numba-0.55.1/000775 000000 000000 00000000000 14174536160 012673 5ustar00rootroot000000 000000 numba-0.55.1/.coveragerc000664 000000 000000 00000000534 14174536160 015016 0ustar00rootroot000000 000000 # configuration file used by run_coverage.py [run] branch = True source = numba concurrency = multiprocessing parallel = True [report] omit = */__main__.py # Vendored packages numba/appdirs.py numba/six.py numba/testing/ddt.py numba/_version.py exclude_lines = pragma: no cover if __name__ == .__main__.: [html] numba-0.55.1/.flake8000664 000000 000000 00000021247 14174536160 014054 0ustar00rootroot000000 000000 [flake8] ignore = E20, # Extra space in brackets E231,E241, # Multiple spaces around "," E26, # Comments E731, # Assigning lambda expression E741, # Ambiguous variable names W503, # line break before binary operator W504, # line break after binary operator max-line-length = 80 exclude = __pycache__ .git *.pyc *~ *.o *.so *.cpp *.c *.h __init__.py # Ignore vendored files numba/cloudpickle/* # Grandfather in existing failing files. This list should shrink over time numba/stencils/stencil.py numba/core/transforms.py numba/core/tracing.py numba/core/withcontexts.py numba/_version.py numba/core/inline_closurecall.py numba/core/ir_utils.py numba/core/pylowering.py numba/python_utils.py numba/parfors/parfor.py numba/misc/numba_entry.py numba/stencils/stencilparfor.py numba/core/ir.py numba/core/generators.py numba/misc/appdirs.py numba/core/caching.py numba/core/debuginfo.py numba/core/annotations/pretty_annotate.py numba/misc/dummyarray.py numba/core/dataflow.py numba/core/pythonapi.py numba/core/decorators.py numba/core/typeconv/rules.py numba/core/typeconv/castgraph.py numba/core/rewrites/registry.py numba/core/rewrites/macros.py numba/core/rewrites/static_binop.py numba/core/rewrites/ir_print.py numba/core/types/abstract.py numba/core/types/misc.py numba/core/types/npytypes.py numba/core/types/common.py numba/core/types/iterators.py numba/core/types/scalars.py numba/core/fastmathpass.py numba/cpython/setobj.py numba/core/options.py numba/cpython/printimpl.py numba/cpython/cmathimpl.py numba/cpython/tupleobj.py numba/cpython/mathimpl.py numba/core/registry.py numba/core/imputils.py numba/cpython/builtins.py numba/core/cpu.py numba/misc/quicksort.py numba/core/callconv.py numba/cpython/randomimpl.py numba/np/npyimpl.py numba/cpython/slicing.py numba/cpython/numbers.py numba/cpython/listobj.py numba/core/removerefctpass.py numba/core/boxing.py numba/misc/cffiimpl.py numba/np/linalg.py numba/cpython/rangeobj.py numba/np/npyfuncs.py numba/cpython/iterators.py numba/core/codegen.py numba/np/polynomial.py numba/misc/mergesort.py numba/core/base.py numba/np/npdatetime.py numba/pycc/cc.py numba/pycc/compiler.py numba/pycc/llvm_types.py numba/pycc/platform.py numba/pycc/decorators.py numba/core/runtime/nrtdynmod.py numba/core/runtime/context.py numba/tests/test_support.py numba/tests/test_llvm_version_check.py numba/tests/test_builtins.py numba/tests/test_jitmethod.py numba/tests/test_inlining.py numba/tests/test_array_manipulation.py numba/tests/test_dummyarray.py numba/tests/test_smart_array.py numba/tests/test_linalg.py numba/tests/test_threadsafety.py numba/tests/test_utils.py numba/tests/cfunc_cache_usecases.py numba/tests/enum_usecases.py numba/tests/test_func_lifetime.py numba/tests/test_typeinfer.py numba/tests/test_return_values.py numba/tests/test_npdatetime.py numba/tests/test_fancy_indexing.py numba/tests/support.py numba/tests/test_print.py numba/tests/test_debug.py numba/tests/test_interproc.py numba/tests/test_typeconv.py numba/tests/test_tracing.py numba/tests/usecases.py numba/tests/test_vectorization_type_inference.py numba/tests/matmul_usecase.py numba/tests/complex_usecases.py numba/tests/test_array_exprs.py numba/tests/test_polynomial.py numba/tests/test_wrapper.py numba/tests/test_obj_lifetime.py numba/tests/test_intwidth.py numba/tests/test_remove_dead.py numba/tests/serialize_usecases.py numba/tests/test_del.py numba/tests/test_gil.py numba/tests/cffi_usecases.py numba/tests/test_slices.py numba/tests/test_mandelbrot.py numba/tests/compile_with_pycc.py numba/tests/test_deprecations.py numba/tests/test_looplifting.py numba/tests/test_storeslice.py numba/tests/recursion_usecases.py numba/tests/dummy_module.py numba/tests/test_operators.py numba/tests/test_comprehension.py numba/tests/ctypes_usecases.py numba/tests/test_locals.py numba/tests/test_dicts.py numba/tests/test_optional.py numba/tests/test_mathlib.py numba/tests/test_numberctor.py numba/tests/test_globals.py numba/tests/test_typingerror.py numba/tests/test_object_mode.py numba/tests/test_copy_propagate.py numba/tests/test_ctypes.py numba/tests/test_typeof.py numba/tests/test_usecases.py numba/tests/test_auto_constants.py numba/tests/test_cffi.py numba/tests/test_sort.py numba/tests/test_cfunc.py numba/tests/test_conversion.py numba/tests/test_indexing.py numba/tests/test_pycc.py numba/tests/annotation_usecases.py numba/tests/test_extended_arg.py numba/tests/test_alignment.py numba/tests/test_multi3.py numba/tests/test_overlap.py numba/tests/test_array_attr.py numba/tests/test_array_methods.py numba/tests/test_enums.py numba/tests/test_profiler.py numba/tests/test_numpyadapt.py numba/tests/test_stencils.py numba/tests/cache_usecases.py numba/tests/true_div_usecase.py numba/tests/test_dataflow.py numba/tests/test_tuples.py numba/tests/test_svml.py numba/tests/test_array_iterators.py numba/tests/test_buffer_protocol.py numba/tests/test_casting.py numba/tests/test_lists.py numba/tests/test_array_analysis.py numba/tests/test_serialize.py numba/tests/test_iteration.py numba/tests/test_recarray_usecases.py numba/tests/test_target_overloadselector.py numba/tests/test_compile_cache.py numba/tests/test_array_reductions.py numba/tests/test_dyn_func.py numba/tests/test_unpack_sequence.py numba/tests/test_cgutils.py numba/tests/test_complex.py numba/tests/test_hashing.py numba/tests/test_sys_stdin_assignment.py numba/tests/test_ufuncs.py numba/tests/pdlike_usecase.py numba/tests/test_range.py numba/tests/test_nrt_refct.py numba/misc/timsort.py numba/tests/test_nested_calls.py numba/tests/test_chained_assign.py numba/tests/test_withlifting.py numba/tests/test_parfors.py numba/tests/test_sets.py numba/tests/test_dyn_array.py numba/tests/test_objects.py numba/tests/test_random.py numba/tests/test_nan.py numba/tests/pycc_distutils_usecase/source_module.py numba/tests/npyufunc/test_ufuncbuilding.py numba/tests/npyufunc/test_errors.py numba/tests/npyufunc/test_vectorize_decor.py numba/tests/npyufunc/test_parallel_ufunc_issues.py numba/tests/npyufunc/test_parallel_env_variable.py numba/tests/npyufunc/test_gufunc.py numba/core/typing/cmathdecl.py numba/core/typing/bufproto.py numba/core/typing/mathdecl.py numba/core/typing/listdecl.py numba/core/typing/builtins.py numba/core/typing/randomdecl.py numba/core/typing/setdecl.py numba/core/typing/npydecl.py numba/core/typing/arraydecl.py numba/core/typing/collections.py numba/core/typing/ctypes_utils.py numba/core/typing/enumdecl.py numba/core/typing/cffi_utils.py numba/core/typing/npdatetime.py numba/core/annotations/type_annotations.py numba/testing/ddt.py numba/testing/loader.py numba/testing/notebook.py numba/testing/main.py numba/np/unsafe/ndarray.py numba/np/ufunc/deviceufunc.py numba/np/ufunc/sigparse.py numba/parfors/parfor_lowering.py numba/np/ufunc/array_exprs.py numba/np/ufunc/decorators.py numba/core/datamodel/models.py numba/core/datamodel/packer.py numba/core/datamodel/testing.py numba/core/datamodel/manager.py per-file-ignores = # Ignore star imports, unused imports, and "may be defined by star imports" # errors in device_init because its purpose is to bring together a lot of # the public API to be star-imported in numba.cuda.__init__ numba/cuda/device_init.py:F401,F403,F405 # libdevice.py is an autogenerated file containing stubs for all the device # functions. Some of the lines in docstrings are a little over-long, as they # contain the URLs of the reference pages in the online libdevice # documentation. numba/cuda/libdevice.py:E501 # Ignore too-long lines in the CUDA doc examples, prioritising readability # in the docs over line length in the example source (especially given that # the test code is already indented by 8 spaces) numba/cuda/tests/doc_examples/test_random.py:E501 numba/cuda/tests/doc_examples/test_cg.py:E501 numba/cuda/tests/doc_examples/test_matmul.py:E501 numba-0.55.1/.github/000775 000000 000000 00000000000 14174536160 014233 5ustar00rootroot000000 000000 numba-0.55.1/.github/CODEOWNERS000664 000000 000000 00000007543 14174536160 015637 0ustar00rootroot000000 000000 # Numba's codeowners file is dual purpose, it: # # 1. Provides information to github about who should be requested to review a PR # 2. Provides contributors/czars general information about who to contact # first about various parts of the code base. A lot of concepts in Numba are # necessarily spread throughout the code base, consequently some of the # "code ownership"/first contact is concept based opposed to file/directory # based. # # ------------------------------------------------------------------------------ # Information for github # ------------------------------------------------------------------------------ # These people are the default "owners" for everything in the repo unless a # later match is made, they will automatically be requested to review PRs. * @sklam @stuartarchibald @esc # Owners of specific parts of the code, will be requested to review if a PR # touches code in the matched pattern /numba/cuda/ @gmarkall /numba/parfors/ @DrTodd13 /numba/stencils/ @DrTodd13 # ------------------------------------------------------------------------------ # Information for contributors # ------------------------------------------------------------------------------ # This section provides a rough list of who to contact first for help with # various parts/concepts in the code base, first contact does not imply # ownership! # # Parts of the code base: # # * Parfors/Parallel Accelerator (@DrTodd13) # - Array Analysis (@DrTodd13) # - Parfors transforms (@DrTodd13) # * Stencils (@DrTodd13) # * Experimental: # - Jitclasses (@sklam) # - StructRef (@sklam) # * Typed containers: # - Typed.List (@esc) # - Typed.Dict (@sklam) # * Documentation (Needs first contact/owner) # * NumPy (Needs first contact/owner) # - ufuncs (Needs first contact/owner) # - linalg (@stuartarchibald) # - Implementation of specific functions (Needs first contact/owner) # - Parallel backends/threading layers (@stuartarchibald) # * CPython implementation (Needs first contact/owner) # * Extension API (Needs first contact/owner) # * AOT (Needs first contact/owner) # * Compiler: # - Type inference (@sklam) # - Bytecode analysis/CFA/DFA (@sklam) # - Compiler Pipeline infrastructure (@stuartarchibald) # - Compiler passes: # - Rewrites (Needs first contact/owner) # - Branch pruning (@stuartarchibald) # - Literal unroll (@stuartarchibald) # - Rewrite Semantic Constants (@stuartarchibald) # - MakeFunction To Jit function (@stuartarchibald) # - Overload and function inlining (@stuartarchibald) # - With Lifting (@sklam) # - Exception handling (@sklam) # - Literally (@sklam) # - SSA (@sklam) # - lowering.py, codegen.py (@sklam) # - Datamodels/call conventions (@sklam) # - Inlining in general (@stuartarchibald) # # Additional Concepts: # # * Reference counting and NRT (@sklam) # * Testing (Needs first contact/owner) # * CI: # - public CI (azure) (Needs first contact/owner) # - Numba build farm (@esc) # * Integration testing (https://github.com/numba/numba-integration-testing) # (@esc) # * ASV profiling (@esc) # * Type Annotations (@luk-f-a and @EPronovost) # * Ufunc/GUfunc (Needs first contact/owner) # * Profiling (Needs first contact/owner (and code!)) # * Debugging: # - DWARF (@sklam) # - gdb support (@stuartarchibald) # * Hardware targets: # - The CUDA target (@gmarkall) # - The ROCm target (@stuartarchibald) # - ARM* (@stuartarchibald) # - POWER (Needs first contact/owner) # - X86* (Needs first contact/owner) # * OS: # - Linux (@stuartarchibald) # - OSX # - Windows # - BSD (@stuartarchibald) # # Anything not covered by someone else... ping @sklam and @stuartarchibald numba-0.55.1/.github/ISSUE_TEMPLATE/000775 000000 000000 00000000000 14174536160 016416 5ustar00rootroot000000 000000 numba-0.55.1/.github/ISSUE_TEMPLATE/Bug_report.md000664 000000 000000 00000001542 14174536160 021052 0ustar00rootroot000000 000000 --- name: Bug Report about: Report a bug. Not for asking general questions - see below. --- ## Reporting a bug - [ ] I have tried using the latest released version of Numba (most recent is visible in the change log (https://github.com/numba/numba/blob/master/CHANGE_LOG). - [ ] I have included a self contained code sample to reproduce the problem. i.e. it's possible to run as 'python bug.py'. numba-0.55.1/.github/ISSUE_TEMPLATE/Feature_request.md000664 000000 000000 00000001074 14174536160 022105 0ustar00rootroot000000 000000 --- name: Feature Request about: Tell us about something in the Python language/NumPy you'd like Numba to support. Not for asking general questions - see below. --- --- ## Feature request numba-0.55.1/.github/ISSUE_TEMPLATE/config.yml000664 000000 000000 00000001356 14174536160 020413 0ustar00rootroot000000 000000 blank_issues_enabled: false contact_links: - name: General Question url: https://numba.discourse.group/c/numba/community-support/ about: "If you have a general question (not a bug report or feature request) then please ask on Numba's discourse instance." - name: Quick Question/Just want to say Hi! url: https://gitter.im/numba/numba about: "If you have a quick question or want chat to users/developers in real time then please use gitter.im/numba/numba" - name: Discuss an involved feature url: https://numba.discourse.group/c/numba/development/ about: "If you would like to suggest a more involved feature like *Can a new compiler pass be added to do X* then please start a discussion on Numba's discourse instance." numba-0.55.1/.github/ISSUE_TEMPLATE/first_rc_checklist.md000664 000000 000000 00000002462 14174536160 022610 0ustar00rootroot000000 000000 --- name: First Release Candidate Checklist (maintainer only) about: Checklist template for the first release of every series title: Numba X.Y.Zrc1 Checklist (FIXME) labels: task --- ## Numba X.Y.Z * [ ] Merge to master. - [ ] "remaining Pull-Requests from milestone". * [ ] Merge change log changes. - [ ] "PR with changelog entries". * [ ] Create X.Y release branch. * [ ] Pin llvmlite to `>=0.A.0rc1,<0.A+1.0`. * [ ] Annotated tag X.Y.Zrc1 on release branch. * [ ] Build and upload conda packages on buildfarm (check "upload"). * [ ] Build wheels (`$PYTHON_VERSIONS`) on the buildfarm. * [ ] Upload wheels and sdist to PyPI (upload from `ci_artifacts`). * [ ] Verify packages uploaded to Anaconda Cloud and move to `numba/label/main`. * [ ] Verify wheels for all platforms arrived on PyPi. * [ ] Initialize and verify ReadTheDocs build. * [ ] Clean up `ci_artifacts`. * [ ] Send RC announcement email / post announcement to discourse group. * [ ] Post link to Twitter. ### Post Release: * [ ] Tag X.Y+1.0dev0 to start new development cycle on `master`. * [ ] Update llvmlite dependency spec to match next version via PR to `master`. * [ ] Update release checklist template with any additional bullet points that may have arisen during the release. * [ ] Close milestone (and then close this release issue). numba-0.55.1/.github/ISSUE_TEMPLATE/sub_rc_checklist.md000664 000000 000000 00000002574 14174536160 022256 0ustar00rootroot000000 000000 --- name: Subsequent Release Candidate Checklist (maintainer only) about: Checklist template for all subsequent releases (RC 2-N, FINAL and PATCH) of every series title: Numba X.Y.Zrc1 Checklist (FIXME) labels: task --- ## numba X.Y.Z * [ ] Cherry-pick items from the X.Y.Z milestone into a PR. * [ ] Merge change log modifications and cherry-picks to X.Y release branch. * [ ] https://github.com/numba/numba/pull/XXXX * [ ] Review, merge and check execution of release notebook. (FINAL ONLY) * [ ] Annotated tag X.Y.Z on release branch (no `v` prefix). * [ ] Build and upload conda packages on buildfarm (check "upload"). * [ ] Build wheels (`$PYTHON_VERSIONS`) on the buildfarm. * [ ] Upload wheels and sdist to PyPI (upload from `ci_artifacts`). * [ ] Verify packages uploaded to Anaconda Cloud and move to `numba/label/main`. * [ ] Verify wheels for all platforms arrived on PyPi. * [ ] Verify ReadTheDocs build. * [ ] Clean up `ci_artifacts`. * [ ] Send RC/FINAL announcement email / post announcement to discourse group. * [ ] Post link to Twitter. * [ ] Post link to python-announce-list@python.org. ### Post release * [ ] Update release checklist template. * [ ] Ping Anaconda Distro team to trigger a build for `defaults` (FINAL ONLY). * [ ] Create a release on Github at https://github.com/numba/numba/releases (FINAL ONLY). * [ ] Close milestone (and then close this release issue). numba-0.55.1/.github/PULL_REQUEST_TEMPLATE.md000664 000000 000000 00000003463 14174536160 020042 0ustar00rootroot000000 000000 ## Reference an existing issue numba-0.55.1/.github/workflows/000775 000000 000000 00000000000 14174536160 016270 5ustar00rootroot000000 000000 numba-0.55.1/.github/workflows/stale.yml000664 000000 000000 00000001235 14174536160 020124 0ustar00rootroot000000 000000 name: 'Mark stale issues' on: schedule: - cron: '30 1 * * *' jobs: stale: runs-on: ubuntu-latest steps: - uses: actions/stale@v3 with: stale-issue-message: > This issue is marked as stale as it has had no activity in the past 30 days. Please close this issue if no further response or action is needed. Otherwise, please respond with any updates and confirm that this issue still needs to be addressed. stale-issue-label: 'stale' any-of-labels: 'question,needtriage,more info needed' days-before-issue-stale: 30 days-before-issue-close: -1 numba-0.55.1/.pre-commit-config.yaml000664 000000 000000 00000000124 14174536160 017151 0ustar00rootroot000000 000000 repos: - repo: https://gitlab.com/pycqa/flake8 rev: 3.7.8 hooks: - id: flake8 numba-0.55.1/.readthedocs.yml000664 000000 000000 00000001050 14174536160 015755 0ustar00rootroot000000 000000 # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/source/conf.py # Optionally build your docs in additional formats such as PDF formats: - pdf # use conda for the environment conda: environment: docs/environment.yml # Optionally set the version of Python and requirements required to build your docs python: version: 3.7 install: - method: setuptools path: . numba-0.55.1/CHANGE_LOG000664 000000 000000 00000774220 14174536160 014260 0ustar00rootroot000000 000000 Version 0.55.1 (27 January, 2022) --------------------------------- This is a bugfix release that closes all the remaining issues from the accelerated release of 0.55.0 and also any release critical regressions discovered since then. CUDA target deprecation notices: * Support for CUDA toolkits < 10.2 is deprecated and will be removed in Numba 0.56. * Support for devices with Compute Capability < 5.3 is deprecated and will be removed in Numba 0.56. Pull-Requests: * PR `#7755 `_: CUDA: Deprecate support for CC < 5.3 and CTK < 10.2 (`Graham Markall `_) * PR `#7749 `_: Refactor threading layer priority tests to not use stdout/stderr (`stuartarchibald `_) * PR `#7744 `_: Fix issues with locating/parsing source during DebugInfo emission. (`stuartarchibald `_) * PR `#7712 `_: Fixing issue 7693 (`Graham Markall `_ `luk-f-a `_ `stuartarchibald `_) * PR `#7729 `_: Handle Omitted/OmittedArgDataModel in DI generation. (`stuartarchibald `_) * PR `#7788 `_: Avoid issue with DI gen for arrayexprs. (`stuartarchibald `_) * PR `#7752 `_: Fix #7751: Use original filename for array exprs (`Graham Markall `_) * PR `#7748 `_: Fix #7713: Ensure _prng_random_hash return has correct bitwidth (`Graham Markall `_) * PR `#7745 `_: Fix the release year for Numba 0.55 change log entry. (`stuartarchibald `_) * PR `#7740 `_: CUDA Python 11.6 support (`Graham Markall `_) * PR `#7724 `_: Update URLs in error messages to refer to RTD docs. (`stuartarchibald `_) * PR `#7709 `_: CUDA: Fixes missing type annotation pass following #7704 (`stuartarchibald `_) * PR `#7704 `_: Move the type annotation pass to post legalization. (`stuartarchibald `_) * PR `#7619 `_: CUDA: Fix linking with PTX when compiling lazily (`Graham Markall `_) Authors: * `Graham Markall `_ * `luk-f-a `_ * `stuartarchibald `_ Version 0.55.0 (13 January, 2022) --------------------------------- This release includes a significant number important dependency upgrades along with a number of new features and bug fixes. NOTE: Due to NumPy CVE-2021-33430 this release has bypassed the usual release process so as to promptly provide a Numba release that supports NumPy 1.21. A single release candidate (RC1) was made and a few issues were reported, these are summarised as follows and will be fixed in a subsequent 0.55.1 release. Known issues with this release: * Incorrect result copying array-typed field of structured array (`#7693 `_) * Two issues in DebugInfo generation (`#7726 `_, `#7730 `_) * Compilation failure for ``hash`` of floating point values on 32 bit Windows when using Python 3.10 (`#7713 `_). Highlights of core dependency upgrades: * Support for Python 3.10 * Support for NumPy 1.21 Python language support enhancements: * Experimental support for ``isinstance``. NumPy features/enhancements: The following functions are now supported: * ``np.broadcast_to`` * ``np.float_power`` * ``np.cbrt`` * ``np.logspace`` * ``np.take_along_axis`` * ``np.average`` * ``np.argmin`` gains support for the ``axis`` kwarg. * ``np.ndarray.astype`` gains support for types expressed as literal strings. Highlights of core changes: * For users of the Numba extension API, Numba now has a new error handling mode whereby it will treat all exceptions that do not inherit from ``numba.errors.NumbaException`` as a "hard error" and immediately unwind the stack. This makes it much easier to debug when writing ``@overload``\s etc from the extension API as there's now no confusion between Python errors and Numba errors. This feature can be enabled by setting the environment variable: ``NUMBA_CAPTURED_ERRORS='new_style'``. * The threading layer selection priority can now be changed via the environment variable ``NUMBA_THREADING_LAYER_PRIORITY``. Highlights of changes for the CUDA target: * Support for NVIDIA's CUDA Python bindings. * Support for 16-bit floating point numbers and their basic operations via intrinsics. * Streams are provided in the ``Stream.async_done`` result, making it easier to implement asynchronous work queues. * Support for structured types in device arrays, character sequences in NumPy arrays, and some array operations on nested arrays. * Much underlying refactoring to align the CUDA target more closely with the CPU target, which lays the groudwork for supporting the high level extension API in CUDA in future releases. Intel also kindly sponsored research and development into native debug (DWARF) support and handling per-function compilation flags: * Line number/location tracking is much improved. * Numba's internal representation of containers (e.g. tuples, arrays) are now encoded as structures. * Numba's per-function compilation flags are encoded into the ABI field of the mangled name of the function such that it's possible to compile and differentiate between versions of the same function with different flags set. General deprecation notices: * There are no new general deprecations. CUDA target deprecation notices: * There are no new CUDA target deprecations. Version support/dependency changes: * Python 3.10 is supported. * NumPy version 1.21 is supported. * The minimum supported NumPy version is raised to 1.18 for runtime (compilation however remains compatible with NumPy 1.11). Pull-Requests: * PR `#6075 `_: add np.float_power and np.cbrt (`Guilherme Leobas `_) * PR `#7047 `_: Support __hash__ for numpy.datetime64 (`Guilherme Leobas `_ `stuartarchibald `_) * PR `#7057 `_: Fix #7041: Add charseq registry to CUDA target (`Graham Markall `_ `stuartarchibald `_) * PR `#7082 `_: Added Add/Sub between datetime64 array and timedelta64 scalar (`Nick Riasanovsky `_ `stuartarchibald `_) * PR `#7119 `_: Add support for `np.broadcast_to` (`Guilherme Leobas `_) * PR `#7129 `_: Add support for axis keyword argument to np.argmin() (`Itamar Turner-Trauring `_) * PR `#7132 `_: gh #7131 Support for astype with literal strings (`Nick Riasanovsky `_) * PR `#7177 `_: Add debug infomation support based on datamodel. (`stuartarchibald `_) * PR `#7185 `_: Add get_impl_key as abstract method to types.Callable (`Alexey Kozlov `_) * PR `#7186 `_: Add support for np.logspace. (`Guoqiang QI `_) * PR `#7189 `_: CUDA: Skip IPC tests on ARM (`Graham Markall `_) * PR `#7190 `_: CUDA: Fix test_pinned on Jetson (`Graham Markall `_) * PR `#7192 `_: Fix missing import in array.argsort impl and add more tests. (`stuartarchibald `_) * PR `#7196 `_: Fixes for lineinfo emission (`stuartarchibald `_) * PR `#7197 `_: don't post to python announce on the first RC (`esc `_) * PR `#7202 `_: Initial implementation of np.take_along_axis (`Itamar Turner-Trauring `_) * PR `#7203 `_: remove duplicate changelog entries (`esc `_) * PR `#7216 `_: Update CHANGE_LOG for 0.54.0rc2 (`stuartarchibald `_) * PR `#7219 `_: bump llvmlite dependency to 0.38.0dev0 for Numba 0.55.0dev0 (`esc `_) * PR `#7220 `_: update release checklist post 0.54rc1+2 (`esc `_) * PR `#7221 `_: Show GPU UUIDs in cuda.detect() output (`Graham Markall `_) * PR `#7222 `_: CUDA: Warn when debug=True and opt=True (`Graham Markall `_) * PR `#7223 `_: Replace assertion errors on IR assumption violation (`Siu Kwan Lam `_) * PR `#7226 `_: Add support for structured types in Device Arrays (`Michael Collison `_) * PR `#7227 `_: FIX: Typo (`Srinath Kailasa `_) * PR `#7230 `_: PR #7171 bugfix only (`stuartarchibald `_ `Todd A. Anderson `_) * PR `#7234 `_: add THREADING_LAYER_PRIORITY & NUMBA_THREADING_LAYER_PRIORITY (`Kolen Cheung `_) * PR `#7235 `_: replace wordings of WIP by draft PR (`Kolen Cheung `_) * PR `#7236 `_: CUDA: Skip managed alloc tests on ARM (`Graham Markall `_) * PR `#7237 `_: fix a typo in a string (`Kolen Cheung `_) * PR `#7241 `_: Set aliasing information for inplace_binops.. (`Todd A. Anderson `_) * PR `#7242 `_: FIX: typo (`Srinath Kailasa `_) * PR `#7244 `_: Implement partial literal propagation pass (support 'isinstance') (`Guilherme Leobas `_ `stuartarchibald `_) * PR `#7247 `_: Solve memory leak to fix issue #7210 (`Siu Kwan Lam `_ `Graham Markall `_ `ysheffer `_) * PR `#7251 `_: Fix #6001: typed.List ignores ctor arguments with JIT disabled (`Graham Markall `_) * PR `#7256 `_: Fix link to the discourse forum in README (`Kenichi Maehashi `_) * PR `#7257 `_: Use normal list constructor in List.__new__() (`Graham Markall `_) * PR `#7260 `_: Support typed lists in `heapq` (`Graham Markall `_) * PR `#7263 `_: Updated issue URL for error messages #7261 (`DeviousLab `_) * PR `#7265 `_: Fix linspace to use np.divide and clamp to stop. (`stuartarchibald `_) * PR `#7266 `_: CUDA: Skip multi-GPU copy test with peer access disabled (`Graham Markall `_) * PR `#7267 `_: Fix #7258. Bug in SROA optimization (`Siu Kwan Lam `_) * PR `#7271 `_: Update 3rd party license text. (`stuartarchibald `_) * PR `#7272 `_: Allow annotations in njit-ed functions (`LunarLanding `_) * PR `#7273 `_: Update CHANGE_LOG for 0.54.0rc3. (`stuartarchibald `_) * PR `#7283 `_: Added NPM to Glossary and linked to mentions (`Nihal Shetty `_) * PR `#7285 `_: CUDA: Fix OOB in test_kernel_arg (`Graham Markall `_) * PR `#7288 `_: Handle cval as a np attr in stencil generation. (`stuartarchibald `_) * PR `#7294 `_: Continuation of PR #7280, fixing lifetime of TBB task_scheduler_handle (`Sergey Pokhodenko `_ `stuartarchibald `_) * PR `#7296 `_: Fix generator lowering not casting to the actual yielded type (`Siu Kwan Lam `_) * PR `#7298 `_: Use CBC to pin GCC to 7 on most linux and 9 on aarch64. (`stuartarchibald `_) * PR `#7304 `_: Continue PR#3655: add support for np.average (`Hadia Ahmed `_ `slnguyen `_) * PR `#7307 `_: Prevent mutation of arrays in global tuples. (`stuartarchibald `_) * PR `#7309 `_: Update MapConstraint to handle type coercion for typed.Dict correctly. (`stuartarchibald `_) * PR `#7312 `_: Fix #7302. Workaround missing pthread problem on ppc64le (`Siu Kwan Lam `_) * PR `#7315 `_: Link ELF obj as DSO for radare2 disassembly CFG (`stuartarchibald `_) * PR `#7316 `_: Use float64 for consistent typing in heapq tests. (`stuartarchibald `_) * PR `#7317 `_: In TBB tsh test switch os.fork for mp fork ctx (`stuartarchibald `_) * PR `#7319 `_: Update CHANGE_LOG for 0.54.0 final. (`stuartarchibald `_) * PR `#7329 `_: Improve documentation in reference to CUDA local memory (`Sterling Baird `_) * PR `#7330 `_: Cuda matmul docs (`Sterling Baird `_) * PR `#7340 `_: Add size_t and ssize_t types (`Bruce Merry `_) * PR `#7345 `_: Add check for ipykernel file in IPython cache locator (`Sahil Gupta `_) * PR `#7347 `_: fix:updated url for error report and feature rquest using issue template (`DEBARGHA SAHA `_) * PR `#7349 `_: Allow arbitrary walk-back in reduction nodes to find inplace_binop. (`Todd A. Anderson `_) * PR `#7359 `_: Extend support for nested arrays inside numpy records (`Graham Markall `_ `luk-f-a `_) * PR `#7375 `_: CUDA: Run doctests as part of numba.cuda.tests and fix test_cg (`Graham Markall `_) * PR `#7395 `_: Fix #7394 and #6550 & Added test & improved error message (`MegaIng `_) * PR `#7397 `_: Add option to catch only Numba `numba.core.errors` derived exceptions. (`stuartarchibald `_) * PR `#7398 `_: Add support for arrayanalysis of tuple args. (`Todd A. Anderson `_) * PR `#7403 `_: Fix for issue 7402: implement missing numpy ufunc interface (`Guilherme Leobas `_) * PR `#7404 `_: fix typo in literal_unroll docs (`esc `_) * PR `#7419 `_: insert missing backtick in comment (`esc `_) * PR `#7422 `_: Update Omitted Type to use Hashable Values as Keys for Caching (`Nick Riasanovsky `_) * PR `#7429 `_: Update CHANGE_LOG for 0.54.1 (`stuartarchibald `_) * PR `#7432 `_: add github release task to checklist (`esc `_) * PR `#7440 `_: Refactor TargetConfig naming. (`stuartarchibald `_) * PR `#7441 `_: Permit any string as a key in literalstrkeydict type. (`stuartarchibald `_) * PR `#7442 `_: Add some diagnostics to SVML test failures. (`stuartarchibald `_) * PR `#7443 `_: Refactor template selection logic for targets. (`stuartarchibald `_) * PR `#7444 `_: use correct variable name in closure (`esc `_) * PR `#7447 `_: cleanup Numba metadata (`esc `_) * PR `#7453 `_: CUDA: Provide stream in async_done result (`Graham Markall `_) * PR `#7456 `_: Fix invalid codegen for #7451. (`stuartarchibald `_) * PR `#7457 `_: Factor out target registry selection logic (`stuartarchibald `_) * PR `#7459 `_: Include compiler flags in symbol mangling (`Siu Kwan Lam `_) * PR `#7460 `_: Add FP16 support for CUDA (`Michael Collison `_ `Graham Markall `_) * PR `#7461 `_: Support NVIDIA's CUDA Python bindings (`Graham Markall `_) * PR `#7465 `_: Update changelog for 0.54.1 release (`Siu Kwan Lam `_) * PR `#7477 `_: Fix unicode operator.eq handling of Optional types. (`stuartarchibald `_) * PR `#7479 `_: CUDA: Print format string and warn for > 32 print() args (`Graham Markall `_) * PR `#7483 `_: NumPy 1.21 support (`Sebastian Berg `_ `stuartarchibald `_) * PR `#7484 `_: Fixed outgoing link to nvidia documentation. (`Dhruv Patel `_) * PR `#7493 `_: Consolidate TLS stacks in target configuration (`Siu Kwan Lam `_) * PR `#7496 `_: CUDA: Use a single dispatcher class for all kinds of functions (`Graham Markall `_) * PR `#7498 `_: refactor with-detection logic (`stuartarchibald `_ `esc `_) * PR `#7499 `_: Add build scripts for CUDA testing on gpuCI (`Charles Blackmon-Luca `_ `Graham Markall `_) * PR `#7500 `_: Update parallel.rst (`Julius Bier Kirkegaard `_) * PR `#7506 `_: Enhance Flags mangling/demangling (`Siu Kwan Lam `_) * PR `#7514 `_: Fixup cuda debuginfo emission for 7177 (`Siu Kwan Lam `_) * PR `#7525 `_: Make sure` demangle()` returns `str` type. (`Siu Kwan Lam `_) * PR `#7538 `_: Fix `@overload_glue` performance regression. (`stuartarchibald `_) * PR `#7539 `_: Fix str decode issue from merge #7525/#7506 (`stuartarchibald `_) * PR `#7546 `_: Fix handling of missing const key in LiteralStrKeyDict (`Siu Kwan Lam `_ `stuartarchibald `_) * PR `#7547 `_: Remove 32bit linux scipy installation. (`stuartarchibald `_) * PR `#7548 `_: Correct evaluation order in assert statement (`Graham Markall `_) * PR `#7552 `_: Prepend the inlined function name to inlined variables. (`stuartarchibald `_) * PR `#7557 `_: Python3.10 v2 (`stuartarchibald `_ `esc `_) * PR `#7560 `_: Refactor with detection py310 (`Siu Kwan Lam `_ `esc `_) * PR `#7561 `_: fix a typo (`Kolen Cheung `_) * PR `#7567 `_: Update docs to note meetings are public. (`stuartarchibald `_) * PR `#7570 `_: Update the docs and error message for errors when importing Numba. (`stuartarchibald `_) * PR `#7580 `_: Fix #7507. catch `NotImplementedError` in `.get_function()` (`Siu Kwan Lam `_) * PR `#7581 `_: Add support for casting from int enums (`Michael Collison `_) * PR `#7583 `_: Make numba.types.Optional __str__ less verbose. (`stuartarchibald `_) * PR `#7588 `_: Fix casting of start/stop in linspace (`stuartarchibald `_) * PR `#7591 `_: Remove deprecations (`Graham Markall `_) * PR `#7596 `_: Fix max symbol match length for r2 (`stuartarchibald `_) * PR `#7597 `_: Update gdb docs for new DWARF enhancements. (`stuartarchibald `_) * PR `#7603 `_: Fix list.insert() for refcounted values (`Ehsan Totoni `_) * PR `#7605 `_: Fix TBB 2021 DSO names on OSX/Win and make TBB reporting consistent (`stuartarchibald `_) * PR `#7606 `_: Ensure a prescribed threading layer can load in CI. (`stuartarchibald `_) * PR `#7610 `_: Fix #7609. Type should not be mutated. (`Siu Kwan Lam `_) * PR `#7618 `_: Fix the doc build: docutils 0.18 not compatible with pinned sphinx (`stuartarchibald `_) * PR `#7626 `_: Fix issues with package dependencies. (`stuartarchibald `_ `esc `_) * PR `#7627 `_: PR 7321 continued (`stuartarchibald `_ `Eric Wieser `_) * PR `#7628 `_: Move to using windows-2019 images in Azure (`stuartarchibald `_) * PR `#7632 `_: Capture output in CUDA matmul doctest (`Graham Markall `_) * PR `#7636 `_: Copy prange loop header to after the parfor. (`Todd A. Anderson `_) * PR `#7637 `_: Increase the timeout on the SVML tests for loaded machines. (`stuartarchibald `_) * PR `#7645 `_: In debuginfo, do not add noinline to functions marked alwaysinline (`stuartarchibald `_) * PR `#7650 `_: Move Azure builds to OSX 10.15 (`stuartarchibald `_ `esc `_ `Siu Kwan Lam `_) Authors: * `Bruce Merry `_ * `Charles Blackmon-Luca `_ * `DeviousLab `_ * `Dhruv Patel `_ * `Todd A. Anderson `_ * `Ehsan Totoni `_ * `Eric Wieser `_ * `esc `_ * `Graham Markall `_ * `Guilherme Leobas `_ * `Guoqiang QI `_ * `Hadia Ahmed `_ * `Kolen Cheung `_ * `Itamar Turner-Trauring `_ * `Julius Bier Kirkegaard `_ * `Kenichi Maehashi `_ * `Alexey Kozlov `_ * `luk-f-a `_ * `LunarLanding `_ * `MegaIng `_ * `Nihal Shetty `_ * `Nick Riasanovsky `_ * `Sergey Pokhodenko `_ * `Sahil Gupta `_ * `Sebastian Berg `_ * `Sterling Baird `_ * `Srinath Kailasa `_ * `Siu Kwan Lam `_ * `slnguyen `_ * `DEBARGHA SAHA `_ * `stuartarchibald `_ * `Michael Collison `_ * `ysheffer `_ Version 0.54.1 (7 October, 2021) -------------------------------- This is a bugfix release for 0.54.0. It fixes a regression in structured array type handling, a potential leak on initialization failure in the CUDA target, a regression caused by Numba's vendored cloudpickle module resetting dynamic classes and a few minor testing/infrastructure related problems. * PR `#7348 `_: test_inspect_cli: Decode exception with default (utf-8) codec (`Graham Markall `_) * PR `#7360 `_: CUDA: Fix potential leaks when initialization fails (`Graham Markall `_) * PR `#7386 `_: Ensure the NRT is initialized prior to use in external NRT tests. (`stuartarchibald `_) * PR `#7388 `_: Patch cloudpickle to not reset dynamic class each time it is unpickled (`Siu Kwan Lam `_) * PR `#7393 `_: skip azure pipeline test if file not present (`esc `_) * PR `#7428 `_: Fix regression #7355: cannot set items in structured array data types (`Siu Kwan Lam `_) Authors: * `esc `_ * `Graham Markall `_ * `Siu Kwan Lam `_ * `stuartarchibald `_ Version 0.54.0 (19 August, 2021) -------------------------------- This release includes a significant number of new features, important refactoring, critical bug fixes and a number of dependency upgrades. Python language support enhancements: * Basic support for ``f-strings``. * ``dict`` comprehensions are now supported. * The ``sum`` built-in function is implemented. NumPy features/enhancements: The following functions are now supported: * ``np.clip`` * ``np.iscomplex`` * ``np.iscomplexobj`` * ``np.isneginf`` * ``np.isposinf`` * ``np.isreal`` * ``np.isrealobj`` * ``np.isscalar`` * ``np.random.dirichlet`` * ``np.rot90`` * ``np.swapaxes`` Also ``np.argmax`` has gained support for the ``axis`` keyword argument and it's now possible to use ``0d`` NumPy arrays as scalars in ``__setitem__`` calls. Internal changes: * Debugging support through DWARF has been fixed and enhanced. * Numba now optimises the way in which locals are emitted to help reduce time spent in LLVM's SROA passes. CUDA target changes: * Support for emitting ``lineinfo`` to be consumed by profiling tools such as Nsight Compute * Improved fastmath code generation for various trig, division, and other functions * Faster compilation using lazy addition of libdevice to compiled units * Support for IPC on Windows * Support for passing tuples to CUDA ufuncs * Performance warnings: * When making implicit copies by calling a kernel on arrays in host memory * When occupancy is poor due to kernel or ufunc/gufunc configuration * Support for implementing warp-aggregated intrinsics: * Using support for more CUDA functions: ``activemask()``, ``lanemask_lt()`` * The ``ffs()`` function now works correctly! * Support for ``@overload`` in the CUDA target Intel kindly sponsored research and development that lead to a number of new features and internal support changes: * Dispatchers can now be retargetted to a new target via a user defined context manager. * Support for custom NumPy array subclasses has been added (including an overloadable memory allocator). * An inheritance based model for targets that permits targets to share ``@overload`` implementations. * Per function compiler flags with inheritance behaviours. * The extension API now has support for overloading class methods via the ``@overload_classmethod`` decorator. Deprecations: * The ``ROCm`` target (for AMD ROC GPUs) has been moved to an "unmaintained" status and a seperate repository stub has been created for it at: https://github.com/numba/numba-rocm CUDA target deprecations and breaking changes: * Relaxed strides checking is now the default when computing the contiguity of device arrays. * The ``inspect_ptx()`` method is deprecated. For use cases that obtain PTX for further compilation outside of Numba, use ``compile_ptx()`` instead. * Eager compilation of device functions (the case when ``device=True`` and a signature is provided) is deprecated. Version support/dependency changes: * LLVM 11 is now supported on all platforms via llvmlite. * The minimum supported Python version is raised to 3.7. * NumPy version 1.20 is supported. * The minimum supported NumPy version is raised to 1.17 for runtime (compilation however remains compatible with NumPy 1.11). * Vendor `cloudpickle `_ `v1.6.0` -- now used for all ``pickle`` operations. * TBB >= 2021 is now supported and all prior versions are unsupported (not easily possible to maintain the ABI breaking changes). Pull-Requests: * PR `#4516 `_: Make setitem accept 0d np-arrays (`Guilherme Leobas `_) * PR `#4610 `_: Implement np.is* functions (`Guilherme Leobas `_) * PR `#5984 `_: Handle idx and size unification in wrap_index manually. (`Todd A. Anderson `_) * PR `#6468 `_: Access ``replace_functions_map`` via PreParforPass instance (`Sergey Pokhodenko `_ `Reazul Hoque `_) * PR `#6469 `_: Add address space in pointer type (`Sergey Pokhodenko `_ `Reazul Hoque `_) * PR `#6608 `_: Support f-strings for common cases (`Ehsan Totoni `_) * PR `#6619 `_: Improved fastmath code generation for trig, log, and exp/pow. (`Graham Markall `_ `Michael Collison `_) * PR `#6681 `_: Explicitly catch ``with..as`` and raise error. (`stuartarchibald `_) * PR `#6689 `_: Fix setup.py build command detection (`Hannes Pahl `_) * PR `#6695 `_: Enable negative indexing for cuda atomic operations (`Ashutosh Varma `_) * PR `#6696 `_: flake8: made more files flake8 compliant (`Ashutosh Varma `_) * PR `#6698 `_: Fix #6697: Wrong dtype when using np.asarray on DeviceNDArray (`Ashutosh Varma `_) * PR `#6700 `_: Add UUID to CUDA devices (`Graham Markall `_) * PR `#6709 `_: Block matplotlib in test examples (`Graham Markall `_) * PR `#6718 `_: doc: fix typo in rewrites.rst (extra iterates) (`Alexander-Makaryev `_) * PR `#6720 `_: Faster compile (`Siu Kwan Lam `_) * PR `#6730 `_: Fix Typeguard error (`Graham Markall `_) * PR `#6731 `_: Add CUDA-specific pipeline (`Graham Markall `_) * PR `#6735 `_: CUDA: Don't parse IR for modules with llvmlite (`Graham Markall `_) * PR `#6736 `_: Support for dict comprehension (`stuartarchibald `_) * PR `#6742 `_: Do not add overload function definitions to index. (`stuartarchibald `_) * PR `#6750 `_: Bump to llvmlite 0.37 series (`Siu Kwan Lam `_) * PR `#6751 `_: Suppress typeguard warnings that affect testing. (`Siu Kwan Lam `_) * PR `#6753 `_: The check for internal types in RewriteArrayExprs (`Alexander-Makaryev `_) * PR `#6755 `_: install llvmlite from numba/label/dev (`esc `_) * PR `#6758 `_: patch to compile _devicearray.cpp with c++11 (`esc `_) * PR `#6760 `_: Fix scheduler bug where it rounds to 0 divisions for a chunk. (`Todd A. Anderson `_) * PR `#6762 `_: Glue wrappers to create @overload from split typing and lowering. (`stuartarchibald `_ `Siu Kwan Lam `_) * PR `#6766 `_: Fix DeviceNDArray null shape issue (`Michael Collison `_) * PR `#6769 `_: CUDA: Replace ``CachedPTX`` and ``CachedCUFunction`` with ``CUDACodeLibrary`` functionality (`Graham Markall `_) * PR `#6776 `_: Fix issue with TBB interface causing warnings and parfors counting them (`stuartarchibald `_) * PR `#6779 `_: Fix wrap_index type unification. (`Todd A. Anderson `_) * PR `#6786 `_: Fix gufunc kwargs support (`Siu Kwan Lam `_) * PR `#6788 `_: Add support for fastmath 32-bit floating point divide (`Michael Collison `_) * PR `#6789 `_: Fix warnings struct ref typeguard (`stuartarchibald `_ `Siu Kwan Lam `_ `esc `_) * PR `#6794 `_: refactor and move create_temp_module into numba.tests.support (`Alexander-Makaryev `_) * PR `#6795 `_: CUDA: Lazily add libdevice to compilation units (`Graham Markall `_) * PR `#6798 `_: CUDA: Add optional Driver API argument logging (`Graham Markall `_) * PR `#6799 `_: Print Numba and llvmlite versions in sysinfo (`Graham Markall `_) * PR `#6800 `_: Make a common standard API for querying ufunc impl (`Sergey Pokhodenko `_ `Siu Kwan Lam `_) * PR `#6801 `_: ParallelAccelerator no long will convert StaticSetItem to SetItem because record arrays require StaticSetItems. (`Todd A. Anderson `_) * PR `#6802 `_: Add lineinfo flag to PTX and SASS compilation (`Graham Markall `_ `Max Katz `_) * PR `#6804 `_: added runtime version to ``numba -s`` (`Kalyan `_) * PR `#6808 `_: #3468 continued: Add support for ``np.clip`` (`Graham Markall `_ `Aaron Russell Voelker `_) * PR `#6809 `_: #3203 additional info in cuda detect (`Kalyan `_) * PR `#6810 `_: Fix tiny formatting error in ROC kernel docs (`Felix Divo `_) * PR `#6811 `_: CUDA: Remove test of runtime being a supported version (`Graham Markall `_) * PR `#6813 `_: Mostly CUDA: Replace llvmpy API usage with llvmlite APIs (`Graham Markall `_) * PR `#6814 `_: Improving context stack (`stuartarchibald `_ `Siu Kwan Lam `_) * PR `#6818 `_: CUDA: Support IPC on Windows (`Graham Markall `_) * PR `#6822 `_: Add support for np.rot90 (`stuartarchibald `_ `Daniel Nagel `_) * PR `#6829 `_: Fix accuracy of np.arange and np.linspace (`stuartarchibald `_) * PR `#6830 `_: CUDA: Use relaxed strides checking to compute contiguity (`Graham Markall `_) * PR `#6833 `_: Raise TypeError exception if numpy array is cast to scalar (`Michael Collison `_) * PR `#6834 `_: Remove illegal "debug" kw argument (`Shaun Cutts `_) * PR `#6836 `_: CUDA: Documentation updates (`Graham Markall `_) * PR `#6840 `_: CUDA: Remove items deprecated in 0.53 + simulator test fixes (`Graham Markall `_) * PR `#6841 `_: CUDA: Fix source location on kernel entry and enable breakpoints to be set on kernels by mangled name (`Graham Markall `_) * PR `#6843 `_: cross-referenced Array type in docs (`Kalyan `_) * PR `#6844 `_: CUDA: Remove NUMBAPRO env var warnings, envvars.py + other small tidy-ups (`Graham Markall `_) * PR `#6848 `_: Ignore .ycm_extra_conf.py (`Graham Markall `_) * PR `#6849 `_: Add __hash__ for IntEnum (`Hannes Pahl `_) * PR `#6850 `_: Fix up more internal warnings (`stuartarchibald `_) * PR `#6854 `_: PR 6096 continued (`stuartarchibald `_ `Ivan Butygin `_) * PR `#6861 `_: updated reference to hsa with roc (`Kalyan `_) * PR `#6867 `_: Update changelog for 0.53.1 (`esc `_) * PR `#6869 `_: Implement builtin sum() (`stuartarchibald `_) * PR `#6870 `_: Add support for dispatcher retargeting using with-context (`stuartarchibald `_ `Siu Kwan Lam `_) * PR `#6871 `_: Force text-align:left when using Annotate (`Guilherme Leobas `_) * PR `#6873 `_: docs: Update reference to @jitclass location (`David Nadlinger `_) * PR `#6876 `_: Add trailing slashes to dir paths in CODEOWNERS (`Graham Markall `_) * PR `#6877 `_: Add doc for recent target extension features (`Siu Kwan Lam `_) * PR `#6878 `_: CUDA: Support passing tuples to ufuncs (`Graham Markall `_) * PR `#6879 `_: CUDA: NumPy and string dtypes for local and shared arrays (`Graham Markall `_) * PR `#6880 `_: Add attribute lower_extension to CPUContext (`Reazul Hoque `_) * PR `#6883 `_: Add support of np.swapaxes #4074 (`Daniel Nagel `_) * PR `#6885 `_: CUDA: Explicitly specify objmode + looplifting for jit functions in cuda.random (`Graham Markall `_) * PR `#6886 `_: CUDA: Fix parallel testing for all testsuite submodules (`Graham Markall `_) * PR `#6888 `_: Get overload to consider compiler flags in cache lookup (`Siu Kwan Lam `_) * PR `#6889 `_: Address guvectorize too slow for cuda target (`Michael Collison `_) * PR `#6890 `_: fixes #6884 (`Kalyan `_) * PR `#6898 `_: Work on overloading by hardware target. (`stuartarchibald `_) * PR `#6911 `_: CUDA: Add support for activemask(), lanemask_lt(), and nanosleep() (`Graham Markall `_) * PR `#6912 `_: Prevent use of varargs in closure calls. (`stuartarchibald `_) * PR `#6913 `_: Add runtests option to gitdiff on the common ancestor (`Siu Kwan Lam `_) * PR `#6915 `_: Update _Intrinsic for sphinx to capture the inner docstring (`Guilherme Leobas `_) * PR `#6917 `_: Add type conversion for StringLiteral to unicode_type and test. (`stuartarchibald `_) * PR `#6918 `_: Start section on commonly encounted unsupported parfors code. (`stuartarchibald `_) * PR `#6924 `_: CUDA: Fix ``ffs`` (`Graham Markall `_) * PR `#6928 `_: Add support for axis keyword arg to numpy.argmax() (`stuartarchibald `_ `Itamar Turner-Trauring `_) * PR `#6929 `_: Fix CI failure when gitpython is missing. (`Siu Kwan Lam `_) * PR `#6935 `_: fixes broken link in numba-runtime.rst (`Kalyan `_) * PR `#6936 `_: CUDA: Implement support for PTDS globally (`Graham Markall `_) * PR `#6937 `_: Fix memory leak in bytes boxing (`stuartarchibald `_) * PR `#6940 `_: Fix function resolution for intrinsics across hardware. (`stuartarchibald `_) * PR `#6941 `_: ABC the target descriptor and make consistent throughout. (`stuartarchibald `_) * PR `#6944 `_: CUDA: Support for ``@overload`` (`Graham Markall `_) * PR `#6945 `_: Fix issue with array analysis tests needing scipy. (`stuartarchibald `_) * PR `#6948 `_: Refactor registry init. (`stuartarchibald `_ `Graham Markall `_ `Siu Kwan Lam `_) * PR `#6953 `_: CUDA: Fix and deprecate ``inspect_ptx()``, fix NVVM option setup for device functions (`Graham Markall `_) * PR `#6958 `_: Inconsistent behavior of reshape between numpy and numba/cuda device array (`Lauren Arnett `_) * PR `#6961 `_: Update overload glue to deal with typing_key (`stuartarchibald `_) * PR `#6964 `_: Move minimum supported Python version to 3.7 (`stuartarchibald `_) * PR `#6966 `_: Fix issue with TBB test detecting forks from incorrect state. (`stuartarchibald `_) * PR `#6971 `_: Fix CUDA ``@intrinsic`` use (`stuartarchibald `_) * PR `#6977 `_: Vendor cloudpickle (`Siu Kwan Lam `_) * PR `#6978 `_: Implement operator.contains for empty Tuples (`Brandon T. Willard `_) * PR `#6981 `_: Fix LLVM IR parsing error on use of ``np.bool_`` in globals (`stuartarchibald `_) * PR `#6983 `_: Support Optional types in ufuncs. (`stuartarchibald `_) * PR `#6985 `_: Implement static set/get items on records with integer index (`stuartarchibald `_) * PR `#6986 `_: document release checklist (`esc `_) * PR `#6989 `_: update threading docs for function loading (`esc `_) * PR `#6990 `_: Refactor hardware extension API to refer to "target" instead. (`stuartarchibald `_) * PR `#6991 `_: Move ROCm target status to "unmaintained". (`stuartarchibald `_) * PR `#6995 `_: Resolve issue where nan was being assigned to int type numpy array (`Michael Collison `_) * PR `#6996 `_: Add constant lowering support for `SliceType`s (`Brandon T. Willard `_) * PR `#6997 `_: CUDA: Remove catch of NotImplementedError in target.py (`Graham Markall `_) * PR `#6999 `_: Fix errors introduced by the cloudpickle patch (`Siu Kwan Lam `_) * PR `#7003 `_: More mainline fixes (`stuartarchibald `_ `Graham Markall `_ `Siu Kwan Lam `_) * PR `#7004 `_: Test extending the CUDA target (`Graham Markall `_) * PR `#7007 `_: Made stencil compilation not fail for arrays of conflicting types. (`MegaIng `_) * PR `#7008 `_: Added support for np.random.dirichlet with all size arguments (`Rishi Kulkarni `_) * PR `#7016 `_: Docs: Add DALI to list of CAI-supporting libraries (`Graham Markall `_) * PR `#7018 `_: Remove cu{blas,sparse,rand,fft} from library checks (`Graham Markall `_) * PR `#7019 `_: Support NumPy 1.20 (`stuartarchibald `_) * PR `#7020 `_: Fix #7017. Adds util class PickleCallableByPath (`Siu Kwan Lam `_) * PR `#7024 `_: fixed llvmir usage in create_module method (`stuartarchibald `_ `Kalyan `_) * PR `#7027 `_: Fix nrt debug print (`MegaIng `_) * PR `#7031 `_: Fix inliner to use a single scope for all blocks (`Alexey Kozlov `_ `Siu Kwan Lam `_) * PR `#7040 `_: Add Github action to mark issues as stale (`Graham Markall `_) * PR `#7044 `_: Fixes for LLVM 11 (`stuartarchibald `_) * PR `#7049 `_: Make NumPy random module use @overload_glue (`stuartarchibald `_) * PR `#7050 `_: Add overload_classmethod (`Siu Kwan Lam `_) * PR `#7052 `_: Fix string support in CUDA target (`Graham Markall `_) * PR `#7056 `_: Change prange conversion approach to reuse header block. (`Todd A. Anderson `_) * PR `#7061 `_: Add ndarray allocator classmethod (`stuartarchibald `_ `Siu Kwan Lam `_) * PR `#7064 `_: Testhound/host array performance warning (`Michael Collison `_) * PR `#7066 `_: Fix #7065: Add expected exception messages for NumPy 1.20 to tests (`Graham Markall `_) * PR `#7068 `_: Enhancing docs about PRNG seeding (`Jérome Eertmans `_) * PR `#7070 `_: Improve the issue templates and pull request template. (`Guoqiang QI `_) * PR `#7080 `_: Fix ``__eq__`` for Flags and cpu_options classes (`Siu Kwan Lam `_) * PR `#7087 `_: Add note to docs about zero-initialization of variables. (`stuartarchibald `_) * PR `#7088 `_: Initialize NUMBA_DEFAULT_NUM_THREADS with a batch scheduler aware value (`Thomas VINCENT `_) * PR `#7100 `_: Replace deprecated call to cuDeviceComputeCapability (`Graham Markall `_) * PR `#7113 `_: Temporarily disable debug env export. (`stuartarchibald `_) * PR `#7114 `_: CUDA: Deprecate eager compilation of device functions (`Graham Markall `_) * PR `#7116 `_: Fix various issues with dwarf emission: (`stuartarchibald `_ `vlad-perevezentsev `_) * PR `#7118 `_: Remove print to stdout (`stuartarchibald `_) * PR `#7121 `_: Continue work on numpy subclasses (`Todd A. Anderson `_ `Siu Kwan Lam `_) * PR `#7122 `_: Rtd/sphinx compat (`esc `_) * PR `#7134 `_: Move minimum LLVM version to 11. (`stuartarchibald `_) * PR `#7137 `_: skip pycc test on Python 3.7 + macOS because of distutils issue (`esc `_) * PR `#7138 `_: Update the Azure default linux image to Ubuntu 18.04 (`stuartarchibald `_) * PR `#7141 `_: Require llvmlite 0.37 as minimum supported. (`stuartarchibald `_) * PR `#7143 `_: Update version checks in __init__ for np 1.17 (`stuartarchibald `_) * PR `#7145 `_: Fix mainline (`stuartarchibald `_) * PR `#7146 `_: Fix ``inline_closurecall`` may not be imported (`Siu Kwan Lam `_) * PR `#7147 `_: Revert "Workaround gitpython 3.1.18 dependency issue" (`stuartarchibald `_) * PR `#7149 `_: Fix issue in bytecode analysis where target and next are same. (`stuartarchibald `_) * PR `#7152 `_: Fix iterators in CUDA (`Graham Markall `_) * PR `#7156 `_: Fix ``ir_utils._max_label`` being updated incorrectly (`Siu Kwan Lam `_) * PR `#7160 `_: Split parfors tests (`stuartarchibald `_) * PR `#7161 `_: Update README for 0.54 (`stuartarchibald `_) * PR `#7162 `_: CUDA: Fix linkage of device functions when compiling for debug (`Graham Markall `_) * PR `#7163 `_: Split legalization pass to consider IR and features separately. (`stuartarchibald `_) * PR `#7165 `_: Fix use of np.clip where out is not provided. (`stuartarchibald `_) * PR `#7189 `_: CUDA: Skip IPC tests on ARM (`Graham Markall `_) * PR `#7190 `_: CUDA: Fix test_pinned on Jetson (`Graham Markall `_) * PR `#7192 `_: Fix missing import in array.argsort impl and add more tests. (`stuartarchibald `_) * PR `#7196 `_: Fixes for lineinfo emission. (`stuartarchibald `_) * PR `#7203 `_: remove duplicate changelog entries (`esc `_) * PR `#7209 `_: Clamp numpy (`esc `_) * PR `#7216 `_: Update CHANGE_LOG for 0.54.0rc2. (`stuartarchibald `_) * PR `#7223 `_: Replace assertion errors on IR assumption violation (`Siu Kwan Lam `_) * PR `#7230 `_: PR #7171 bugfix only (`Todd A. Anderson `_ `stuartarchibald `_) * PR `#7236 `_: CUDA: Skip managed alloc tests on ARM (`Graham Markall `_) * PR `#7267 `_: Fix #7258. Bug in SROA optimization (`Siu Kwan Lam `_) * PR `#7271 `_: Update 3rd party license text. (`stuartarchibald `_) * PR `#7272 `_: Allow annotations in njit-ed functions (`LunarLanding `_) * PR `#7273 `_: Update CHANGE_LOG for 0.54.0rc3. (`stuartarchibald `_) * PR `#7285 `_: CUDA: Fix OOB in test_kernel_arg (`Graham Markall `_) * PR `#7294 `_: Continuation of PR #7280, fixing lifetime of TBB task_scheduler_handle (`Sergey Pokhodenko `_ `stuartarchibald `_) * PR `#7298 `_: Use CBC to pin GCC to 7 on most linux and 9 on aarch64. (`stuartarchibald `_) * PR `#7312 `_: Fix #7302. Workaround missing pthread problem on ppc64le (`Siu Kwan Lam `_) * PR `#7317 `_: In TBB tsh test switch os.fork for mp fork ctx (`stuartarchibald `_) * PR `#7319 `_: Update CHANGE_LOG for 0.54.0 final. (`stuartarchibald `_) Authors: * `Alexander-Makaryev `_ * `Todd A. Anderson `_ * `Hannes Pahl `_ * `Ivan Butygin `_ * `MegaIng `_ * `Sergey Pokhodenko `_ * `Aaron Russell Voelker `_ * `Ashutosh Varma `_ * `Ben Greiner `_ * `Brandon T. Willard `_ * `Daniel Nagel `_ * `David Nadlinger `_ * `Ehsan Totoni `_ * `esc `_ * `Felix Divo `_ * `Graham Markall `_ * `Guilherme Leobas `_ * `Guoqiang QI `_ * `Itamar Turner-Trauring `_ * `Jérome Eertmans `_ * `Alexey Kozlov `_ * `Lauren Arnett `_ * `LunarLanding `_ * `Max Katz `_ * `Kalyan `_ * `Reazul Hoque `_ * `Rishi Kulkarni `_ * `Shaun Cutts `_ * `Siu Kwan Lam `_ * `stuartarchibald `_ * `Thomas VINCENT `_ * `Michael Collison `_ * `vlad-perevezentsev `_ Version 0.53.1 (25 March, 2021) ------------------------------- This is a bugfix release for 0.53.0. It contains the following four pull-requests which fix two critical regressions and two build failures reported by the openSuSe team: * PR #6826 Fix regression on gufunc serialization * PR #6828 Fix regression in CUDA: Set stream in mapped and managed array device_setup * PR #6837 Ignore warnings from packaging module when testing import behaviour. * PR #6851 set non-reported llvm timing values to 0.0 Authors: * Ben Greiner * Graham Markall * Siu Kwan Lam * Stuart Archibald Version 0.53.0 (11 March, 2021) ------------------------------- This release continues to add new features, bug fixes and stability improvements to Numba. Highlights of core changes: * Support for Python 3.9 (Stuart Archibald). * Function sub-typing (Lucio Fernandez-Arjona). * Initial support for dynamic ``gufuncs`` (i.e. from ``@guvectorize``) (Guilherme Leobas). * Parallel Accelerator (``@njit(parallel=True)`` now supports Fortran ordered arrays (Todd A. Anderson and Siu Kwan Lam). Intel also kindly sponsored research and development that lead to two new features: * Exposing LLVM compilation pass timings for diagnostic purposes (Siu Kwan Lam). * An event system for broadcasting compiler events (Siu Kwan Lam). Highlights of changes for the CUDA target: * CUDA 11.2 onwards (versions of the toolkit using NVVM IR 1.6 / LLVM IR 7.0.1) are now supported (Graham Markall). * A fast cube root function is added (Michael Collison). * Support for atomic ``xor``, increment, decrement, exchange, are added, and compare-and-swap is extended to support 64-bit integers (Michael Collison). * Addition of ``cuda.is_supported_version()`` to check if the CUDA runtime version is supported (Graham Markall). * The CUDA dispatcher now shares infrastructure with the CPU dispatcher, improving launch times for lazily-compiled kernels (Graham Markall). * The CUDA Array Interface is updated to version 3, with support for streams added (Graham Markall). * Tuples and ``namedtuples`` can now be passed to kernels (Graham Markall). * Initial support for Cooperative Groups is added, with support for Grid Groups and Grid Sync (Graham Markall and Nick White). * Support for ``math.log2`` and ``math.remainder`` is added (Guilherme Leobas). General deprecation notices: * There are no new general deprecations. CUDA target deprecation notices: * CUDA support on macOS is deprecated with this release (it still works, it is just unsupported). * The ``argtypes``, ``restypes``, and ``bind`` keyword arguments to the ``cuda.jit`` decorator, deprecated since 0.51.0, are removed * The ``Device.COMPUTE_CAPABILITY`` property, deprecated since 2014, has been removed (use ``compute_capability`` instead). * The ``to_host`` method of device arrays is removed (use ``copy_to_host`` instead). General Enhancements: * PR #4769: objmode complex type spelling (Siu Kwan Lam) * PR #5579: Function subtyping (Lucio Fernandez-Arjona) * PR #5659: Add support for parfors creating 'F'ortran layout Numpy arrays. (Todd A. Anderson) * PR #5936: Improve array analysis for user-defined data types. (Todd A. Anderson) * PR #5938: Initial support for dynamic gufuncs (Guilherme Leobas) * PR #5958: Making typed.List a typing Generic (Lucio Fernandez-Arjona) * PR #6334: Support attribute access from other modules (Farah Hariri) * PR #6373: Allow Dispatchers to be cached (Eric Wieser) * PR #6519: Avoid unnecessary ir.Del generation and removal (Ehsan Totoni) * PR #6545: Refactoring ParforDiagnostics (Elena Totmenina) * PR #6560: Add LLVM pass timer (Siu Kwan Lam) * PR #6573: Improve ``__str__`` for typed.List when invoked from IPython shell (Amin Sadeghi) * PR #6575: Avoid temp variable assignments (Ehsan Totoni) * PR #6578: Add support for numpy ``intersect1d`` and basic test cases (``@caljrobe``) * PR #6579: Python 3.9 support. (Stuart Archibald) * PR #6580: Store partial typing errors in compiler state (Ehsan Totoni) * PR #6626: A simple event system to broadcast compiler events (Siu Kwan Lam) * PR #6635: Try to resolve dynamic getitems as static post unroll transform. (Stuart Archibald) * PR #6636: Adds llvm_lock event (Siu Kwan Lam) * PR #6664: Adds tests for PR 5659 (Siu Kwan Lam) * PR #6680: Allow getattr to work in objmode output type spec (Siu Kwan Lam) Fixes: * PR #6176: Remove references to deprecated numpy globals (Eric Wieser) * PR #6374: Use Python 3 style OSError handling (Eric Wieser) * PR #6402: Fix ``typed.Dict`` and ``typed.List`` crashing on parametrized types (Andreas Sodeur) * PR #6403: Add ``types.ListType.key`` (Andreas Sodeur) * PR #6410: Fixes issue #6386 (Danny Weitekamp) * PR #6425: Fix unicode join for issue #6405 (Teugea Ioan-Teodor) * PR #6437: Don't pass reduction variables known in an outer parfor to inner parfors when analyzing reductions. (Todd A. Anderson) * PR #6453: Keep original variable names in metadata to improve diagnostics (Ehsan Totoni) * PR #6454: FIX: Fixes for literals (Eric Larson) * PR #6463: Bump llvmlite to 0.36 series (Stuart Archibald) * PR #6466: Remove the misspelling of finalize_dynamic_globals (Sergey Pokhodenko) * PR #6489: Improve the error message for unsupported Buffer in Buffer situation. (Stuart Archibald) * PR #6503: Add test to ensure Numba imports without warnings. (Stuart Archibald) * PR #6508: Defer requirements to setup.py (Siu Kwan Lam) * PR #6521: Skip annotated jitclass test if typeguard is running. (Stuart Archibald) * PR #6524: Fix typed.List return value (Lucio Fernandez-Arjona) * PR #6562: Correcting typo in numba sysinfo output (Nick Sutcliffe) * PR #6574: Run parfor fusion if 2 or more parfors (Ehsan Totoni) * PR #6582: Fix typed dict error with uninitialized padding bytes (Siu Kwan Lam) * PR #6584: Remove jitclass from ``__init__`` ``__all__``. (Stuart Archibald) * PR #6586: Run closure inlining ahead of branch pruning in case of nonlocal (Stuart Archibald) * PR #6591: Fix inlineasm test failure. (Siu Kwan Lam) * PR #6622: Fix 6534, handle unpack of assign-like tuples. (Stuart Archibald) * PR #6652: Simplify PR-6334 (Siu Kwan Lam) * PR #6653: Fix get_numba_envvar (Siu Kwan Lam) * PR #6654: Fix #6632 support alternative dtype string spellings (Stuart Archibald) * PR #6685: Add Python 3.9 to classifiers. (Stuart Archibald) * PR #6693: patch to compile _devicearray.cpp with c++11 (Valentin Haenel) * PR #6716: Consider assignment lhs live if used in rhs (Fixes #6715) (Ehsan Totoni) * PR #6727: Avoid errors in array analysis for global tuples with non-int (Ehsan Totoni) * PR #6733: Fix segfault and errors in #6668 (Siu Kwan Lam) * PR #6741: Enable SSA in IR inliner (Ehsan Totoni) * PR #6763: use an alternative constraint for the conda packages (Valentin Haenel) * PR #6786: Fix gufunc kwargs support (Siu Kwan Lam) CUDA Enhancements/Fixes: * PR #5162: Specify synchronization semantics of CUDA Array Interface (Graham Markall) * PR #6245: CUDA Cooperative grid groups (Graham Markall and Nick White) * PR #6333: Remove dead ``_Kernel.__call__`` (Graham Markall) * PR #6343: CUDA: Add support for passing tuples and namedtuples to kernels (Graham Markall) * PR #6349: Refactor Dispatcher to remove unnecessary indirection (Graham Markall) * PR #6358: Add log2 and remainder implementations for cuda (Guilherme Leobas) * PR #6376: Added a fixed seed in test_atomics.py for issue #6370 (Teugea Ioan-Teodor) * PR #6377: CUDA: Fix various issues in test suite (Graham Markall) * PR #6409: Implement cuda atomic xor (Michael Collison) * PR #6422: CUDA: Remove deprecated items, expect CUDA 11.1 (Graham Markall) * PR #6427: Remove duplicate repeated definition of gufunc (Amit Kumar) * PR #6432: CUDA: Use ``_dispatcher.Dispatcher`` as base Dispatcher class (Graham Markall) * PR #6447: CUDA: Add get_regs_per_thread method to Dispatcher (Graham Markall) * PR #6499: CUDA atomic increment, decrement, exchange and compare and swap (Michael Collison) * PR #6510: CUDA: Make device array assignment synchronous where necessary (Graham Markall) * PR #6517: CUDA: Add NVVM test of all 8-bit characters (Graham Markall) * PR #6567: Refactor llvm replacement code into separate function (Michael Collison) * PR #6642: Testhound/cuda cuberoot (Michael Collison) * PR #6661: CUDA: Support NVVM70 / CUDA 11.2 (Graham Markall) * PR #6663: Fix error caused by missing "-static" libraries defined for some platforms (Siu Kwan Lam) * PR #6666: CUDA: Add a function to query whether the runtime version is supported. (Graham Markall) * PR #6725: CUDA: Fix compile to PTX with debug for CUDA 11.2 (Graham Markall) Documentation Updates: * PR #5740: Add FAQ entry on how to create a MWR. (Stuart Archibald) * PR #6346: DOC: add where to get dev builds from to FAQ (Eyal Trabelsi) * PR #6418: docs: use https for homepage (``@imba-tjd``) * PR #6430: CUDA docs: Add RNG example with 3D grid and strided loops (Graham Markall) * PR #6436: docs: remove typo in Deprecation Notices (Thibault Ballier) * PR #6440: Add note about performance of typed containers from the interpreter. (Stuart Archibald) * PR #6457: Link to read the docs instead of numba homepage (Hannes Pahl) * PR #6470: Adding PyCon Sweden 2020 talk on numba (Ankit Mahato) * PR #6472: Document ``numba.extending.is_jitted`` (Stuart Archibald) * PR #6495: Fix typo in literal list docs. (Stuart Archibald) * PR #6501: Add doc entry on Numba's limited resources and how to help. (Stuart Archibald) * PR #6502: Add CODEOWNERS file. (Stuart Archibald) * PR #6531: Update canonical URL. (Stuart Archibald) * PR #6544: Minor typo / grammar fixes to 5 minute guide (Ollin Boer Bohan) * PR #6599: docs: fix simple typo, consevatively -> conservatively (Tim Gates) * PR #6609: Recommend miniforge instead of c4aarch64 (Isuru Fernando) * PR #6671: Update environment creation example to python 3.8 (Lucio Fernandez-Arjona) * PR #6676: Update hardware and software versions in various docs. (Stuart Archibald) * PR #6682: Update deprecation notices for 0.53 (Stuart Archibald) CI/Infrastructure Updates: * PR #6458: Enable typeguard in CI (Siu Kwan Lam) * PR #6500: Update bug and feature request templates. (Stuart Archibald) * PR #6516: Fix RTD build by using conda. (Stuart Archibald) * PR #6587: Add zenodo badge (Siu Kwan Lam) Authors: * Amin Sadeghi * Amit Kumar * Andreas Sodeur * Ankit Mahato * Chris Barnes * Danny Weitekamp * Ehsan Totoni (core dev) * Eric Larson * Eric Wieser * Eyal Trabelsi * Farah Hariri * Graham Markall * Guilherme Leobas * Hannes Pahl * Isuru Fernando * Lucio Fernandez-Arjona * Michael Collison * Nick Sutcliffe * Nick White * Ollin Boer Bohan * Sergey Pokhodenko * Siu Kwan Lam (core dev) * Stuart Archibald (core dev) * Teugea Ioan-Teodor * Thibault Ballier * Tim Gates * Todd A. Anderson (core dev) * Valentin Haenel (core dev) * ``@caljrobe`` * ``@imba-tjd`` Version 0.52.0 (30 November, 2020) ---------------------------------- This release focuses on performance improvements, but also adds some new features and contains numerous bug fixes and stability improvements. Highlights of core performance improvements include: * Intel kindly sponsored research and development into producing a new reference count pruning pass. This pass operates at the LLVM level and can prune a number of common reference counting patterns. This will improve performance for two primary reasons: * There will be less pressure on the atomic locks used to do the reference counting. * Removal of reference counting operations permits more inlining and the optimisation passes can in general do more with what is present. (Siu Kwan Lam). * Intel also sponsored work to improve the performance of the ``numba.typed.List`` container, particularly in the case of ``__getitem__`` and iteration (Stuart Archibald). * Superword-level parallelism vectorization is now switched on and the optimisation pipeline has been lightly analysed and tuned so as to be able to vectorize more and more often (Stuart Archibald). Highlights of core feature changes include: * The ``inspect_cfg`` method on the JIT dispatcher object has been significantly enhanced and now includes highlighted output and interleaved line markers and Python source (Stuart Archibald). * The BSD operating system is now unofficially supported (Stuart Archibald). * Numerous features/functionality improvements to NumPy support, including support for: * ``np.asfarray`` (Guilherme Leobas) * "subtyping" in record arrays (Lucio Fernandez-Arjona) * ``np.split`` and ``np.array_split`` (Isaac Virshup) * ``operator.contains`` with ``ndarray`` (``@mugoh``). * ``np.asarray_chkfinite`` (Rishabh Varshney). * NumPy 1.19 (Stuart Archibald). * the ``ndarray`` allocators, ``empty``, ``ones`` and ``zeros``, accepting a ``dtype`` specified as a string literal (Stuart Archibald). * Booleans are now supported as literal types (Alexey Kozlov). * On the CUDA target: * CUDA 9.0 is now the minimum supported version (Graham Markall). * Support for Unified Memory has been added (Max Katz). * Kernel launch overhead is reduced (Graham Markall). * Cudasim support for mapped array, memcopies and memset has been added (Mike Williams). * Access has been wired in to all libdevice functions (Graham Markall). * Additional CUDA atomic operations have been added (Michael Collison). * Additional math library functions (``frexp``, ``ldexp``, ``isfinite``) (Zhihao Yuan). * Support for ``power`` on complex numbers (Graham Markall). Deprecations to note: There are no new deprecations. However, note that "compatibility" mode, which was added some 40 releases ago to help transition from 0.11 to 0.12+, has been removed! Also, the shim to permit the import of ``jitclass`` from Numba's top level namespace has now been removed as per the deprecation schedule. General Enhancements: * PR #5418: Add np.asfarray impl (Guilherme Leobas) * PR #5560: Record subtyping (Lucio Fernandez-Arjona) * PR #5609: Jitclass Infer Spec from Type Annotations (Ethan Pronovost) * PR #5699: Implement np.split and np.array_split (Isaac Virshup) * PR #6015: Adding BooleanLiteral type (Alexey Kozlov) * PR #6027: Support operators inlining in InlineOverloads (Alexey Kozlov) * PR #6038: Closes #6037, fixing FreeBSD compilation (László Károlyi) * PR #6086: Add more accessible version information (Stuart Archibald) * PR #6157: Add pipeline_class argument to @cfunc as supported by @jit. (Arthur Peters) * PR #6262: Support dtype from str literal. (Stuart Archibald) * PR #6271: Support ``ndarray`` contains (``@mugoh``) * PR #6295: Enhance inspect_cfg (Stuart Archibald) * PR #6304: Support NumPy 1.19 (Stuart Archibald) * PR #6309: Add suitable file search path for BSDs. (Stuart Archibald) * PR #6341: Re roll 6279 (Rishabh Varshney and Valentin Haenel) Performance Enhancements: * PR #6145: Patch to fingerprint namedtuples. (Stuart Archibald) * PR #6202: Speed up str(int) (Stuart Archibald) * PR #6261: Add np.ndarray.ptp() support. (Stuart Archibald) * PR #6266: Use custom LLVM refcount pruning pass (Siu Kwan Lam) * PR #6275: Switch on SLP vectorize. (Stuart Archibald) * PR #6278: Improve typed list performance. (Stuart Archibald) * PR #6335: Split optimisation passes. (Stuart Archibald) * PR #6455: Fix refprune on obfuscated refs and stabilize optimisation WRT wrappers. (Stuart Archibald) Fixes: * PR #5639: Make UnicodeType inherit from Hashable (Stuart Archibald) * PR #6006: Resolves incorrectly hoisted list in parfor. (Todd A. Anderson) * PR #6126: fix version_info if version can not be determined (Valentin Haenel) * PR #6137: Remove references to Python 2's long (Eric Wieser) * PR #6139: Use direct syntax instead of the ``add_metaclass`` decorator (Eric Wieser) * PR #6140: Replace calls to utils.iteritems(d) with d.items() (Eric Wieser) * PR #6141: Fix #6130 objmode cache segfault (Siu Kwan Lam) * PR #6156: Remove callers of ``reraise`` in favor of using ``with_traceback`` directly (Eric Wieser) * PR #6162: Move charseq support out of init (Stuart Archibald) * PR #6165: #5425 continued (Amos Bird and Stuart Archibald) * PR #6166: Remove Python 2 compatibility from numba.core.utils (Eric Wieser) * PR #6185: Better error message on NotDefinedError (Luiz Almeida) * PR #6194: Remove recursion from traverse_types (Radu Popovici) * PR #6200: Workaround #5973 (Stuart Archibald) * PR #6203: Make find_callname only lookup functions that are likely part of NumPy. (Stuart Archibald) * PR #6204: Fix unicode kind selection for getitem. (Stuart Archibald) * PR #6206: Build all extension modules with -g -Wall -Werror on Linux x86, provide -O0 flag option (Graham Markall) * PR #6212: Fix for objmode recompilation issue (Alexey Kozlov) * PR #6213: Fix #6177. Remove AOT dependency on the Numba package (Siu Kwan Lam) * PR #6224: Add support for tuple concatenation to array analysis. (#5396 continued) (Todd A. Anderson) * PR #6231: Remove compatibility mode (Graham Markall) * PR #6254: Fix win-32 hashing bug (from Stuart Archibald) (Ray Donnelly) * PR #6265: Fix #6260 (Stuart Archibald) * PR #6267: speed up a couple of really slow unittests (Stuart Archibald) * PR #6281: Remove numba.jitclass shim as per deprecation schedule. (Stuart Archibald) * PR #6294: Make return type propagate to all return variables (Andreas Sodeur) * PR #6300: Un-skip tests that were skipped because of #4026. (Owen Anderson) * PR #6307: Remove restrictions on SVML version due to bug in LLVM SVML CC (Stuart Archibald) * PR #6316: Make IR inliner tests not self mutating. (Stuart Archibald) * PR #6318: PR #5892 continued (Todd A. Anderson, via Stuart Archibald) * PR #6319: Permit switching off boundschecking when debug is on. (Stuart Archibald) * PR #6324: PR 6208 continued (Ivan Butygin and Stuart Archibald) * PR #6337: Implements ``key`` on ``types.TypeRef`` (Andreas Sodeur) * PR #6354: Bump llvmlite to 0.35. series. (Stuart Archibald) * PR #6357: Fix enumerate invalid decref (Siu Kwan Lam) * PR #6359: Fixes typed list indexing on 32bit (Stuart Archibald) * PR #6378: Fix incorrect CPU override in vectorization test. (Stuart Archibald) * PR #6379: Use O0 to enable inline and not affect loop-vectorization by later O3... (Siu Kwan Lam) * PR #6384: Fix failing tests to match on platform invariant int spelling. (Stuart Archibald) * PR #6390: Updates inspect_cfg (Stuart Archibald) * PR #6396: Remove hard dependency on tbb package. (Stuart Archibald) * PR #6408: Don't do array analysis for tuples that contain arrays. (Todd A. Anderson) * PR #6441: Fix ASCII flag in Unicode slicing (0.52.0rc2 regression) (Ehsan Totoni) * PR #6442: Fix array analysis regression in 0.52 RC2 for tuple of 1D arrays (Ehsan Totoni) * PR #6446: Fix #6444: pruner issues with reference stealing functions (Siu Kwan Lam) * PR #6450: Fix asfarray kwarg default handling. (Stuart Archibald) * PR #6486: fix abstract base class import (Valentin Haenel) * PR #6487: Restrict maximum version of python (Siu Kwan Lam) * PR #6527: setup.py: fix py version guard (Chris Barnes) CUDA Enhancements/Fixes: * PR #5465: Remove macro expansion and replace uses with FE typing + BE lowering (Graham Markall) * PR #5741: CUDA: Add two-argument implementation of round() (Graham Markall) * PR #5900: Enable CUDA Unified Memory (Max Katz) * PR #6042: CUDA: Lower launch overhead by launching kernel directly (Graham Markall) * PR #6064: Lower math.frexp and math.ldexp in numba.cuda (Zhihao Yuan) * PR #6066: Lower math.isfinite in numba.cuda (Zhihao Yuan) * PR #6092: CUDA: Add mapped_array_like and pinned_array_like (Graham Markall) * PR #6127: Fix race in reduction kernels on Volta, require CUDA 9, add syncwarp with default mask (Graham Markall) * PR #6129: Extend Cudasim to support most of the memory functionality. (Mike Williams) * PR #6150: CUDA: Turn on flake8 for cudadrv and fix errors (Graham Markall) * PR #6152: CUDA: Provide wrappers for all libdevice functions, and fix typing of math function (#4618) (Graham Markall) * PR #6227: Raise exception when no supported architectures are found (Jacob Tomlinson) * PR #6244: CUDA Docs: Make workflow using simulator more explicit (Graham Markall) * PR #6248: Add support for CUDA atomic subtract operations (Michael Collison) * PR #6289: Refactor atomic test cases to reduce code duplication (Michael Collison) * PR #6290: CUDA: Add support for complex power (Graham Markall) * PR #6296: Fix flake8 violations in numba.cuda module (Graham Markall) * PR #6297: Fix flake8 violations in numba.cuda.tests.cudapy module (Graham Markall) * PR #6298: Fix flake8 violations in numba.cuda.tests.cudadrv (Graham Markall) * PR #6299: Fix flake8 violations in numba.cuda.simulator (Graham Markall) * PR #6306: Fix flake8 in cuda atomic test from merge. (Stuart Archibald) * PR #6325: Refactor code for atomic operations (Michael Collison) * PR #6329: Flake8 fix for a CUDA test (Stuart Archibald) * PR #6331: Explicitly state that NUMBA_ENABLE_CUDASIM needs to be set before import (Graham Markall) * PR #6340: CUDA: Fix #6339, performance regression launching specialized kernels (Graham Markall) * PR #6380: Only test managed allocations on Linux (Graham Markall) Documentation Updates: * PR #6090: doc: Add doc on direct creation of Numba typed-list (``@rht``) * PR #6110: Update CONTRIBUTING.md (Stuart Archibald) * PR #6128: CUDA Docs: Restore Dispatcher.forall() docs (Graham Markall) * PR #6277: fix: cross2d wrong doc. reference (issue #6276) (``@jeertmans``) * PR #6282: Remove docs on Python 2(.7) EOL. (Stuart Archibald) * PR #6283: Add note on how public CI is impl and what users can do to help. (Stuart Archibald) * PR #6292: Document support for structured array attribute access (Graham Markall) * PR #6310: Declare unofficial \*BSD support (Stuart Archibald) * PR #6342: Fix docs on literally usage. (Stuart Archibald) * PR #6348: doc: fix typo in jitclass.rst ("initilising" -> "initialising") (``@muxator``) * PR #6362: Move llvmlite support in README to 0.35 (Stuart Archibald) * PR #6363: Note that reference counted types are not permitted in set(). (Stuart Archibald) * PR #6364: Move deprecation schedules for 0.52 (Stuart Archibald) CI/Infrastructure Updates: * PR #6252: Show channel URLs (Siu Kwan Lam) * PR #6338: Direct user questions to Discourse instead of the Google Group. (Stan Seibert) * PR #6474: Add skip on PPC64LE for tests causing SIGABRT in LLVM. (Stuart Archibald) Authors: * Alexey Kozlov * Amos Bird * Andreas Sodeur * Arthur Peters * Chris Barnes * Ehsan Totoni (core dev) * Eric Wieser * Ethan Pronovost * Graham Markall * Guilherme Leobas * Isaac Virshup * Ivan Butygin * Jacob Tomlinson * Luiz Almeida * László Károlyi * Lucio Fernandez-Arjona * Max Katz * Michael Collison * Mike Williams * Owen Anderson * Radu Popovici * Ray Donnelly * Rishabh Varshney * Siu Kwan Lam (core dev) * Stan Seibert (core dev) * Stuart Archibald (core dev) * Todd A. Anderson (core dev) * Valentin Haenel (core dev) * Zhihao Yuan * ``@jeertmans`` * ``@mugoh`` * ``@muxator`` * ``@rht`` Version 0.51.2 (September 2, 2020) ---------------------------------- This is a bugfix release for 0.51.1. It fixes a critical performance bug in the CFG back edge computation algorithm that leads to exponential time complexity arising in compilation for use cases with certain pathological properties. * PR #6195: PR 6187 Continue. Don't visit already checked successors Authors: * Graham Markall * Siu Kwan Lam (core dev) Version 0.51.1 (August 26, 2020) -------------------------------- This is a bugfix release for 0.51.0, it fixes a critical bug in caching, another critical bug in the CUDA target initialisation sequence and also fixes some compile time performance regressions: * PR #6141: Fix #6130 objmode cache segfault * PR #6146: Fix compilation slowdown due to controlflow analysis * PR #6147: CUDA: Don't make a runtime call on import * PR #6153: Fix for #6151. Make UnicodeCharSeq into str for comparison. * PR #6168: Fix Issue #6167: Failure in test_cuda_submodules Authors: * Graham Markall * Siu Kwan Lam (core dev) * Stuart Archibald (core dev) Version 0.51.0 (August 12, 2020) -------------------------------- This release continues to add new features to Numba and also contains a significant number of bug fixes and stability improvements. Highlights of core feature changes include: * The compilation chain is now based on LLVM 10 (Valentin Haenel). * Numba has internally switched to prefer non-literal types over literal ones so as to reduce function over-specialisation, this with view of speeding up compile times (Siu Kwan Lam). * On the CUDA target: Support for CUDA Toolkit 11, Ampere, and Compute Capability 8.0; Printing of ``SASS`` code for kernels; Callbacks to Python functions can be inserted into CUDA streams, and streams are async awaitable; Atomic ``nanmin`` and ``nanmax`` functions are added; Fixes for various miscompilations and segfaults. (mostly Graham Markall; call backs on streams by Peter Würtz). Intel also kindly sponsored research and development that lead to some exciting new features: * Support for heterogeneous immutable lists and heterogeneous immutable string key dictionaries. Also optional initial/construction value capturing for all lists and dictionaries containing literal values (Stuart Archibald). * A new pass-by-reference mutable structure extension type ``StructRef`` (Siu Kwan Lam). * Object mode blocks are now cacheable, with the side effect of numerous bug fixes and performance improvements in caching. This also permits caching of functions defined in closures (Siu Kwan Lam). Deprecations to note: To align with other targets, the ``argtypes`` and ``restypes`` kwargs to ``@cuda.jit`` are now deprecated, the ``bind`` kwarg is also deprecated. Further the ``target`` kwarg to the ``numba.jit`` decorator family is deprecated. General Enhancements: * PR #5463: Add str(int) impl * PR #5526: Impl. np.asarray(literal) * PR #5619: Add support for multi-output ufuncs * PR #5711: Division with timedelta input * PR #5763: Support minlength argument to np.bincount * PR #5779: Return zero array from np.dot when the arguments are empty. * PR #5796: Add implementation for np.positive * PR #5849: Setitem for records when index is StringLiteral, including literal unroll * PR #5856: Add support for conversion of inplace_binop to parfor. * PR #5893: Allocate 1D iteration space one at a time for more even distribution. * PR #5922: Reduce objmode and unpickling overhead * PR #5944: re-enable OpenMP in wheels * PR #5946: Implement literal dictionaries and lists. * PR #5956: Update numba_sysinfo.py * PR #5978: Add structref as a mutable struct that is pass-by-ref * PR #5980: Deprecate target kwarg for numba.jit. * PR #6058: Add prefer_literal option to overload API Fixes: * PR #5674: Fix #3955. Allow `with objmode` to be cached * PR #5724: Initialize process lock lazily to prevent multiprocessing issue * PR #5783: Make np.divide and np.remainder code more similar * PR #5808: Fix 5665 Block jit(nopython=True, forceobj=True) and suppress njit(forceobj=True) * PR #5834: Fix the is operator on Ellipsis * PR #5838: Ensure ``Dispatcher.__eq__`` always returns a bool * PR #5841: cleanup: Use PythonAPI.bool_from_bool in more places * PR #5862: Do not leak loop iteration variables into the numba.np.npyimpl namespace * PR #5869: Update repomap * PR #5879: Fix erroneous input mutation in linalg routines * PR #5882: Type check function in jit decorator * PR #5925: Use np.inf and -np.inf for max and min float values respectively. * PR #5935: Fix default arguments with multiprocessing * PR #5952: Fix "Internal error ... local variable 'errstr' referenced before assignment during BoundFunction(...)" * PR #5962: Fix SVML tests with LLVM 10 and AVX512 * PR #5972: fix flake8 for numba/runtests.py * PR #5995: Update setup.py with new llvmlite versions * PR #5996: Set lower bound for llvmlite to 0.33 * PR #6004: Fix problem in branch pruning with LiteralStrKeyDict * PR #6017: Fixing up numba_do_raise * PR #6028: Fix #6023 * PR #6031: Continue 5821 * PR #6035: Fix overspecialize of literal * PR #6046: Fixes statement reordering bug in maximize fusion step. * PR #6056: Fix issue on invalid inlining of non-empty build_list by inline_arraycall * PR #6057: fix aarch64/python_3.8 failure on master * PR #6070: Fix overspecialized containers * PR #6071: Remove f-strings in setup.py * PR #6072: Fix for #6005 * PR #6073: Fixes invalid C prototype in helper function. * PR #6078: Duplicate NumPy's PyArray_DescrCheck macro * PR #6081: Fix issue with cross drive use and relpath. * PR #6083: Fix bug in initial value unify. * PR #6087: remove invalid sanity check from randrange tests * PR #6089: Fix invalid reference to TypingError * PR #6097: Add function code and closure bytes into cache key * PR #6099: Restrict upper limit of TBB version due to ABI changes. * PR #6101: Restrict lower limit of icc_rt version due to assumed SVML bug. * PR #6107: Fix and test #6095 * PR #6109: Fixes an issue reported in #6094 * PR #6111: Decouple LiteralList and LiteralStrKeyDict from tuple * PR #6116: Fix #6102. Problem with non-unique label. CUDA Enhancements/Fixes: * PR #5359: Remove special-casing of 0d arrays * PR #5709: CUDA: Refactoring of cuda.jit and kernel / dispatcher abstractions * PR #5732: CUDA Docs: document ``forall`` method of kernels * PR #5745: CUDA stream callbacks and async awaitable streams * PR #5761: Add implmentation for int types for isnan and isinf for CUDA * PR #5819: Add support for CUDA 11 and Ampere / CC 8.0 * PR #5826: CUDA: Add function to get SASS for kernels * PR #5846: CUDA: Allow disabling NVVM optimizations, and fix debug issues * PR #5851: CUDA EMM enhancements - add default get_ipc_handle implementation, skip a test conditionally * PR #5852: CUDA: Fix ``cuda.test()`` * PR #5857: CUDA docs: Add notes on resetting the EMM plugin * PR #5859: CUDA: Fix reduce docs and style improvements * PR #6016: Fixes change of list spelling in a cuda test. * PR #6020: CUDA: Fix #5820, adding atomic nanmin / nanmax * PR #6030: CUDA: Don't optimize IR before sending it to NVVM * PR #6052: Fix dtype for atomic_add_double testsuite * PR #6080: CUDA: Prevent auto-upgrade of atomic intrinsics * PR #6123: Fix #6121 Documentation Updates: * PR #5782: Host docs on Read the Docs * PR #5830: doc: Mention that caching uses pickle * PR #5963: Fix broken link to numpy ufunc signature docs * PR #5975: restructure communication section * PR #5981: Document bounds-checking behavior in python deviations page * PR #5993: Docs for structref * PR #6008: Small fix so bullet points are rendered by sphinx * PR #6013: emphasize cuda kernel functions are asynchronous * PR #6036: Update deprecation doc from numba.errors to numba.core.errors * PR #6062: Change references to numba.pydata.org to https CI updates: * PR #5850: Updates the "New Issue" behaviour to better redirect users. * PR #5940: Add discourse badge * PR #5960: Setting mypy on CI Enhancements from user contributed PRs (with thanks!): * Aisha Tammy added the ability to switch off TBB support at compile time in #5821 (continued in #6031 by Stuart Archibald). * Alexander Stiebing fixed a reference before assignment bug in #5952. * Alexey Kozlov fixed a bug in tuple getitem for literals in #6028. * Andrew Eckart updated the repomap in #5869, added support for Read the Docs in #5782, fixed a bug in the ``np.dot`` implementation to correctly handle empty arrays in #5779 and added support for ``minlength`` to ``np.bincount`` in #5763. * ``@bitsisbits`` updated ``numba_sysinfo.py`` to handle HSA agents correctly in #5956. * Daichi Suzuo Fixed a bug in the threading backend initialisation sequence such that it is now correctly a lazy lock in #5724. * Eric Wieser contributed a number of patches, particularly in enhancing and improving the ``ufunc`` capabilities: * #5359: Remove special-casing of 0d arrays * #5834: Fix the is operator on Ellipsis * #5619: Add support for multi-output ufuncs * #5841: cleanup: Use PythonAPI.bool_from_bool in more places * #5862: Do not leak loop iteration variables into the numba.np.npyimpl namespace * #5838: Ensure ``Dispatcher.__eq__`` always returns a bool * #5830: doc: Mention that caching uses pickle * #5783: Make np.divide and np.remainder code more similar * Ethan Pronovost added a guard to prevent the common mistake of applying a jit decorator to the same function twice in #5881. * Graham Markall contributed many patches to the CUDA target, as follows: * #6052: Fix dtype for atomic_add_double tests * #6030: CUDA: Don't optimize IR before sending it to NVVM * #5846: CUDA: Allow disabling NVVM optimizations, and fix debug issues * #5826: CUDA: Add function to get SASS for kernels * #5851: CUDA EMM enhancements - add default get_ipc_handle implementation, skip a test conditionally * #5709: CUDA: Refactoring of cuda.jit and kernel / dispatcher abstractions * #5819: Add support for CUDA 11 and Ampere / CC 8.0 * #6020: CUDA: Fix #5820, adding atomic nanmin / nanmax * #5857: CUDA docs: Add notes on resetting the EMM plugin * #5859: CUDA: Fix reduce docs and style improvements * #5852: CUDA: Fix ``cuda.test()`` * #5732: CUDA Docs: document ``forall`` method of kernels * Guilherme Leobas added support for ``str(int)`` in #5463 and ``np.asarray(literal value)``` in #5526. * Hameer Abbasi deprecated the ``target`` kwarg for ``numba.jit`` in #5980. * Hannes Pahl added a badge to the Numba github page linking to the new discourse forum in #5940 and also fixed a bug that permitted illegal combinations of flags to be passed into ``@jit`` in #5808. * Kayran Schmidt emphasized that CUDA kernel functions are asynchronous in the documentation in #6013. * Leonardo Uieda fixed a broken link to the NumPy ufunc signature docs in #5963. * Lucio Fernandez-Arjona added mypy to CI and started adding type annotations to the code base in #5960, also fixed a (de)serialization problem on the dispatcher in #5935, improved the undefined variable error message in #5876, added support for division with timedelta input in #5711 and implemented ``setitem`` for records when the index is a ``StringLiteral`` in #5849. * Ludovic Tiako documented Numba's bounds-checking behavior in the python deviations page in #5981. * Matt Roeschke changed all ``http`` references ``https`` in #6062. * ``@niteya-shah`` implemented ``isnan`` and ``isinf`` for integer types on the CUDA target in #5761 and implemented ``np.positive`` in #5796. * Peter Würtz added CUDA stream callbacks and async awaitable streams in #5745. * ``@rht`` fixed an invalid import referred to in the deprecation documentation in #6036. * Sergey Pokhodenko updated the SVML tests for LLVM 10 in #5962. * Shyam Saladi fixed a Sphinx rendering bug in #6008. Authors: * Aisha Tammy * Alexander Stiebing * Alexey Kozlov * Andrew Eckart * ``@bitsisbits`` * Daichi Suzuo * Eric Wieser * Ethan Pronovost * Graham Markall * Guilherme Leobas * Hameer Abbasi * Hannes Pahl * Kayran Schmidt * Kozlov, Alexey * Leonardo Uieda * Lucio Fernandez-Arjona * Ludovic Tiako * Matt Roeschke * ``@niteya-shah`` * Peter Würtz * Sergey Pokhodenko * Shyam Saladi * ``@rht`` * Siu Kwan Lam (core dev) * Stuart Archibald (core dev) * Todd A. Anderson (core dev) * Valentin Haenel (core dev) Version 0.50.1 (Jun 24, 2020) ----------------------------- This is a bugfix release for 0.50.0, it fixes a critical bug in error reporting and a number of other smaller issues: * PR #5861: Added except for possible Windows get_terminal_size exception * PR #5876: Improve undefined variable error message * PR #5884: Update the deprecation notices for 0.50.1 * PR #5889: Fixes literally not forcing re-dispatch for inline='always' * PR #5912: Fix bad attr access on certain typing templates breaking exceptions. * PR #5918: Fix cuda test due to #5876 Authors: * ``@pepping_dore`` * Lucio Fernandez-Arjona * Siu Kwan Lam (core dev) * Stuart Archibald (core dev) Version 0.50.0 (Jun 10, 2020) ----------------------------- This is a more usual release in comparison to the others that have been made in the last six months. It comprises the result of a number of maintenance tasks along with some new features and a lot of bug fixes. Highlights of core feature changes include: * The compilation chain is now based on LLVM 9. * The error handling and reporting system has been improved to reduce the size of error messages, and also improve quality and specificity. * The CUDA target has more stream constructors available and a new function for compiling to PTX without linking and loading the code to a device. Further, the macro-based system for describing CUDA threads and blocks has been replaced with standard typing and lowering implementations, for improved debugging and extensibility. IMPORTANT: The backwards compatibility shim, that was present in 0.49.x to accommodate the refactoring of Numba's internals, has been removed. If a module is imported from a moved location an ``ImportError`` will occur. General Enhancements: * PR #5060: Enables np.sum for timedelta64 * PR #5225: Adjust interpreter to make conditionals predicates via bool() call. * PR #5506: Jitclass static methods * PR #5580: Revert shim * PR #5591: Fix #5525 Add figure for total memory to ``numba -s`` output. * PR #5616: Simplify the ufunc kernel registration * PR #5617: Remove /examples from the Numba repo. * PR #5673: Fix inliners to run all passes on IR and clean up correctly. * PR #5700: Make it easier to understand type inference: add SSA dump, use for ``DEBUG_TYPEINFER`` * PR #5702: Fixes for LLVM 9 * PR #5722: Improve error messages. * PR #5758: Support NumPy 1.18 Fixes: * PR #5390: add error handling for lookup_module * PR #5464: Jitclass drops annotations to avoid error * PR #5478: Fix #5471. Issue with omitted type not recognized as literal value. * PR #5517: Fix numba.typed.List extend for singleton and empty iterable * PR #5549: Check type getitem * PR #5568: Add skip to entrypoint test on windows * PR #5581: Revert #5568 * PR #5602: Fix segfault caused by pop from numba.typed.List * PR #5645: Fix SSA redundant CFG computation * PR #5686: Fix issue with SSA not minimal * PR #5689: Fix bug in unified_function_type (issue 5685) * PR #5694: Skip part of slice array analysis if any part is not analyzable. * PR #5697: Fix usedef issue with parfor loopnest variables. * PR #5705: A fix for cases where SSA looks like a reduction variable. * PR #5714: Fix bug in test * PR #5717: Initialise Numba extensions ahead of any compilation starting. * PR #5721: Fix array iterator layout. * PR #5738: Unbreak master on buildfarm * PR #5757: Force LLVM to use ZMM registers for vectorization. * PR #5764: fix flake8 errors * PR #5768: Interval example: fix import * PR #5781: Moving record array examples to a test module * PR #5791: Fix up no cgroups problem * PR #5795: Restore refct removal pass and make it strict * PR #5807: Skip failing test on POWER8 due to PPC CTR Loop problem. * PR #5812: Fix side issue from #5792, @overload inliner cached IR being mutated. * PR #5815: Pin llvmlite to 0.33 * PR #5833: Fixes the source location appearing incorrectly in error messages. CUDA Enhancements/Fixes: * PR #5347: CUDA: Provide more stream constructors * PR #5388: CUDA: Fix OOB write in test_round{f4,f8} * PR #5437: Fix #5429: Exception using ``.get_ipc_handle(...)`` on array from ``as_cuda_array(...)`` * PR #5481: CUDA: Replace macros with typing and lowering implementations * PR #5556: CUDA: Make atomic semantics match Python / NumPy, and fix #5458 * PR #5558: CUDA: Only release primary ctx if retained * PR #5561: CUDA: Add function for compiling to PTX (+ other small fixes) * PR #5573: CUDA: Skip tests under cuda-memcheck that hang it * PR #5578: Implement math.modf for CUDA target * PR #5704: CUDA Eager compilation: Fix max_registers kwarg * PR #5718: CUDA lib path tests: unset CUDA_PATH when CUDA_HOME unset * PR #5800: Fix LLVM 9 IR for NVVM * PR #5803: CUDA Update expected error messages to fix #5797 Documentation Updates: * PR #5546: DOC: Add documentation about cost model to inlining notes. * PR #5653: Update doc with respect to try-finally case Enhancements from user contributed PRs (with thanks!): * Elias Kuthe fixed in issue with imports in the Interval example in #5768 * Eric Wieser Simplified the ufunc kernel registration mechanism in #5616 * Ethan Pronovost patched a problem with ``__annotations__`` in ``jitclass`` in #5464, fixed a bug that lead to infinite loops in Numba's ``Type.__getitem__`` in #5549, fixed a bug in ``np.arange`` testing in #5714 and added support for ``@staticmethod`` to ``jitclass`` in #5506. * Gabriele Gemmi implemented ``math.modf`` for the CUDA target in #5578 * Graham Markall contributed many patches, largely to the CUDA target, as follows: * #5347: CUDA: Provide more stream constructors * #5388: CUDA: Fix OOB write in test_round{f4,f8} * #5437: Fix #5429: Exception using ``.get_ipc_handle(...)`` on array from ``as_cuda_array(...)`` * #5481: CUDA: Replace macros with typing and lowering implementations * #5556: CUDA: Make atomic semantics match Python / NumPy, and fix #5458 * #5558: CUDA: Only release primary ctx if retained * #5561: CUDA: Add function for compiling to PTX (+ other small fixes) * #5573: CUDA: Skip tests under cuda-memcheck that hang it * #5648: Unset the memory manager after EMM Plugin tests * #5700: Make it easier to understand type inference: add SSA dump, use for ``DEBUG_TYPEINFER`` * #5704: CUDA Eager compilation: Fix max_registers kwarg * #5718: CUDA lib path tests: unset CUDA_PATH when CUDA_HOME unset * #5800: Fix LLVM 9 IR for NVVM * #5803: CUDA Update expected error messages to fix #5797 * Guilherme Leobas updated the documentation surrounding try-finally in #5653 * Hameer Abbasi added documentation about the cost model to the notes on inlining in #5546 * Jacques Gaudin rewrote ``numba -s`` to produce and consume a dictionary of output about the current system in #5591 * James Bourbeau Updated min/argmin and max/argmax to handle non-leading nans (via #5758) * Lucio Fernandez-Arjona moved the record array examples to a test module in #5781 and added ``np.timedelta64`` handling to ``np.sum`` in #5060 * Pearu Peterson Fixed a bug in unified_function_type in #5689 * Sergey Pokhodenko fixed an issue impacting LLVM 10 regarding vectorization widths on Intel SkyLake processors in #5757 * Shan Sikdar added error handling for ``lookup_module`` in #5390 * @toddrme2178 add CI testing for NumPy 1.18 (via #5758) Authors: * Elias Kuthe * Eric Wieser * Ethan Pronovost * Gabriele Gemmi * Graham Markall * Guilherme Leobas * Hameer Abbasi * Jacques Gaudin * James Bourbeau * Lucio Fernandez-Arjona * Pearu Peterson * Sergey Pokhodenko * Shan Sikdar * Siu Kwan Lam (core dev) * Stuart Archibald (core dev) * Todd A. Anderson (core dev) * ``@toddrme2178`` * Valentin Haenel (core dev) Version 0.49.1 (May 7, 2020) ---------------------------- This is a bugfix release for 0.49.0, it fixes some residual issues with SSA form, a critical bug in the branch pruning logic and a number of other smaller issues: * PR #5587: Fixed #5586 Threading Implementation Typos * PR #5592: Fixes #5583 Remove references to cffi_support from docs and examples * PR #5614: Fix invalid type in resolve for comparison expr in parfors. * PR #5624: Fix erroneous rewrite of predicate to bit const on prune. * PR #5627: Fixes #5623, SSA local def scan based on invalid equality assumption. * PR #5629: Fixes naming error in array_exprs * PR #5630: Fix #5570. Incorrect race variable detection due to SSA naming. * PR #5638: Make literal_unroll function work as a freevar. * PR #5648: Unset the memory manager after EMM Plugin tests * PR #5651: Fix some SSA issues * PR #5652: Pin to sphinx=2.4.4 to avoid problem with C declaration * PR #5658: Fix unifying undefined first class function types issue * PR #5669: Update example in 5m guide WRT SSA type stability. * PR #5676: Restore ``numba.types`` as public API Authors: * Graham Markall * Juan Manuel Cruz Martinez * Pearu Peterson * Sean Law * Stuart Archibald (core dev) * Siu Kwan Lam (core dev) Version 0.49.0 (Apr 16, 2020) ----------------------------- This release is very large in terms of code changes. Large scale removal of unsupported Python and NumPy versions has taken place along with a significant amount of refactoring to simplify the Numba code base to make it easier for contributors. Numba's intermediate representation has also undergone some important changes to solve a number of long standing issues. In addition some new features have been added and a large number of bugs have been fixed! IMPORTANT: In this release Numba's internals have moved about a lot. A backwards compatibility "shim" is provided for this release so as to not immediately break projects using Numba's internals. If a module is imported from a moved location the shim will issue a deprecation warning and suggest how to update the import statement for the new location. The shim will be removed in 0.50.0! Highlights of core feature changes include: * Removal of all Python 2 related code and also updating the minimum supported Python version to 3.6, the minimum supported NumPy version to 1.15 and the minimum supported SciPy version to 1.0. (Stuart Archibald). * Refactoring of the Numba code base. The code is now organised into submodules by functionality. This cleans up Numba's top level namespace. (Stuart Archibald). * Introduction of an ``ir.Del`` free static single assignment form for Numba's intermediate representation (Siu Kwan Lam and Stuart Archibald). * An OpenMP-like thread masking API has been added for use with code using the parallel CPU backends (Aaron Meurer and Stuart Archibald). * For the CUDA target, all kernel launches now require a configuration, this preventing accidental launches of kernels with the old default of a single thread in a single block. The hard-coded autotuner is also now removed, such tuning is deferred to CUDA API calls that provide the same functionality (Graham Markall). * The CUDA target also gained an External Memory Management plugin interface to allow Numba to use another CUDA-aware library for all memory allocations and deallocations (Graham Markall). * The Numba Typed List container gained support for construction from iterables (Valentin Haenel). * Experimental support was added for first-class function types (Pearu Peterson). Enhancements from user contributed PRs (with thanks!): * Aaron Meurer added support for thread masking at runtime in #4615. * Andreas Sodeur fixed a long standing bug that was preventing ``cProfile`` from working with Numba JIT compiled functions in #4476. * Arik Funke fixed error messages in ``test_array_reductions`` (#5278), fixed an issue with test discovery (#5239), made it so the documentation would build again on windows (#5453) and fixed a nested list problem in the docs in #5489. * Antonio Russo fixed a SyntaxWarning in #5252. * Eric Wieser added support for inferring the types of object arrays (#5348) and iterating over 2D arrays (#5115), also fixed some compiler warnings due to missing (void) in #5222. Also helped improved the "shim" and associated warnings in #5485, #5488, #5498 and partly #5532. * Ethan Pronovost fixed a problem with the shim erroneously warning for jitclass use in #5454 and also prevented illegal return values in jitclass ``__init__`` in #5505. * Gabriel Majeri added SciPy 2019 talks to the docs in #5106. * Graham Markall changed the Numba HTML documentation theme to resolve a number of long standing issues in #5346. Also contributed were a large number of CUDA enhancements and fixes, namely: * #5519: CUDA: Silence the test suite - Fix #4809, remove autojit, delete prints * #5443: Fix #5196: Docs: assert in CUDA only enabled for debug * #5436: Fix #5408: test_set_registers_57 fails on Maxwell * #5423: Fix #5421: Add notes on printing in CUDA kernels * #5400: Fix #4954, and some other small CUDA testsuite fixes * #5328: NBEP 7: External Memory Management Plugin Interface * #5144: Fix #4875: Make #2655 test with debug expect to pass * #5323: Document lifetime semantics of CUDA Array Interface * #5061: Prevent kernel launch with no configuration, remove autotuner * #5099: Fix #5073: Slices of dynamic shared memory all alias * #5136: CUDA: Enable asynchronous operations on the default stream * #5085: Support other itemsizes with view * #5059: Docs: Explain how to use Memcheck with Numba, fixups in CUDA documentation * #4957: Add notes on overwriting gufunc inputs to docs * Greg Jennings fixed an issue with ``np.random.choice`` not acknowledging the RNG seed correctly in #3897/#5310. * Guilherme Leobas added support for ``np.isnat`` in #5293. * Henry Schreiner made the llvmlite requirements more explicit in requirements.txt in #5150. * Ivan Butygin helped fix an issue with parfors sequential lowering in #5114/#5250. * Jacques Gaudin fixed a bug for Python >= 3.8 in ``numba -s`` in #5548. * Jim Pivarski added some hints for debugging entry points in #5280. * John Kirkham added ``numpy.dtype`` coercion for the ``dtype`` argument to CUDA device arrays in #5252. * Leo Fang added a list of libraries that support ``__cuda_array_interface__`` in #5104. * Lucio Fernandez-Arjona added ``getitem`` for the NumPy record type when the index is a ``StringLiteral`` type in #5182 and improved the documentation rendering via additions to the TOC and removal of numbering in #5450. * Mads R. B. Kristensen fixed an issue with ``__cuda_array_interface__`` not requiring the context in #5189. * Marcin Tolysz added support for nested modules in AOT compilation in #5174. * Mike Williams fixed some issues with NumPy records and ``getitem`` in the CUDA simulator in #5343. * Pearu Peterson added experimental support for first-class function types in #5287 (and fixes in #5459, #5473/#5429, and #5557). * Ravi Teja Gutta added support for ``np.flip`` in #4376/#5313. * Rohit Sanjay fixed an issue with type refinement for unicode input supplied to typed-list ``extend()`` (#5295) and fixed unicode ``.strip()`` to strip all whitespace characters in #5213. * Vladimir Lukyanov fixed an awkward bug in ``typed.dict`` in #5361, added a fix to ensure the LLVM and assembly dumps are highlighted correctly in #5357 and implemented a Numba IR Lexer and added highlighting to Numba IR dumps in #5333. * hdf fixed an issue with the ``boundscheck`` flag in the CUDA jit target in #5257. General Enhancements: * PR #4615: Allow masking threads out at runtime * PR #4798: Add branch pruning based on raw predicates. * PR #5115: Add support for iterating over 2D arrays * PR #5117: Implement ord()/chr() * PR #5122: Remove Python 2. * PR #5127: Calling convention adaptor for boxer/unboxer to call jitcode * PR #5151: implement None-typed typed-list * PR #5174: Nested modules https://github.com/numba/numba/issues/4739 * PR #5182: Add getitem for Record type when index is StringLiteral * PR #5185: extract code-gen utilities from closures * PR #5197: Refactor Numba, part I * PR #5210: Remove more unsupported Python versions from build tooling. * PR #5212: Adds support for viewing the CFG of the ELF disassembly. * PR #5227: Immutable typed-list * PR #5231: Added support for ``np.asarray`` to be used with ``numba.typed.List`` * PR #5235: Added property ``dtype`` to ``numba.typed.List`` * PR #5272: Refactor parfor: split up ParforPass * PR #5281: Make IR ir.Del free until legalized. * PR #5287: First-class function type * PR #5293: np.isnat * PR #5294: Create typed-list from iterable * PR #5295: refine typed-list on unicode input to extend * PR #5296: Refactor parfor: better exception from passes * PR #5308: Provide ``numba.extending.is_jitted`` * PR #5320: refactor array_analysis * PR #5325: Let literal_unroll accept types.Named*Tuple * PR #5330: refactor common operation in parfor lowering into a new util * PR #5333: Add: highlight Numba IR dump * PR #5342: Support for tuples passed to parfors. * PR #5348: Add support for inferring the types of object arrays * PR #5351: SSA again * PR #5352: Add shim to accommodate refactoring. * PR #5356: implement allocated parameter in njit * PR #5369: Make test ordering more consistent across feature availability * PR #5428: Wip/deprecate jitclass location * PR #5441: Additional changes to first class function * PR #5455: Move to llvmlite 0.32.* * PR #5457: implement repr for untyped lists Fixes: * PR #4476: Another attempt at fixing frame injection in the dispatcher tracing path * PR #4942: Prevent some parfor aliasing. Rename copied function var to prevent recursive type locking. * PR #5092: Fix 5087 * PR #5150: More explicit llvmlite requirement in requirements.txt * PR #5172: fix version spec for llvmlite * PR #5176: Normalize kws going into fold_arguments. * PR #5183: pass 'inline' explicitly to overload * PR #5193: Fix CI failure due to missing files when installed * PR #5213: Fix ``.strip()`` to strip all whitespace characters * PR #5216: Fix namedtuple mistreated by dispatcher as simple tuple * PR #5222: Fix compiler warnings due to missing (void) * PR #5232: Fixes a bad import that breaks master * PR #5239: fix test discovery for unittest * PR #5247: Continue PR #5126 * PR #5250: Part fix/5098 * PR #5252: Trivially fix SyntaxWarning * PR #5276: Add prange variant to has_no_side_effect. * PR #5278: fix error messages in test_array_reductions * PR #5310: PR #3897 continued * PR #5313: Continues PR #4376 * PR #5318: Remove AUTHORS file reference from MANIFEST.in * PR #5327: Add warning if FNV hashing is found as the default for CPython. * PR #5338: Remove refcount pruning pass * PR #5345: Disable test failing due to removed pass. * PR #5357: Small fix to have llvm and asm highlighted properly * PR #5361: 5081 typed.dict * PR #5431: Add tolerance to numba extension module entrypoints. * PR #5432: Fix code causing compiler warnings. * PR #5445: Remove undefined variable * PR #5454: Don't warn for numba.experimental.jitclass * PR #5459: Fixes issue 5448 * PR #5480: Fix for #5477, literal_unroll KeyError searching for getitems * PR #5485: Show the offending module in "no direct replacement" error message * PR #5488: Add missing ``numba.config`` shim * PR #5495: Fix missing null initializer for variable after phi strip * PR #5498: Make the shim deprecation warnings work on python 3.6 too * PR #5505: Better error message if __init__ returns value * PR #5527: Attempt to fix #5518 * PR #5529: PR #5473 continued * PR #5532: Make ``numba.`` available without an import * PR #5542: Fixes RC2 module shim bug * PR #5548: Fix #5537 Removed reference to ``platform.linux_distribution`` * PR #5555: Fix #5515 by reverting changes to ArrayAnalysis * PR #5557: First-class function call cannot use keyword arguments * PR #5569: Fix RewriteConstGetitems not registering calltype for new expr * PR #5571: Pin down llvmlite requirement CUDA Enhancements/Fixes: * PR #5061: Prevent kernel launch with no configuration, remove autotuner * PR #5085: Support other itemsizes with view * PR #5099: Fix #5073: Slices of dynamic shared memory all alias * PR #5104: Add a list of libraries that support __cuda_array_interface__ * PR #5136: CUDA: Enable asynchronous operations on the default stream * PR #5144: Fix #4875: Make #2655 test with debug expect to pass * PR #5189: __cuda_array_interface__ not requiring context * PR #5253: Coerce ``dtype`` to ``numpy.dtype`` * PR #5257: boundscheck fix * PR #5319: Make user facing error string use abs path not rel. * PR #5323: Document lifetime semantics of CUDA Array Interface * PR #5328: NBEP 7: External Memory Management Plugin Interface * PR #5343: Fix cuda spoof * PR #5400: Fix #4954, and some other small CUDA testsuite fixes * PR #5436: Fix #5408: test_set_registers_57 fails on Maxwell * PR #5519: CUDA: Silence the test suite - Fix #4809, remove autojit, delete prints Documentation Updates: * PR #4957: Add notes on overwriting gufunc inputs to docs * PR #5059: Docs: Explain how to use Memcheck with Numba, fixups in CUDA documentation * PR #5106: Add SciPy 2019 talks to docs * PR #5147: Update master for 0.48.0 updates * PR #5155: Explain what inlining at Numba IR level will do * PR #5161: Fix README.rst formatting * PR #5207: Remove AUTHORS list * PR #5249: fix target path for See also * PR #5262: fix typo in inlining docs * PR #5270: fix 'see also' in typeddict docs * PR #5280: Added some hints for debugging entry points. * PR #5297: Update docs with intro to {g,}ufuncs. * PR #5326: Update installation docs with OpenMP requirements. * PR #5346: Docs: use sphinx_rtd_theme * PR #5366: Remove reference to Python 2.7 in install check output * PR #5423: Fix #5421: Add notes on printing in CUDA kernels * PR #5438: Update package deps for doc building. * PR #5440: Bump deprecation notices. * PR #5443: Fix #5196: Docs: assert in CUDA only enabled for debug * PR #5450: Docs: remove numbers and add titles to TOC * PR #5453: fix building docs on windows * PR #5489: docs: fix rendering of nested bulleted list CI updates: * PR #5314: Update the image used in Azure CI for OSX. * PR #5360: Remove Travis CI badge. Authors: * Aaron Meurer * Andreas Sodeur * Antonio Russo * Arik Funke * Eric Wieser * Ethan Pronovost * Gabriel Majeri * Graham Markall * Greg Jennings * Guilherme Leobas * hdf * Henry Schreiner * Ivan Butygin * Jacques Gaudin * Jim Pivarski * John Kirkham * Leo Fang * Lucio Fernandez-Arjona * Mads R. B. Kristensen * Marcin Tolysz * Mike Williams * Pearu Peterson * Ravi Teja Gutta * Rohit Sanjay * Siu Kwan Lam (core dev) * Stan Seibert (core dev) * Stuart Archibald (core dev) * Todd A. Anderson (core dev) * Valentin Haenel (core dev) * Vladimir Lukyanov Version 0.48.0 (Jan 27, 2020) ----------------------------- This release is particularly small as it was present to catch anything that missed the 0.47.0 deadline (the deadline deliberately coincided with the end of support for Python 2.7). The next release will be considerably larger. The core changes in this release are dominated by the start of the clean up needed for the end of Python 2.7 support, improvements to the CUDA target and support for numerous additional unicode string methods. Enhancements from user contributed PRs (with thanks!): * Brian Wignall fixed more spelling typos in #4998. * Denis Smirnov added support for string methods ``capitalize`` (#4823), ``casefold`` (#4824), ``swapcase`` (#4825), ``rsplit`` (#4834), ``partition`` (#4845) and ``splitlines`` (#4849). * Elena Totmenina extended support for string methods ``startswith`` (#4867) and added ``endswith`` (#4868). * Eric Wieser made ``type_callable`` return the decorated function itself in #4760 * Ethan Pronovost added support for ``np.argwhere`` in #4617 * Graham Markall contributed a large number of CUDA enhancements and fixes, namely: * #5068: Remove Python 3.4 backports from utils * #4975: Make ``device_array_like`` create contiguous arrays (Fixes #4832) * #5023: Don't launch ForAll kernels with 0 elements (Fixes #5017) * #5016: Fix various issues in CUDA library search (Fixes #4979) * #5014: Enable use of records and bools for shared memory, remove ddt, add additional transpose tests * #4964: Fix #4628: Add more appropriate typing for CUDA device arrays * #5007: test_consuming_strides: Keep dev array alive * #4997: State that CUDA Toolkit 8.0 required in docs * James Bourbeau added the Python 3.8 classifier to setup.py in #5027. * John Kirkham added a clarification to the ``__cuda_array_interface__`` documentation in #5049. * Leo Fang Fixed an indexing problem in ``dummyarray`` in #5012. * Marcel Bargull fixed a build and test issue for Python 3.8 in #5029. * Maria Rubtsov added support for string methods ``isdecimal`` (#4842), ``isdigit`` (#4843), ``isnumeric`` (#4844) and ``replace`` (#4865). General Enhancements: * PR #4760: Make type_callable return the decorated function * PR #5010: merge string prs This merge PR included the following: * PR #4823: Implement str.capitalize() based on CPython * PR #4824: Implement str.casefold() based on CPython * PR #4825: Implement str.swapcase() based on CPython * PR #4834: Implement str.rsplit() based on CPython * PR #4842: Implement str.isdecimal * PR #4843: Implement str.isdigit * PR #4844: Implement str.isnumeric * PR #4845: Implement str.partition() based on CPython * PR #4849: Implement str.splitlines() based on CPython * PR #4865: Implement str.replace * PR #4867: Functionality extension str.startswith() based on CPython * PR #4868: Add functionality for str.endswith() * PR #5039: Disable help messages. * PR #4617: Add coverage for ``np.argwhere`` Fixes: * PR #4724: Only use lives (and not aliases) to create post parfor live set. * PR #4998: Fix more spelling typos * PR #5024: Propagate semantic constants ahead of static rewrites. * PR #5027: Add Python 3.8 classifier to setup.py * PR #5046: Update setup.py and buildscripts for dependency requirements * PR #5053: Convert from arrays to names in define() and don't invalidate for multiple consistent defines. * PR #5058: Permit mixed int types in wrap_index * PR #5078: Catch the use of global typed-list in JITed functions * PR #5092: Fix #5087, bug in bytecode analysis. CUDA Enhancements/Fixes: * PR #4964: Fix #4628: Add more appropriate typing for CUDA device arrays * PR #4975: Make ``device_array_like`` create contiguous arrays (Fixes #4832) * PR #4997: State that CUDA Toolkit 8.0 required in docs * PR #5007: test_consuming_strides: Keep dev array alive * PR #5012: Fix IndexError when accessing the "-1" element of dummyarray * PR #5014: Enable use of records and bools for shared memory, remove ddt, add additional transpose tests * PR #5016: Fix various issues in CUDA library search (Fixes #4979) * PR #5023: Don't launch ForAll kernels with 0 elements (Fixes #5017) * PR #5068: Remove Python 3.4 backports from utils Documentation Updates: * PR #5049: Clarify what dictionary means * PR #5062: Update docs for updated version requirements * PR #5090: Update deprecation notices for 0.48.0 CI updates: * PR #5029: Install optional dependencies for Python 3.8 tests * PR #5040: Drop Py2.7 and Py3.5 from public CI * PR #5048: Fix CI py38 Authors: * Brian Wignall * Denis Smirnov * Elena Totmenina * Eric Wieser * Ethan Pronovost * Graham Markall * James Bourbeau * John Kirkham * Leo Fang * Marcel Bargull * Maria Rubtsov * Siu Kwan Lam (core dev) * Stan Seibert (core dev) * Stuart Archibald (core dev) * Todd A. Anderson (core dev) * Valentin Haenel (core dev) Version 0.47.0 (Jan 2, 2020) ----------------------------- This release expands the capability of Numba in a number of important areas and is also significant as it is the last major point release with support for Python 2 and Python 3.5 included. The next release (0.48.0) will be for Python 3.6+ only! (This follows NumPy's deprecation schedule as specified in `NEP 29 `_.) Highlights of core feature changes include: * Full support for Python 3.8 (Siu Kwan Lam) * Opt-in bounds checking (Aaron Meurer) * Support for ``map``, ``filter`` and ``reduce`` (Stuart Archibald) Intel also kindly sponsored research and development that lead to some exciting new features: * Initial support for basic ``try``/``except`` use (Siu Kwan Lam) * The ability to pass functions created from closures/lambdas as arguments (Stuart Archibald) * ``sorted`` and ``list.sort()`` now accept the ``key`` argument (Stuart Archibald and Siu Kwan Lam) * A new compiler pass triggered through the use of the function ``numba.literal_unroll`` which permits iteration over heterogeneous tuples and constant lists of constants. (Stuart Archibald) Enhancements from user contributed PRs (with thanks!): * Ankit Mahato added a reference to a new talk on Numba at PyCon India 2019 in #4862 * Brian Wignall kindly fixed some spelling mistakes and typos in #4909 * Denis Smirnov wrote numerous methods to considerable enhance string support including: * ``str.rindex()`` in #4861 * ``str.isprintable()`` in #4836 * ``str.index()`` in #4860 * ``start/end`` parameters for ``str.find()`` in #4866 * ``str.isspace()`` in #4835 * ``str.isidentifier()`` #4837 * ``str.rpartition()`` in #4841 * ``str.lower()`` and ``str.islower()`` in #4651 * Elena Totmenina implemented both ``str.isalnum()``, ``str.isalpha()`` and ``str.isascii`` in #4839, #4840 and #4847 respectively. * Eric Larson fixed a bug in literal comparison in #4710 * Ethan Pronovost updated the ``np.arange`` implementation in #4770 to allow the use of the ``dtype`` key word argument and also added ``bool`` implementations for several types in #4715. * Graham Markall fixed some issues with the CUDA target, namely: * #4931: Added physical limits for CC 7.0 / 7.5 to CUDA autotune * #4934: Fixed bugs in TestCudaWarpOperations * #4938: Improved errors / warnings for the CUDA vectorize decorator * Guilherme Leobas fixed a typo in the ``urem`` implementation in #4667 * Isaac Virshup contributed a number of patches that fixed bugs, added support for more NumPy functions and enhanced Python feature support. These contributions included: * #4729: Allow array construction with mixed type shape tuples * #4904: Implementing ``np.lcm`` * #4780: Implement np.gcd and math.gcd * #4779: Make slice constructor more similar to python. * #4707: Added support for slice.indices * #4578: Clarify numba ufunc supported features * James Bourbeau fixed some issues with tooling, #4794 add ``setuptools`` as a dependency and #4501 add pre-commit hooks for ``flake8`` compliance. * Leo Fang made ``numba.dummyarray.Array`` iterable in #4629 * Marc Garcia fixed the ``numba.jit`` parameter name signature_or_function in #4703 * Marcelo Duarte Trevisani patched the llvmlite requirement to ``>=0.30.0`` in #4725 * Matt Cooper fixed a long standing CI problem in #4737 by remove maxParallel * Matti Picus fixed an issue with ``collections.abc`` in #4734 from Azure Pipelines. * Rob Ennis patched a bug in ``np.interp`` ``float32`` handling in #4911 * VDimir fixed a bug in array transposition layouts in #4777 and re-enabled and fixed some idle tests in #4776. * Vyacheslav Smirnov Enable support for `str.istitle()`` in #4645 General Enhancements: * PR #4432: Bounds checking * PR #4501: Add pre-commit hooks * PR #4536: Handle kw args in inliner when callee is a function * PR #4599: Permits closures to become functions, enables map(), filter() * PR #4611: Implement method title() for unicode based on Cpython * PR #4645: Enable support for istitle() method for unicode string * PR #4651: Implement str.lower() and str.islower() * PR #4652: Implement str.rfind() * PR #4695: Refactor `overload*` and support `jit_options` and `inline` * PR #4707: Added support for slice.indices * PR #4715: Add `bool` overload for several types * PR #4729: Allow array construction with mixed type shape tuples * PR #4755: Python3.8 support * PR #4756: Add parfor support for ndarray.fill. * PR #4768: Update typeconv error message to ask for sys.executable. * PR #4770: Update `np.arange` implementation with `@overload` * PR #4779: Make slice constructor more similar to python. * PR #4780: Implement np.gcd and math.gcd * PR #4794: Add setuptools as a dependency * PR #4802: put git hash into build string * PR #4803: Better compiler error messages for improperly used reduction variables. * PR #4817: Typed list implement and expose allocation * PR #4818: Typed list faster copy * PR #4835: Implement str.isspace() based on CPython * PR #4836: Implement str.isprintable() based on CPython * PR #4837: Implement str.isidentifier() based on CPython * PR #4839: Implement str.isalnum() based on CPython * PR #4840: Implement str.isalpha() based on CPython * PR #4841: Implement str.rpartition() based on CPython * PR #4847: Implement str.isascii() based on CPython * PR #4851: Add graphviz output for FunctionIR * PR #4854: Python3.8 looplifting * PR #4858: Implement str.expandtabs() based on CPython * PR #4860: Implement str.index() based on CPython * PR #4861: Implement str.rindex() based on CPython * PR #4866: Support params start/end for str.find() * PR #4874: Bump to llvmlite 0.31 * PR #4896: Specialise arange dtype on arch + python version. * PR #4902: basic support for try except * PR #4904: Implement np.lcm * PR #4910: loop canonicalisation and type aware tuple unroller/loop body versioning passes * PR #4961: Update hash(tuple) for Python 3.8. * PR #4977: Implement sort/sorted with key. * PR #4987: Add `is_internal` property to all Type classes. Fixes: * PR #4090: Update to LLVM8 memset/memcpy intrinsic * PR #4582: Convert sub to add and div to mul when doing the reduction across the per-thread reduction array. * PR #4648: Handle 0 correctly as slice parameter. * PR #4660: Remove multiply defined variables from all blocks' equivalence sets. * PR #4672: Fix pickling of dufunc * PR #4710: BUG: Comparison for literal * PR #4718: Change get_call_table to support intermediate Vars. * PR #4725: Requires llvmlite >=0.30.0 * PR #4734: prefer to import from collections.abc * PR #4736: fix flake8 errors * PR #4776: Fix and enable idle tests from test_array_manipulation * PR #4777: Fix transpose output array layout * PR #4782: Fix issue with SVML (and knock-on function resolution effects). * PR #4785: Treat 0d arrays like scalars. * PR #4787: fix missing incref on flags * PR #4789: fix typos in numba/targets/base.py * PR #4791: fix typos * PR #4811: fix spelling in now-failing tests * PR #4852: windowing test should check equality only up to double precision errors * PR #4881: fix refining list by using extend on an iterator * PR #4882: Fix return type in arange and zero step size handling. * PR #4885: suppress spurious RuntimeWarning about ufunc sizes * PR #4891: skip the xfail test for now. Py3.8 CFG refactor seems to have changed the test case * PR #4892: regex needs to accept singular form of "argument" * PR #4901: fix typed list equals * PR #4909: Fix some spelling typos * PR #4911: np.interp bugfix for float32 handling * PR #4920: fix creating list with JIT disabled * PR #4921: fix creating dict with JIT disabled * PR #4935: Better handling of prange with multiple reductions on the same variable. * PR #4946: Improve the error message for `raise `. * PR #4955: Move overload of literal_unroll to avoid circular dependency that breaks Python 2.7 * PR #4962: Fix test error on windows * PR #4973: Fixes a bug in the relabelling logic in literal_unroll. * PR #4978: Fix overload_method problem with stararg * PR #4981: Add ind_to_const to enable fewer equivalence classes. * PR #4991: Continuation of #4588 (Let dead code removal handle removing more of the unneeded code after prange conversion to parfor) * PR #4994: Remove xfail for test which has since had underlying issue fixed. * PR #5018: Fix #5011. * PR #5019: skip pycc test on Python 3.8 + macOS because of distutils issue CUDA Enhancements/Fixes: * PR #4629: Make numba.dummyarray.Array iterable * PR #4675: Bump cuda array interface to version 2 * PR #4741: Update choosing the "CUDA_PATH" for windows * PR #4838: Permit ravel('A') for contig device arrays in CUDA target * PR #4931: Add physical limits for CC 7.0 / 7.5 to autotune * PR #4934: Fix fails in TestCudaWarpOperations * PR #4938: Improve errors / warnings for cuda vectorize decorator Documentation Updates: * PR #4418: Directed graph task roadmap * PR #4578: Clarify numba ufunc supported features * PR #4655: fix sphinx build warning * PR #4667: Fix typo on urem implementation * PR #4669: Add link to ParallelAccelerator paper. * PR #4703: Fix numba.jit parameter name signature_or_function * PR #4862: Addition of PyCon India 2019 talk on Numba * PR #4947: Document jitclass with numba.typed use. * PR #4958: Add docs for `try..except` * PR #4993: Update deprecations for 0.47 CI Updates: * PR #4737: remove maxParallel from Azure Pipelines * PR #4767: pin to 2.7.16 for py27 on osx * PR #4781: WIP/runtest cf pytest Authors: * Aaron Meurer * Ankit Mahato * Brian Wignall * Denis Smirnov * Ehsan Totoni (core dev) * Elena Totmenina * Eric Larson * Ethan Pronovost * Giovanni Cavallin * Graham Markall * Guilherme Leobas * Isaac Virshup * James Bourbeau * Leo Fang * Marc Garcia * Marcelo Duarte Trevisani * Matt Cooper * Matti Picus * Rob Ennis * Rujal Desai * Siu Kwan Lam (core dev) * Stan Seibert (core dev) * Stuart Archibald (core dev) * Todd A. Anderson (core dev) * VDimir * Valentin Haenel (core dev) * Vyacheslav Smirnov Version 0.46.0 -------------- This release significantly reworked one of the main parts of Numba, the compiler pipeline, to make it more extensible and easier to use. The purpose of this was to continue enhancing Numba's ability for use as a compiler toolkit. In a similar vein, Numba now has an extension registration mechanism to allow other Numba-using projects to automatically have their Numba JIT compilable functions discovered. There were also a number of other related compiler toolkit enhancement added along with some more NumPy features and a lot of bug fixes. This release has updated the CUDA Array Interface specification to version 2, which clarifies the `strides` attribute for C-contiguous arrays and specifies the treatment for zero-size arrays. The implementation in Numba has been changed and may affect downstream packages relying on the old behavior (see issue #4661). Enhancements from user contributed PRs (with thanks!): * Aaron Meurer fixed some Python issues in the code base in #4345 and #4341. * Ashwin Srinath fixed a CUDA performance bug via #4576. * Ethan Pronovost added support for triangular indices functions in #4601 (the NumPy functions ``tril_indices``, ``tril_indices_from``, ``triu_indices``, and ``triu_indices_from``). * Gerald Dalley fixed a tear down race occurring in Python 2. * Gregory R. Lee fixed the use of deprecated ``inspect.getargspec``. * Guilherme Leobas contributed five PRs, adding support for ``np.append`` and ``np.count_nonzero`` in #4518 and #4386. The typed List was fixed to accept unsigned integers in #4510. #4463 made a fix to NamedTuple internals and #4397 updated the docs for ``np.sum``. * James Bourbeau added a new feature to permit the automatic application of the `jit` decorator to a whole module in #4331. Also some small fixes to the docs and the code base were made in #4447 and #4433, and a fix to inplace array operation in #4228. * Jim Crist fixed a bug in the rendering of patched errors in #4464. * Leo Fang updated the CUDA Array Interface contract in #4609. * Pearu Peterson added support for Unicode based NumPy arrays in #4425. * Peter Andreas Entschev fixed a CUDA concurrency bug in #4581. * Lucio Fernandez-Arjona extended Numba's ``np.sum`` support to now accept the ``dtype`` kwarg in #4472. * Pedro A. Morales Maries added support for ``np.cross`` in #4128 and also added the necessary extension ``numba.numpy_extensions.cross2d`` in #4595. * David Hoese, Eric Firing, Joshua Adelman, and Juan Nunez-Iglesias all made documentation fixes in #4565, #4482, #4455, #4375 respectively. * Vyacheslav Smirnov and Rujal Desai enabled support for ``count()`` on unicode strings in #4606. General Enhancements: * PR #4113: Add rewrite for semantic constants. * PR #4128: Add np.cross support * PR #4162: Make IR comparable and legalize it. * PR #4208: R&D inlining, jitted and overloaded. * PR #4331: Automatic JIT of called functions * PR #4353: Inspection tool to check what numba supports * PR #4386: Implement np.count_nonzero * PR #4425: Unicode array support * PR #4427: Entrypoints for numba extensions * PR #4467: Literal dispatch * PR #4472: Allow dtype input argument in np.sum * PR #4513: New compiler. * PR #4518: add support for np.append * PR #4554: Refactor NRT C-API * PR #4556: 0.46 scheduled deprecations * PR #4567: Add env var to disable performance warnings. * PR #4568: add np.array_equal support * PR #4595: Implement numba.cross2d * PR #4601: Add triangular indices functions * PR #4606: Enable support for count() method for unicode string Fixes: * PR #4228: Fix inplace operator error for arrays * PR #4282: Detect and raise unsupported on generator expressions * PR #4305: Don't allow the allocation of mutable objects written into a container to be hoisted. * PR #4311: Avoid deprecated use of inspect.getargspec * PR #4328: Replace GC macro with function call * PR #4330: Loosen up typed container casting checks * PR #4341: Fix some coding lines at the top of some files (utf8 -> utf-8) * PR #4345: Replace "import \*" with explicit imports in numba/types * PR #4346: Fix incorrect alg in isupper for ascii strings. * PR #4349: test using jitclass in typed-list * PR #4361: Add allocation hoisting info to LICM section at diagnostic L4 * PR #4366: Offset search box to avoid wrapping on some pages with Safari. Fixes #4365. * PR #4372: Replace all "except BaseException" with "except Exception". * PR #4407: Restore the "free" conda channel for NumPy 1.10 support. * PR #4408: Add lowering for constant bytes. * PR #4409: Add exception chaining for better error context * PR #4411: Name of type should not contain user facing description for debug. * PR #4412: Fix #4387. Limit the number of return types for recursive functions * PR #4426: Fixed two module teardown races in py2. * PR #4431: Fix and test numpy.random.random_sample(n) for np117 * PR #4463: NamedTuple - Raises an error on non-iterable elements * PR #4464: Add a newline in patched errors * PR #4474: Fix liveness for remove dead of parfors (and other IR extensions) * PR #4510: Make List.__getitem__ accept unsigned parameters * PR #4512: Raise specific error at typing time for iteration on >1D array. * PR #4532: Fix static_getitem with Literal type as index * PR #4547: Update to inliner cost model information. * PR #4557: Use specific random number seed when generating arbitrary test data * PR #4559: Adjust test timeouts * PR #4564: Skip unicode array tests on ppc64le that trigger an LLVM bug * PR #4621: Fix packaging issue due to missing numba/cext * PR #4623: Fix issue 4520 due to storage model mismatch * PR #4644: Updates for llvmlite 0.30.0 CUDA Enhancements/Fixes: * PR #4410: Fix #4111. cudasim mishandling recarray * PR #4576: Replace use of `np.prod` with `functools.reduce` for computing size from shape * PR #4581: Prevent taking the GIL in ForAll * PR #4592: Fix #4589. Just pass NULL for b2d_func for constant dynamic sharedmem * PR #4609: Update CUDA Array Interface & Enforce Numba compliance * PR #4619: Implement math.{degrees, radians} for the CUDA target. * PR #4675: Bump cuda array interface to version 2 Documentation Updates: * PR #4317: Add docs for ARMv8/AArch64 * PR #4318: Add supported platforms to the docs. Closes #4316 * PR #4375: Add docstrings to inspect methods * PR #4388: Update Python 2.7 EOL statement * PR #4397: Add note about np.sum * PR #4447: Minor parallel performance tips edits * PR #4455: Clarify docs for typed dict with regard to arrays * PR #4482: Fix example in guvectorize docstring. * PR #4541: fix two typos in architecture.rst * PR #4548: Document numba.extending.intrinsic and inlining. * PR #4565: Fix typo in jit-compilation docs * PR #4607: add dependency list to docs * PR #4614: Add documentation for implementing new compiler passes. CI Updates: * PR #4415: Make 32bit incremental builds on linux not use free channel * PR #4433: Removes stale azure comment * PR #4493: Fix Overload Inliner wrt CUDA Intrinsics * PR #4593: Enable Azure CI batching Contributors: * Aaron Meurer * Ashwin Srinath * David Hoese * Ehsan Totoni (core dev) * Eric Firing * Ethan Pronovost * Gerald Dalley * Gregory R. Lee * Guilherme Leobas * James Bourbeau * Jim Crist * Joshua Adelman * Juan Nunez-Iglesias * Leo Fang * Lucio Fernandez-Arjona * Pearu Peterson * Pedro A. Morales Marie * Peter Andreas Entschev * Rujal Desai * Siu Kwan Lam (core dev) * Stan Seibert (core dev) * Stuart Archibald (core dev) * Todd A. Anderson (core dev) * Valentin Haenel (core dev) * Vyacheslav Smirnov Version 0.45.1 -------------- This patch release addresses some regressions reported in the 0.45.0 release and adds support for NumPy 1.17: * PR #4325: accept scalar/0d-arrays * PR #4338: Fix #4299. Parfors reduction vars not deleted. * PR #4350: Use process level locks for fork() only. * PR #4354: Try to fix #4352. * PR #4357: Fix np1.17 isnan, isinf, isfinite ufuncs * PR #4363: Fix np.interp for np1.17 nan handling * PR #4371: Fix nump1.17 random function non-aliasing Contributors: * Siu Kwan Lam (core dev) * Stuart Archibald (core dev) * Valentin Haenel (core dev) Version 0.45.0 -------------- In this release, Numba gained an experimental :ref:`numba.typed.List ` container as a future replacement of the :ref:`reflected list `. In addition, functions decorated with ``parallel=True`` can now be cached to reduce compilation overhead associated with the auto-parallelization. Enhancements from user contributed PRs (with thanks!): * James Bourbeau added the Numba version to reportable error messages in #4227, added the ``signature`` parameter to ``inspect_types`` in #4200, improved the docstring of ``normalize_signature`` in #4205, and fixed #3658 by adding reference counting to ``register_dispatcher`` in #4254 * Guilherme Leobas implemented the dominator tree and dominance frontier algorithms in #4216 and #4149, respectively. * Nick White fixed the issue with ``round`` in the CUDA target in #4137. * Joshua Adelman added support for determining if a value is in a `range` (i.e. ``x in range(...)``) in #4129, and added windowing functions (``np.bartlett``, ``np.hamming``, ``np.blackman``, ``np.hanning``, ``np.kaiser``) from NumPy in #4076. * Lucio Fernandez-Arjona added support for ``np.select`` in #4077 * Rob Ennis added support for ``np.flatnonzero`` in #4157 * Keith Kraus extended the ``__cuda_array_interface__`` with an optional mask attribute in #4199. * Gregory R. Lee replaced deprecated use of ``inspect.getargspec`` in #4311. General Enhancements: * PR #4328: Replace GC macro with function call * PR #4311: Avoid deprecated use of inspect.getargspec * PR #4296: Slacken window function testing tol on ppc64le * PR #4254: Add reference counting to register_dispatcher * PR #4239: Support len() of multi-dim arrays in array analysis * PR #4234: Raise informative error for np.kron array order * PR #4232: Add unicodetype db, low level str functions and examples. * PR #4229: Make hashing cacheable * PR #4227: Include numba version in reportable error message * PR #4216: Add dominator tree * PR #4200: Add signature parameter to inspect_types * PR #4196: Catch missing imports of internal functions. * PR #4180: Update use of unlowerable global message. * PR #4166: Add tests for PR #4149 * PR #4157: Support for np.flatnonzero * PR #4149: Implement dominance frontier for SSA for the Numba IR * PR #4148: Call branch pruning in inline_closure_call() * PR #4132: Reduce usage of inttoptr * PR #4129: Support contains for range * PR #4112: better error messages for np.transpose and tuples * PR #4110: Add range attrs, start, stop, step * PR #4077: Add np select * PR #4076: Add numpy windowing functions support (np.bartlett, np.hamming, np.blackman, np.hanning, np.kaiser) * PR #4095: Support ir.Global/FreeVar in find_const() * PR #3691: Make TypingError abort compiling earlier * PR #3646: Log internal errors encountered in typeinfer Fixes: * PR #4303: Work around scipy bug 10206 * PR #4302: Fix flake8 issue on master * PR #4301: Fix integer literal bug in np.select impl * PR #4291: Fix pickling of jitclass type * PR #4262: Resolves #4251 - Fix bug in reshape analysis. * PR #4233: Fixes issue revealed by #4215 * PR #4224: Fix #4223. Looplifting error due to StaticSetItem in objectmode * PR #4222: Fix bad python path. * PR #4178: Fix unary operator overload, check with unicode impl * PR #4173: Fix return type in np.bincount with weights * PR #4153: Fix slice shape assignment in array analysis * PR #4152: fix status check in dict lookup * PR #4145: Use callable instead of checking __module__ * PR #4118: Fix inline assembly support on CPU. * PR #4088: Resolves #4075 - parfors array_analysis bug. * PR #4085: Resolves #3314 - parfors array_analysis bug with reshape. CUDA Enhancements/Fixes: * PR #4199: Extend `__cuda_array_interface__` with optional mask attribute, bump version to 1 * PR #4137: CUDA - Fix round Builtin * PR #4114: Support 3rd party activated CUDA context Documentation Updates: * PR #4317: Add docs for ARMv8/AArch64 * PR #4318: Add supported platforms to the docs. Closes #4316 * PR #4295: Alter deprecation schedules * PR #4253: fix typo in pysupported docs * PR #4252: fix typo on repomap * PR #4241: remove unused import * PR #4240: fix typo in jitclass docs * PR #4205: Update return value order in normalize_signature docstring * PR #4237: Update doc links to point to latest not dev docs. * PR #4197: hyperlink repomap * PR #4170: Clarify docs on accumulating into arrays in prange * PR #4147: fix docstring for DictType iterables * PR #3951: A guide to overloading CI Updates: * PR #4300: AArch64 has no faulthandler package * PR #4273: pin to MKL BLAS for testing to get consistent results * PR #4209: Revert previous network tol patch and try with conda config * PR #4138: Remove tbb before Azure test only on Python 3, since it was already removed for Python 2 Contributors: * Ehsan Totoni (core dev) * Gregory R. Lee * Guilherme Leobas * James Bourbeau * Joshua L. Adelman * Keith Kraus * Lucio Fernandez-Arjona * Nick White * Rob Ennis * Siu Kwan Lam (core dev) * Stan Seibert (core dev) * Stuart Archibald (core dev) * Todd A. Anderson (core dev) * Valentin Haenel (core dev) Version 0.44.1 -------------- This patch release addresses some regressions reported in the 0.44.0 release: - PR #4165: Fix #4164 issue with NUMBAPRO_NVVM. - PR #4172: Abandon branch pruning if an arg name is redefined. (Fixes #4163) - PR #4183: Fix #4156. Problem with defining in-loop variables. Version 0.44.0 -------------- IMPORTANT: In this release a few significant deprecations (and some less significant ones) are being made, users are encouraged to read the related documentation. General enhancements in this release include: - Numba is backed by LLVM 8 on all platforms apart from ppc64le, which, due to bugs, remains on the LLVM 7.x series. - Numba's dictionary support now includes type inference for keys and values. - The .view() method now works for NumPy scalar types. - Newly supported NumPy functions added: np.delete, np.nanquantile, np.quantile, np.repeat, np.shape. In addition considerable effort has been made to fix some long standing bugs and a large number of other bugs, the "Fixes" section is very large this time! Enhancements from user contributed PRs (with thanks!): - Max Bolingbroke added support for the selective use of ``fastmath`` flags in #3847. - Rob Ennis made min() and max() work on iterables in #3820 and added np.quantile and np.nanquantile in #3899. - Sergey Shalnov added numerous unicode string related features, zfill in #3978, ljust in #4001, rjust and center in #4044 and strip, lstrip and rstrip in #4048. - Guilherme Leobas added support for np.delete in #3890 - Christoph Deil exposed the Numba CLI via ``python -m numba`` in #4066 and made numerous documentation fixes. - Leo Schwarz wrote the bulk of the code for jitclass default constructor arguments in #3852. - Nick White enhanced the CUDA backend to use min/max PTX instructions where possible in #4054. - Lucio Fernandez-Arjona implemented the unicode string ``__mul__`` function in #3952. - Dimitri Vorona wrote the bulk of the code to implement getitem and setitem for jitclass in #3861. General Enhancements: * PR #3820: Min max on iterables * PR #3842: Unicode type iteration * PR #3847: Allow fine-grained control of fastmath flags to partially address #2923 * PR #3852: Continuation of PR #2894 * PR #3861: Continuation of PR #3730 * PR #3890: Add support for np.delete * PR #3899: Support for np.quantile and np.nanquantile * PR #3900: Fix 3457 :: Implements np.repeat * PR #3928: Add .view() method for NumPy scalars * PR #3939: Update icc_rt clone recipe. * PR #3952: __mul__ for strings, initial implementation and tests * PR #3956: Type-inferred dictionary * PR #3959: Create a view for string slicing to avoid extra allocations * PR #3978: zfill operation implementation * PR #4001: ljust operation implementation * PR #4010: Support `dict()` and `{}` * PR #4022: Support for llvm 8 * PR #4034: Make type.Optional str more representative * PR #4041: Deprecation warnings * PR #4044: rjust and center operations implementation * PR #4048: strip, lstrip and rstrip operations implementation * PR #4066: Expose numba CLI via python -m numba * PR #4081: Impl `np.shape` and support function for `asarray`. * PR #4091: Deprecate the use of iternext_impl without RefType CUDA Enhancements/Fixes: * PR #3933: Adds `.nbytes` property to CUDA device array objects. * PR #4011: Add .inspect_ptx() to cuda device function * PR #4054: CUDA: Use min/max PTX Instructions * PR #4096: Update env-vars for CUDA libraries lookup Documentation Updates: * PR #3867: Code repository map * PR #3918: adding Joris' Fosdem 2019 presentation * PR #3926: order talks on applications of Numba by date * PR #3943: fix two small typos in vectorize docs * PR #3944: Fixup jitclass docs * PR #3990: mention preprint repo in FAQ. Fixes #3981 * PR #4012: Correct runtests command in contributing.rst * PR #4043: fix typo * PR #4047: Ambiguous Documentation fix for guvectorize. * PR #4060: Remove remaining mentions of autojit in docs * PR #4063: Fix annotate example in docstring * PR #4065: Add FAQ entry explaining Numba project name * PR #4079: Add Documentation for atomicity of typed.Dict * PR #4105: Remove info about CUDA ENVVAR potential replacement Fixes: * PR #3719: Resolves issue #3528. Adds support for slices when not using parallel=True. * PR #3727: Remove dels for known dead vars. * PR #3845: Fix mutable flag transmission in .astype * PR #3853: Fix some minor issues in the C source. * PR #3862: Correct boolean reinterpretation of data * PR #3863: Comments out the appveyor badge * PR #3869: fixes flake8 after merge * PR #3871: Add assert to ir.py to help enforce correct structuring * PR #3881: fix preparfor dtype transform for datetime64 * PR #3884: Prevent mutation of objmode fallback IR. * PR #3885: Updates for llvmlite 0.29 * PR #3886: Use `safe_load` from pyyaml. * PR #3887: Add tolerance to network errors by permitting conda to retry * PR #3893: Fix casting in namedtuple ctor. * PR #3894: Fix array inliner for multiple array definition. * PR #3905: Cherrypick #3903 to main * PR #3920: Raise better error if unsupported jump opcode found. * PR #3927: Apply flake8 to the numpy related files * PR #3935: Silence DeprecationWarning * PR #3938: Better error message for unknown opcode * PR #3941: Fix typing of ufuncs in parfor conversion * PR #3946: Return variable renaming dict from inline_closurecall * PR #3962: Fix bug in alignment computation of `Record.make_c_struct` * PR #3967: Fix error with pickling unicode * PR #3964: Unicode split algo versioning * PR #3975: Add handler for unknown locale to numba -s * PR #3991: Permit Optionals in ufunc machinery * PR #3995: Remove assert in type inference causing poor error message. * PR #3996: add is_ascii flag to UnicodeType * PR #4009: Prevent zero division error in np.linalg.cond * PR #4014: Resolves #4007. * PR #4021: Add a more specific error message for invalid write to a global. * PR #4023: Fix handling of titles in record dtype * PR #4024: Do a check if a call is const before saying that an object is multiply defined. * PR #4027: Fix issue #4020. Turn off no_cpython_wrapper flag when compiling for… * PR #4033: [WIP] Fixing wrong dtype of array inside reflected list #4028 * PR #4061: Change IPython cache dir name to numba_cache * PR #4067: Delete examples/notebooks/LinearRegr.py * PR #4070: Catch writes to global typed.Dict and raise. * PR #4078: Check tuple length * PR #4084: Fix missing incref on optional return None * PR #4089: Make the warnings fixer flush work for warning comparing on type. * PR #4094: Fix function definition finding logic for commented def * PR #4100: Fix alignment check on 32-bit. * PR #4104: Use PEP 508 compliant env markers for install deps Contributors: * Benjamin Zaitlen * Christoph Deil * David Hirschfeld * Dimitri Vorona * Ehsan Totoni (core dev) * Guilherme Leobas * Leo Schwarz * Lucio Fernandez-Arjona * Max Bolingbroke * NanduTej * Nick White * Ravi Teja Gutta * Rob Ennis * Sergey Shalnov * Siu Kwan Lam (core dev) * Stan Seibert (core dev) * Stuart Archibald (core dev) * Todd A. Anderson (core dev) * Valentin Haenel (core dev) Version 0.43.1 -------------- This is a bugfix release that provides minor changes to fix: a bug in branch pruning, bugs in `np.interp` functionality, and also fully accommodate the NumPy 1.16 release series. * PR #3826: NumPy 1.16 support * PR #3850: Refactor np.interp * PR #3883: Rewrite pruned conditionals as their evaluated constants. Contributors: * Rob Ennis * Siu Kwan Lam (core dev) * Stuart Archibald (core dev) Version 0.43.0 -------------- In this release, the major new features are: - Initial support for statically typed dictionaries - Improvements to `hash()` to match Python 3 behavior - Support for the heapq module - Ability to pass C structs to Numba - More NumPy functions: asarray, trapz, roll, ptp, extract NOTE: The vast majority of NumPy 1.16 behaviour is supported, however ``datetime`` and ``timedelta`` use involving ``NaT`` matches the behaviour present in earlier release. The ufunc suite has not been extending to accommodate the two new time computation related additions present in NumPy 1.16. In addition the functions ``ediff1d`` and ``interp`` have known minor issues in replicating outputs exactly when ``NaN``'s occur in certain input patterns. General Enhancements: * PR #3563: Support for np.roll * PR #3572: Support for np.ptp * PR #3592: Add dead branch prune before type inference. * PR #3598: Implement np.asarray() * PR #3604: Support for np.interp * PR #3607: Some simplication to lowering * PR #3612: Exact match flag in dispatcher * PR #3627: Support for np.trapz * PR #3630: np.where with broadcasting * PR #3633: Support for np.extract * PR #3657: np.max, np.min, np.nanmax, np.nanmin - support for complex dtypes * PR #3661: Access C Struct as Numpy Structured Array * PR #3678: Support for str.split and str.join * PR #3684: Support C array in C struct * PR #3696: Add intrinsic to help debug refcount * PR #3703: Implementations of type hashing. * PR #3715: Port CPython3.7 dictionary for numba internal use * PR #3716: Support inplace concat of strings * PR #3718: Add location to ConstantInferenceError exceptions. * PR #3720: improve error msg about invalid signature * PR #3731: Support for heapq * PR #3754: Updates for llvmlite 0.28 * PR #3760: Overloadable operator.setitem * PR #3775: Support overloading operator.delitem * PR #3777: Implement compiler support for dictionary * PR #3791: Implement interpreter-side interface for numba dict * PR #3799: Support refcount'ed types in numba dict CUDA Enhancements/Fixes: * PR #3713: Fix the NvvmSupportError message when CC too low * PR #3722: Fix #3705: slicing error with negative strides * PR #3755: Make cuda.to_device accept readonly host array * PR #3773: Adapt library search to accommodate multiple locations Documentation Updates: * PR #3651: fix link to berryconda in docs * PR #3668: Add Azure Pipelines build badge * PR #3749: DOC: Clarify when prange is different from range * PR #3771: fix a few typos * PR #3785: Clarify use of range as function only. * PR #3829: Add docs for typed-dict Fixes: * PR #3614: Resolve #3586 * PR #3618: Skip gdb tests on ARM. * PR #3643: Remove support_literals usage * PR #3645: Enforce and fix that AbstractTemplate.generic must be returning a Signature * PR #3648: Fail on @overload signature mismatch. * PR #3660: Added Ignore message to test numba.tests.test_lists.TestLists.test_mul_error * PR #3662: Replace six with numba.six * PR #3663: Removes coverage computation from travisci builds * PR #3672: Avoid leaking memory when iterating over uniform tuple * PR #3676: Fixes constant string lowering inside tuples * PR #3677: Ensure all referenced compiled functions are linked properly * PR #3692: Fix test failure due to overly strict test on floating point values. * PR #3693: Intercept failed import to help users. * PR #3694: Fix memory leak in enumerate iterator * PR #3695: Convert return of None from intrinsic implementation to dummy value * PR #3697: Fix for issue #3687 * PR #3701: Fix array.T analysis (fixes #3700) * PR #3704: Fixes for overload_method * PR #3706: Don't push call vars recursively into nested parfors. Resolves #3686. * PR #3710: Set as non-hoistable if a mutable variable is passed to a function in a loop. Resolves #3699. * PR #3712: parallel=True to use better builtin mechanism to resolve call types. Resolves issue #3671 * PR #3725: Fix invalid removal of dead empty list * PR #3740: add uintp as a valid type to the tuple operator.getitem * PR #3758: Fix target definition update in inlining * PR #3782: Raise typing error on yield optional. * PR #3792: Fix non-module object used as the module of a function. * PR #3800: Bugfix for np.interp * PR #3808: Bump macro to include VS2014 to fix py3.5 build * PR #3809: Add debug guard to debug only C function. * PR #3816: Fix array.sum(axis) 1d input return type. * PR #3821: Replace PySys_WriteStdout with PySys_FormatStdout to ensure no truncation. * PR #3830: Getitem should not return optional type * PR #3832: Handle single string as path in find_file() Contributors: * Ehsan Totoni * Gryllos Prokopis * Jonathan J. Helmus * Kayla Ngan * lalitparate * luk-f-a * Matyt * Max Bolingbroke * Michael Seifert * Rob Ennis * Siu Kwan Lam * Stan Seibert * Stuart Archibald * Todd A. Anderson * Tao He * Valentin Haenel Version 0.42.1 -------------- Bugfix release to fix the incorrect hash in OSX wheel packages. No change in source code. Version 0.42.0 -------------- In this release the major features are: - The capability to launch and attach the GDB debugger from within a jitted function. - The upgrading of LLVM to version 7.0.0. We added a draft of the project roadmap to the developer manual. The roadmap is for informational purposes only as priorities and resources may change. Here are some enhancements from contributed PRs: - #3532. Daniel Wennberg improved the ``cuda.{pinned, mapped}`` API so that the associated memory is released immediately at the exit of the context manager. - #3531. Dimitri Vorona enabled the inlining of jitclass methods. - #3516. Simon Perkins added the support for passing numpy dtypes (i.e. ``np.dtype("int32")``) and their type constructor (i.e. ``np.int32``) into a jitted function. - #3509. Rob Ennis added support for ``np.corrcoef``. A regression issue (#3554, #3461) relating to making an empty slice in parallel mode is resolved by #3558. General Enhancements: * PR #3392: Launch and attach gdb directly from Numba. * PR #3437: Changes to accommodate LLVM 7.0.x * PR #3509: Support for np.corrcoef * PR #3516: Typeof dtype values * PR #3520: Fix @stencil ignoring cval if out kwarg supplied. * PR #3531: Fix jitclass method inlining and avoid unnecessary increfs * PR #3538: Avoid future C-level assertion error due to invalid visibility * PR #3543: Avoid implementation error being hidden by the try-except * PR #3544: Add `long_running` test flag and feature to exclude tests. * PR #3549: ParallelAccelerator caching improvements * PR #3558: Fixes array analysis for inplace binary operators. * PR #3566: Skip alignment tests on armv7l. * PR #3567: Fix unifying literal types in namedtuple * PR #3576: Add special copy routine for NumPy out arrays * PR #3577: Fix example and docs typos for `objmode` context manager. reorder statements. * PR #3580: Use alias information when determining whether it is safe to * PR #3583: Use `ir.unknown_loc` for unknown `Loc`, as #3390 with tests * PR #3587: Fix llvm.memset usage changes in llvm7 * PR #3596: Fix Array Analysis for Global Namedtuples * PR #3597: Warn users if threading backend init unsafe. * PR #3605: Add guard for writing to read only arrays from ufunc calls * PR #3606: Improve the accuracy of error message wording for undefined type. * PR #3611: gdb test guard needs to ack ptrace permissions * PR #3616: Skip gdb tests on ARM. CUDA Enhancements: * PR #3532: Unregister temporarily pinned host arrays at once * PR #3552: Handle broadcast arrays correctly in host->device transfer. * PR #3578: Align cuda and cuda simulator kwarg names. Documentation Updates: * PR #3545: Fix @njit description in 5 min guide * PR #3570: Minor documentation fixes for numba.cuda * PR #3581: Fixing minor typo in `reference/types.rst` * PR #3594: Changing `@stencil` docs to correctly reflect `func_or_mode` param * PR #3617: Draft roadmap as of Dec 2018 Contributors: * Aaron Critchley * Daniel Wennberg * Dimitri Vorona * Dominik Stańczak * Ehsan Totoni (core dev) * Iskander Sharipov * Rob Ennis * Simon Muller * Simon Perkins * Siu Kwan Lam (core dev) * Stan Seibert (core dev) * Stuart Archibald (core dev) * Todd A. Anderson (core dev) Version 0.41.0 -------------- This release adds the following major features: * Diagnostics showing the optimizations done by ParallelAccelerator * Support for profiling Numba-compiled functions in Intel VTune * Additional NumPy functions: partition, nancumsum, nancumprod, ediff1d, cov, conj, conjugate, tri, tril, triu * Initial support for Python 3 Unicode strings General Enhancements: * PR #1968: armv7 support * PR #2983: invert mapping b/w binop operators and the operator module #2297 * PR #3160: First attempt at parallel diagnostics * PR #3307: Adding NUMBA_ENABLE_PROFILING envvar, enabling jit event * PR #3320: Support for np.partition * PR #3324: Support for np.nancumsum and np.nancumprod * PR #3325: Add location information to exceptions. * PR #3337: Support for np.ediff1d * PR #3345: Support for np.cov * PR #3348: Support user pipeline class in with lifting * PR #3363: string support * PR #3373: Improve error message for empty imprecise lists. * PR #3375: Enable overload(operator.getitem) * PR #3402: Support negative indexing in tuple. * PR #3414: Refactor Const type * PR #3416: Optimized usage of alloca out of the loop * PR #3424: Updates for llvmlite 0.26 * PR #3462: Add support for `np.conj/np.conjugate`. * PR #3480: np.tri, np.tril, np.triu - default optional args * PR #3481: Permit dtype argument as sole kwarg in np.eye CUDA Enhancements: * PR #3399: Add max_registers Option to cuda.jit Continuous Integration / Testing: * PR #3303: CI with Azure Pipelines * PR #3309: Workaround race condition with apt * PR #3371: Fix issues with Azure Pipelines * PR #3362: Fix #3360: `RuntimeWarning: 'numba.runtests' found in sys.modules` * PR #3374: Disable openmp in wheel building * PR #3404: Azure Pipelines templates * PR #3419: Fix cuda tests and error reporting in test discovery * PR #3491: Prevent faulthandler installation on armv7l * PR #3493: Fix CUDA test that used negative indexing behaviour that's fixed. * PR #3495: Start Flake8 checking of Numba source Fixes: * PR #2950: Fix dispatcher to only consider contiguous-ness. * PR #3124: Fix 3119, raise for 0d arrays in reductions * PR #3228: Reduce redundant module linking * PR #3329: Fix AOT on windows. * PR #3335: Fix memory management of __cuda_array_interface__ views. * PR #3340: Fix typo in error name. * PR #3365: Fix the default unboxing logic * PR #3367: Allow non-global reference to objmode() context-manager * PR #3381: Fix global reference in objmode for dynamically created function * PR #3382: CUDA_ERROR_MISALIGNED_ADDRESS Using Multiple Const Arrays * PR #3384: Correctly handle very old versions of colorama * PR #3394: Add 32bit package guard for non-32bit installs * PR #3397: Fix with-objmode warning * PR #3403 Fix label offset in call inline after parfor pass * PR #3429: Fixes raising of user defined exceptions for exec(). * PR #3432: Fix error due to function naming in CI in py2.7 * PR #3444: Fixed TBB's single thread execution and test added for #3440 * PR #3449: Allow matching non-array objects in find_callname() * PR #3455: Change getiter and iternext to not be pure. Resolves #3425 * PR #3467: Make ir.UndefinedType singleton class. * PR #3478: Fix np.random.shuffle sideeffect * PR #3487: Raise unsupported for kwargs given to `print()` * PR #3488: Remove dead script. * PR #3498: Fix stencil support for boolean as return type * PR #3511: Fix handling make_function literals (regression of #3414) * PR #3514: Add missing unicode != unicode * PR #3527: Fix complex math sqrt implementation for large -ve values * PR #3530: This adds arg an check for the pattern supplied to Parfors. * PR #3536: Sets list dtor linkage to `linkonce_odr` to fix visibility in AOT Documentation Updates: * PR #3316: Update 0.40 changelog with additional PRs * PR #3318: Tweak spacing to avoid search box wrapping onto second line * PR #3321: Add note about memory leaks with exceptions to docs. Fixes #3263 * PR #3322: Add FAQ on CUDA + fork issue. Fixes #3315. * PR #3343: Update docs for argsort, kind kwarg partially supported. * PR #3357: Added mention of njit in 5minguide.rst * PR #3434: Fix parallel reduction example in docs. * PR #3452: Fix broken link and mark up problem. * PR #3484: Size Numba logo in docs in em units. Fixes #3313 * PR #3502: just two typos * PR #3506: Document string support * PR #3513: Documentation for parallel diagnostics. * PR #3526: Fix 5 min guide with respect to @njit decl Contributors: * Alex Ford * Andreas Sodeur * Anton Malakhov * Daniel Stender * Ehsan Totoni (core dev) * Henry Schreiner * Marcel Bargull * Matt Cooper * Nick White * Nicolas Hug * rjenc29 * Siu Kwan Lam (core dev) * Stan Seibert (core dev) * Stuart Archibald (core dev) * Todd A. Anderson (core dev) Version 0.40.1 -------------- This is a PyPI-only patch release to ensure that PyPI wheels can enable the TBB threading backend, and to disable the OpenMP backend in the wheels. Limitations of manylinux1 and variation in user environments can cause segfaults when OpenMP is enabled on wheel builds. Note that this release has no functional changes for users who obtained Numba 0.40.0 via conda. Patches: * PR #3338: Accidentally left Anton off contributor list for 0.40.0 * PR #3374: Disable OpenMP in wheel building * PR #3376: Update 0.40.1 changelog and docs on OpenMP backend Version 0.40.0 -------------- This release adds a number of major features: * A new GPU backend: kernels for AMD GPUs can now be compiled using the ROCm driver on Linux. * The thread pool implementation used by Numba for automatic multithreading is configurable to use TBB, OpenMP, or the old "workqueue" implementation. (TBB is likely to become the preferred default in a future release.) * New documentation on thread and fork-safety with Numba, along with overall improvements in thread-safety. * Experimental support for executing a block of code inside a nopython mode function in object mode. * Parallel loops now allow arrays as reduction variables * CUDA improvements: FMA, faster float64 atomics on supporting hardware, records in const memory, and improved datatime dtype support * More NumPy functions: vander, tri, triu, tril, fill_diagonal General Enhancements: * PR #3017: Add facility to support with-contexts * PR #3033: Add support for multidimensional CFFI arrays * PR #3122: Add inliner to object mode pipeline * PR #3127: Support for reductions on arrays. * PR #3145: Support for np.fill_diagonal * PR #3151: Keep a queue of references to last N deserialized functions. Fixes #3026 * PR #3154: Support use of list() if typeable. * PR #3166: Objmode with-block * PR #3179: Updates for llvmlite 0.25 * PR #3181: Support function extension in alias analysis * PR #3189: Support literal constants in typing of object methods * PR #3190: Support passing closures as literal values in typing * PR #3199: Support inferring stencil index as constant in simple unary expressions * PR #3202: Threading layer backend refactor/rewrite/reinvention! * PR #3209: Support for np.tri, np.tril and np.triu * PR #3211: Handle unpacking in building tuple (BUILD_TUPLE_UNPACK opcode) * PR #3212: Support for np.vander * PR #3227: Add NumPy 1.15 support * PR #3272: Add MemInfo_data to runtime._nrt_python.c_helpers * PR #3273: Refactor. Removing thread-local-storage based context nesting. * PR #3278: compiler threadsafety lockdown * PR #3291: Add CPU count and CFS restrictions info to numba -s. CUDA Enhancements: * PR #3152: Use cuda driver api to get best blocksize for best occupancy * PR #3165: Add FMA intrinsic support * PR #3172: Use float64 add Atomics, Where Available * PR #3186: Support Records in CUDA Const Memory * PR #3191: CUDA: fix log size * PR #3198: Fix GPU datetime timedelta types usage * PR #3221: Support datetime/timedelta scalar argument to a CUDA kernel. * PR #3259: Add DeviceNDArray.view method to reinterpret data as a different type. * PR #3310: Fix IPC handling of sliced cuda array. ROCm Enhancements: * PR #3023: Support for AMDGCN/ROCm. * PR #3108: Add ROC info to `numba -s` output. * PR #3176: Move ROC vectorize init to npyufunc * PR #3177: Add auto_synchronize support to ROC stream * PR #3178: Update ROC target documentation. * PR #3294: Add compiler lock to ROC compilation path. * PR #3280: Add wavebits property to the HSA Agent. * PR #3281: Fix ds_permute types and add tests Continuous Integration / Testing: * PR #3091: Remove old recipes, switch to test config based on env var. * PR #3094: Add higher ULP tolerance for products in complex space. * PR #3096: Set exit on error in incremental scripts * PR #3109: Add skip to test needing jinja2 if no jinja2. * PR #3125: Skip cudasim only tests * PR #3126: add slack, drop flowdock * PR #3147: Improve error message for arg type unsupported during typing. * PR #3128: Fix recipe/build for jetson tx2/ARM * PR #3167: In build script activate env before installing. * PR #3180: Add skip to broken test. * PR #3216: Fix libcuda.so loading in some container setup * PR #3224: Switch to new Gitter notification webhook URL and encrypt it * PR #3235: Add 32bit Travis CI jobs * PR #3257: This adds scipy/ipython back into windows conda test phase. Fixes: * PR #3038: Fix random integer generation to match results from NumPy. * PR #3045: Fix #3027 - Numba reassigns sys.stdout * PR #3059: Handler for known LoweringErrors. * PR #3060: Adjust attribute error for NumPy functions. * PR #3067: Abort simulator threads on exception in thread block. * PR #3079: Implement +/-(types.boolean) Fix #2624 * PR #3080: Compute np.var and np.std correctly for complex types. * PR #3088: Fix #3066 (array.dtype.type in prange) * PR #3089: Fix invalid ParallelAccelerator hoisting issue. * PR #3136: Fix #3135 (lowering error) * PR #3137: Fix for issue3103 (race condition detection) * PR #3142: Fix Issue #3139 (parfors reuse of reduction variable across prange blocks) * PR #3148: Remove dead array equal @infer code * PR #3153: Fix canonicalize_array_math typing for calls with kw args * PR #3156: Fixes issue with missing pygments in testing and adds guards. * PR #3168: Py37 bytes output fix. * PR #3171: Fix #3146. Fix CFUNCTYPE void* return-type handling * PR #3193: Fix setitem/getitem resolvers * PR #3222: Fix #3214. Mishandling of POP_BLOCK in while True loop. * PR #3230: Fixes liveness analysis issue in looplifting * PR #3233: Fix return type difference for 32bit ctypes.c_void_p * PR #3234: Fix types and layout for `np.where`. * PR #3237: Fix DeprecationWarning about imp module * PR #3241: Fix #3225. Normalize 0nd array to scalar in typing of indexing code. * PR #3256: Fix #3251: Move imports of ABCs to collections.abc for Python >= 3.3 * PR #3292: Fix issue3279. * PR #3302: Fix error due to mismatching dtype Documentation Updates: * PR #3104: Workaround for #3098 (test_optional_unpack Heisenbug) * PR #3132: Adds an ~5 minute guide to Numba. * PR #3194: Fix docs RE: np.random generator fork/thread safety * PR #3242: Page with Numba talks and tutorial links * PR #3258: Allow users to choose the type of issue they are reporting. * PR #3260: Fixed broken link * PR #3266: Fix cuda pointer ownership problem with user/externally allocated pointer * PR #3269: Tweak typography with CSS * PR #3270: Update FAQ for functions passed as arguments * PR #3274: Update installation instructions * PR #3275: Note pyobject and voidptr are types in docs * PR #3288: Do not need to call parallel optimizations "experimental" anymore * PR #3318: Tweak spacing to avoid search box wrapping onto second line Contributors: * Anton Malakhov * Alex Ford * Anthony Bisulco * Ehsan Totoni (core dev) * Leonard Lausen * Matthew Petroff * Nick White * Ray Donnelly * rjenc29 * Siu Kwan Lam (core dev) * Stan Seibert (core dev) * Stuart Archibald (core dev) * Stuart Reynolds * Todd A. Anderson (core dev) Version 0.39.0 -------------- Here are the highlights for the Numba 0.39.0 release. * This is the first version that supports Python 3.7. * With help from Intel, we have fixed the issues with SVML support (related issues #2938, #2998, #3006). * List has gained support for containing reference-counted types like NumPy arrays and `list`. Note, list still cannot hold heterogeneous types. * We have made a significant change to the internal calling-convention, which should be transparent to most users, to allow for a future feature that will permitting jumping back into python-mode from a nopython-mode function. This also fixes a limitation to `print` that disabled its use from nopython functions that were deep in the call-stack. * For CUDA GPU support, we added a `__cuda_array_interface__` following the NumPy array interface specification to allow Numba to consume externally defined device arrays. We have opened a corresponding pull request to CuPy to test out the concept and be able to use a CuPy GPU array. * The Numba dispatcher `inspect_types()` method now supports the kwarg `pretty` which if set to `True` will produce ANSI/HTML output, showing the annotated types, when invoked from ipython/jupyter-notebook respectively. * The NumPy functions `ndarray.dot`, `np.percentile` and `np.nanpercentile`, and `np.unique` are now supported. * Numba now supports the use of a per-project configuration file to permanently set behaviours typically set via `NUMBA_*` family environment variables. * Support for the `ppc64le` architecture has been added. Enhancements: * PR #2793: Simplify and remove javascript from html_annotate templates. * PR #2840: Support list of refcounted types * PR #2902: Support for np.unique * PR #2926: Enable fence for all architecture and add developer notes * PR #2928: Making error about untyped list more informative. * PR #2930: Add configuration file and color schemes. * PR #2932: Fix encoding to 'UTF-8' in `check_output` decode. * PR #2938: Python 3.7 compat: _Py_Finalizing becomes _Py_IsFinalizing() * PR #2939: Comprehensive SVML unit test * PR #2946: Add support for `ndarray.dot` method and tests. * PR #2953: percentile and nanpercentile * PR #2957: Add new 3.7 opcode support. * PR #2963: Improve alias analysis to be more comprehensive * PR #2984: Support for namedtuples in array analysis * PR #2986: Fix environment propagation * PR #2990: Improve function call matching for intrinsics * PR #3002: Second pass at error rewrites (interpreter errors). * PR #3004: Add numpy.empty to the list of pure functions. * PR #3008: Augment SVML detection with llvmlite SVML patch detection. * PR #3012: Make use of the common spelling of heterogeneous/homogeneous. * PR #3032: Fix pycc ctypes test due to mismatch in calling-convention * PR #3039: Add SVML detection to Numba environment diagnostic tool. * PR #3041: This adds @needs_blas to tests that use BLAS * PR #3056: Require llvmlite>=0.24.0 CUDA Enhancements: * PR #2860: __cuda_array_interface__ * PR #2910: More CUDA intrinsics * PR #2929: Add Flag To Prevent Unneccessary D->H Copies * PR #3037: Add CUDA IPC support on non-peer-accessible devices CI Enhancements: * PR #3021: Update appveyor config. * PR #3040: Add fault handler to all builds * PR #3042: Add catchsegv * PR #3077: Adds optional number of processes for `-m` in testing Fixes: * PR #2897: Fix line position of delete statement in numba ir * PR #2905: Fix for #2862 * PR #3009: Fix optional type returning in recursive call * PR #3019: workaround and unittest for issue #3016 * PR #3035: [TESTING] Attempt delayed removal of Env * PR #3048: [WIP] Fix cuda tests failure on buildfarm * PR #3054: Make test work on 32-bit * PR #3062: Fix cuda.In freeing devary before the kernel launch * PR #3073: Workaround #3072 * PR #3076: Avoid ignored exception due to missing globals at interpreter teardown Documentation Updates: * PR #2966: Fix syntax in env var docs. * PR #2967: Fix typo in CUDA kernel layout example. * PR #2970: Fix docstring copy paste error. Contributors: The following people contributed to this release. * Anton Malakhov * Ehsan Totoni (core dev) * Julia Tatz * Matthias Bussonnier * Nick White * Ray Donnelly * Siu Kwan Lam (core dev) * Stan Seibert (core dev) * Stuart Archibald (core dev) * Todd A. Anderson (core dev) * Rik-de-Kort * rjenc29 Version 0.38.1 -------------- This is a critical bug fix release addressing: https://github.com/numba/numba/issues/3006 The bug does not impact users using conda packages from Anaconda or Intel Python Distribution (but it does impact conda-forge). It does not impact users of pip using wheels from PyPI. This only impacts a small number of users where: * The ICC runtime (specifically libsvml) is present in the user's environment. * The user is using an llvmlite statically linked against a version of LLVM that has not been patched with SVML support. * The platform is 64-bit. The release fixes a code generation path that could lead to the production of incorrect results under the above situation. Fixes: * PR #3007: Augment SVML detection with llvmlite SVML patch detection. Contributors: The following people contributed to this release. * Stuart Archibald (core dev) Version 0.38.0 -------------- Following on from the bug fix focus of the last release, this release swings back towards the addition of new features and usability improvements based on community feedback. This release is comparatively large! Three key features/ changes to note are: * Numba (via llvmlite) is now backed by LLVM 6.0, general vectorization is improved as a result. A significant long standing LLVM bug that was causing corruption was also found and fixed. * Further considerable improvements in vectorization are made available as Numba now supports Intel's short vector math library (SVML). Try it out with `conda install -c numba icc_rt`. * CUDA 8.0 is now the minimum supported CUDA version. Other highlights include: * Bug fixes to `parallel=True` have enabled more vectorization opportunities when using the ParallelAccelerator technology. * Much effort has gone into improving error reporting and the general usability of Numba. This includes highlighted error messages and performance tips documentation. Try it out with `conda install colorama`. * A number of new NumPy functions are supported, `np.convolve`, `np.correlate` `np.reshape`, `np.transpose`, `np.permutation`, `np.real`, `np.imag`, and `np.searchsorted` now supports the`side` kwarg. Further, `np.argsort` now supports the `kind` kwarg with `quicksort` and `mergesort` available. * The Numba extension API has gained the ability operate more easily with functions from Cython modules through the use of `numba.extending.get_cython_function_address` to obtain function addresses for direct use in `ctypes.CFUNCTYPE`. * Numba now allows the passing of jitted functions (and containers of jitted functions) as arguments to other jitted functions. * The CUDA functionality has gained support for a larger selection of bit manipulation intrinsics, also SELP, and has had a number of bugs fixed. * Initial work to support the PPC64LE platform has been added, full support is however waiting on the LLVM 6.0.1 release as it contains critical patches not present in 6.0.0. It is hoped that any remaining issues will be fixed in the next release. * The capacity for advanced users/compiler engineers to define their own compilation pipelines. Enhancements: * PR #2660: Support bools from cffi in nopython. * PR #2741: Enhance error message for undefined variables. * PR #2744: Add diagnostic error message to test suite discovery failure. * PR #2748: Added Intel SVML optimizations as opt-out choice working by default * PR #2762: Support transpose with axes arguments. * PR #2777: Add support for np.correlate and np.convolve * PR #2779: Implement np.random.permutation * PR #2801: Passing jitted functions as args * PR #2802: Support np.real() and np.imag() * PR #2807: Expose `import_cython_function` * PR #2821: Add kwarg 'side' to np.searchsorted * PR #2822: Adds stable argsort * PR #2832: Fixups for llvmlite 0.23/llvm 6 * PR #2836: Support `index` method on tuples * PR #2839: Support for np.transpose and np.reshape. * PR #2843: Custom pipeline * PR #2847: Replace signed array access indices in unsiged prange loop body * PR #2859: Add support for improved error reporting. * PR #2880: This adds a github issue template. * PR #2881: Build recipe to clone Intel ICC runtime. * PR #2882: Update TravisCI to test SVML * PR #2893: Add reference to the data buffer in array.ctypes object * PR #2895: Move to CUDA 8.0 Fixes: * PR #2737: Fix #2007 (part 1). Empty array handling in np.linalg. * PR #2738: Fix install_requires to allow pip getting pre-release version * PR #2740: Fix 2208. Generate better error message. * PR #2765: Fix Bit-ness * PR #2780: PowerPC reference counting memory fences * PR #2805: Fix six imports. * PR #2813: Fix #2812: gufunc scalar output bug. * PR #2814: Fix the build post #2727 * PR #2831: Attempt to fix #2473 * PR #2842: Fix issue with test discovery and broken CUDA drivers. * PR #2850: Add rtsys init guard and test. * PR #2852: Skip vectorization test with targets that are not x86 * PR #2856: Prevent printing to stdout in `test_extending.py` * PR #2864: Correct C code to prevent compiler warnings. * PR #2889: Attempt to fix #2386. * PR #2891: Removed test skipping for inspect_cfg * PR #2898: Add guard to parallel test on unsupported platforms * PR #2907: Update change log for PPC64LE LLVM dependency. * PR #2911: Move build requirement to llvmlite>=0.23.0dev0 * PR #2912: Fix random permutation test. * PR #2914: Fix MD list syntax in issue template. Documentation Updates: * PR #2739: Explicitly state default value of error_model in docstring * PR #2803: DOC: parallel vectorize requires signatures * PR #2829: Add Python 2.7 EOL plan to docs * PR #2838: Use automatic numbering syntax in list. * PR #2877: Add performance tips documentation. * PR #2883: Fix #2872: update rng doc about thread/fork-safety * PR #2908: Add missing link and ref to docs. * PR #2909: Tiny typo correction ParallelAccelerator enhancements/fixes: * PR #2727: Changes to enable vectorization in ParallelAccelerator. * PR #2816: Array analysis for transpose with arbitrary arguments * PR #2874: Fix dead code eliminator not to remove a call with side-effect * PR #2886: Fix ParallelAccelerator arrayexpr repr CUDA enhancements: * PR #2734: More Constants From cuda.h * PR #2767: Add len(..) Support to DeviceNDArray * PR #2778: Add More Device Array API Functions to CUDA Simulator * PR #2824: Add CUDA Primitives for Population Count * PR #2835: Emit selp Instructions to Avoid Branching * PR #2867: Full support for CUDA device attributes CUDA fixes: * PR #2768: Don't Compile Code on Every Assignment * PR #2878: Fixes a Win64 issue with the test in Pr/2865 Contributors: The following people contributed to this release. * Abutalib Aghayev * Alex Olivas * Anton Malakhov * Dong-hee Na * Ehsan Totoni (core dev) * John Zwinck * Josh Wilson * Kelsey Jordahl * Nick White * Olexa Bilaniuk * Rik-de-Kort * Siu Kwan Lam (core dev) * Stan Seibert (core dev) * Stuart Archibald (core dev) * Thomas Arildsen * Todd A. Anderson (core dev) Version 0.37.0 -------------- This release focuses on bug fixing and stability but also adds a few new features including support for Numpy 1.14. The key change for Numba core was the long awaited addition of the final tranche of thread safety improvements that allow Numba to be run concurrently on multiple threads without hitting known thread safety issues inside LLVM itself. Further, a number of fixes and enhancements went into the CUDA implementation and ParallelAccelerator gained some new features and underwent some internal refactoring. Misc enhancements: * PR #2627: Remove hacks to make llvmlite threadsafe * PR #2672: Add ascontiguousarray * PR #2678: Add Gitter badge * PR #2691: Fix #2690: add intrinsic to convert array to tuple * PR #2703: Test runner feature: failed-first and last-failed * PR #2708: Patch for issue #1907 * PR #2732: Add support for array.fill Misc Fixes: * PR #2610: Fix #2606 lowering of optional.setattr * PR #2650: Remove skip for win32 cosine test * PR #2668: Fix empty_like from readonly arrays. * PR #2682: Fixes 2210, remove _DisableJitWrapper * PR #2684: Fix #2340, generator error yielding bool * PR #2693: Add travis-ci testing of NumPy 1.14, and also check on Python 2.7 * PR #2694: Avoid type inference failure due to a typing template rejection * PR #2695: Update llvmlite version dependency. * PR #2696: Fix tuple indexing codegeneration for empty tuple * PR #2698: Fix #2697 by deferring deletion in the simplify_CFG loop. * PR #2701: Small fix to avoid tempfiles being created in the current directory * PR #2725: Fix 2481, LLVM IR parsing error due to mutated IR * PR #2726: Fix #2673: incorrect fork error msg. * PR #2728: Alternative to #2620. Remove dead code ByteCodeInst.get. * PR #2730: Add guard for test needing SciPy/BLAS Documentation updates: * PR #2670: Update communication channels * PR #2671: Add docs about diagnosing loop vectorizer * PR #2683: Add docs on const arg requirements and on const mem alloc * PR #2722: Add docs on numpy support in cuda * PR #2724: Update doc: warning about unsupported arguments ParallelAccelerator enhancements/fixes: Parallel support for `np.arange` and `np.linspace`, also `np.mean`, `np.std` and `np.var` are added. This was performed as part of a general refactor and cleanup of the core ParallelAccelerator code. * PR #2674: Core pa * PR #2704: Generate Dels after parfor sequential lowering * PR #2716: Handle matching directly supported functions CUDA enhancements: * PR #2665: CUDA DeviceNDArray: Support numpy tranpose API * PR #2681: Allow Assigning to DeviceNDArrays * PR #2702: Make DummyArray do High Dimensional Reshapes * PR #2714: Use CFFI to Reuse Code CUDA fixes: * PR #2667: Fix CUDA DeviceNDArray slicing * PR #2686: Fix #2663: incorrect offset when indexing cuda array. * PR #2687: Ensure Constructed Stream Bound * PR #2706: Workaround for unexpected warp divergence due to exception raising code * PR #2707: Fix regression: cuda test submodules not loading properly in runtests * PR #2731: Use more challenging values in slice tests. * PR #2720: A quick testsuite fix to not run the new cuda testcase in the multiprocess pool Contributors: The following people contributed to this release. * Coutinho Menezes Nilo * Daniel * Ehsan Totoni * Nick White * Paul H. Liu * Siu Kwan Lam * Stan Seibert * Stuart Archibald * Todd A. Anderson Version 0.36.2 -------------- This is a bugfix release that provides minor changes to address: * PR #2645: Avoid CPython bug with ``exec`` in older 2.7.x. * PR #2652: Add support for CUDA 9. Version 0.36.1 -------------- This release continues to add new features to the work undertaken in partnership with Intel on ParallelAccelerator technology. Other changes of note include the compilation chain being updated to use LLVM 5.0 and the production of conda packages using conda-build 3 and the new compilers that ship with it. NOTE: A version 0.36.0 was tagged for internal use but not released. ParallelAccelerator: NOTE: The ParallelAccelerator technology is under active development and should be considered experimental. New features relating to ParallelAccelerator, from work undertaken with Intel, include the addition of the `@stencil` decorator for ease of implementation of stencil-like computations, support for general reductions, and slice and range fusion for parallel slice/bit-array assignments. Documentation on both the use and implementation of the above has been added. Further, a new debug environment variable `NUMBA_DEBUG_ARRAY_OPT_STATS` is made available to give information about which operators/calls are converted to parallel for-loops. ParallelAccelerator features: * PR #2457: Stencil Computations in ParallelAccelerator * PR #2548: Slice and range fusion, parallelizing bitarray and slice assignment * PR #2516: Support general reductions in ParallelAccelerator ParallelAccelerator fixes: * PR #2540: Fix bug #2537 * PR #2566: Fix issue #2564. * PR #2599: Fix nested multi-dimensional parfor type inference issue * PR #2604: Fixes for stencil tests and cmath sin(). * PR #2605: Fixes issue #2603. Additional features of note: This release of Numba (and llvmlite) is updated to use LLVM version 5.0 as the compiler back end, the main change to Numba to support this was the addition of a custom symbol tracker to avoid the calls to LLVM's `ExecutionEngine` that was crashing when asking for non-existent symbol addresses. Further, the conda packages for this release of Numba are built using conda build version 3 and the new compilers/recipe grammar that are present in that release. * PR #2568: Update for LLVM 5 * PR #2607: Fixes abort when getting address to "nrt_unresolved_abort" * PR #2615: Working towards conda build 3 Thanks to community feedback and bug reports, the following fixes were also made. Misc fixes/enhancements: * PR #2534: Add tuple support to np.take. * PR #2551: Rebranding fix * PR #2552: relative doc links * PR #2570: Fix issue #2561, handle missing successor on loop exit * PR #2588: Fix #2555. Disable libpython.so linking on linux * PR #2601: Update llvmlite version dependency. * PR #2608: Fix potential cache file collision * PR #2612: Fix NRT test failure due to increased overhead when running in coverage * PR #2619: Fix dubious pthread_cond_signal not in lock * PR #2622: Fix `np.nanmedian` for all NaN case. * PR #2633: Fix markdown in CONTRIBUTING.md * PR #2635: Make the dependency on compilers for AOT optional. CUDA support fixes: * PR #2523: Fix invalid cuda context in memory transfer calls in another thread * PR #2575: Use CPU to initialize xoroshiro states for GPU RNG. Fixes #2573 * PR #2581: Fix cuda gufunc mishandling of scalar arg as array and out argument Version 0.35.0 -------------- This release includes some exciting new features as part of the work performed in partnership with Intel on ParallelAccelerator technology. There are also some additions made to Numpy support and small but significant fixes made as a result of considerable effort spent chasing bugs and implementing stability improvements. ParallelAccelerator: NOTE: The ParallelAccelerator technology is under active development and should be considered experimental. New features relating to ParallelAccelerator, from work undertaken with Intel, include support for a larger range of `np.random` functions in `parallel` mode, printing Numpy arrays in no Python mode, the capacity to initialize Numpy arrays directly from list comprehensions, and the axis argument to `.sum()`. Documentation on the ParallelAccelerator technology implementation has also been added. Further, a large amount of work on equivalence relations was undertaken to enable runtime checks of broadcasting behaviours in parallel mode. ParallelAccelerator features: * PR #2400: Array comprehension * PR #2405: Support printing Numpy arrays * PR #2438: from Support more np.random functions in ParallelAccelerator * PR #2482: Support for sum with axis in nopython mode. * PR #2487: Adding developer documentation for ParallelAccelerator technology. * PR #2492: Core PA refactor adds assertions for broadcast semantics ParallelAccelerator fixes: * PR #2478: Rename cfg before parfor translation (#2477) * PR #2479: Fix broken array comprehension tests on unsupported platforms * PR #2484: Fix array comprehension test on win64 * PR #2506: Fix for 32-bit machines. Additional features of note: Support for `np.take`, `np.finfo`, `np.iinfo` and `np.MachAr` in no Python mode is added. Further, three new environment variables are added, two for overriding CPU target/features and another to warn if `parallel=True` was set no such transform was possible. * PR #2490: Implement np.take and ndarray.take * PR #2493: Display a warning if parallel=True is set but not possible. * PR #2513: Add np.MachAr, np.finfo, np.iinfo * PR #2515: Allow environ overriding of cpu target and cpu features. Due to expansion of the test farm and a focus on fixing bugs, the following fixes were also made. Misc fixes/enhancements: * PR #2455: add contextual information to runtime errors * PR #2470: Fixes #2458, poor performance in np.median * PR #2471: Ensure LLVM threadsafety in {g,}ufunc building. * PR #2494: Update doc theme * PR #2503: Remove hacky code added in 2482 and feature enhancement * PR #2505: Serialise env mutation tests during multithreaded testing. * PR #2520: Fix failing cpu-target override tests CUDA support fixes: * PR #2504: Enable CUDA toolkit version testing * PR #2509: Disable tests generating code unavailable in lower CC versions. * PR #2511: Fix Windows 64 bit CUDA tests. Version 0.34.0 -------------- This release adds a significant set of new features arising from combined work with Intel on ParallelAccelerator technology. It also adds list comprehension and closure support, support for Numpy 1.13 and a new, faster, CUDA reduction algorithm. For Linux users this release is the first to be built on Centos 6, which will be the new base platform for future releases. Finally a number of thread-safety, type inference and other smaller enhancements and bugs have been fixed. ParallelAccelerator features: NOTE: The ParallelAccelerator technology is under active development and should be considered experimental. The ParallelAccelerator technology is accessed via a new "nopython" mode option "parallel". The ParallelAccelerator technology attempts to identify operations which have parallel semantics (for instance adding a scalar to a vector), fuse together adjacent such operations, and then parallelize their execution across a number of CPU cores. This is essentially auto-parallelization. In addition to the auto-parallelization feature, explicit loop based parallelism is made available through the use of `prange` in place of `range` as a loop iterator. More information and examples on both auto-parallelization and `prange` are available in the documentation and examples directory respectively. As part of the necessary work for ParallelAccelerator, support for closures and list comprehensions is added: * PR #2318: Transfer ParallelAccelerator technology to Numba * PR #2379: ParallelAccelerator Core Improvements * PR #2367: Add support for len(range(...)) * PR #2369: List comprehension * PR #2391: Explicit Parallel Loop Support (prange) The ParallelAccelerator features are available on all supported platforms and Python versions with the exceptions of (with view of supporting in a future release): * The combination of Windows operating systems with Python 2.7. * Systems running 32 bit Python. CUDA support enhancements: * PR #2377: New GPU reduction algorithm CUDA support fixes: * PR #2397: Fix #2393, always set alignment of cuda static memory regions Misc Fixes: * PR #2373, Issue #2372: 32-bit compatibility fix for parfor related code * PR #2376: Fix #2375 missing stdint.h for py2.7 vc9 * PR #2378: Fix deadlock in parallel gufunc when kernel acquires the GIL. * PR #2382: Forbid unsafe casting in bitwise operation * PR #2385: docs: fix Sphinx errors * PR #2396: Use 64-bit RHS operand for shift * PR #2404: Fix threadsafety logic issue in ufunc compilation cache. * PR #2424: Ensure consistent iteration order of blocks for type inference. * PR #2425: Guard code to prevent the use of 'parallel' on win32 + py27 * PR #2426: Basic test for Enum member type recovery. * PR #2433: Fix up the parfors tests with respect to windows py2.7 * PR #2442: Skip tests that need BLAS/LAPACK if scipy is not available. * PR #2444: Add test for invalid array setitem * PR #2449: Make the runtime initialiser threadsafe * PR #2452: Skip CFG test on 64bit windows Misc Enhancements: * PR #2366: Improvements to IR utils * PR #2388: Update README.rst to indicate the proper version of LLVM * PR #2394: Upgrade to llvmlite 0.19.* * PR #2395: Update llvmlite version to 0.19 * PR #2406: Expose environment object to ufuncs * PR #2407: Expose environment object to target-context inside lowerer * PR #2413: Add flags to pass through to conda build for buildbot * PR #2414: Add cross compile flags to local recipe * PR #2415: A few cleanups for rewrites * PR #2418: Add getitem support for Enum classes * PR #2419: Add support for returning enums in vectorize * PR #2421: Add copyright notice for Intel contributed files. * PR #2422: Patch code base to work with np 1.13 release * PR #2448: Adds in warning message when using 'parallel' if cache=True * PR #2450: Add test for keyword arg on .sum-like and .cumsum-like array methods Version 0.33.0 -------------- This release resolved several performance issues caused by atomic reference counting operations inside loop bodies. New optimization passes have been added to reduce the impact of these operations. We observe speed improvements between 2x-10x in affected programs due to the removal of unnecessary reference counting operations. There are also several enhancements to the CUDA GPU support: * A GPU random number generator based on `xoroshiro128+ algorithm `_ is added. See details and examples in :ref:`documentation `. * ``@cuda.jit`` CUDA kernels can now call ``@jit`` and ``@njit`` CPU functions and they will automatically be compiled as CUDA device functions. * CUDA IPC memory API is exposed for sharing memory between proceses. See usage details in :ref:`documentation `. Reference counting enhancements: * PR #2346, Issue #2345, #2248: Add extra refcount pruning after inlining * PR #2349: Fix refct pruning not removing refct op with tail call. * PR #2352, Issue #2350: Add refcount pruning pass for function that does not need refcount CUDA support enhancements: * PR #2023: Supports CUDA IPC for device array * PR #2343, Issue #2335: Allow CPU jit decorated function to be used as cuda device function * PR #2347: Add random number generator support for CUDA device code * PR #2361: Update autotune table for CC: 5.3, 6.0, 6.1, 6.2 Misc fixes: * PR #2362: Avoid test failure due to typing to int32 on 32-bit platforms * PR #2359: Fixed nogil example that threw a TypeError when executed. * PR #2357, Issue #2356: Fix fragile test that depends on how the script is executed. * PR #2355: Fix cpu dispatcher referenced as attribute of another module * PR #2354: Fixes an issue with caching when function needs NRT and refcount pruning * PR #2342, Issue #2339: Add warnings to inspection when it is used on unserialized cached code * PR #2329, Issue #2250: Better handling of missing op codes Misc enhancements: * PR #2360: Adds missing values in error mesasge interp. * PR #2353: Handle when get_host_cpu_features() raises RuntimeError * PR #2351: Enable SVML for erf/erfc/gamma/lgamma/log2 * PR #2344: Expose error_model setting in jit decorator * PR #2337: Align blocking terminate support for fork() with new TBB version * PR #2336: Bump llvmlite version to 0.18 * PR #2330: Core changes in PR #2318 Version 0.32.0 -------------- In this release, we are upgrading to LLVM 4.0. A lot of work has been done to fix many race-condition issues inside LLVM when the compiler is used concurrently, which is likely when Numba is used with Dask. Improvements: * PR #2322: Suppress test error due to unknown but consistent error with tgamma * PR #2320: Update llvmlite dependency to 0.17 * PR #2308: Add details to error message on why cuda support is disabled. * PR #2302: Add os x to travis * PR #2294: Disable remove_module on MCJIT due to memory leak inside LLVM * PR #2291: Split parallel tests and recycle workers to tame memory usage * PR #2253: Remove the pointer-stuffing hack for storing meminfos in lists Fixes: * PR #2331: Fix a bug in the GPU array indexing * PR #2326: Fix #2321 docs referring to non-existing function. * PR #2316: Fixing more race-condition problems * PR #2315: Fix #2314. Relax strict type check to allow optional type. * PR #2310: Fix race condition due to concurrent compilation and cache loading * PR #2304: Fix intrinsic 1st arg not a typing.Context as stated by the docs. * PR #2287: Fix int64 atomic min-max * PR #2286: Fix #2285 `@overload_method` not linking dependent libs * PR #2303: Missing import statements to interval-example.rst Version 0.31.0 -------------- In this release, we added preliminary support for debugging with GDB version >= 7.0. The feature is enabled by setting the ``debug=True`` compiler option, which causes GDB compatible debug info to be generated. The CUDA backend also gained limited debugging support so that source locations are showed in memory-checking and profiling tools. For details, see :ref:`numba-troubleshooting`. Also, we added the ``fastmath=True`` compiler option to enable unsafe floating-point transformations, which allows LLVM to auto-vectorize more code. Other important changes include upgrading to LLVM 3.9.1 and adding support for Numpy 1.12. Improvements: * PR #2281: Update for numpy1.12 * PR #2278: Add CUDA atomic.{max, min, compare_and_swap} * PR #2277: Add about section to conda recipies to identify license and other metadata in Anaconda Cloud * PR #2271: Adopt itanium C++-style mangling for CPU and CUDA targets * PR #2267: Add fastmath flags * PR #2261: Support dtype.type * PR #2249: Changes for llvm3.9 * PR #2234: Bump llvmlite requirement to 0.16 and add install_name_tool_fixer to mviewbuf for OS X * PR #2230: Add python3.6 to TravisCi * PR #2227: Enable caching for gufunc wrapper * PR #2170: Add debugging support * PR #2037: inspect_cfg() for easier visualization of the function operation Fixes: * PR #2274: Fix nvvm ir patch in mishandling "load" * PR #2272: Fix breakage to cuda7.5 * PR #2269: Fix caching of copy_strides kernel in cuda.reduce * PR #2265: Fix #2263: error when linking two modules with dynamic globals * PR #2252: Fix path separator in test * PR #2246: Fix overuse of memory in some system with fork * PR #2241: Fix #2240: __module__ in dynamically created function not a str * PR #2239: Fix fingerprint computation failure preventing fallback Version 0.30.1 -------------- This is a bug-fix release to enable Python 3.6 support. In addition, there is now early Intel TBB support for parallel ufuncs when building from source with TBBROOT defined. The TBB feature is not enabled in our official builds. Fixes: * PR #2232: Fix name clashes with _Py_hashtable_xxx in Python 3.6. Improvements: * PR #2217: Add Intel TBB threadpool implementation for parallel ufunc. Version 0.30.0 -------------- This release adds preliminary support for Python 3.6, but no official build is available yet. A new system reporting tool (``numba --sysinfo``) is added to provide system information to help core developers in replication and debugging. See below for other improvements and bug fixes. Improvements: * PR #2209: Support Python 3.6. * PR #2175: Support ``np.trace()``, ``np.outer()`` and ``np.kron()``. * PR #2197: Support ``np.nanprod()``. * PR #2190: Support caching for ufunc. * PR #2186: Add system reporting tool. Fixes: * PR #2214, Issue #2212: Fix memory error with ndenumerate and flat iterators. * PR #2206, Issue #2163: Fix ``zip()`` consuming extra elements in early exhaustion. * PR #2185, Issue #2159, #2169: Fix rewrite pass affecting objmode fallback. * PR #2204, Issue #2178: Fix annotation for liftedloop. * PR #2203: Fix Appveyor segfault with Python 3.5. * PR #2202, Issue #2198: Fix target context not initialized when loading from ufunc cache. * PR #2172, Issue #2171: Fix optional type unpacking. * PR #2189, Issue #2188: Disable freezing of big (>1MB) global arrays. * PR #2180, Issue #2179: Fix invalid variable version in looplifting. * PR #2156, Issue #2155: Fix divmod, floordiv segfault on CUDA. Version 0.29.0 -------------- This release extends the support of recursive functions to include direct and indirect recursion without explicit function type annotations. See new example in `examples/mergesort.py`. Newly supported numpy features include array stacking functions, np.linalg.eig* functions, np.linalg.matrix_power, np.roots and array to array broadcasting in assignments. This release depends on llvmlite 0.14.0 and supports CUDA 8 but it is not required. Improvements: * PR #2130, #2137: Add type-inferred recursion with docs and examples. * PR #2134: Add ``np.linalg.matrix_power``. * PR #2125: Add ``np.roots``. * PR #2129: Add ``np.linalg.{eigvals,eigh,eigvalsh}``. * PR #2126: Add array-to-array broadcasting. * PR #2069: Add hstack and related functions. * PR #2128: Allow for vectorizing a jitted function. (thanks to @dhirschfeld) * PR #2117: Update examples and make them test-able. * PR #2127: Refactor interpreter class and its results. Fixes: * PR #2149: Workaround MSVC9.0 SP1 fmod bug kb982107. * PR #2145, Issue #2009: Fixes kwargs for jitclass ``__init__`` method. * PR #2150: Fix slowdown in objmode fallback. * PR #2050, Issue #1259: Fix liveness problem with some generator loops. * PR #2072, Issue #1995: Right shift of unsigned LHS should be logical. * PR #2115, Issue #1466: Fix inspect_types() error due to mangled variable name. * PR #2119, Issue #2118: Fix array type created from record-dtype. * PR #2122, Issue #1808: Fix returning a generator due to datamodel error. Version 0.28.1 -------------- This is a bug-fix release to resolve packaging issues with setuptools dependency. Version 0.28.0 -------------- Amongst other improvements, this version improves again the level of support for linear algebra -- functions from the :mod:`numpy.linalg` module. Also, our random generator is now guaranteed to be thread-safe and fork-safe. Improvements: * PR #2019: Add the ``@intrinsic`` decorator to define low-level subroutines callable from JIT functions (this is considered a private API for now). * PR #2059: Implement ``np.concatenate`` and ``np.stack``. * PR #2048: Make random generation fork-safe and thread-safe, producing independent streams of random numbers for each thread or process. * PR #2031: Add documentation of floating-point pitfalls. * Issue #2053: Avoid polling in parallel CPU target (fixes severe performance regression on Windows). * Issue #2029: Make default arguments fast. * PR #2052: Add logging to the CUDA driver. * PR #2049: Implement the built-in ``divmod()`` function. * PR #2036: Implement the ``argsort()`` method on arrays. * PR #2046: Improving CUDA memory management by deferring deallocations until certain thresholds are reached, so as to avoid breaking asynchronous execution. * PR #2040: Switch the CUDA driver implementation to use CUDA's "primary context" API. * PR #2017: Allow ``min(tuple)`` and ``max(tuple)``. * PR #2039: Reduce fork() detection overhead in CUDA. * PR #2021: Handle structured dtypes with titles. * PR #1996: Rewrite looplifting as a transformation on Numba IR. * PR #2014: Implement ``np.linalg.matrix_rank``. * PR #2012: Implement ``np.linalg.cond``. * PR #1985: Rewrite even trivial array expressions, which opens the door for other optimizations (for example, ``array ** 2`` can be converted into ``array * array``). * PR #1950: Have ``typeof()`` always raise ValueError on failure. Previously, it would either raise or return None, depending on the input. * PR #1994: Implement ``np.linalg.norm``. * PR #1987: Implement ``np.linalg.det`` and ``np.linalg.slogdet``. * Issue #1979: Document integer width inference and how to workaround. * PR #1938: Numba is now compatible with LLVM 3.8. * PR #1967: Restrict ``np.linalg`` functions to homogeneous dtypes. Users wanting to pass mixed-typed inputs have to convert explicitly, which makes the performance implications more obvious. Fixes: * PR #2006: ``array(float32) ** int`` should return ``array(float32)``. * PR #2044: Allow reshaping empty arrays. * Issue #2051: Fix refcounting issue when concatenating tuples. * Issue #2000: Make Numpy optional for setup.py, to allow ``pip install`` to work without Numpy pre-installed. * PR #1989: Fix assertion in ``Dispatcher.disable_compile()``. * Issue #2028: Ignore filesystem errors when caching from multiple processes. * Issue #2003: Allow unicode variable and function names (on Python 3). * Issue #1998: Fix deadlock in parallel ufuncs that reacquire the GIL. * PR #1997: Fix random crashes when AOT compiling on certain Windows platforms. * Issue #1988: Propagate jitclass docstring. * Issue #1933: Ensure array constants are emitted with the right alignment. Version 0.27.0 -------------- Improvements: * Issue #1976: improve error message when non-integral dimensions are given to a CUDA kernel. * PR #1970: Optimize the power operator with a static exponent. * PR #1710: Improve contextual information for compiler errors. * PR #1961: Support printing constant strings. * PR #1959: Support more types in the print() function. * PR #1823: Support ``compute_50`` in CUDA backend. * PR #1955: Support ``np.linalg.pinv``. * PR #1896: Improve the ``SmartArray`` API. * PR #1947: Support ``np.linalg.solve``. * Issue #1943: Improve error message when an argument fails typing.4 * PR #1927: Support ``np.linalg.lstsq``. * PR #1934: Use system functions for hypot() where possible, instead of our own implementation. * PR #1929: Add cffi support to ``@cfunc`` objects. * PR #1932: Add user-controllable thread pool limits for parallel CPU target. * PR #1928: Support self-recursion when the signature is explicit. * PR #1890: List all lowering implementations in the developer docs. * Issue #1884: Support ``np.lib.stride_tricks.as_strided()``. Fixes: * Issue #1960: Fix sliced assignment when source and destination areas are overlapping. * PR #1963: Make CUDA print() atomic. * PR #1956: Allow 0d array constants. * Issue #1945: Allow using Numpy ufuncs in AOT compiled code. * Issue #1916: Fix documentation example for ``@generated_jit``. * Issue #1926: Fix regression when caching functions in an IPython session. * Issue #1923: Allow non-intp integer arguments to carray() and farray(). * Issue #1908: Accept non-ASCII unicode docstrings on Python 2. * Issue #1874: Allow ``del container[key]`` in object mode. * Issue #1913: Fix set insertion bug when the lookup chain contains deleted entries. * Issue #1911: Allow function annotations on jitclass methods. Version 0.26.0 -------------- This release adds support for ``cfunc`` decorator for exporting numba jitted functions to 3rd party API that takes C callbacks. Most of the overhead of using jitclasses inside the interpreter are eliminated. Support for decompositions in ``numpy.linalg`` are added. Finally, Numpy 1.11 is supported. Improvements: * PR #1889: Export BLAS and LAPACK wrappers for pycc. * PR #1888: Faster array power. * Issue #1867: Allow "out" keyword arg for dufuncs. * PR #1871: ``carray()`` and ``farray()`` for creating arrays from pointers. * PR #1855: ``@cfunc`` decorator for exporting as ctypes function. * PR #1862: Add support for ``numpy.linalg.qr``. * PR #1851: jitclass support for '_' and '__' prefixed attributes. * PR #1842: Optimize jitclass in Python interpreter. * Issue #1837: Fix CUDA simulator issues with device function. * PR #1839: Add support for decompositions from ``numpy.linalg``. * PR #1829: Support Python enums. * PR #1828: Add support for ``numpy.random.rand()``` and ``numpy.random.randn()`` * Issue #1825: Use of 0-darray in place of scalar index. * Issue #1824: Scalar arguments to object mode gufuncs. * Issue #1813: Let bitwise bool operators return booleans, not integers. * Issue #1760: Optional arguments in generators. * PR #1780: Numpy 1.11 support. Version 0.25.0 -------------- This release adds support for ``set`` objects in nopython mode. It also adds support for many missing Numpy features and functions. It improves Numba's compatibility and performance when using a distributed execution framework such as dask, distributed or Spark. Finally, it removes compatibility with Python 2.6, Python 3.3 and Numpy 1.6. Improvements: * Issue #1800: Add erf(), erfc(), gamma() and lgamma() to CUDA targets. * PR #1793: Implement more Numpy functions: np.bincount(), np.diff(), np.digitize(), np.histogram(), np.searchsorted() as well as NaN-aware reduction functions (np.nansum(), np.nanmedian(), etc.) * PR #1789: Optimize some reduction functions such as np.sum(), np.prod(), np.median(), etc. * PR #1752: Make CUDA features work in dask, distributed and Spark. * PR #1787: Support np.nditer() for fast multi-array indexing with broadcasting. * PR #1799: Report JIT-compiled functions as regular Python functions when profiling (allowing to see the filename and line number where a function is defined). * PR #1782: Support np.any() and np.all(). * Issue #1788: Support the iter() and next() built-in functions. * PR #1778: Support array.astype(). * Issue #1775: Allow the user to set the target CPU model for AOT compilation. * PR #1758: Support creating random arrays using the ``size`` parameter to the np.random APIs. * PR #1757: Support len() on array.flat objects. * PR #1749: Remove Numpy 1.6 compatibility. * PR #1748: Remove Python 2.6 and 3.3 compatibility. * PR #1735: Support the ``not in`` operator as well as operator.contains(). * PR #1724: Support homogeneous sets in nopython mode. * Issue #875: make compilation of array constants faster. Fixes: * PR #1795: Fix a massive performance issue when calling Numba functions with distributed, Spark or a similar mechanism using serialization. * Issue #1784: Make jitclasses usable with NUMBA_DISABLE_JIT=1. * Issue #1786: Allow using linear algebra functions when profiling. * Issue #1796: Fix np.dot() memory leak on non-contiguous inputs. * PR #1792: Fix static negative indexing of tuples. * Issue #1771: Use fallback cache directory when __pycache__ isn't writable, such as when user code is installed in a system location. * Issue #1223: Use Numpy error model in array expressions (e.g. division by zero returns ``inf`` or ``nan`` instead of raising an error). * Issue #1640: Fix np.random.binomial() for large n values. * Issue #1643: Improve error reporting when passing an invalid spec to ``@jitclass``. * PR #1756: Fix slicing with a negative step and an omitted start. Version 0.24.0 -------------- This release introduces several major changes, including the ``@generated_jit`` decorator for flexible specializations as with Julia's "``@generated``" macro, or the SmartArray array wrapper type that allows seamless transfer of array data between the CPU and the GPU. This will be the last version to support Python 2.6, Python 3.3 and Numpy 1.6. Improvements: * PR #1723: Improve compatibility of JIT functions with the Python profiler. * PR #1509: Support array.ravel() and array.flatten(). * PR #1676: Add SmartArray type to support transparent data management in multiple address spaces (host & GPU). * PR #1689: Reduce startup overhead of importing Numba. * PR #1705: Support registration of CFFI types as corresponding to known Numba types. * PR #1686: Document the extension API. * PR #1698: Improve warnings raised during type inference. * PR #1697: Support np.dot() and friends on non-contiguous arrays. * PR #1692: cffi.from_buffer() improvements (allow more pointer types, allow non-Numpy buffer objects). * PR #1648: Add the ``@generated_jit`` decorator. * PR #1651: Implementation of np.linalg.inv using LAPACK. Thanks to Matthieu Dartiailh. * PR #1674: Support np.diag(). * PR #1673: Improve error message when looking up an attribute on an unknown global. * Issue #1569: Implement runtime check for the LLVM locale bug. * PR #1612: Switch to LLVM 3.7 in sync with llvmlite. * PR #1624: Allow slice assignment of sequence to array. * PR #1622: Support slicing tuples with a constant slice. Fixes: * Issue #1722: Fix returning an optional boolean (bool or None). * Issue #1734: NRT decref bug when variable is del'ed before being defined, leading to a possible memory leak. * PR #1732: Fix tuple getitem regression for CUDA target. * PR #1718: Mishandling of optional to optional casting. * PR #1714: Fix .compile() on a JIT function not respecting ._can_compile. * Issue #1667: Fix np.angle() on arrays. * Issue #1690: Fix slicing with an omitted stop and a negative step value. * PR #1693: Fix gufunc bug in handling scalar formal arg with non-scalar input value. * PR #1683: Fix parallel testing under Windows. * Issue #1616: Use system-provided versions of C99 math where possible. * Issue #1652: Reductions of bool arrays (e.g. sum() or mean()) should return integers or floats, not bools. * Issue #1664: Fix regression when indexing a record array with a constant index. * PR #1661: Disable AVX on old Linux kernels. * Issue #1636: Allow raising an exception looked up on a module. Version 0.23.1 -------------- This is a bug-fix release to address several regressions introduced in the 0.23.0 release, and a couple other issues. Fixes: * Issue #1645: CUDA ufuncs were broken in 0.23.0. * Issue #1638: Check tuple sizes when passing a list of tuples. * Issue #1630: Parallel ufunc would keep eating CPU even after finishing under Windows. * Issue #1628: Fix ctypes and cffi tests under Windows with Python 3.5. * Issue #1627: Fix xrange() support. * PR #1611: Rewrite variable liveness analysis. * Issue #1610: Allow nested calls between explicitly-typed ufuncs. * Issue #1593: Fix `*args` in object mode. Version 0.23.0 -------------- This release introduces JIT classes using the new ``@jitclass`` decorator, allowing user-defined structures for nopython mode. Other improvements and bug fixes are listed below. Improvements: * PR #1609: Speed up some simple math functions by inlining them in their caller * PR #1571: Implement JIT classes * PR #1584: Improve typing of array indexing * PR #1583: Allow printing booleans * PR #1542: Allow negative values in np.reshape() * PR #1560: Support vector and matrix dot product, including ``np.dot()`` and the ``@`` operator in Python 3.5 * PR #1546: Support field lookup on record arrays and scalars (i.e. ``array['field']`` in addition to ``array.field``) * PR #1440: Support the HSA wavebarrier() and activelanepermute_wavewidth() intrinsics * PR #1540: Support np.angle() * PR #1543: Implement CPU multithreaded gufuncs (target="parallel") * PR #1551: Allow scalar arguments in np.where(), np.empty_like(). * PR #1516: Add some more examples from NumbaPro * PR #1517: Support np.sinc() Fixes: * Issue #1603: Fix calling a non-cached function from a cached function * Issue #1594: Ensure a list is homogeneous when unboxing * Issue #1595: Replace deprecated use of get_pointer_to_function() * Issue #1586: Allow tests to be run by different users on the same machine * Issue #1587: Make CudaAPIError picklable * Issue #1568: Fix using Numba from inside Visual Studio 2015 * Issue #1559: Fix serializing a jit function referring a renamed module * PR #1508: Let reshape() accept integer argument(s), not just a tuple * Issue #1545: Improve error checking when unboxing list objects * Issue #1538: Fix array broadcasting in CUDA gufuncs * Issue #1526: Fix a reference count handling bug Version 0.22.1 -------------- This is a bug-fix release to resolve some packaging issues and other problems found in the 0.22.0 release. Fixes: * PR #1515: Include MANIFEST.in in MANIFEST.in so that sdist still works from source tar files. * PR #1518: Fix reference counting bug caused by hidden alias * PR #1519: Fix erroneous assert when passing nopython=True to guvectorize. * PR #1521: Fix cuda.test() Version 0.22.0 -------------- This release features several highlights: Python 3.5 support, Numpy 1.10 support, Ahead-of-Time compilation of extension modules, additional vectorization features that were previously only available with the proprietary extension NumbaPro, improvements in array indexing. Improvements: * PR #1497: Allow scalar input type instead of size-1 array to @guvectorize * PR #1480: Add distutils support for AOT compilation * PR #1460: Create a new API for Ahead-of-Time (AOT) compilation * PR #1451: Allow passing Python lists to JIT-compiled functions, and reflect mutations on function return * PR #1387: Numpy 1.10 support * PR #1464: Support cffi.FFI.from_buffer() * PR #1437: Propagate errors raised from Numba-compiled ufuncs; also, let "division by zero" and other math errors produce a warning instead of exiting the function early * PR #1445: Support a subset of fancy indexing * PR #1454: Support "out-of-line" CFFI modules * PR #1442: Improve array indexing to support more kinds of basic slicing * PR #1409: Support explicit CUDA memory fences * PR #1435: Add support for vectorize() and guvectorize() with HSA * PR #1432: Implement numpy.nonzero() and numpy.where() * PR #1416: Add support for vectorize() and guvectorize() with CUDA, as originally provided in NumbaPro * PR #1424: Support in-place array operators * PR #1414: Python 3.5 support * PR #1404: Add the parallel ufunc functionality originally provided in NumbaPro * PR #1393: Implement sorting on arrays and lists * PR #1415: Add functions to estimate the occupancy of a CUDA kernel * PR #1360: The JIT cache now stores the compiled object code, yielding even larger speedups. * PR #1402: Fixes for the ARMv7 (armv7l) architecture under Linux * PR #1400: Add the cuda.reduce() decorator originally provided in NumbaPro Fixes: * PR #1483: Allow np.empty_like() and friends on non-contiguous arrays * Issue #1471: Allow caching JIT functions defined in IPython * PR #1457: Fix flat indexing of boolean arrays * PR #1421: Allow calling Numpy ufuncs, without an explicit output, on non-contiguous arrays * Issue #1411: Fix crash when unpacking a tuple containing a Numba-allocated array * Issue #1394: Allow unifying range_state32 and range_state64 * Issue #1373: Fix code generation error on lists of bools Version 0.21.0 -------------- This release introduces support for AMD's Heterogeneous System Architecture, which allows memory to be shared directly between the CPU and the GPU. Other major enhancements are support for lists and the introduction of an opt-in compilation cache. Improvements: * PR #1391: Implement print() for CUDA code * PR #1366: Implement integer typing enhancement proposal (NBEP 1) * PR #1380: Support the one-argument type() builtin * PR #1375: Allow boolean evaluation of lists and tuples * PR #1371: Support array.view() in CUDA mode * PR #1369: Support named tuples in nopython mode * PR #1250: Implement numpy.median(). * PR #1289: Make dispatching faster when calling a JIT-compiled function from regular Python * Issue #1226: Improve performance of integer power * PR #1321: Document features supported with CUDA * PR #1345: HSA support * PR #1343: Support lists in nopython mode * PR #1356: Make Numba-allocated memory visible to tracemalloc * PR #1363: Add an environment variable NUMBA_DEBUG_TYPEINFER * PR #1051: Add an opt-in, per-function compilation cache Fixes: * Issue #1372: Some array expressions would fail rewriting when involved the same variable more than once, or a unary operator * Issue #1385: Allow CUDA local arrays to be declared anywhere in a function * Issue #1285: Support datetime64 and timedelta64 in Numpy reduction functions * Issue #1332: Handle the EXTENDED_ARG opcode. * PR #1329: Handle the ``in`` operator in object mode * Issue #1322: Fix augmented slice assignment on Python 2 * PR #1357: Fix slicing with some negative bounds or step values. Version 0.20.0 -------------- This release updates Numba to use LLVM 3.6 and CUDA 7 for CUDA support. Following the platform deprecation in CUDA 7, Numba's CUDA feature is no longer supported on 32-bit platforms. The oldest supported version of Windows is Windows 7. Improvements: * Issue #1203: Support indexing ndarray.flat * PR #1200: Migrate cgutils to llvmlite * PR #1190: Support more array methods: .transpose(), .T, .copy(), .reshape(), .view() * PR #1214: Simplify setup.py and avoid manual maintenance * PR #1217: Support datetime64 and timedelta64 constants * PR #1236: Reload environment variables when compiling * PR #1225: Various speed improvements in generated code * PR #1252: Support cmath module in CUDA * PR #1238: Use 32-byte aligned allocator to optimize for AVX * PR #1258: Support numpy.frombuffer() * PR #1274: Use TravisCI container infrastructure for lower wait time * PR #1279: Micro-optimize overload resolution in call dispatch * Issue #1248: Improve error message when return type unification fails Fixes: * Issue #1131: Handling of negative zeros in np.conjugate() and np.arccos() * Issue #1188: Fix slow array return * Issue #1164: Avoid warnings from CUDA context at shutdown * Issue #1229: Respect the writeable flag in arrays * Issue #1244: Fix bug in refcount pruning pass * Issue #1251: Fix partial left-indexing of Fortran contiguous array * Issue #1264: Fix compilation error in array expression * Issue #1254: Fix error when yielding array objects * Issue #1276: Fix nested generator use Version 0.19.2 -------------- This release fixes the source distribution on pypi. The only change is in the setup.py file. We do not plan to provide a conda package as this release is essentially the same as 0.19.1 for conda users. Version 0.19.1 -------------- * Issue #1196: * fix double-free segfault due to redundant variable deletion in the Numba IR (#1195) * fix use-after-delete in array expression rewrite pass Version 0.19.0 -------------- This version introduces memory management in the Numba runtime, allowing to allocate new arrays inside Numba-compiled functions. There is also a rework of the ufunc infrastructure, and an optimization pass to collapse cascading array operations into a single efficient loop. .. warning:: Support for Windows XP and Vista with all compiler targets and support for 32-bit platforms (Win/Mac/Linux) with the CUDA compiler target are deprecated. In the next release of Numba, the oldest version of Windows supported will be Windows 7. CPU compilation will remain supported on 32-bit Linux and Windows platforms. Known issues: * There are some performance regressions in very short running ``nopython`` functions due to the additional overhead incurred by memory management. We will work to reduce this overhead in future releases. Features: * Issue #1181: Add a Frequently Asked Questions section to the documentation. * Issue #1162: Support the ``cumsum()`` and ``cumprod()`` methods on Numpy arrays. * Issue #1152: Support the ``*args`` argument-passing style. * Issue #1147: Allow passing character sequences as arguments to JIT-compiled functions. * Issue #1110: Shortcut deforestation and loop fusion for array expressions. * Issue #1136: Support various Numpy array constructors, for example numpy.zeros() and numpy.zeros_like(). * Issue #1127: Add a CUDA simulator running on the CPU, enabled with the NUMBA_ENABLE_CUDASIM environment variable. * Issue #1086: Allow calling standard Numpy ufuncs without an explicit output array from ``nopython`` functions. * Issue #1113: Support keyword arguments when calling numpy.empty() and related functions. * Issue #1108: Support the ``ctypes.data`` attribute of Numpy arrays. * Issue #1077: Memory management for array allocations in ``nopython`` mode. * Issue #1105: Support calling a ctypes function that takes ctypes.py_object parameters. * Issue #1084: Environment variable NUMBA_DISABLE_JIT disables compilation of ``@jit`` functions, instead calling into the Python interpreter when called. This allows easier debugging of multiple jitted functions. * Issue #927: Allow gufuncs with no output array. * Issue #1097: Support comparisons between tuples. * Issue #1075: Numba-generated ufuncs can now be called from ``nopython`` functions. * Issue #1062: ``@vectorize`` now allows omitting the signatures, and will compile the required specializations on the fly (like ``@jit`` does). * Issue #1027: Support numpy.round(). * Issue #1085: Allow returning a character sequence (as fetched from a structured array) from a JIT-compiled function. Fixes: * Issue #1170: Ensure ``ndindex()``, ``ndenumerate()`` and ``ndarray.flat`` work properly inside generators. * Issue #1151: Disallow unpacking of tuples with the wrong size. * Issue #1141: Specify install dependencies in setup.py. * Issue #1106: Loop-lifting would fail when the lifted loop does not produce any output values for the function tail. * Issue #1103: Fix mishandling of some inputs when a JIT-compiled function is called with multiple array layouts. * Issue #1089: Fix range() with large unsigned integers. * Issue #1088: Install entry-point scripts (numba, pycc) from the conda build recipe. * Issue #1081: Constant structured scalars now work properly. * Issue #1080: Fix automatic promotion of booleans to integers. Version 0.18.2 -------------- Bug fixes: * Issue #1073: Fixes missing template file for HTML annotation * Issue #1074: Fixes CUDA support on Windows machine due to NVVM API mismatch Version 0.18.1 -------------- Version 0.18.0 is not officially released. This version removes the old deprecated and undocumented ``argtypes`` and ``restype`` arguments to the ``@jit`` decorator. Function signatures should always be passed as the first argument to ``@jit``. Features: * Issue #960: Add inspect_llvm() and inspect_asm() methods to JIT-compiled functions: they output the LLVM IR and the native assembler source of the compiled function, respectively. * Issue #990: Allow passing tuples as arguments to JIT-compiled functions in ``nopython`` mode. * Issue #774: Support two-argument round() in ``nopython`` mode. * Issue #987: Support missing functions from the math module in nopython mode: frexp(), ldexp(), gamma(), lgamma(), erf(), erfc(). * Issue #995: Improve code generation for round() on Python 3. * Issue #981: Support functions from the random and numpy.random modules in ``nopython`` mode. * Issue #979: Add cuda.atomic.max(). * Issue #1006: Improve exception raising and reporting. It is now allowed to raise an exception with an error message in ``nopython`` mode. * Issue #821: Allow ctypes- and cffi-defined functions as arguments to ``nopython`` functions. * Issue #901: Allow multiple explicit signatures with ``@jit``. The signatures must be passed in a list, as with ``@vectorize``. * Issue #884: Better error message when a JIT-compiled function is called with the wrong types. * Issue #1010: Simpler and faster CUDA argument marshalling thanks to a refactoring of the data model. * Issue #1018: Support arrays of scalars inside Numpy structured types. * Issue #808: Reduce Numba import time by half. * Issue #1021: Support the buffer protocol in ``nopython`` mode. Buffer-providing objects, such as ``bytearray``, ``array.array`` or ``memoryview`` support array-like operations such as indexing and iterating. Furthermore, some standard attributes on the ``memoryview`` object are supported. * Issue #1030: Support nested arrays in Numpy structured arrays. * Issue #1033: Implement the inspect_types(), inspect_llvm() and inspect_asm() methods for CUDA kernels. * Issue #1029: Support Numpy structured arrays with CUDA as well. * Issue #1034: Support for generators in nopython and object mode. * Issue #1044: Support default argument values when calling Numba-compiled functions. * Issue #1048: Allow calling Numpy scalar constructors from CUDA functions. * Issue #1047: Allow indexing a multi-dimensional array with a single integer, to take a view. * Issue #1050: Support len() on tuples. * Issue #1011: Revive HTML annotation. Fixes: * Issue #977: Assignment optimization was too aggressive. * Issue #561: One-argument round() now returns an int on Python 3. * Issue #1001: Fix an unlikely bug where two closures with the same name and id() would compile to the same LLVM function name, despite different closure values. * Issue #1006: Fix reference leak when a JIT-compiled function is disposed of. * Issue #1017: Update instructions for CUDA in the README. * Issue #1008: Generate shorter LLVM type names to avoid segfaults with CUDA. * Issue #1005: Properly clean up references when raising an exception from object mode. * Issue #1041: Fix incompatibility between Numba and the third-party library "future". * Issue #1053: Fix the size attribute of CUDA shared arrays. Version 0.17.0 -------------- The major focus in this release has been a rewrite of the documentation. The new documentation is better structured and has more detailed coverage of Numba features and APIs. It can be found online at https://numba.pydata.org/numba-doc/dev/index.html Features: * Issue #895: LLVM can now inline nested function calls in ``nopython`` mode. * Issue #863: CUDA kernels can now infer the types of their arguments ("autojit"-like). * Issue #833: Support numpy.{min,max,argmin,argmax,sum,mean,var,std} in ``nopython`` mode. * Issue #905: Add a ``nogil`` argument to the ``@jit`` decorator, to release the GIL in ``nopython`` mode. * Issue #829: Add a ``identity`` argument to ``@vectorize`` and ``@guvectorize``, to set the identity value of the ufunc. * Issue #843: Allow indexing 0-d arrays with the empty tuple. * Issue #933: Allow named arguments, not only positional arguments, when calling a Numba-compiled function. * Issue #902: Support numpy.ndenumerate() in ``nopython`` mode. * Issue #950: AVX is now enabled by default except on Sandy Bridge and Ivy Bridge CPUs, where it can produce slower code than SSE. * Issue #956: Support constant arrays of structured type. * Issue #959: Indexing arrays with floating-point numbers isn't allowed anymore. * Issue #955: Add support for 3D CUDA grids and thread blocks. * Issue #902: Support numpy.ndindex() in ``nopython`` mode. * Issue #951: Numpy number types (``numpy.int8``, etc.) can be used as constructors for type conversion in ``nopython`` mode. Fixes: * Issue #889: Fix ``NUMBA_DUMP_ASSEMBLY`` for the CUDA backend. * Issue #903: Fix calling of stdcall functions with ctypes under Windows. * Issue #908: Allow lazy-compiling from several threads at once. * Issue #868: Wrong error message when multiplying a scalar by a non-scalar. * Issue #917: Allow vectorizing with datetime64 and timedelta64 in the signature (only with unit-less values, though, because of a Numpy limitation). * Issue #431: Allow overloading of cuda device function. * Issue #917: Print out errors occurred in object mode ufuncs. * Issue #923: Numba-compiled ufuncs now inherit the name and doc of the original Python function. * Issue #928: Fix boolean return value in nested calls. * Issue #915: ``@jit`` called with an explicit signature with a mismatching type of arguments now raises an error. * Issue #784: Fix the truth value of NaNs. * Issue #953: Fix using shared memory in more than one function (kernel or device). * Issue #970: Fix an uncommon double to uint64 conversion bug on CentOS5 32-bit (C compiler issue). Version 0.16.0 -------------- This release contains a major refactor to switch from llvmpy to `llvmlite `_ as our code generation backend. The switch is necessary to reconcile different compiler requirements for LLVM 3.5 (needs C++11) and Python extensions (need specific compiler versions on Windows). As a bonus, we have found the use of llvmlite speeds up compilation by a factor of 2! Other Major Changes: * Faster dispatch for numpy structured arrays * Optimized array.flat() * Improved CPU feature selection * Fix constant tuple regression in macro expansion code Known Issues: * AVX code generation is still disabled by default due to performance regressions when operating on misaligned NumPy arrays. We hope to have a workaround in the future. * In *extremely* rare circumstances, a `known issue with LLVM 3.5 `_ code generation can cause an ELF relocation error on 64-bit Linux systems. Version 0.15.1 -------------- (This was a bug-fix release that superceded version 0.15 before it was announced.) Fixes: * Workaround for missing __ftol2 on Windows XP. * Do not lift loops for compilation that contain break statements. * Fix a bug in loop-lifting when multiple values need to be returned to the enclosing scope. * Handle the loop-lifting case where an accumulator needs to be updated when the loop count is zero. Version 0.15 ------------ Features: * Support for the Python ``cmath`` module. (NumPy complex functions were already supported.) * Support for ``.real``, ``.imag``, and `.conjugate()`` on non-complex numbers. * Add support for ``math.isfinite()`` and ``math.copysign()``. * Compatibility mode: If enabled (off by default), a failure to compile in object mode will fall back to using the pure Python implementation of the function. * *Experimental* support for serializing JIT functions with cloudpickle. * Loop-jitting in object mode now works with loops that modify scalars that are accessed after the loop, such as accumulators. * ``@vectorize`` functions can be compiled in object mode. * Numba can now be built using the `Visual C++ Compiler for Python 2.7 `_ on Windows platforms. * CUDA JIT functions can be returned by factory functions with variables in the closure frozen as constants. * Support for "optional" types in nopython mode, which allow ``None`` to be a valid value. Fixes: * If nopython mode compilation fails for any reason, automatically fall back to object mode (unless nopython=True is passed to @jit) rather than raise an exeception. * Allow function objects to be returned from a function compiled in object mode. * Fix a linking problem that caused slower platform math functions (such as ``exp()``) to be used on Windows, leading to performance regressions against NumPy. * ``min()`` and ``max()`` no longer accept scalars arguments in nopython mode. * Fix handling of ambigous type promotion among several compiled versions of a JIT function. The dispatcher will now compile a new version to resolve the problem. (issue #776) * Fix float32 to uint64 casting bug on 32-bit Linux. * Fix type inference to allow forced casting of return types. * Allow the shape of a 1D ``cuda.shared.array`` and ``cuda.local.array`` to be a one-element tuple. * More correct handling of signed zeros. * Add custom implementation of ``atan2()`` on Windows to handle special cases properly. * Eliminated race condition in the handling of the pagelocked staging area used when transferring CUDA arrays. * Fix non-deterministic type unification leading to varying performance. (issue #797) Version 0.14 ------------ Features: * Support for nearly all the Numpy math functions (including comparison, logical, bitwise and some previously missing float functions) in nopython mode. * The Numpy datetime64 and timedelta64 dtypes are supported in nopython mode with Numpy 1.7 and later. * Support for Numpy math functions on complex numbers in nopython mode. * ndarray.sum() is supported in nopython mode. * Better error messages when unsupported types are used in Numpy math functions. * Set NUMBA_WARNINGS=1 in the environment to see which functions are compiled in object mode vs. nopython mode. * Add support for the two-argument pow() builtin function in nopython mode. * New developer documentation describing how Numba works, and how to add new types. * Support for Numpy record arrays on the GPU. (Note: Improper alignment of dtype fields will cause an exception to be raised.) * Slices on GPU device arrays. * GPU objects can be used as Python context managers to select the active device in a block. * GPU device arrays can be bound to a CUDA stream. All subsequent operations (such as memory copies) will be queued on that stream instead of the default. This can prevent unnecessary synchronization with other streams. Fixes: * Generation of AVX instructions has been disabled to avoid performance bugs when calling external math functions that may use SSE instructions, especially on OS X. * JIT functions can be removed by the garbage collector when they are no longer accessible. * Various other reference counting fixes to prevent memory leaks. * Fixed handling of exception when input argument is out of range. * Prevent autojit functions from making unsafe numeric conversions when called with different numeric types. * Fix a compilation error when an unhashable global value is accessed. * Gracefully handle failure to enable faulthandler in the IPython Notebook. * Fix a bug that caused loop lifting to fail if the loop was inside an ``else`` block. * Fixed a problem with selecting CUDA devices in multithreaded programs on Linux. * The ``pow()`` function (and ``**`` operation) applied to two integers now returns an integer rather than a float. * Numpy arrays using the object dtype no longer cause an exception in the autojit. * Attempts to write to a global array will cause compilation to fall back to object mode, rather than attempt and fail at nopython mode. * ``range()`` works with all negative arguments (ex: ``range(-10, -12, -1)``) Version 0.13.4 -------------- Features: * Setting and deleting attributes in object mode * Added documentation of supported and currently unsupported numpy ufuncs * Assignment to 1-D numpy array slices * Closure variables and functions can be used in object mode * All numeric global values in modules can be used as constants in JIT compiled code * Support for the start argument in enumerate() * Inplace arithmetic operations (+=, -=, etc.) * Direct iteration over a 1D numpy array (e.g. "for x in array: ...") in nopython mode Fixes: * Support for NVIDIA compute capability 5.0 devices (such as the GTX 750) * Vectorize no longer crashes/gives an error when bool\_ is used as return type * Return the correct dictionary when globals() is used in JIT functions * Fix crash bug when creating dictionary literals in object * Report more informative error message on import if llvmpy is too old * Temporarily disable pycc --header, which generates incorrect function signatures. Version 0.13.3 -------------- Features: * Support for enumerate() and zip() in nopython mode * Increased LLVM optimization of JIT functions to -O1, enabling automatic vectorization of compiled code in some cases * Iteration over tuples and unpacking of tuples in nopython mode * Support for dict and set (Python >= 2.7) literals in object mode Fixes: * JIT functions have the same __name__ and __doc__ as the original function. * Numerous improvements to better match the data types and behavior of Python math functions in JIT compiled code on different platforms. * Importing Numba will no longer throw an exception if the CUDA driver is present, but cannot be initialized. * guvectorize now properly supports functions with scalar arguments. * CUDA driver is lazily initialized Version 0.13.2 -------------- Features: * @vectorize ufunc now can generate SIMD fast path for unit strided array * Added cuda.gridsize * Added preliminary exception handling (raise exception class) Fixes: * UNARY_POSITIVE * Handling of closures and dynamically generated functions * Global None value Version 0.13.1 -------------- Features: * Initial support for CUDA array slicing Fixes: * Indirectly fixes numbapro when the system has a incompatible CUDA driver * Fix numba.cuda.detect * Export numba.intp and numba.intc Version 0.13 ------------ Features: * Opensourcing NumbaPro CUDA python support in `numba.cuda` * Add support for ufunc array broadcasting * Add support for mixed input types for ufuncs * Add support for returning tuple from jitted function Fixes: * Fix store slice bytecode handling for Python2 * Fix inplace subtract * Fix pycc so that correct header is emitted * Allow vectorize to work on functions with jit decorator Version 0.12.2 -------------- Fixes: * Improved NumPy ufunc support in nopython mode * Misc bug fixes Version 0.12.1 -------------- This version fixed many regressions reported by user for the 0.12 release. This release contains a new loop-lifting mechanism that specializes certains loop patterns for nopython mode compilation. This avoid direct support for heap-allocating and other very dynamic operations. Improvements: * Add loop-lifting--jit-ing loops in nopython for object mode code. This allows functions to allocate NumPy arrays and use Python objects, while the tight loops in the function can still be compiled in nopython mode. Any arrays that the tight loop uses should be created before the loop is entered. Fixes: * Add support for majority of "math" module functions * Fix for...else handling * Add support for builtin round() * Fix tenary if...else support * Revive "numba" script * Fix problems with some boolean expressions * Add support for more NumPy ufuncs Version 0.12 ------------ Version 0.12 contains a big refactor of the compiler. The main objective for this refactor was to simplify the code base to create a better foundation for further work. A secondary objective was to improve the worst case performance to ensure that compiled functions in object mode never run slower than pure Python code (this was a problem in several cases with the old code base). This refactor is still a work in progress and further testing is needed. Main improvements: * Major refactor of compiler for performance and maintenance reasons * Better fallback to object mode when native mode fails * Improved worst case performance in object mode The public interface of numba has been slightly changed. The idea is to make it cleaner and more rational: * jit decorator has been modified, so that it can be called without a signature. When called without a signature, it behaves as the old autojit. Autojit has been deprecated in favour of this approach. * Jitted functions can now be overloaded. * Added a "njit" decorator that behaves like "jit" decorator with nopython=True. * The numba.vectorize namespace is gone. The vectorize decorator will be in the main numba namespace. * Added a guvectorize decorator in the main numba namespace. It is similar to numba.vectorize, but takes a dimension signature. It generates gufuncs. This is a replacement for the GUVectorize gufunc factory which has been deprecated. Main regressions (will be fixed in a future release): * Creating new NumPy arrays is not supported in nopython mode * Returning NumPy arrays is not supported in nopython mode * NumPy array slicing is not supported in nopython mode * lists and tuples are not supported in nopython mode * string, datetime, cdecimal, and struct types are not implemented yet * Extension types (classes) are not supported in nopython mode * Closures are not supported * Raise keyword is not supported * Recursion is not support in nopython mode Version 0.11 ------------ * Experimental support for NumPy datetime type Version 0.10 ------------ * Annotation tool (./bin/numba --annotate --fancy) (thanks to Jay Bourque) * Open sourced prange * Support for raise statement * Pluggable array representation * Support for enumerate and zip (thanks to Eugene Toder) * Better string formatting support (thanks to Eugene Toder) * Builtins min(), max() and bool() (thanks to Eugene Toder) * Fix some code reloading issues (thanks to Björn Linse) * Recognize NumPy scalar objects (thanks to Björn Linse) Version 0.9 ----------- * Improved math support * Open sourced generalized ufuncs * Improved array expressions Version 0.8 ----------- * Support for autojit classes * Inheritance not yet supported * Python 3 support for pycc * Allow retrieval of ctypes function wrapper * And hence support retrieval of a pointer to the function * Fixed a memory leak of array slicing views Version 0.7.2 ------------- * Official Python 3 support (python 3.2 and 3.3) * Support for intrinsics and instructions * Various bug fixes (see https://github.com/numba/numba/issues?milestone=7&state=closed) Version 0.7.1 ------------- * Various bug fixes Version 0.7 ----------- * Open sourced single-threaded ufunc vectorizer * Open sourced NumPy array expression compilation * Open sourced fast NumPy array slicing * Experimental Python 3 support * Support for typed containers * typed lists and tuples * Support for iteration over objects * Support object comparisons * Preliminary CFFI support * Jit calls to CFFI functions (passed into autojit functions) * TODO: Recognize ffi_lib.my_func attributes * Improved support for ctypes * Allow declaring extension attribute types as through class attributes * Support for type casting in Python * Get the same semantics with or without numba compilation * Support for recursion * For jit methods and extension classes * Allow jit functions as C callbacks * Friendlier error reporting * Internal improvements * A variety of bug fixes Version 0.6.1 -------------- * Support for bitwise operations Version 0.6 -------------- * Python 2.6 support * Programmable typing * Allow users to add type inference for external code * Better NumPy type inference * outer, inner, dot, vdot, tensordot, nonzero, where, binary ufuncs + methods (reduce, accumulate, reduceat, outer) * Type based alias analysis * Support for strict aliasing * Much faster autojit dispatch when calling from Python * Faster numerical loops through data and stride pre-loading * Integral overflow and underflow checking for conversions from objects * Make Meta dependency optional Version 0.5 -------------- * SSA-based type inference * Allows variable reuse * Allow referring to variables before lexical definition * Support multiple comparisons * Support for template types * List comprehensions * Support for pointers * Many bug fixes * Added user documentation Version 0.4 -------------- Version 0.3.2 -------------- * Add support for object arithmetic (issue 56). * Bug fixes (issue 55). Version 0.3 -------------- * Changed default compilation approach to ast * Added support for cross-module linking * Added support for closures (can jit inner functions and return them) (see examples/closure.py) * Added support for dtype structures (can access elements of structure with attribute access) (see examples/structures.py) * Added support for extension types (numba classes) (see examples/numbaclasses.py) * Added support for general Python code (use nopython to raise an error if Python C-API is used to avoid unexpected slowness because of lack of implementation defaulting to generic Python) * Fixed many bugs * Added support to detect math operations. * Added with python and with nopython contexts * Added more examples Many features need to be documented still. Look at examples and tests for more information. Version 0.2 -------------- * Added an ast approach to compilation * Removed d, f, i, b from numba namespace (use f8, f4, i4, b1) * Changed function to autojit2 * Added autojit function to decorate calls to the function and use types of the variable to create compiled versions. * changed keyword arguments to jit and autojit functions to restype and argtypes to be consistent with ctypes module. * Added pycc -- a python to shared library compiler numba-0.55.1/CONTRIBUTING.md000664 000000 000000 00000004700 14174536160 015125 0ustar00rootroot000000 000000 We welcome people who want to make contributions to Numba, big or small! Even simple documentation improvements are encouraged. # Asking questions Numba has a [discourse forum](https://numba.discourse.group/) for longer/more involved questions and an IRC channel on [gitter.im](https://gitter.im/numba/numba) for quick questions and interactive help. # Ways to help: There's lots of ways to help improve Numba, some of these require creating code changes, see **contributing patches** below. ## Quick things: * Answer a question asked on [discourse](https://numba.discourse.group/) or [gitter.im](https://gitter.im/numba/numba). * Review a page of documentation, check it makes sense, that it's clear and still relevant, that the examples are present, good and working. Fix anything that needs updating in a pull request. * Make a file that is not `flake8` compliant meet the standard, a list of all failing files is in the `exclude` section of the [`.flake8` config](https://github.com/numba/numba/blob/master/.flake8), then create a pull request with the change. ## More involved things: * Review a pull request, you don't need to be a compiler engineer to do an initial review of a pull request. It's incredibly helpful to have pull requests go through a review to just make sure the code change is well formed, documented, efficient and clear. Further, if the code is fixing a bug, making sure that tests are present demonstrating it is fixed! Look out for PRs with the [`needs initial review`](https://github.com/numba/numba/labels/needs%20initial%20review) label. * Work on fixing or implementing something in the code base, there are a lot of [`good first issue's`](https://github.com/numba/numba/labels/good%20first%20issue) and [`good second issue's`](https://github.com/numba/numba/labels/good%20first%20issue). For implementing new features/functionality, the extension API is the best thing to use and a guide to using `@overload` in particular is [here](https://numba.pydata.org/numba-doc/dev/extending/overloading-guide.html) and the API documentation is [here](https://numba.pydata.org/numba-doc/latest/extending/high-level.html#implementing-functions). ## Contributing patches Please fork the Numba repository on Github, and create a new branch containing your work. When you are done, open a pull request. # Further reading Please read the [contributing guide]( https://numba.pydata.org/numba-doc/dev/developer/contributing.html). numba-0.55.1/LICENSE000664 000000 000000 00000002406 14174536160 013702 0ustar00rootroot000000 000000 Copyright (c) 2012, Anaconda, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. numba-0.55.1/LICENSES.third-party000664 000000 000000 00000055356 14174536160 016307 0ustar00rootroot000000 000000 The Numba source tree includes vendored libraries governed by the following licenses. appdirs ------- # This is the MIT license Copyright (c) 2010 ActiveState Software Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. NetworkX -------- The dominance frontier algorithm is from a pull request https://github.com/numba/numba/pull/4149/files which is based on the implementation of NetworkX of dominance. NetworkX has the following license: NetworkX is distributed with the 3-clause BSD license. :: Copyright (C) 2004-2019, NetworkX Developers Aric Hagberg Dan Schult Pieter Swart All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the NetworkX Developers nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. jquery.graphviz.svg (https://github.com/mountainstorm/jquery.graphviz.svg/) --------------------------------------------------------------------------- The DAG roadmap rendering code in docs/dagmap/ uses Javascript from this package to draw graphs in HTML. Copyright (c) 2015 Mountainstorm Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. CPython (https://github.com/python/cpython) ------------------------------------------- Numba source code that references URLs starting with: https://github.com/python/cpython/ relates to use/inclusion of CPython source code which has the following license: A. HISTORY OF THE SOFTWARE ========================== Python was created in the early 1990s by Guido van Rossum at Stichting Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands as a successor of a language called ABC. Guido remains Python's principal author, although it includes many contributions from others. In 1995, Guido continued his work on Python at the Corporation for National Research Initiatives (CNRI, see http://www.cnri.reston.va.us) in Reston, Virginia where he released several versions of the software. In May 2000, Guido and the Python core development team moved to BeOpen.com to form the BeOpen PythonLabs team. In October of the same year, the PythonLabs team moved to Digital Creations, which became Zope Corporation. In 2001, the Python Software Foundation (PSF, see https://www.python.org/psf/) was formed, a non-profit organization created specifically to own Python-related Intellectual Property. Zope Corporation was a sponsoring member of the PSF. All Python releases are Open Source (see http://www.opensource.org for the Open Source Definition). Historically, most, but not all, Python releases have also been GPL-compatible; the table below summarizes the various releases. Release Derived Year Owner GPL- from compatible? (1) 0.9.0 thru 1.2 1991-1995 CWI yes 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes 1.6 1.5.2 2000 CNRI no 2.0 1.6 2000 BeOpen.com no 1.6.1 1.6 2001 CNRI yes (2) 2.1 2.0+1.6.1 2001 PSF no 2.0.1 2.0+1.6.1 2001 PSF yes 2.1.1 2.1+2.0.1 2001 PSF yes 2.1.2 2.1.1 2002 PSF yes 2.1.3 2.1.2 2002 PSF yes 2.2 and above 2.1.1 2001-now PSF yes Footnotes: (1) GPL-compatible doesn't mean that we're distributing Python under the GPL. All Python licenses, unlike the GPL, let you distribute a modified version without making your changes open source. The GPL-compatible licenses make it possible to combine Python with other software that is released under the GPL; the others don't. (2) According to Richard Stallman, 1.6.1 is not GPL-compatible, because its license has a choice of law clause. According to CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 is "not incompatible" with the GPL. Thanks to the many outside volunteers who have worked under Guido's direction to make these releases possible. B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON =============================================================== PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 -------------------------------------------- 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("Python") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, PSF hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python alone or in any derivative version, provided, however, that PSF's License Agreement and PSF's notice of copyright, i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 Python Software Foundation; All Rights Reserved" are retained in Python alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates Python or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python. 4. PSF is making Python available to Licensee on an "AS IS" basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between PSF and Licensee. This License Agreement does not grant permission to use PSF trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using Python, Licensee agrees to be bound by the terms and conditions of this License Agreement. BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 ------------------------------------------- BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the Individual or Organization ("Licensee") accessing and otherwise using this software in source or binary form and its associated documentation ("the Software"). 2. Subject to the terms and conditions of this BeOpen Python License Agreement, BeOpen hereby grants Licensee a non-exclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use the Software alone or in any derivative version, provided, however, that the BeOpen Python License is retained in the Software, alone or in any derivative version prepared by Licensee. 3. BeOpen is making the Software available to Licensee on an "AS IS" basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 5. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 6. This License Agreement shall be governed by and interpreted in all respects by the law of the State of California, excluding conflict of law provisions. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between BeOpen and Licensee. This License Agreement does not grant permission to use BeOpen trademarks or trade names in a trademark sense to endorse or promote products or services of Licensee, or any third party. As an exception, the "BeOpen Python" logos available at http://www.pythonlabs.com/logos.html may be used according to the permissions granted on that web page. 7. By copying, installing or otherwise using the software, Licensee agrees to be bound by the terms and conditions of this License Agreement. CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 --------------------------------------- 1. This LICENSE AGREEMENT is between the Corporation for National Research Initiatives, having an office at 1895 Preston White Drive, Reston, VA 20191 ("CNRI"), and the Individual or Organization ("Licensee") accessing and otherwise using Python 1.6.1 software in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, CNRI hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python 1.6.1 alone or in any derivative version, provided, however, that CNRI's License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) 1995-2001 Corporation for National Research Initiatives; All Rights Reserved" are retained in Python 1.6.1 alone or in any derivative version prepared by Licensee. Alternately, in lieu of CNRI's License Agreement, Licensee may substitute the following text (omitting the quotes): "Python 1.6.1 is made available subject to the terms and conditions in CNRI's License Agreement. This Agreement together with Python 1.6.1 may be located on the Internet using the following unique, persistent identifier (known as a handle): 1895.22/1013. This Agreement may also be obtained from a proxy server on the Internet using the following URL: http://hdl.handle.net/1895.22/1013". 3. In the event Licensee prepares a derivative work that is based on or incorporates Python 1.6.1 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python 1.6.1. 4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. This License Agreement shall be governed by the federal intellectual property law of the United States, including without limitation the federal copyright law, and, to the extent such U.S. federal law does not apply, by the law of the Commonwealth of Virginia, excluding Virginia's conflict of law provisions. Notwithstanding the foregoing, with regard to derivative works based on Python 1.6.1 that incorporate non-separable material that was previously distributed under the GNU General Public License (GPL), the law of the Commonwealth of Virginia shall govern this License Agreement only as to issues arising under or with respect to Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between CNRI and Licensee. This License Agreement does not grant permission to use CNRI trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By clicking on the "ACCEPT" button where indicated, or by copying, installing or otherwise using Python 1.6.1, Licensee agrees to be bound by the terms and conditions of this License Agreement. ACCEPT CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 -------------------------------------------------- Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, The Netherlands. All rights reserved. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that copyright notice and this permission notice appear in supporting documentation, and that the name of Stichting Mathematisch Centrum or CWI not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. CPython unicode (https://github.com/python/cpython) --------------------------------------------------- Numba's unicode support includes source code/algorithms from CPython's unicode implementation, Numba source code that has a reference starting with: https://github.com/python/cpython/ and contains in the path "Objects/unicodeobject.c" relates to use/inclusion of CPython source code which has the following license along with the standard CPython license: Unicode implementation based on original code by Fredrik Lundh, modified by Marc-Andre Lemburg . Major speed upgrades to the method implementations at the Reykjavik NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. Copyright (c) Corporation for National Research Initiatives. -------------------------------------------------------------------- The original string type implementation is: Copyright (c) 1999 by Secret Labs AB Copyright (c) 1999 by Fredrik Lundh By obtaining, using, and/or copying this software and/or its associated documentation, you agree that you have read, understood, and will comply with the following terms and conditions: Permission to use, copy, modify, and distribute this software and its associated documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appears in all copies, and that both that copyright notice and this permission notice appear in supporting documentation, and that the name of Secret Labs AB or the author not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -------------------------------------------------------------------- cloudpickle ----------- This module was extracted from the `cloud` package, developed by PiCloud, Inc. Copyright (c) 2015, Cloudpickle contributors. Copyright (c) 2012, Regents of the University of California. Copyright (c) 2009 PiCloud, Inc. http://www.picloud.com. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of California, Berkeley nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. © 2020 GitHub, Inc. NumPy (https://github.com/numpy/numpy) -------------------------------------- Numba source code that references URLs starting with: https://github.com/numpy/numpy relates to use of/inclusion of/derivate work based on NumPy source code which has the following license: Copyright (c) 2005-2021, NumPy Developers. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the NumPy Developers nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. numba-0.55.1/MANIFEST.in000664 000000 000000 00000000452 14174536160 014432 0ustar00rootroot000000 000000 include MANIFEST.in include README.rst setup.py runtests.py versioneer.py CHANGE_LOG LICENSE recursive-include numba *.c *.cpp *.h *.hpp *.inc recursive-include docs *.ipynb *.txt *.py Makefile *.rst recursive-include examples *.py prune docs/_build prune docs/gh-pages include numba/_version.py numba-0.55.1/README.rst000664 000000 000000 00000004544 14174536160 014371 0ustar00rootroot000000 000000 ***** Numba ***** .. image:: https://badges.gitter.im/numba/numba.svg :target: https://gitter.im/numba/numba?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge :alt: Gitter .. image:: https://img.shields.io/badge/discuss-on%20discourse-blue :target: https://numba.discourse.group/ :alt: Discourse .. image:: https://zenodo.org/badge/3659275.svg :target: https://zenodo.org/badge/latestdoi/3659275 :alt: Zenodo DOI A Just-In-Time Compiler for Numerical Functions in Python ######################################################### Numba is an open source, NumPy-aware optimizing compiler for Python sponsored by Anaconda, Inc. It uses the LLVM compiler project to generate machine code from Python syntax. Numba can compile a large subset of numerically-focused Python, including many NumPy functions. Additionally, Numba has support for automatic parallelization of loops, generation of GPU-accelerated code, and creation of ufuncs and C callbacks. For more information about Numba, see the Numba homepage: https://numba.pydata.org Supported Platforms =================== * Operating systems and CPUs: - Linux: x86 (32-bit), x86_64, ppc64le (POWER8 and 9), ARMv7 (32-bit), ARMv8 (64-bit). - Windows: x86, x86_64. - macOS: x86_64, (M1/Arm64, unofficial support only). - \*BSD: (unofficial support only). * (Optional) Accelerators and GPUs: * NVIDIA GPUs (Kepler architecture or later) via CUDA driver on Linux and Windows. Dependencies ============ * Python versions: 3.7-3.10 * llvmlite 0.38.* * NumPy >=1.18,<1.22 (can build with 1.11 for ABI compatibility). Optionally: * SciPy >=1.0.0 (for ``numpy.linalg`` support). Installing ========== The easiest way to install Numba and get updates is by using the Anaconda Distribution: https://www.anaconda.com/download :: $ conda install numba For more options, see the Installation Guide: https://numba.readthedocs.io/en/stable/user/installing.html Documentation ============= https://numba.readthedocs.io/en/stable/index.html Contact ======= Numba has a discourse forum for discussions: * https://numba.discourse.group Continuous Integration ====================== .. image:: https://dev.azure.com/numba/numba/_apis/build/status/numba.numba?branchName=master :target: https://dev.azure.com/numba/numba/_build/latest?definitionId=1?branchName=master :alt: Azure Pipelines numba-0.55.1/azure-pipelines.yml000664 000000 000000 00000006057 14174536160 016542 0ustar00rootroot000000 000000 trigger: batch: true variables: # Change the following along with adding new TEST_START_INDEX. TEST_COUNT: 20 jobs: # Mac and Linux use the same template with different matrixes - template: buildscripts/azure/azure-linux-macos.yml parameters: name: macOS vmImage: macOS-10.15 matrix: py37_np118: PYTHON: '3.7' NUMPY: '1.18' CONDA_ENV: 'azure_ci' TEST_START_INDEX: 0 py39_np121: PYTHON: '3.9' NUMPY: '1.21' CONDA_ENV: 'azure_ci' TEST_THREADING: 'tbb' TEST_START_INDEX: 1 - template: buildscripts/azure/azure-linux-macos.yml parameters: name: Linux vmImage: ubuntu-18.04 matrix: py37_np118_32bit: # 32 bit linux only has np 1.15 PYTHON: '3.7' NUMPY: '1.18' CONDA_ENV: azure_ci BITS32: yes TEST_START_INDEX: 2 py37_np118_cov: PYTHON: '3.7' NUMPY: '1.18' CONDA_ENV: azure_ci RUN_COVERAGE: yes RUN_FLAKE8: yes RUN_MYPY: yes TEST_START_INDEX: 3 py37_np118_vanilla: PYTHON: '3.7' NUMPY: '1.18' CONDA_ENV: azure_ci VANILLA_INSTALL: yes TEST_START_INDEX: 4 py37_np118_tbb: PYTHON: '3.7' NUMPY: '1.18' CONDA_ENV: azure_ci TEST_THREADING: 'tbb' TEST_START_INDEX: 5 py37_np118_omp: PYTHON: '3.7' NUMPY: '1.18' CONDA_ENV: azure_ci TEST_THREADING: omp TEST_START_INDEX: 6 py37_np118_workqueue: PYTHON: '3.7' NUMPY: '1.18' CONDA_ENV: azure_ci TEST_THREADING: workqueue TEST_START_INDEX: 7 py37_np119_doc: PYTHON: '3.7' NUMPY: '1.19' CONDA_ENV: azure_ci BUILD_DOC: yes TEST_START_INDEX: 8 py37_np119_pickle5: PYTHON: '3.7' NUMPY: '1.19' CONDA_ENV: azure_ci TEST_PICKLE5: yes TEST_START_INDEX: 9 py37_np120_svml: PYTHON: '3.7' NUMPY: '1.20' CONDA_ENV: azure_ci TEST_SVML: yes TEST_START_INDEX: 10 py37_np121: PYTHON: '3.7' NUMPY: '1.21' CONDA_ENV: azure_ci TEST_START_INDEX: 11 py38_np119: PYTHON: '3.8' NUMPY: '1.19' CONDA_ENV: azure_ci TEST_START_INDEX: 12 py38_np120_typeguard: PYTHON: '3.8' NUMPY: '1.20' CONDA_ENV: azure_ci RUN_TYPEGUARD: yes TEST_START_INDEX: 13 py38_np121: PYTHON: '3.8' NUMPY: '1.21' CONDA_ENV: azure_ci TEST_START_INDEX: 14 py39_np120: PYTHON: '3.9' NUMPY: '1.20' CONDA_ENV: azure_ci TEST_START_INDEX: 15 py39_np121: PYTHON: '3.9' NUMPY: '1.21' CONDA_ENV: azure_ci TEST_START_INDEX: 16 py310_np121: PYTHON: '3.10' NUMPY: '1.21' CONDA_ENV: azure_ci TEST_START_INDEX: 17 - template: buildscripts/azure/azure-windows.yml parameters: name: Windows vmImage: windows-2019 numba-0.55.1/bin/000775 000000 000000 00000000000 14174536160 013443 5ustar00rootroot000000 000000 numba-0.55.1/bin/numba000775 000000 000000 00000000277 14174536160 014501 0ustar00rootroot000000 000000 #!/usr/bin/env python # -*- coding: UTF-8 -*- from __future__ import print_function, division, absolute_import from numba.misc.numba_entry import main if __name__ == "__main__": main() numba-0.55.1/buildscripts/000775 000000 000000 00000000000 14174536160 015402 5ustar00rootroot000000 000000 numba-0.55.1/buildscripts/appveyor/000775 000000 000000 00000000000 14174536160 017247 5ustar00rootroot000000 000000 numba-0.55.1/buildscripts/appveyor/run_with_env.cmd000664 000000 000000 00000006472 14174536160 022454 0ustar00rootroot000000 000000 :: From https://github.com/ogrisel/python-appveyor-demo :: :: To build extensions for 64 bit Python 3, we need to configure environment :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) :: :: To build extensions for 64 bit Python 2, we need to configure environment :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) :: :: 32 bit builds, and 64-bit builds for 3.5 and beyond, do not require specific :: environment configurations. :: :: Note: this script needs to be run with the /E:ON and /V:ON flags for the :: cmd interpreter, at least for (SDK v7.0) :: :: More details at: :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows :: http://stackoverflow.com/a/13751649/163740 :: :: Author: Olivier Grisel :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ :: :: Notes about batch files for Python people: :: :: Quotes in values are literally part of the values: :: SET FOO="bar" :: FOO is now five characters long: " b a r " :: If you don't want quotes, don't include them on the right-hand side. :: :: The CALL lines at the end of this file look redundant, but if you move them :: outside of the IF clauses, they do not run properly in the SET_SDK_64==Y :: case, I don't know why. @ECHO OFF SET COMMAND_TO_RUN=%* SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows SET WIN_WDK=c:\Program Files (x86)\Windows Kits\10\Include\wdf :: Extract the major and minor versions, and allow for the minor version to be :: more than 9. This requires the version number to have two dots in it. SET MAJOR_PYTHON_VERSION=%PYTHON:~0,1% IF "%PYTHON:~3,1%" == "." ( SET MINOR_PYTHON_VERSION=%PYTHON:~2,1% ) ELSE ( SET MINOR_PYTHON_VERSION=%PYTHON:~2,2% ) :: Based on the Python version, determine what SDK version to use, and whether :: to set the SDK for 64-bit. IF %MAJOR_PYTHON_VERSION% == 2 ( SET WINDOWS_SDK_VERSION="v7.0" SET SET_SDK_64=Y ) ELSE ( IF %MAJOR_PYTHON_VERSION% == 3 ( SET WINDOWS_SDK_VERSION="v7.1" IF %MINOR_PYTHON_VERSION% LEQ 4 ( SET SET_SDK_64=Y ) ELSE ( SET SET_SDK_64=N IF EXIST "%WIN_WDK%" ( :: See: https://connect.microsoft.com/VisualStudio/feedback/details/1610302/ REN "%WIN_WDK%" 0wdf ) ) ) ELSE ( ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%" EXIT 1 ) ) IF %ARCH% == 64 ( IF %SET_SDK_64% == Y ( ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture SET DISTUTILS_USE_SDK=1 SET MSSdk=1 "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release ECHO Executing: %COMMAND_TO_RUN% call %COMMAND_TO_RUN% || EXIT 1 ) ELSE ( ECHO Using default MSVC build environment for 64 bit architecture ECHO Executing: %COMMAND_TO_RUN% call %COMMAND_TO_RUN% || EXIT 1 ) ) ELSE ( ECHO Using default MSVC build environment for 32 bit architecture ECHO Executing: %COMMAND_TO_RUN% call %COMMAND_TO_RUN% || EXIT 1 ) numba-0.55.1/buildscripts/azure/000775 000000 000000 00000000000 14174536160 016530 5ustar00rootroot000000 000000 numba-0.55.1/buildscripts/azure/azure-linux-macos.yml000664 000000 000000 00000002632 14174536160 022641 0ustar00rootroot000000 000000 parameters: name: '' vmImage: '' matrix: [] jobs: - job: ${{ parameters.name }} pool: vmImage: ${{ parameters.vmImage }} strategy: matrix: ${{ insert }}: ${{ parameters.matrix }} steps: - script: | if [ "$(uname)" == "Linux" ] && [[ "$CONDA_SUBDIR" == "linux-32" || "$BITS32" == "yes" ]]; then sudo apt-get install -y libc6-dev-i386; fi if [ "$(uname)" == "Linux" ] && [[ "$CONDA_SUBDIR" != "linux-32" && "$BITS32" != "yes" ]]; then sudo apt-get install -y gdb; fi echo "Installing Miniconda" buildscripts/incremental/install_miniconda.sh export PATH=$HOME/miniconda3/bin:$PATH echo "Setting up Conda environment" buildscripts/incremental/setup_conda_environment.sh displayName: 'Before Install' - script: | export PATH=$HOME/miniconda3/bin:$PATH buildscripts/incremental/build.sh displayName: 'Build' - script: | export PATH=$HOME/miniconda3/bin:$PATH conda install -y flake8 flake8 numba displayName: 'Flake8' condition: eq(variables['RUN_FLAKE8'], 'yes') - script: | export PATH=$HOME/miniconda3/bin:$PATH conda install -y mypy mypy displayName: 'Mypy' condition: eq(variables['RUN_MYPY'], 'yes') - script: | export PATH=$HOME/miniconda3/bin:$PATH buildscripts/incremental/test.sh displayName: 'Test' numba-0.55.1/buildscripts/azure/azure-windows.yml000664 000000 000000 00000003405 14174536160 022073 0ustar00rootroot000000 000000 parameters: name: '' vmImage: '' jobs: - job: ${{ parameters.name }} pool: vmImage: ${{ parameters.vmImage }} strategy: matrix: py39_np121: PYTHON: '3.9' NUMPY: '1.21' CONDA_ENV: 'testenv' TEST_START_INDEX: 18 py37_np118: PYTHON: '3.7' NUMPY: '1.18' CONDA_ENV: 'testenv' TEST_START_INDEX: 19 steps: - task: CondaEnvironment@1 inputs: updateConda: no packageSpecs: '' - script: | buildscripts\\incremental\\setup_conda_environment.cmd displayName: 'Before Install' - script: | # use TBB call activate %CONDA_ENV% conda install -y -c numba tbb=2021 tbb-devel displayName: 'Add in TBB' - script: | buildscripts\\incremental\\build.cmd displayName: 'Build' - script: | call activate %CONDA_ENV% python -m numba -s displayName: 'Display numba system information' - script: | call activate %CONDA_ENV% python -m numba.tests.test_runtests displayName: 'Verify runtests' - script: | call activate %CONDA_ENV% python -m numba.runtests -l displayName: 'List discovered tests' - script: | call activate %CONDA_ENV% set NUMBA_CAPTURED_ERRORS=new_style echo "Running slice of discovered tests: %TEST_START_INDEX%,None,%TEST_COUNT%" python -m numba.runtests -b -v -g -m 2 -- numba.tests displayName: 'Test modified test files' - script: | call activate %CONDA_ENV% set NUMBA_CAPTURED_ERRORS=new_style python runtests.py -m 2 -b -j "%TEST_START_INDEX%,None,%TEST_COUNT%" --exclude-tags='long_running' -- numba.tests displayName: 'Test slice of test files' numba-0.55.1/buildscripts/condarecipe.local/000775 000000 000000 00000000000 14174536160 020747 5ustar00rootroot000000 000000 numba-0.55.1/buildscripts/condarecipe.local/bld.bat000664 000000 000000 00000000156 14174536160 022202 0ustar00rootroot000000 000000 %PYTHON% setup.py build install --single-version-externally-managed --record=record.txt exit /b %errorlevel% numba-0.55.1/buildscripts/condarecipe.local/build.sh000664 000000 000000 00000001077 14174536160 022407 0ustar00rootroot000000 000000 #!/bin/bash if [[ "$(uname -s)" == *"Linux"* ]] && [[ "$(uname -p)" == *"86"* ]]; then EXTRA_BUILD_EXT_FLAGS="--werror --wall" else EXTRA_BUILD_EXT_FLAGS="" fi if [[ "$(uname -s)" == *"Linux"* ]] && [[ "$(uname -p)" == *"ppc64le"* ]]; then # To workaround https://github.com/numba/numba/issues/7302 # because of a python build problem that the -pthread could be stripped. export CC="$CC -pthread" fi MACOSX_DEPLOYMENT_TARGET=10.10 $PYTHON setup.py build_ext $EXTRA_BUILD_EXT_FLAGS build install --single-version-externally-managed --record=record.txt numba-0.55.1/buildscripts/condarecipe.local/conda_build_config.yaml000664 000000 000000 00000001020 14174536160 025414 0ustar00rootroot000000 000000 # Numba/llvmlite stack needs an older compiler for backwards compatability. c_compiler_version: # [linux] - 7 # [linux and (x86_64 or ppc64le)] - 9 # [linux and aarch64] cxx_compiler_version: # [linux] - 7 # [linux and (x86_64 or ppc64le)] - 9 # [linux and aarch64] fortran_compiler_version: # [linux] - 7 # [linux and (x86_64 or ppc64le)] - 9 # [linux and aarch64] numba-0.55.1/buildscripts/condarecipe.local/license.txt000664 000000 000000 00000002406 14174536160 023134 0ustar00rootroot000000 000000 Copyright (c) 2012, Anaconda, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. numba-0.55.1/buildscripts/condarecipe.local/meta.yaml000664 000000 000000 00000005736 14174536160 022574 0ustar00rootroot000000 000000 package: name: numba version: {{ GIT_DESCRIBE_TAG }} source: path: ../.. build: number: {{ GIT_DESCRIBE_NUMBER|int }} string: np{{ NPY_VER }}py{{ PY_VER }}h{{ PKG_HASH }}_{{GIT_DESCRIBE_HASH}}_{{ GIT_DESCRIBE_NUMBER }} entry_points: - pycc = numba.pycc:main - numba = numba.misc.numba_entry:main script_env: - PY_VCRUNTIME_REDIST missing_dso_whitelist: # [osx] # optional dependency: required only when omp is chosen as the backend for # the threading layer - lib/libiomp5.dylib # [osx] ignore_run_exports: # tbb-devel triggers hard dependency on tbb, this is not the case. - tbb # [not (armv6l or armv7l or aarch64 or linux32)] requirements: # build and run dependencies are duplicated to avoid setuptools issues # when we also set install_requires in setup.py build: - {{ compiler('c') }} # [not (armv6l or armv7l or aarch64)] - {{ compiler('cxx') }} # [not (armv6l or armv7l or aarch64)] # both of these are needed on osx, llvm for the headers, Intel for the lib - llvm-openmp # [osx] - intel-openmp # [osx] host: - python - numpy - setuptools # On channel https://anaconda.org/numba/ - llvmlite >=0.38.0rc1,<0.39 # TBB devel version is to match TBB libs. # 2020.3 is the last version with the "old" ABI # NOTE: ppc64le exclusion is temporary until packages are more generally # available. - tbb-devel 2021.* # [not (armv6l or armv7l or aarch64 or linux32 or ppc64le)] run: - python >=3.7 - numpy >=1.18,<1.22 - setuptools # On channel https://anaconda.org/numba/ - llvmlite >=0.38.0rc1,<0.39 run_constrained: # If TBB is present it must be at least version 2021 - tbb 2021.* # [not (armv6l or armv7l or aarch64 or linux32 or ppc64le)] # avoid confusion from openblas bugs - libopenblas !=0.3.6 # [x86_64] # CUDA 9.2 or later is required for CUDA support - cudatoolkit >=9.2 # scipy 1.0 or later - scipy >=1.0 # CUDA Python 11.6 or later - cuda-python >=11.6 test: requires: - jinja2 # Required to test optional Numba features - cffi # temporarily disable scipy testing on ARM, need to build out more packages - scipy # [not (armv6l or armv7l)] - ipython # [not (armv6l or armv7l or aarch64)] - setuptools - tbb 2021.* # [not (armv6l or armv7l or aarch64 or linux32 or ppc64le)] - intel-openmp # [osx] # This is for driving gdb tests - pexpect # [linux64] # For testing ipython - ipykernel # Need these for AOT. Do not init msvc as it may not be present - {{ compiler('c') }} # [not (win or armv6l or armv7l or aarch64)] - {{ compiler('cxx') }} # [not (win or armv6l or armv7l or aarch64)] about: home: https://numba.pydata.org/ license: BSD license_file: LICENSE summary: a just-in-time Python function compiler based on LLVM numba-0.55.1/buildscripts/condarecipe.local/run_test.bat000664 000000 000000 00000000621 14174536160 023301 0ustar00rootroot000000 000000 set NUMBA_DEVELOPER_MODE=1 set NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING=1 set NUMBA_CAPTURED_ERRORS=new_style set PYTHONFAULTHANDLER=1 @rem Check Numba executables are there pycc -h numba -h @rem Run system info tool numba -s @rem Check test discovery works python -m numba.tests.test_runtests @rem Run the whole test suite python -m numba.runtests -b -m -- %TESTS_TO_RUN% if errorlevel 1 exit 1 numba-0.55.1/buildscripts/condarecipe.local/run_test.sh000664 000000 000000 00000001474 14174536160 023154 0ustar00rootroot000000 000000 #!/bin/bash set -e export NUMBA_DEVELOPER_MODE=1 export NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING=1 export NUMBA_CAPTURED_ERRORS="new_style" export PYTHONFAULTHANDLER=1 unamestr=`uname` if [[ "$unamestr" == 'Linux' ]]; then SEGVCATCH=catchsegv elif [[ "$unamestr" == 'Darwin' ]]; then SEGVCATCH="" else echo Error fi # limit CPUs in use on PPC64LE, fork() issues # occur on high core count systems archstr=`uname -m` if [[ "$archstr" == 'ppc64le' ]]; then TEST_NPROCS=16 fi # Check Numba executables are there pycc -h numba -h # run system info tool numba -s # Check test discovery works python -m numba.tests.test_runtests # Run the whole test suite echo "Running: $SEGVCATCH python -m numba.runtests -b -m $TEST_NPROCS -- $TESTS_TO_RUN" $SEGVCATCH python -m numba.runtests -b -m $TEST_NPROCS -- $TESTS_TO_RUN numba-0.55.1/buildscripts/condarecipe_clone_icc_rt/000775 000000 000000 00000000000 14174536160 022361 5ustar00rootroot000000 000000 numba-0.55.1/buildscripts/condarecipe_clone_icc_rt/bld.bat000664 000000 000000 00000000050 14174536160 023605 0ustar00rootroot000000 000000 %PYTHON% build.py exit /b %errorlevel% numba-0.55.1/buildscripts/condarecipe_clone_icc_rt/build.sh000664 000000 000000 00000000050 14174536160 024007 0ustar00rootroot000000 000000 #!/bin/bash set -x ${PYTHON} build.py numba-0.55.1/buildscripts/condarecipe_clone_icc_rt/meta.yaml000664 000000 000000 00000002770 14174536160 024201 0ustar00rootroot000000 000000 {% set version = "2019.3" %} # this is the intel version to get {% set win_build_number = "203" %} # the build number from the intel windows version {% set osx_build_number = "199" %} # the build number from the intel osx version {% set lnx_build_number = "199" %} # the build number from the intel linux version package: name: icc_rt version: {{ version }} build: number: {{ win_build_number }} # [win] number: {{ osx_build_number }} # [osx] number: {{ lnx_build_number }} # [linux] source: - url: https://anaconda.org/intel/icc_rt/{{ version }}/download/win-64/icc_rt-{{version}}-intel_{{win_build_number}}.tar.bz2 # [win] - md5: d39bae3218457a4ea045763fdcfc1562 # [win] - sha256: 2c55b8af1dea35ee4648b671050899a93b7eba1b26acad019bf569ca777a944e # [win] - url: https://anaconda.org/intel/icc_rt/{{ version }}/download/osx-64/icc_rt-{{version}}-intel_{{osx_build_number}}.tar.bz2 # [osx] - md5: 064566ac53e729d3f008e32b1f73d1fa # [osx] - sha256: 54a372b0d8d5b4d750c28ea122851b52ec9aa3cccb8d4cf4a2999494dfda6656 # [osx] - url: https://anaconda.org/intel/icc_rt/{{ version }}/download/linux-64/icc_rt-{{version}}-intel_{{lnx_build_number}}.tar.bz2 # [linux] - md5: 306c3ee9491577715dbd76c838147078 # [linux] - sha256: 4cedd10343d1ab4403af2ff080b47afe5399be550f1c215e5a7c7eceec672516 # [linux] - path: scripts requirements: build: - python>=3 about: license: "Intel" license_family: "Proprietary" license_file: LICENSE.txt summary: Intel ICC runtime. numba-0.55.1/buildscripts/condarecipe_clone_icc_rt/scripts/000775 000000 000000 00000000000 14174536160 024050 5ustar00rootroot000000 000000 numba-0.55.1/buildscripts/condarecipe_clone_icc_rt/scripts/build.py000664 000000 000000 00000001423 14174536160 025521 0ustar00rootroot000000 000000 import sys import os import shutil libdir = {'w': 'Library', 'l': 'lib', 'd': 'lib'} def run(): src_dir = os.environ.get('SRC_DIR') prefix = os.environ.get('PREFIX') libd = libdir.get(sys.platform[0], None) assert libd is not None # remove 'lib' from the prefix so a direct copy from the original # package can be made lib_dir = os.path.join(prefix, libd) shutil.rmtree(lib_dir) # copy in the original package lib dir shutil.copytree(os.path.join(src_dir, libd), lib_dir) # and copy the license info_dir = os.path.join(src_dir, 'info') shutil.copy(os.path.join(info_dir, 'LICENSE.txt'), src_dir) shutil.rmtree(info_dir) if __name__ == "__main__": args = sys.argv assert len(args) == 1 run() numba-0.55.1/buildscripts/gpuci/000775 000000 000000 00000000000 14174536160 016511 5ustar00rootroot000000 000000 numba-0.55.1/buildscripts/gpuci/axis.yaml000664 000000 000000 00000000254 14174536160 020342 0ustar00rootroot000000 000000 PYTHON_VER: - "3.8" CUDA_VER: - "11.2" CUDA_TOOLKIT_VER: - "9.2" - "10.0" - "10.2" - "11.0" - "11.2" - "11.4" LINUX_VER: - ubuntu18.04 RAPIDS_VER: - "21.12" excludes: numba-0.55.1/buildscripts/gpuci/build.sh000664 000000 000000 00000003615 14174536160 020151 0ustar00rootroot000000 000000 ############################################## # Numba GPU build and test script for CI # ############################################## set -e # Set path and build parallel level export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} # Set home to the job's workspace export HOME="$WORKSPACE" # Switch to project root; also root of repo checkout cd "$WORKSPACE" # Determine CUDA release version export CUDA_REL=${CUDA_VERSION%.*} # Test with NVIDIA Bindings on CUDA 11.4 if [ $CUDA_TOOLKIT_VER == "11.4" ] then export NUMBA_CUDA_USE_NVIDIA_BINDING=1; else export NUMBA_CUDA_USE_NVIDIA_BINDING=0; fi; ################################################################################ # SETUP - Check environment ################################################################################ gpuci_logger "Check environment variables" env gpuci_logger "Check GPU usage" nvidia-smi gpuci_logger "Create testing env" . /opt/conda/etc/profile.d/conda.sh gpuci_mamba_retry create -n numba_ci -y \ "python=${PYTHON_VER}" \ "cudatoolkit=${CUDA_TOOLKIT_VER}" \ "numba/label/dev::llvmlite" \ "numpy=1.21" \ "scipy" \ "cffi" \ "psutil" \ "gcc_linux-64=7" \ "gxx_linux-64=7" conda activate numba_ci if [ $NUMBA_CUDA_USE_NVIDIA_BINDING == "1" ] then gpuci_logger "Install NVIDIA CUDA Python bindings"; gpuci_mamba_retry install nvidia::cuda-python; fi; gpuci_logger "Install numba" python setup.py develop gpuci_logger "Check Compiler versions" $CC --version $CXX --version gpuci_logger "Check conda environment" conda info conda config --show-sources gpuci_logger "Dump system information from Numba" python -m numba -s gpuci_logger "Run tests in numba.cuda.tests" python -m numba.runtests numba.cuda.tests -v -m numba-0.55.1/buildscripts/incremental/000775 000000 000000 00000000000 14174536160 017703 5ustar00rootroot000000 000000 numba-0.55.1/buildscripts/incremental/MacOSX10.10.sdk.checksum000664 000000 000000 00000000103 14174536160 023653 0ustar00rootroot000000 000000 ea40a3b9dc48cd3593628490f2738b89282f00ab ./MacOSX10.10.sdk.tar.xz numba-0.55.1/buildscripts/incremental/after_success.sh000775 000000 000000 00000000256 14174536160 023076 0ustar00rootroot000000 000000 #!/bin/bash source activate $CONDA_ENV # Make sure any error below is reported as such set -v -e if [ "$RUN_COVERAGE" == "yes" ]; then coverage combine codecov fi numba-0.55.1/buildscripts/incremental/build.cmd000664 000000 000000 00000000424 14174536160 021467 0ustar00rootroot000000 000000 call activate %CONDA_ENV% @rem Build numba extensions without silencing compile errors python setup.py build_ext -q --inplace @rem Install numba locally for use in `numba -s` sys info tool at test time python -m pip install -e . if %errorlevel% neq 0 exit /b %errorlevel% numba-0.55.1/buildscripts/incremental/build.sh000775 000000 000000 00000002062 14174536160 021341 0ustar00rootroot000000 000000 #!/bin/bash source activate conda activate $CONDA_ENV # Make sure any error below is reported as such set -v -e # Build numba extensions without silencing compile errors if [[ "$(uname -s)" == *"Linux"* ]] && [[ "$(uname -p)" == *"86"* ]]; then EXTRA_BUILD_EXT_FLAGS="--werror --wall" else EXTRA_BUILD_EXT_FLAGS="" fi if [[ $(uname) == "Darwin" ]]; then # The following is suggested in https://docs.conda.io/projects/conda-build/en/latest/resources/compiler-tools.html?highlight=SDK#macos-sdk wget -q https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX10.10.sdk.tar.xz shasum -c ./buildscripts/incremental/MacOSX10.10.sdk.checksum tar -xf ./MacOSX10.10.sdk.tar.xz export SDKROOT=`pwd`/MacOSX10.10.sdk fi python setup.py build_ext -q --inplace --debug $EXTRA_BUILD_EXT_FLAGS --verbose # (note we don't install to avoid problems with extra long Windows paths # during distutils-dependent tests -- e.g. test_pycc) # Install numba locally for use in `numba -s` sys info tool at test time python -m pip install --no-deps -e . numba-0.55.1/buildscripts/incremental/install_miniconda.sh000775 000000 000000 00000001042 14174536160 023726 0ustar00rootroot000000 000000 #!/bin/bash set -v -e # Install Miniconda unamestr=`uname` if [[ "$unamestr" == 'Linux' ]]; then if [[ "$BITS32" == "yes" ]]; then wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86.sh -O miniconda.sh else wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh fi elif [[ "$unamestr" == 'Darwin' ]]; then wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh else echo Error fi chmod +x miniconda.sh ./miniconda.sh -b numba-0.55.1/buildscripts/incremental/setup_conda_environment.cmd000664 000000 000000 00000003210 14174536160 025314 0ustar00rootroot000000 000000 @rem first configure conda to have more tolerance of network problems, these @rem numbers are not scientifically chosen, just merely larger than defaults set CONDA_CONFIG=cmd /C conda config %CONDA_CONFIG% --write-default %CONDA_CONFIG% --set remote_connect_timeout_secs 30.15 %CONDA_CONFIG% --set remote_max_retries 10 %CONDA_CONFIG% --set remote_read_timeout_secs 120.2 %CONDA_CONFIG% --set restore_free_channel true %CONDA_CONFIG% --set show_channel_urls true cmd /C conda info %CONDA_CONFIG% --show @rem The cmd /C hack circumvents a regression where conda installs a conda.bat @rem script in non-root environments. set CONDA_INSTALL=cmd /C conda install -q -y set PIP_INSTALL=pip install -q @echo on @rem Deactivate any environment call deactivate @rem Display root environment (for debugging) conda list @rem Scipy, CFFI, jinja2 and IPython are optional dependencies, but exercised in the test suite conda create -n %CONDA_ENV% -q -y python=%PYTHON% numpy=%NUMPY% cffi pip scipy jinja2 ipython gitpython pyyaml call activate %CONDA_ENV% @rem Install latest llvmlite build %CONDA_INSTALL% -c numba/label/dev llvmlite @rem Install dependencies for building the documentation if "%BUILD_DOC%" == "yes" (%CONDA_INSTALL% sphinx sphinx_rtd_theme pygments) @rem Install dependencies for code coverage (codecov.io) if "%RUN_COVERAGE%" == "yes" (%PIP_INSTALL% codecov) @rem Install TBB %CONDA_INSTALL% -c numba tbb=2021 tbb-devel if %errorlevel% neq 0 exit /b %errorlevel% echo "DEBUG ENV:" echo "-------------------------------------------------------------------------" conda env export echo "-------------------------------------------------------------------------" numba-0.55.1/buildscripts/incremental/setup_conda_environment.sh000775 000000 000000 00000010721 14174536160 025173 0ustar00rootroot000000 000000 #!/bin/bash set -v -e # first configure conda to have more tolerance of network problems, these # numbers are not scientifically chosen, just merely larger than defaults conda config --write-default conda config --set remote_connect_timeout_secs 30.15 conda config --set remote_max_retries 10 conda config --set remote_read_timeout_secs 120.2 conda config --set show_channel_urls true if [[ $(uname) == Linux ]]; then if [[ "$CONDA_SUBDIR" != "linux-32" && "$BITS32" != "yes" ]] ; then conda config --set restore_free_channel true fi fi conda info conda config --show CONDA_INSTALL="conda install -q -y" PIP_INSTALL="pip install -q" EXTRA_CHANNELS="" if [ "${USE_C3I_TEST_CHANNEL}" == "yes" ]; then EXTRA_CHANNELS="${EXTRA_CHANNELS} -c c3i_test" fi # Deactivate any environment source deactivate # Display root environment (for debugging) conda list # If VANILLA_INSTALL is yes, then only Python, NumPy and pip are installed, this # is to catch tests/code paths that require an optional package and are not # guarding against the possibility that it does not exist in the environment. # Create a base env first and then add to it... # NOTE: gitpython is needed for CI testing to do the test slicing # NOTE: pyyaml is used to ensure that the Azure CI config is valid # NOTE: 32 bit linux... do not install NumPy, there's no conda package for >1.15 # so it has to come from pip later # If it's Python 3.10, we get everything except for the interpreter and # compilers via pip from PyPi if [[ "$PYTHON" == "3.10" ]] ; then conda create -n $CONDA_ENV -q -y ${EXTRA_CHANNELS} python=$PYTHON elif [[ "$CONDA_SUBDIR" == "linux-32" || "$BITS32" == "yes" ]] ; then conda create -n $CONDA_ENV -q -y ${EXTRA_CHANNELS} python=$PYTHON pip gitpython pyyaml else conda create -n $CONDA_ENV -q -y ${EXTRA_CHANNELS} python=$PYTHON numpy=$NUMPY pip gitpython pyyaml fi # Activate first set +v source activate $CONDA_ENV set -v # Install optional packages into activated env echo "PYTHON=$PYTHON" echo "VANILLA_INSTALL=$VANILLA_INSTALL" if [[ "$PYTHON" != "3.10" && "$VANILLA_INSTALL" != "yes" ]]; then # Scipy, CFFI, jinja2, IPython, and pygments are optional dependencies, # but exercised in the test suite. # pexpect is used to run the gdb tests. # ipykernel is used for testing ipython behaviours. $CONDA_INSTALL ${EXTRA_CHANNELS} cffi jinja2 ipython ipykernel pygments pexpect # Only install scipy on 64bit, else it'll pull in NumPy, 32bit linux needs # to get scipy from pip if [[ "$CONDA_SUBDIR" != "linux-32" && "$BITS32" != "yes" ]] ; then $CONDA_INSTALL ${EXTRA_CHANNELS} scipy fi fi # Install the compiler toolchain if [[ $(uname) == Linux ]]; then if [[ "$CONDA_SUBDIR" == "linux-32" || "$BITS32" == "yes" ]] ; then $CONDA_INSTALL gcc_linux-32 gxx_linux-32 else $CONDA_INSTALL gcc_linux-64 gxx_linux-64 fi elif [[ $(uname) == Darwin ]]; then $CONDA_INSTALL clang_osx-64 clangxx_osx-64 # Install llvm-openmp and intel-openmp on OSX too $CONDA_INSTALL llvm-openmp intel-openmp fi # `pip install` all the dependencies on Python 3.10 if [[ "$PYTHON" == "3.10" ]] ; then $PIP_INSTALL -U pip pip --version $PIP_INSTALL gitpython pyyaml cffi jinja2 ipython ipykernel pygments pexpect scipy numpy # If on 32bit linux, now pip install NumPy (no conda package), SciPy is broken?! elif [[ "$CONDA_SUBDIR" == "linux-32" || "$BITS32" == "yes" ]] ; then $PIP_INSTALL numpy==$NUMPY fi # Install latest llvmlite build $CONDA_INSTALL -c numba/label/dev llvmlite # Install dependencies for building the documentation if [ "$BUILD_DOC" == "yes" ]; then $CONDA_INSTALL sphinx=2.4.4 docutils=0.17 sphinx_rtd_theme pygments numpydoc; fi if [ "$BUILD_DOC" == "yes" ]; then $PIP_INSTALL rstcheck; fi # Install dependencies for code coverage (codecov.io) if [ "$RUN_COVERAGE" == "yes" ]; then $PIP_INSTALL codecov; fi # Install SVML if [ "$TEST_SVML" == "yes" ]; then $CONDA_INSTALL -c numba icc_rt; fi # Install Intel TBB parallel backend if [ "$TEST_THREADING" == "tbb" ]; then $CONDA_INSTALL -c numba tbb=2021 tbb-devel; fi # Install pickle5 if [ "$TEST_PICKLE5" == "yes" ]; then $PIP_INSTALL pickle5; fi # Install typeguard if [ "$RUN_TYPEGUARD" == "yes" ]; then $CONDA_INSTALL conda-forge::typeguard; fi # environment dump for debug # echo "DEBUG ENV:" # echo "-------------------------------------------------------------------------" # conda env export # echo "-------------------------------------------------------------------------" numba-0.55.1/buildscripts/incremental/test.cmd000664 000000 000000 00000001740 14174536160 021351 0ustar00rootroot000000 000000 call activate %CONDA_ENV% @rem Ensure that the documentation builds without warnings if "%BUILD_DOC%" == "yes" python setup.py build_doc @rem Run system info tool pushd bin numba -s popd @rem switch off color messages set NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING=1 @rem switch on developer mode set NUMBA_DEVELOPER_MODE=1 @rem enable the faulthandler set PYTHONFAULTHANDLER=1 @rem enable new style error handling set NUMBA_CAPTURED_ERRORS=new_style @rem First check that the test discovery works python -m numba.tests.test_runtests @rem Now run the Numba test suite @rem Note that coverage is run from the checkout dir to match the "source" @rem directive in .coveragerc if "%RUN_COVERAGE%" == "yes" ( set PYTHONPATH=. coverage erase coverage run runtests.py -b --exclude-tags='long_running' -m -- numba.tests ) else ( set NUMBA_ENABLE_CUDASIM=1 python -m numba.runtests -b --exclude-tags='long_running' -m -- numba.tests ) if %errorlevel% neq 0 exit /b %errorlevel% numba-0.55.1/buildscripts/incremental/test.sh000775 000000 000000 00000007121 14174536160 021222 0ustar00rootroot000000 000000 #!/bin/bash source activate $CONDA_ENV # Make sure any error below is reported as such set -v -e # Ensure the README is correctly formatted if [ "$BUILD_DOC" == "yes" ]; then rstcheck README.rst; fi # Ensure that the documentation builds without warnings pushd docs if [ "$BUILD_DOC" == "yes" ]; then make SPHINXOPTS=-W clean html; fi popd # Run system info tool pushd bin numba -s popd # switch off color messages export NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING=1 # switch on developer mode export NUMBA_DEVELOPER_MODE=1 # enable the fault handler export PYTHONFAULTHANDLER=1 # enable new style error handling export NUMBA_CAPTURED_ERRORS="new_style" # deal with threading layers if [ -z ${TEST_THREADING+x} ]; then echo "INFO: Threading layer not explicitly set." else case "${TEST_THREADING}" in "workqueue"|"omp"|"tbb") export NUMBA_THREADING_LAYER="$TEST_THREADING" echo "INFO: Threading layer set as: $TEST_THREADING" ;; *) echo "INFO: Threading layer explicitly set to bad value: $TEST_THREADING." exit 1 ;; esac fi # If TEST_THREADING is set in the env, then check that Numba agrees that the # environment can support the requested threading. function check_sysinfo() { cmd="import os;\ from numba.misc.numba_sysinfo import get_sysinfo;\ assert get_sysinfo()['$1 Threading'] is True, 'Threading layer $1 '\ 'is not supported';\ print('Threading layer $1 is supported')" python -c "$cmd" } if [[ "$TEST_THREADING" ]]; then if [[ "$TEST_THREADING" == "tbb" ]]; then check_sysinfo "TBB" elif [[ "$TEST_THREADING" == "omp" ]]; then check_sysinfo "OpenMP" elif [[ "$TEST_THREADING" == "workqueue" ]]; then check_sysinfo "Workqueue" else echo "Unknown threading layer requested: $TEST_THREADING" exit 1 fi fi # Find catchsegv unamestr=`uname` if [[ "$unamestr" == 'Linux' ]]; then if [[ "${BITS32}" == "yes" ]]; then SEGVCATCH="" else SEGVCATCH=catchsegv fi elif [[ "$unamestr" == 'Darwin' ]]; then SEGVCATCH="" else echo Error fi # limit CPUs in use on PPC64LE, fork() issues # occur on high core count systems archstr=`uname -m` if [[ "$archstr" == 'ppc64le' ]]; then TEST_NPROCS=16 fi # setup SDKROOT on Mac if [[ $(uname) == "Darwin" ]]; then export SDKROOT=`pwd`/MacOSX10.10.sdk fi # First check that the test discovery works python -m numba.tests.test_runtests # Now run tests based on the changes identified via git NUMBA_ENABLE_CUDASIM=1 $SEGVCATCH python -m numba.runtests -b -v -g -m $TEST_NPROCS -- numba.tests # List the tests found echo "INFO: All discovered tests:" python -m numba.runtests -l # Now run the Numba test suite with slicing # Note that coverage is run from the checkout dir to match the "source" # directive in .coveragerc echo "INFO: Running slice of discovered tests: ($TEST_START_INDEX,None,$TEST_COUNT)" if [ "$RUN_COVERAGE" == "yes" ]; then export PYTHONPATH=. coverage erase $SEGVCATCH coverage run runtests.py -b -j "$TEST_START_INDEX,None,$TEST_COUNT" --exclude-tags='long_running' -m $TEST_NPROCS -- numba.tests elif [ "$RUN_TYPEGUARD" == "yes" ]; then echo "INFO: Running with typeguard" NUMBA_USE_TYPEGUARD=1 NUMBA_ENABLE_CUDASIM=1 PYTHONWARNINGS="ignore:::typeguard" $SEGVCATCH python runtests.py -b -j "$TEST_START_INDEX,None,$TEST_COUNT" --exclude-tags='long_running' -m $TEST_NPROCS -- numba.tests else NUMBA_ENABLE_CUDASIM=1 $SEGVCATCH python -m numba.runtests -b -j "$TEST_START_INDEX,None,$TEST_COUNT" --exclude-tags='long_running' -m $TEST_NPROCS -- numba.tests fi numba-0.55.1/codecov.yml000664 000000 000000 00000001244 14174536160 015041 0ustar00rootroot000000 000000 # Configuration for codecov.io # When editing this file, please validate its contents using: # curl -X POST --data-binary @- https://codecov.io/validate < codecov.yml comment: layout: "header, diff, changes, uncovered" coverage: ignore: - "numba/cuda/.*" - "numba/hsa/.*" status: project: default: # The build fails if total project coverage drops by more than 3% target: auto threshold: "3%" # These checks can mark a build failed if too much new code # is not covered (which happens often with JITted functions). changes: false patch: false numba-0.55.1/contrib/000775 000000 000000 00000000000 14174536160 014333 5ustar00rootroot000000 000000 numba-0.55.1/contrib/valgrind-numba.supp000664 000000 000000 00000000660 14174536160 020154 0ustar00rootroot000000 000000 { Memcheck:Cond fun:_ZN4llvm3sys14getHostCPUNameEv fun:LLVMPY_GetHostCPUName } { Memcheck:Value8 fun:_ZN4llvm3sys14getHostCPUNameEv fun:LLVMPY_GetHostCPUName } { Memcheck:Cond fun:__intel_sse2_strrchr fun:_ZN67_INTERNAL_45_______src_thirdparty_tbb_omp_dynamic_link_cpp_c306cade5__kmp12init_dl_dataEv fun:__sti__$E } numba-0.55.1/docs/000775 000000 000000 00000000000 14174536160 013623 5ustar00rootroot000000 000000 numba-0.55.1/docs/Makefile000664 000000 000000 00000015164 14174536160 015272 0ustar00rootroot000000 000000 # Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = -j1 SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " xml to make Docutils-native XML files" @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Numba.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Numba.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/Numba" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Numba" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." latexpdfja: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." numba-0.55.1/docs/_templates/000775 000000 000000 00000000000 14174536160 015760 5ustar00rootroot000000 000000 numba-0.55.1/docs/_templates/EMPTY000664 000000 000000 00000000000 14174536160 016567 0ustar00rootroot000000 000000 numba-0.55.1/docs/dagmap/000775 000000 000000 00000000000 14174536160 015054 5ustar00rootroot000000 000000 numba-0.55.1/docs/dagmap/README.md000664 000000 000000 00000004300 14174536160 016330 0ustar00rootroot000000 000000 # DAG Roadmap This directory includes a representation of the Numba roadmap in the form of a DAG. We have done this to enable a highly granular display of enhancements to Numba that also shows the relationships between these tasks. Many tasks have prerequisites, and we've found that issue trackers, Kanban boards, and time-bucketed roadmap documentation all fail to represent this information in different ways. ## Requirements ``` conda install jinja2 python-graphviz pyyaml ``` ## Usage ``` ./render.py -o dagmap.html dagmap.yaml ``` The generated HTML file will look for `jquery.graphviz.svg.js` in the same directory. ## Updating the DAG Copy one of the existing tasks and edit: * `label`: text appears on the node. Embed `\n` for line breaks. * `id`: Referenced to indicate a dependency * `description`: Shown in the tooltip. Automatically word-wrapped. * `depends_on`: Optional list of task IDs which this task depends on. The `style` section of the file is not used yet. ## Notes The HTML rendering of the graph is based on a slightly modified version of (jquery.graphviz.svg)[https://github.com/mountainstorm/jquery.graphviz.svg/]. Its license is: ``` Copyright (c) 2015 Mountainstorm Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ```numba-0.55.1/docs/dagmap/dagmap.yaml000664 000000 000000 00000013020 14174536160 017165 0ustar00rootroot000000 000000 meta: version: 1 style: tags: performance: border: red tasks: - label: Track allocations in functions id: track_alloc description: | Maintain a list of allocations inside each function which can be used for freeing things on return, and also for debugging memory usage. - label: Catch exceptions id: catch_exceptions description: | Allow exceptions raised in nopython mode to be caught in nopython mode. depends_on: - track_alloc - label: New IR id: new_ir description: | New intermediate representation for Numba that is backed by a dictionary - label: New Type Matching DSL id: type_matching description: | Replace the current DSL for Numba types with something more expressive that can match type patterns - label: Declarative type signatures\nfor @overload/@overload_method id: declarative_overload description: | Replace the current DSL for Numba types with something more expressive\n that can match type patterns depends_on: - type_matching - label: Rewrite "old-style" implementations id: rewrite_old_impls description: | Rewrite implementations of functions that use the old extension API that separates typing from implementation, and often uses the LLVM builder unnecessarily. depends_on: - declarative_overload - improve_test_suite_tooling - faster_pr_testing - label: Unify and add more test suite tooling id: improve_test_suite_tooling description: | Add tools to help with common patterns in testing and unify the ones we have, there's no need for 12 spellings of "is this Python 3" Also decide on "what to test", do all types need testing if inputs are being "as_array"'d? - label: Pipeline pass formalisation id: pass_formalisation description: | Decide on a formal description of a compiler pass and create supporting code for it - label: Array expression fusion pass id: new_array_expr_fusion_pass description: From parfors extract out the array expression fusion pass depends_on: - parfors_clean_up - pass_formalisation - label: LICM Pass id: new_licm_pass description: | Create a LICM Pass depends_on: - parfors_clean_up - pass_formalisation - label: Clean up Parfors id: parfors_clean_up description: | General clean up and refactoring of parfors ahead of any additional work - label: Mode based pipeline id: mode_based_pipeline description: | Switch the jit decorator to use a mode based pipeline with `nopython=True` equivalent as default. - label: Remove object mode fallback id: remove_objmode_fallback description: | Remove the deprecated object mode fallback depends_on: - mode_based_pipeline - label: Switch to ORC JIT id: orc_jit description: | MCJIT has been deprecated for some time. Need to switch to the newer ORC JIT class. - label: Performance analysis suite id: perform_analysis_suite description: | Meta task for all performance analysis related functionality depends_on: - line_profiling - assembly_analysis_tooling - vectorisation_analysis - label: Vectorisation analysis id: vectorisation_analysis description: | Obtain LLVMs vectorisation reports and present these in a user friendly manner - label: Line profiling id: line_profiling description: | Support collection of profiling statistics from compiled machine code and map back to lines of Python. depends_on: - orc_jit - assembly_analysis_tooling - label: Assembly analysis tooling id: assembly_analysis_tooling description: | Tie generated assembly back to python lines and annotate instruction quality depends_on: - capstone - label: Build capstone against llvmdev id: capstone description: | Build capstone against llvmdev and create conda packages/wheels - label: Increase JIT class method performance id: jit_class_method_performance description: | Increase the performance of jitclass methods depends_on: - llvm_ref_count_pruning - new_licm_pass - label: LLVM level ref count pruning id: llvm_ref_count_pruning description: | Add a LLVM compiler pass to prune refcounts across entire functions - label: JITted coverage information id: jitted_coverage_info description: | Work out how to leverage gcov support in LLVM to enable coverage information depends_on: - compiler_rt - label: LLVM compiler_rt support id: compiler_rt description: | Work out how to build compiler_rt into LLVM and how to use it in Numba - label: Switch to pytest id: pytest description: | Make it possible to use pytest as test runner for Numba - label: Option to run modified tests only id: run_new_tests description: | Use / make pytest plugin to detect all test files which are new / changed relative to a given branch, and run only those tests depends_on: - pytest - label: Option to run 1/N slice of tests id: run_test_slice description: | Use / make pytest plugin to run 1/N of enumerated tests. depends_on: - pytest - label: Faster PR testing id: faster_pr_testing description: | Make automated PR testing with public CI services give faster feedback. depends_on: - run_new_tests - run_test_slice numba-0.55.1/docs/dagmap/jquery.graphviz.svg.js000664 000000 000000 00000037127 14174536160 021372 0ustar00rootroot000000 000000 /* * Copyright (c) 2015 Mountainstorm * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +function ($) { 'use strict' // Cross Browser starts/endsWith support // ===================================== String.prototype.startsWith = function(prefix) { return this.indexOf(prefix) == 0; }; String.prototype.endsWith = function(suffix) { return this.indexOf(suffix, this.length - suffix.length) !== -1; }; // GRAPHVIZSVG PUBLIC CLASS DEFINITION // =================================== var GraphvizSvg = function (element, options) { this.type = null this.options = null this.enabled = null this.$element = null this.init('graphviz.svg', element, options) } GraphvizSvg.VERSION = '1.0.1' GraphvizSvg.GVPT_2_PX = 32.5 // used to ease removal of extra space GraphvizSvg.DEFAULTS = { url: null, svg: null, shrink: '0.125pt', tooltips: { init: function ($graph) { var $a = $(this) $a.tooltip({ container: $graph, placement: 'auto left', animation: false, viewport: null }).on('hide.bs.tooltip', function() { // keep them visible even if you acidentally mouse over if ($a.attr('data-tooltip-keepvisible')) { return false } }) }, show: function () { var $a = $(this) $a.attr('data-tooltip-keepvisible', true) $a.tooltip('show') }, hide: function () { var $a = $(this) $a.removeAttr('data-tooltip-keepvisible') $a.tooltip('hide') }, update: function () { var $this = $(this) if ($this.attr('data-tooltip-keepvisible')) { $this.tooltip('show') return } } }, zoom: true, highlight: { selected: function (col, bg) { return col }, unselected: function (col, bg) { return jQuery.Color(col).transition(bg, 0.9) } }, ready: null } GraphvizSvg.prototype.init = function (type, element, options) { this.enabled = true this.type = type this.$element = $(element) this.options = this.getOptions(options) if (options.url) { var that = this $.get(options.url, null, function(data) { var svg = $("svg", data) that.$element.html(document.adoptNode(svg[0])) that.setup() }, "xml") } else { if (options.svg) { this.$element.html(options.svg) } this.setup() } } GraphvizSvg.prototype.getDefaults = function () { return GraphvizSvg.DEFAULTS } GraphvizSvg.prototype.getOptions = function (options) { options = $.extend({}, this.getDefaults(), this.$element.data(), options) if (options.shrink) { if (typeof options.shrink != 'object') { options.shrink = { x: options.shrink, y: options.shrink } } options.shrink.x = this.convertToPx(options.shrink.x) options.shrink.y = this.convertToPx(options.shrink.y) } return options } GraphvizSvg.prototype.setup = function () { var options = this.options // save key elements in the graph for easy access var $svg = $(this.$element.children('svg')) var $graph = $svg.children('g:first') this.$svg = $svg this.$graph = $graph this.$background = $graph.children('polygon:first') // might not exist this.$nodes = $graph.children('.node') this.$edges = $graph.children('.edge') this._nodesByName = {} this._edgesByName = {} // add top level class and copy background color to element this.$element.addClass('graphviz-svg') if (this.$background.length) { this.$element.css('background', this.$background.attr('fill')) } // setup all the nodes and edges var that = this this.$nodes.each(function () { that.setupNodesEdges($(this), true) }) this.$edges.each(function () { that.setupNodesEdges($(this), false) }) // remove the graph title element var $title = this.$graph.children('title') this.$graph.attr('data-name', $title.text()) $title.remove() if (options.zoom) { this.setupZoom() } // tell people we're done if (options.ready) { options.ready.call(this) } } GraphvizSvg.prototype.setupNodesEdges = function ($el, isNode) { var that = this var options = this.options // save the colors of the paths, ellipses and polygons $el.find('polygon, ellipse, path').each(function () { var $this = $(this) // save original colors $this.data('graphviz.svg.color', { fill: $this.attr('fill'), stroke: $this.attr('stroke') }) // shrink it if it's a node if (isNode && options.shrink) { that.scaleNode($this) } }) // save the node name and check if theres a comment above; save it var $title = $el.children('title') if ($title[0]) { // remove any compass points: var title = $title.text().replace(/:[snew][ew]?/g,'') $el.attr('data-name', title) $title.remove() if (isNode) { this._nodesByName[title] = $el[0] } else { this._edgesByName[title] = $el[0] } // without a title we can't tell if its a user comment or not var previousSibling = $el[0].previousSibling while (previousSibling && previousSibling.nodeType != 8) { previousSibling = previousSibling.previousSibling } if (previousSibling != null && previousSibling.nodeType == 8) { var htmlDecode = function (input) { var e = document.createElement('div') e.innerHTML = input return e.childNodes[0].nodeValue } var value = htmlDecode(previousSibling.nodeValue.trim()) if (value != title) { // user added comment $el.attr('data-comment', value) } } } // remove namespace from a[xlink:title] $el.find('a').filter(function () { return $(this).attr('xlink:title') }).each(function () { var $a = $(this) $a.attr('title', $a.attr('xlink:title')) $a.removeAttr('xlink:title') if (options.tooltips) { options.tooltips.init.call(this, that.$element) } }) } GraphvizSvg.prototype.setupZoom = function() { var that = this var $element = this.$element var $svg = this.$svg this.zoom = {width: $svg.attr('width'), height: $svg.attr('height'), percentage: null } this.scaleView(100.0) $element.mousewheel(function (evt) { if (evt.shiftKey) { var percentage = that.zoom.percentage percentage -= evt.deltaY * evt.deltaFactor if (percentage < 100.0) { percentage = 100.0 } // get pointer offset in view // ratio offset within svg var dx = evt.pageX - $svg.offset().left var dy = evt.pageY - $svg.offset().top var rx = dx / $svg.width() var ry = dy / $svg.height() // offset within frame ($element) var px = evt.pageX - $element.offset().left var py = evt.pageY - $element.offset().top that.scaleView(percentage) // scroll so pointer is still in same place $element.scrollLeft((rx * $svg.width()) + 0.5 - px) $element.scrollTop((ry * $svg.height()) + 0.5 - py) return false // stop propagation } }) } GraphvizSvg.prototype.scaleView = function(percentage) { var that = this var $svg = this.$svg $svg.attr('width', percentage + '%') $svg.attr('height', percentage + '%') this.zoom.percentage = percentage // now callback to update tooltip position var $everything = this.$nodes.add(this.$edges) $everything.children('a[title]').each(function () { that.options.tooltips.update.call(this) }) } GraphvizSvg.prototype.scaleNode = function($node) { var dx = this.options.shrink.x var dy = this.options.shrink.y var tagName = $node.prop('tagName') if (tagName == 'ellipse') { $node.attr('rx', parseFloat($node.attr('rx')) - dx) $node.attr('ry', parseFloat($node.attr('ry')) - dy) } else if (tagName == 'polygon') { // this is more complex - we need to scale it manually var bbox = $node[0].getBBox() var cx = bbox.x + (bbox.width / 2) var cy = bbox.y + (bbox.height / 2) var pts = $node.attr('points').split(' ') var points = '' // new value for (var i in pts) { var xy = pts[i].split(',') var ox = parseFloat(xy[0]) var oy = parseFloat(xy[1]) points += (((cx - ox) / (bbox.width / 2) * dx) + ox) + ',' + (((cy - oy) / (bbox.height / 2) * dy) + oy) + ' ' } $node.attr('points', points) } } GraphvizSvg.prototype.convertToPx = function (val) { var retval = val if (typeof val == 'string') { var end = val.length var factor = 1.0 if (val.endsWith('px')) { end -= 2 } else if (val.endsWith('pt')) { end -= 2 factor = GraphvizSvg.GVPT_2_PX } retval = parseFloat(val.substring(0, end)) * factor } return retval } GraphvizSvg.prototype.findEdge = function (nodeName, testEdge, $retval) { var retval = [] for (var name in this._edgesByName) { var match = testEdge(nodeName, name) if (match) { if ($retval) { $retval.push(this._edgesByName[name]) } retval.push(match) } } return retval } GraphvizSvg.prototype.findLinked = function (node, includeEdges, testEdge, $retval) { var that = this var $node = $(node) var $edges = null if (includeEdges) { $edges = $retval } var names = this.findEdge($node.attr('data-name'), testEdge, $edges) for (var i in names) { var n = this._nodesByName[names[i]] if (!$retval.is(n)) { $retval.push(n) that.findLinked(n, includeEdges, testEdge, $retval) } } } GraphvizSvg.prototype.colorElement = function ($el, getColor) { var bg = this.$element.css('background') $el.find('polygon, ellipse, path').each(function() { var $this = $(this) var color = $this.data('graphviz.svg.color') if (color.fill && $this.prop('tagName') != 'path') { $this.attr('fill', getColor(color.fill, bg)) // don't set fill if it's a path } if (color.stroke) { $this.attr('stroke', getColor(color.stroke, bg)) } }) } GraphvizSvg.prototype.restoreElement = function ($el) { $el.find('polygon, ellipse, path').each(function() { var $this = $(this) var color = $this.data('graphviz.svg.color') if (color.fill) { $this.attr('fill', color.fill) // don't set fill if it's a path } if (color.stroke) { $this.attr('stroke', color.stroke) } }) } // methods users can actually call GraphvizSvg.prototype.nodes = function () { return this.$nodes } GraphvizSvg.prototype.edges = function () { return this.$edges } GraphvizSvg.prototype.nodesByName = function () { return this._nodesByName } GraphvizSvg.prototype.edgesByName = function () { return this._edgesByName } GraphvizSvg.prototype.linkedTo = function (node, includeEdges) { var $retval = $() this.findLinked(node, includeEdges, function (nodeName, edgeName) { var other = null; var match = '->' + nodeName if (edgeName.endsWith(match)) { other = edgeName.substring(0, edgeName.length - match.length); } return other; }, $retval) return $retval } GraphvizSvg.prototype.linkedFrom = function (node, includeEdges) { var $retval = $() this.findLinked(node, includeEdges, function (nodeName, edgeName) { var other = null; var match = nodeName + '->' if (edgeName.startsWith(match)) { other = edgeName.substring(match.length); } return other; }, $retval) return $retval } GraphvizSvg.prototype.linked = function (node, includeEdges) { var $retval = $() this.findLinked(node, includeEdges, function (nodeName, edgeName) { return '^' + name + '--(.*)$' }, $retval) this.findLinked(node, includeEdges, function (nodeName, edgeName) { return '^(.*)--' + name + '$' }, $retval) return $retval } GraphvizSvg.prototype.tooltip = function ($elements, show) { var that = this var options = this.options $elements.each(function () { $(this).find('a[title]').each(function () { if (show) { options.tooltips.show.call(this) } else { options.tooltips.hide.call(this) } }) }) } GraphvizSvg.prototype.bringToFront = function ($elements) { $elements.detach().appendTo(this.$graph) } GraphvizSvg.prototype.sendToBack = function ($elements) { if (this.$background.length) { $element.insertAfter(this.$background) } else { $elements.detach().prependTo(this.$graph) } } GraphvizSvg.prototype.highlight = function ($nodesEdges, tooltips) { var that = this var options = this.options var $everything = this.$nodes.add(this.$edges) if ($nodesEdges && $nodesEdges.length > 0) { // create set of all other elements and dim them $everything.not($nodesEdges).each(function () { that.colorElement($(this), options.highlight.unselected) $(this).css('font-weight', 'normal') that.tooltip($(this)) }) $nodesEdges.each(function () { that.colorElement($(this), options.highlight.selected) $(this).css('font-weight', 'normal') }) this.tooltip($nodesEdges, tooltips) } else { $everything.each(function () { that.restoreElement($(this)) $(this).css('font-weight', 'normal') }) this.tooltip($everything) } } GraphvizSvg.prototype.destroy = function () { var that = this this.hide(function () { that.$element.off('.' + that.type).removeData(that.type) }) } // GRAPHVIZSVG PLUGIN DEFINITION // ============================= function Plugin(option) { return this.each(function () { var $this = $(this) var data = $this.data('graphviz.svg') var options = typeof option == 'object' && option if (!data && /destroy/.test(option)) return if (!data) $this.data('graphviz.svg', (data = new GraphvizSvg(this, options))) if (typeof option == 'string') data[option]() }) } var old = $.fn.graphviz $.fn.graphviz = Plugin $.fn.graphviz.Constructor = GraphvizSvg // GRAPHVIZ NO CONFLICT // ==================== $.fn.graphviz.noConflict = function () { $.fn.graphviz = old return this } }(jQuery) numba-0.55.1/docs/dagmap/render.py000775 000000 000000 00000005056 14174536160 016716 0ustar00rootroot000000 000000 #!/usr/bin/env python import os.path import json import collections import yaml import graphviz from jinja2 import Environment, FileSystemLoader Dagmap = collections.namedtuple('Dagmap', ['version', 'meta', 'style', 'tasks']) def parse_yaml(filename): with open(filename, 'r') as f: contents = yaml.safe_load(f) meta = contents['meta'] version = meta['version'] if version > 1: raise Exception('Unsupported version %d' % version) del meta['version'] style = contents['style'] tasks = contents['tasks'] if not isinstance(tasks, list): raise Exception('"tasks" must be a list') return Dagmap(version=version, meta=meta, style=style, tasks=tasks) def to_graphviz(dagmap): G = graphviz.Digraph(format='svg', engine='neato', graph_attr=dict(bgcolor="#f4f4f4", pad="0.5", overlap="false"), node_attr=dict(width="0.6", style="filled", fillcolor="#83c6de", color="#83c6de", penwidth="3", label="", fontname="helvetica Neue Ultra Light", fontsize="28"), edge_attr=dict(color="#616a72", arrowsize="2.0", penwidth="4", fontname="helvetica Neue Ultra Light")) G.node(name='_nothing', label='', style='invis') for task in dagmap.tasks: G.node(name=task['id'], label=task['label'], tooltip=task['description'].strip()) depends_on = task.get('depends_on', ['_nothing']) for dep in depends_on: if dep == '_nothing': attrs = { 'style': 'invis', } else: attrs = {} G.edge(dep, task['id'], **attrs) return G def main(argv): import argparse parser = argparse.ArgumentParser(description='Render Dagmap to Graphviz') parser.add_argument('-o', '--output', required=True, help='output svg filename') parser.add_argument('-t', '--template', default='template.html', help='HTML rendering template') parser.add_argument('input', metavar='INPUT', type=str, help='YAML input filename') args = parser.parse_args(argv[1:]) dagmap = parse_yaml(args.input) graph = to_graphviz(dagmap) svg = graph.pipe().decode('utf-8') template_env = Environment(loader=FileSystemLoader(os.path.dirname(__file__))) template = template_env.get_template(args.template) html = template.render(svg=json.dumps(svg)) with open(args.output, 'w') as f: f.write(html) return 0 if __name__ == '__main__': import sys sys.exit(main(sys.argv)) numba-0.55.1/docs/dagmap/template.html000664 000000 000000 00000010266 14174536160 017562 0ustar00rootroot000000 000000

Click node to highlight; Shift-scroll to zoom; Esc to unhighlight

Details

(Click on a node for details)

numba-0.55.1/docs/environment.yml000664 000000 000000 00000000773 14174536160 016721 0ustar00rootroot000000 000000 # This environment is used by the RTD config for PR builds. RTD uses this as the # base environment and then adds in the sphinx etc tools on top. # See: https://docs.readthedocs.io/en/stable/guides/conda.html name: rtd channels: - numba/label/dev dependencies: - python=3.7 - llvmlite - numpy - numpydoc - setuptools # https://stackoverflow.com/questions/67542699/readthedocs-sphinx-not-rendering-bullet-list-from-rst-fileA - docutils==0.16 # The following is needed to fix RTD. - conda numba-0.55.1/docs/gh-pages.py000664 000000 000000 00000011112 14174536160 015664 0ustar00rootroot000000 000000 #!/usr/bin/env python # -*- coding: utf-8 -*- """Script to commit the doc build outputs into the github-pages repo. Use: gh-pages.py [tag] If no tag is given, the current output of 'git describe' is used. If given, that is how the resulting directory will be named. In practice, you should use either actual clean tags from a current build or something like 'current' as a stable URL for the most current version of the """ from __future__ import print_function, division, absolute_import #----------------------------------------------------------------------------- # Imports #----------------------------------------------------------------------------- import os import re import shutil import sys from os import chdir as cd from os.path import join as pjoin from subprocess import Popen, PIPE, CalledProcessError, check_call #----------------------------------------------------------------------------- # Globals #----------------------------------------------------------------------------- pages_dir = 'gh-pages' html_dir = '_build/html' pdf_dir = '_build/latex' pages_repo = 'git@github.com:numba/numba-doc.git' #----------------------------------------------------------------------------- # Functions #----------------------------------------------------------------------------- def sub_environment(): """Return an environment dict for executing subcommands in.""" env = os.environ.copy() # Force untranslated messages for regex matching env['LANG'] = 'C' return env def sh(cmd): """Execute command in a subshell, return status code.""" return check_call(cmd, shell=True, env=sub_environment()) def sh2(cmd): """Execute command in a subshell, return stdout. Stderr is unbuffered from the subshell.x""" p = Popen(cmd, stdout=PIPE, shell=True, env=sub_environment()) out = p.communicate()[0] retcode = p.returncode if retcode: raise CalledProcessError(retcode, cmd) else: return out.rstrip() def sh3(cmd): """Execute command in a subshell, return stdout, stderr If anything appears in stderr, print it out to sys.stderr""" p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True, env=sub_environment()) out, err = p.communicate() retcode = p.returncode if retcode: raise CalledProcessError(retcode, cmd) else: return out.rstrip(), err.rstrip() def init_repo(path): """clone the gh-pages repo if we haven't already.""" sh("git clone %s %s"%(pages_repo, path)) here = os.getcwd() cd(path) sh('git checkout gh-pages') cd(here) #----------------------------------------------------------------------------- # Script starts #----------------------------------------------------------------------------- if __name__ == '__main__': # The tag can be given as a positional argument try: tag = sys.argv[1] except IndexError: try: tag = sh2('git describe --exact-match').decode() except CalledProcessError: tag = "dev" # Fallback print("Using dev") startdir = os.getcwd() if not os.path.exists(pages_dir): # init the repo init_repo(pages_dir) else: # ensure up-to-date before operating cd(pages_dir) sh('git checkout gh-pages') sh('git pull') cd(startdir) dest = pjoin(pages_dir, tag) # don't `make html` here, because gh-pages already depends on html in Makefile # sh('make html') if tag != 'dev': # only build pdf for non-dev targets #sh2('make pdf') pass # This is pretty unforgiving: we unconditionally nuke the destination # directory, and then copy the html tree in there shutil.rmtree(dest, ignore_errors=True) shutil.copytree(html_dir, dest) if tag != 'dev': #shutil.copy(pjoin(pdf_dir, 'ipython.pdf'), pjoin(dest, 'ipython.pdf')) pass try: cd(pages_dir) status = sh2('git status | head -1').decode() branch = re.match('\#?\s*On branch (.*)$', status).group(1) if branch != 'gh-pages': e = 'On %r, git branch is %r, MUST be "gh-pages"' % (pages_dir, branch) raise RuntimeError(e) sh('git add -A %s' % tag) sh('git commit -m"Updated doc release: %s"' % tag) print() print('Most recent 3 commits:') sys.stdout.flush() sh('git --no-pager log --oneline HEAD~3..') finally: cd(startdir) print() print('Now verify the build in: %r' % dest) print("If everything looks good, 'git push'") numba-0.55.1/docs/make.bat000664 000000 000000 00000015065 14174536160 015237 0ustar00rootroot000000 000000 @ECHO OFF REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set BUILDDIR=_build set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source set I18NSPHINXOPTS=%SPHINXOPTS% source if NOT "%PAPER%" == "" ( set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% ) if "%1" == "" goto help if "%1" == "help" ( :help echo.Please use `make ^` where ^ is one of echo. html to make standalone HTML files echo. dirhtml to make HTML files named index.html in directories echo. singlehtml to make a single large HTML file echo. pickle to make pickle files echo. json to make JSON files echo. htmlhelp to make HTML files and a HTML help project echo. qthelp to make HTML files and a qthelp project echo. devhelp to make HTML files and a Devhelp project echo. epub to make an epub echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter echo. text to make text files echo. man to make manual pages echo. texinfo to make Texinfo files echo. gettext to make PO message catalogs echo. changes to make an overview over all changed/added/deprecated items echo. xml to make Docutils-native XML files echo. pseudoxml to make pseudoxml-XML files for display purposes echo. linkcheck to check all external links for integrity echo. doctest to run all doctests embedded in the documentation if enabled goto end ) if "%1" == "clean" ( for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i del /q /s %BUILDDIR%\* goto end ) %SPHINXBUILD% 2> nul if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) if "%1" == "html" ( %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/html. goto end ) if "%1" == "dirhtml" ( %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. goto end ) if "%1" == "singlehtml" ( %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. goto end ) if "%1" == "pickle" ( %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the pickle files. goto end ) if "%1" == "json" ( %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the JSON files. goto end ) if "%1" == "htmlhelp" ( %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run HTML Help Workshop with the ^ .hhp project file in %BUILDDIR%/htmlhelp. goto end ) if "%1" == "qthelp" ( %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in %BUILDDIR%/qthelp, like this: echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Numba.qhcp echo.To view the help file: echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Numba.ghc goto end ) if "%1" == "devhelp" ( %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp if errorlevel 1 exit /b 1 echo. echo.Build finished. goto end ) if "%1" == "epub" ( %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub if errorlevel 1 exit /b 1 echo. echo.Build finished. The epub file is in %BUILDDIR%/epub. goto end ) if "%1" == "latex" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex if errorlevel 1 exit /b 1 echo. echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdf" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex cd %BUILDDIR%/latex make all-pdf cd %BUILDDIR%/.. echo. echo.Build finished; the PDF files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdfja" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex cd %BUILDDIR%/latex make all-pdf-ja cd %BUILDDIR%/.. echo. echo.Build finished; the PDF files are in %BUILDDIR%/latex. goto end ) if "%1" == "text" ( %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text if errorlevel 1 exit /b 1 echo. echo.Build finished. The text files are in %BUILDDIR%/text. goto end ) if "%1" == "man" ( %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man if errorlevel 1 exit /b 1 echo. echo.Build finished. The manual pages are in %BUILDDIR%/man. goto end ) if "%1" == "texinfo" ( %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo if errorlevel 1 exit /b 1 echo. echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. goto end ) if "%1" == "gettext" ( %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale if errorlevel 1 exit /b 1 echo. echo.Build finished. The message catalogs are in %BUILDDIR%/locale. goto end ) if "%1" == "changes" ( %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes if errorlevel 1 exit /b 1 echo. echo.The overview file is in %BUILDDIR%/changes. goto end ) if "%1" == "linkcheck" ( %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck if errorlevel 1 exit /b 1 echo. echo.Link check complete; look for any errors in the above output ^ or in %BUILDDIR%/linkcheck/output.txt. goto end ) if "%1" == "doctest" ( %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest if errorlevel 1 exit /b 1 echo. echo.Testing of doctests in the sources finished, look at the ^ results in %BUILDDIR%/doctest/output.txt. goto end ) if "%1" == "xml" ( %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml if errorlevel 1 exit /b 1 echo. echo.Build finished. The XML files are in %BUILDDIR%/xml. goto end ) if "%1" == "pseudoxml" ( %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml if errorlevel 1 exit /b 1 echo. echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. goto end ) :end numba-0.55.1/docs/requirements.txt000664 000000 000000 00000000010 14174536160 017076 0ustar00rootroot000000 000000 numpydocnumba-0.55.1/docs/source/000775 000000 000000 00000000000 14174536160 015123 5ustar00rootroot000000 000000 numba-0.55.1/docs/source/_ext/000775 000000 000000 00000000000 14174536160 016062 5ustar00rootroot000000 000000 numba-0.55.1/docs/source/_ext/ghfiles.py000664 000000 000000 00000004753 14174536160 020066 0ustar00rootroot000000 000000 import os.path as path import subprocess import shlex from sphinx.util import logging from docutils import nodes logger = logging.getLogger(__name__) # use an old git trick, to get the top-level, could have used ../ etc.. but # this will be fine.. top = subprocess.check_output(shlex.split( "git rev-parse --show-toplevel")).strip().decode("utf-8") def make_ref(text): """ Make hyperlink to Github """ full_path = path.join(top, text) if path.isfile(full_path): ref = "https://www.github.com/numba/numba/blob/master/" + text elif path.isdir(full_path): ref = "https://www.github.com/numba/numba/tree/master/" + text else: logger.warn("Failed to find file in repomap: " + text) ref = "https://www.github.com/numba/numba" return ref def intersperse(lst, item): """ Insert item between each item in lst. Copied under CC-BY-SA from stackoverflow at: https://stackoverflow.com/questions/5920643/ add-an-item-between-each-item-already-in-the-list """ result = [item] * (len(lst) * 2 - 1) result[0::2] = lst return result def ghfile_role(name, rawtext, text, lineno, inliner, options={}, content=[]): """ Emit hyperlink nodes for a given file in repomap. """ my_nodes = [] if "{" in text: # myfile.{c,h} - make two nodes # could have used regexes, but this will be fine.. base = text[:text.find(".") + 1] exts = text[text.find("{") + 1:text.find("}")].split(",") for e in exts: node = nodes.reference(rawtext, base + e, refuri=make_ref(base + e), **options) my_nodes.append(node) elif "*" in text: # path/*_files.py - link to directory # Could have used something from os.path, but this will be fine.. ref = path.dirname(text) + path.sep node = nodes.reference(rawtext, text, refuri=make_ref(ref), **options) my_nodes.append(node) else: # everything else is taken verbatim node = nodes.reference(rawtext, text, refuri=make_ref(text), **options) my_nodes.append(node) # insert seperators if needed if len(my_nodes) > 1: my_nodes = intersperse(my_nodes, nodes.Text(" | ")) return my_nodes, [] def setup(app): logger.info('Initializing ghfiles plugin') app.add_role('ghfile', ghfile_role) metadata = {'parallel_read_safe': True, 'parallel_write_safe': True} return metadata numba-0.55.1/docs/source/conf.py000664 000000 000000 00000024623 14174536160 016431 0ustar00rootroot000000 000000 #!/usr/bin/env python3 # -*- coding: utf-8 -*- # # Numba documentation build configuration file, created by # sphinx-quickstart on Tue Dec 30 11:55:40 2014. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys import os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. try: # Numba is installed import numba except ImportError: # Numba is run from its source checkout sys.path.insert(0, os.path.abspath('../..')) import numba on_rtd = os.environ.get('READTHEDOCS') == 'True' if on_rtd: # The following is needed to fix RTD issue with numpydoc # https://github.com/readthedocs/sphinx_rtd_theme/issues/766 from conda.cli.python_api import run_command as conda_cmd conda_cmd("install", "-c", "conda-forge", "sphinx_rtd_theme>=0.5.1", "-y") # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.intersphinx', 'sphinx.ext.todo', #'sphinx.ext.mathjax', 'sphinx.ext.autodoc', #'sphinx.ext.graphviz', 'numpydoc', ] # Adding the github files extension sys.path.append(os.path.abspath(os.path.join(".", "_ext"))) extensions.append('ghfiles') todo_include_todos = True # Add any paths that contain templates here, relative to this directory. templates_path = ['../_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = u'Numba' copyright = u'2012-2020, Anaconda, Inc. and others' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # version = '.'.join(numba.__version__.split('.')[:2]) # The full version, including alpha/beta/rc tags. release = numba.__version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = [] # The reST default role (used for this markup: `text`) to use for all # documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. #keep_warnings = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = 'sphinx_rtd_theme' # All sphinx_rtd_theme options. Default values commented out; uncomment to # change. html_theme_options = { 'canonical_url': 'https://numba.readthedocs.io/en/stable/', # 'logo_only': False, # 'display_version': True, # 'prev_next_buttons_location': 'bottom', 'style_external_links': True, # 'vcs_pageview_mode': '', 'style_nav_header_background': '#00A3E0', # Toc options 'collapse_navigation': False, # 'sticky_navigation': True, # 'navigation_depth': 4, # 'includehidden': True, # 'titles_only': False } # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = None # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. html_logo = "../_static/numba-white-icon-rgb.svg" # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. html_favicon = '../_static/numba-blue-icon-rgb.svg' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['../_static'] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. #html_extra_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_domain_indices = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. #html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. #html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'Numbadoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ ('index', 'numba.tex', u'Numba Documentation', u'Anaconda', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # If true, show page references after internal links. #latex_show_pagerefs = False # If true, show URL addresses after external links. #latex_show_urls = False # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ('index', 'numba', 'Numba Documentation', ['Anaconda'], 1) ] # If true, show URL addresses after external links. #man_show_urls = False # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ('index', 'Numba', 'Numba Documentation', 'Anaconda', 'Numba', 'One line description of project.', 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. #texinfo_appendices = [] # If false, no module index is generated. #texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. #texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. #texinfo_no_detailmenu = False # Configuration for intersphinx: refer to the Python standard library # and the Numpy documentation. intersphinx_mapping = { 'python': ('https://docs.python.org/3', None), 'numpy': ('http://docs.scipy.org/doc/numpy', None), 'llvmlite': ('http://llvmlite.pydata.org/en/latest/', None), } # numpydoc options # To silence "WARNING: toctree contains reference to nonexisting document" numpydoc_show_class_members = False # -- Custom autogeneration ------------------------------------------------ def _autogenerate(): from numba.scripts.generate_lower_listing import gen_lower_listing from numba.misc.help.inspector import write_listings basedir = os.path.dirname(__file__) gen_lower_listing(os.path.join(basedir, 'developer/autogen_lower_listing.rst')) # Run inspector on supported packages for package in ['builtins', 'math', 'cmath', 'numpy']: write_listings( package_name=package, filename=os.path.join( basedir, 'developer', 'autogen_{}_listing'.format(package), ), output_format='rst', ) _autogenerate() def setup(app): app.add_css_file('rtd-overrides.css') numba-0.55.1/docs/source/cuda-reference/000775 000000 000000 00000000000 14174536160 017773 5ustar00rootroot000000 000000 numba-0.55.1/docs/source/cuda-reference/host.rst000664 000000 000000 00000016622 14174536160 021511 0ustar00rootroot000000 000000 CUDA Host API ============= Device Management ----------------- Device detection and enquiry ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The following functions are available for querying the available hardware: .. autofunction:: numba.cuda.is_available .. autofunction:: numba.cuda.detect Context management ~~~~~~~~~~~~~~~~~~ CUDA Python functions execute within a CUDA context. Each CUDA device in a system has an associated CUDA context, and Numba presently allows only one context per thread. For further details on CUDA Contexts, refer to the `CUDA Driver API Documentation on Context Management `_ and the `CUDA C Programming Guide Context Documentation `_. CUDA Contexts are instances of the :class:`~numba.cuda.cudadrv.driver.Context` class: .. autoclass:: numba.cuda.cudadrv.driver.Context :members: reset, get_memory_info, push, pop The following functions can be used to get or select the context: .. autofunction:: numba.cuda.current_context .. autofunction:: numba.cuda.require_context The following functions affect the current context: .. autofunction:: numba.cuda.synchronize .. autofunction:: numba.cuda.close Device management ~~~~~~~~~~~~~~~~~ Numba maintains a list of supported CUDA-capable devices: .. attribute:: numba.cuda.gpus An indexable list of supported CUDA devices. This list is indexed by integer device ID. Alternatively, the current device can be obtained: .. function:: numba.cuda.gpus.current Return the currently-selected device. Getting a device through :attr:`numba.cuda.gpus` always provides an instance of :class:`numba.cuda.cudadrv.devices._DeviceContextManager`, which acts as a context manager for the selected device: .. autoclass:: numba.cuda.cudadrv.devices._DeviceContextManager One may also select a context and device or get the current device using the following three functions: .. autofunction:: numba.cuda.select_device .. autofunction:: numba.cuda.get_current_device .. autofunction:: numba.cuda.list_devices The :class:`numba.cuda.cudadrv.driver.Device` class can be used to enquire about the functionality of the selected device: .. class:: numba.cuda.cudadrv.driver.Device The device associated with a particular context. .. attribute:: compute_capability A tuple, *(major, minor)* indicating the supported compute capability. .. attribute:: id The integer ID of the device. .. attribute:: name The name of the device (e.g. "GeForce GTX 970"). .. attribute:: uuid The UUID of the device (e.g. "GPU-e6489c45-5b68-3b03-bab7-0e7c8e809643"). .. method:: reset Delete the context for the device. This will destroy all memory allocations, events, and streams created within the context. Compilation ----------- Numba provides an entry point for compiling a Python function to PTX without invoking any of the driver API. This can be useful for: - Generating PTX that is to be inlined into other PTX code (e.g. from outside the Numba / Python ecosystem). - Generating code when there is no device present. - Generating code prior to a fork without initializing CUDA. .. note:: It is the user's responsibility to manage any ABI issues arising from the use of compilation to PTX. .. autofunction:: numba.cuda.compile_ptx The environment variable ``NUMBA_CUDA_DEFAULT_PTX_CC`` can be set to control the default compute capability targeted by ``compile_ptx`` - see :ref:`numba-envvars-gpu-support`. If PTX for the compute capability of the current device is required, the ``compile_ptx_for_current_device`` function can be used: .. autofunction:: numba.cuda.compile_ptx_for_current_device Measurement ----------- .. _cuda-profiling: Profiling ~~~~~~~~~ The NVidia Visual Profiler can be used directly on executing CUDA Python code - it is not a requirement to insert calls to these functions into user code. However, these functions can be used to allow profiling to be performed selectively on specific portions of the code. For further information on profiling, see the `NVidia Profiler User's Guide `_. .. autofunction:: numba.cuda.profile_start .. autofunction:: numba.cuda.profile_stop .. autofunction:: numba.cuda.profiling .. _events: Events ~~~~~~ Events can be used to monitor the progress of execution and to record the timestamps of specific points being reached. Event creation returns immediately, and the created event can be queried to determine if it has been reached. For further information, see the `CUDA C Programming Guide Events section `_. The following functions are used for creating and measuring the time between events: .. autofunction:: numba.cuda.event .. autofunction:: numba.cuda.event_elapsed_time Events are instances of the :class:`numba.cuda.cudadrv.driver.Event` class: .. autoclass:: numba.cuda.cudadrv.driver.Event :members: query, record, synchronize, wait .. _streams: Stream Management ----------------- Streams allow concurrency of execution on a single device within a given context. Queued work items in the same stream execute sequentially, but work items in different streams may execute concurrently. Most operations involving a CUDA device can be performed asynchronously using streams, including data transfers and kernel execution. For further details on streams, see the `CUDA C Programming Guide Streams section `_. Numba defaults to using the legacy default stream as the default stream. The per-thread default stream can be made the default stream by setting the environment variable ``NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM`` to ``1`` (see the :ref:`CUDA Environment Variables section `). Regardless of this setting, the objects representing the legacy and per-thread default streams can be constructed using the functions below. Streams are instances of :class:`numba.cuda.cudadrv.driver.Stream`: .. autoclass:: numba.cuda.cudadrv.driver.Stream :members: synchronize, auto_synchronize, add_callback, async_done To create a new stream: .. autofunction:: numba.cuda.stream To get the default stream: .. autofunction:: numba.cuda.default_stream To get the default stream with an explicit choice of whether it is the legacy or per-thread default stream: .. autofunction:: numba.cuda.legacy_default_stream .. autofunction:: numba.cuda.per_thread_default_stream To construct a Numba ``Stream`` object using a stream allocated elsewhere, the ``external_stream`` function is provided. Note that the lifetime of external streams must be managed by the user - Numba will not deallocate an external stream, and the stream must remain valid whilst the Numba ``Stream`` object is in use. .. autofunction:: numba.cuda.external_stream Runtime ------- Numba generally uses the Driver API, but it provides a simple wrapper to the Runtime API so that the version of the runtime in use can be queried. This is accessed through ``cuda.runtime``, which is an instance of the :class:`numba.cuda.cudadrv.runtime.Runtime` class: .. autoclass:: numba.cuda.cudadrv.runtime.Runtime :members: get_version, is_supported_version, supported_versions Whether the current runtime is officially supported and tested with the current version of Numba can also be queried: .. autofunction:: numba.cuda.is_supported_version numba-0.55.1/docs/source/cuda-reference/index.rst000664 000000 000000 00000000164 14174536160 021635 0ustar00rootroot000000 000000 CUDA Python Reference ===================== .. toctree:: host.rst kernel.rst memory.rst libdevice.rst numba-0.55.1/docs/source/cuda-reference/kernel.rst000664 000000 000000 00000045070 14174536160 022013 0ustar00rootroot000000 000000 CUDA Kernel API =============== Kernel declaration ------------------ The ``@cuda.jit`` decorator is used to create a CUDA dispatcher object that can be configured and launched: .. autofunction:: numba.cuda.jit Dispatcher objects ------------------ The usual syntax for configuring a Dispatcher with a launch configuration uses subscripting, with the arguments being as in the following: .. code-block:: python # func is some function decorated with @cuda.jit func[griddim, blockdim, stream, sharedmem] The ``griddim`` and ``blockdim`` arguments specify the size of the grid and thread blocks, and may be either integers or tuples of length up to 3. The ``stream`` parameter is an optional stream on which the kernel will be launched, and the ``sharedmem`` parameter specifies the size of dynamic shared memory in bytes. Subscripting the Dispatcher returns a configuration object that can be called with the kernel arguments: .. code-block:: python configured = func[griddim, blockdim, stream, sharedmem] configured(x, y, z) However, it is more idiomatic to configure and call the kernel within a single statement: .. code-block:: python func[griddim, blockdim, stream, sharedmem](x, y, z) This is similar to launch configuration in CUDA C/C++: .. code-block:: cuda func<<>>(x, y, z) .. note:: The order of ``stream`` and ``sharedmem`` are reversed in Numba compared to in CUDA C/C++. Dispatcher objects also provide several utility methods for inspection and creating a specialized instance: .. autoclass:: numba.cuda.compiler.Dispatcher :members: inspect_asm, inspect_llvm, inspect_sass, inspect_types, get_regs_per_thread, specialize, specialized, extensions, forall Intrinsic Attributes and Functions ---------------------------------- The remainder of the attributes and functions in this section may only be called from within a CUDA Kernel. Thread Indexing ~~~~~~~~~~~~~~~ .. attribute:: numba.cuda.threadIdx The thread indices in the current thread block, accessed through the attributes ``x``, ``y``, and ``z``. Each index is an integer spanning the range from 0 inclusive to the corresponding value of the attribute in :attr:`numba.cuda.blockDim` exclusive. .. attribute:: numba.cuda.blockIdx The block indices in the grid of thread blocks, accessed through the attributes ``x``, ``y``, and ``z``. Each index is an integer spanning the range from 0 inclusive to the corresponding value of the attribute in :attr:`numba.cuda.gridDim` exclusive. .. attribute:: numba.cuda.blockDim The shape of a block of threads, as declared when instantiating the kernel. This value is the same for all threads in a given kernel, even if they belong to different blocks (i.e. each block is "full"). .. attribute:: numba.cuda.gridDim The shape of the grid of blocks, accessed through the attributes ``x``, ``y``, and ``z``. .. attribute:: numba.cuda.laneid The thread index in the current warp, as an integer spanning the range from 0 inclusive to the :attr:`numba.cuda.warpsize` exclusive. .. attribute:: numba.cuda.warpsize The size in threads of a warp on the GPU. Currently this is always 32. .. function:: numba.cuda.grid(ndim) Return the absolute position of the current thread in the entire grid of blocks. *ndim* should correspond to the number of dimensions declared when instantiating the kernel. If *ndim* is 1, a single integer is returned. If *ndim* is 2 or 3, a tuple of the given number of integers is returned. Computation of the first integer is as follows:: cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x and is similar for the other two indices, but using the ``y`` and ``z`` attributes. .. function:: numba.cuda.gridsize(ndim) Return the absolute size (or shape) in threads of the entire grid of blocks. *ndim* should correspond to the number of dimensions declared when instantiating the kernel. Computation of the first integer is as follows:: cuda.blockDim.x * cuda.gridDim.x and is similar for the other two indices, but using the ``y`` and ``z`` attributes. Memory Management ~~~~~~~~~~~~~~~~~ .. function:: numba.cuda.shared.array(shape, dtype) Creates an array in the local memory space of the CUDA kernel with the given ``shape`` and ``dtype``. Returns an array with its content uninitialized. .. note:: All threads in the same thread block sees the same array. .. function:: numba.cuda.local.array(shape, dtype) Creates an array in the local memory space of the CUDA kernel with the given ``shape`` and ``dtype``. Returns an array with its content uninitialized. .. note:: Each thread sees a unique array. .. function:: numba.cuda.const.array_like(ary) Copies the ``ary`` into constant memory space on the CUDA kernel at compile time. Returns an array like the ``ary`` argument. .. note:: All threads and blocks see the same array. Synchronization and Atomic Operations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. function:: numba.cuda.atomic.add(array, idx, value) Perform ``array[idx] += value``. Support int32, int64, float32 and float64 only. The ``idx`` argument can be an integer or a tuple of integer indices for indexing into multiple dimensional arrays. The number of element in ``idx`` must match the number of dimension of ``array``. Returns the value of ``array[idx]`` before the storing the new value. Behaves like an atomic load. .. function:: numba.cuda.atomic.sub(array, idx, value) Perform ``array[idx] -= value``. Supports int32, int64, float32 and float64 only. The ``idx`` argument can be an integer or a tuple of integer indices for indexing into multi-dimensional arrays. The number of elements in ``idx`` must match the number of dimensions of ``array``. Returns the value of ``array[idx]`` before the storing the new value. Behaves like an atomic load. .. function:: numba.cuda.atomic.and_(array, idx, value) Perform ``array[idx] &= value``. Supports int32, uint32, int64, and uint64 only. The ``idx`` argument can be an integer or a tuple of integer indices for indexing into multi-dimensional arrays. The number of elements in ``idx`` must match the number of dimensions of ``array``. Returns the value of ``array[idx]`` before the storing the new value. Behaves like an atomic load. .. function:: numba.cuda.atomic.or_(array, idx, value) Perform ``array[idx] |= value``. Supports int32, uint32, int64, and uint64 only. The ``idx`` argument can be an integer or a tuple of integer indices for indexing into multi-dimensional arrays. The number of elements in ``idx`` must match the number of dimensions of ``array``. Returns the value of ``array[idx]`` before the storing the new value. Behaves like an atomic load. .. function:: numba.cuda.atomic.xor(array, idx, value) Perform ``array[idx] ^= value``. Supports int32, uint32, int64, and uint64 only. The ``idx`` argument can be an integer or a tuple of integer indices for indexing into multi-dimensional arrays. The number of elements in ``idx`` must match the number of dimensions of ``array``. Returns the value of ``array[idx]`` before the storing the new value. Behaves like an atomic load. .. function:: numba.cuda.atomic.exch(array, idx, value) Perform ``array[idx] = value``. Supports int32, uint32, int64, and uint64 only. The ``idx`` argument can be an integer or a tuple of integer indices for indexing into multi-dimensional arrays. The number of elements in ``idx`` must match the number of dimensions of ``array``. Returns the value of ``array[idx]`` before the storing the new value. Behaves like an atomic load. .. function:: numba.cuda.atomic.inc(array, idx, value) Perform ``array[idx] = (0 if array[idx] >= value else array[idx] + 1)``. Supports uint32, and uint64 only. The ``idx`` argument can be an integer or a tuple of integer indices for indexing into multi-dimensional arrays. The number of elements in ``idx`` must match the number of dimensions of ``array``. Returns the value of ``array[idx]`` before the storing the new value. Behaves like an atomic load. .. function:: numba.cuda.atomic.dec(array, idx, value) Perform ``array[idx] = (value if (array[idx] == 0) or (array[idx] > value) else array[idx] - 1)``. Supports uint32, and uint64 only. The ``idx`` argument can be an integer or a tuple of integer indices for indexing into multi-dimensional arrays. The number of elements in ``idx`` must match the number of dimensions of ``array``. Returns the value of ``array[idx]`` before the storing the new value. Behaves like an atomic load. .. function:: numba.cuda.atomic.max(array, idx, value) Perform ``array[idx] = max(array[idx], value)``. Support int32, int64, float32 and float64 only. The ``idx`` argument can be an integer or a tuple of integer indices for indexing into multiple dimensional arrays. The number of element in ``idx`` must match the number of dimension of ``array``. Returns the value of ``array[idx]`` before the storing the new value. Behaves like an atomic load. .. function:: numba.cuda.syncthreads Synchronize all threads in the same thread block. This function implements the same pattern as barriers in traditional multi-threaded programming: this function waits until all threads in the block call it, at which point it returns control to all its callers. .. function:: numba.cuda.syncthreads_count(predicate) An extension to :attr:`numba.cuda.syncthreads` where the return value is a count of the threads where ``predicate`` is true. .. function:: numba.cuda.syncthreads_and(predicate) An extension to :attr:`numba.cuda.syncthreads` where 1 is returned if ``predicate`` is true for all threads or 0 otherwise. .. function:: numba.cuda.syncthreads_or(predicate) An extension to :attr:`numba.cuda.syncthreads` where 1 is returned if ``predicate`` is true for any thread or 0 otherwise. .. warning:: All syncthreads functions must be called by every thread in the thread-block. Falling to do so may result in undefined behavior. Cooperative Groups ~~~~~~~~~~~~~~~~~~ .. function:: numba.cuda.cg.this_grid() Get the current grid group. :return: The current grid group :rtype: numba.cuda.cg.GridGroup .. class:: numba.cuda.cg.GridGroup A grid group. Users should not construct a GridGroup directly - instead, get the current grid group using :func:`cg.this_grid() `. .. method:: sync() Synchronize the current grid group. Memory Fences ~~~~~~~~~~~~~ The memory fences are used to guarantee the effect of memory operations are visible by other threads within the same thread-block, the same GPU device, and the same system (across GPUs on global memory). Memory loads and stores are guaranteed to not move across the memory fences by optimization passes. .. warning:: The memory fences are considered to be advanced API and most usercases should use the thread barrier (e.g. ``syncthreads()``). .. function:: numba.cuda.threadfence A memory fence at device level (within the GPU). .. function:: numba.cuda.threadfence_block A memory fence at thread block level. .. function:: numba.cuda.threadfence_system A memory fence at system level (across GPUs). Warp Intrinsics ~~~~~~~~~~~~~~~ The argument ``membermask`` is a 32 bit integer mask with each bit corresponding to a thread in the warp, with 1 meaning the thread is in the subset of threads within the function call. The ``membermask`` must be all 1 if the GPU compute capability is below 7.x. .. function:: numba.cuda.syncwarp(membermask) Synchronize a masked subset of the threads in a warp. .. function:: numba.cuda.all_sync(membermask, predicate) If the ``predicate`` is true for all threads in the masked warp, then a non-zero value is returned, otherwise 0 is returned. .. function:: numba.cuda.any_sync(membermask, predicate) If the ``predicate`` is true for any thread in the masked warp, then a non-zero value is returned, otherwise 0 is returned. .. function:: numba.cuda.eq_sync(membermask, predicate) If the boolean ``predicate`` is the same for all threads in the masked warp, then a non-zero value is returned, otherwise 0 is returned. .. function:: numba.cuda.ballot_sync(membermask, predicate) Returns a mask of all threads in the warp whose ``predicate`` is true, and are within the given mask. .. function:: numba.cuda.shfl_sync(membermask, value, src_lane) Shuffles ``value`` across the masked warp and returns the ``value`` from ``src_lane``. If this is outside the warp, then the given ``value`` is returned. .. function:: numba.cuda.shfl_up_sync(membermask, value, delta) Shuffles ``value`` across the masked warp and returns the ``value`` from ``laneid - delta``. If this is outside the warp, then the given ``value`` is returned. .. function:: numba.cuda.shfl_down_sync(membermask, value, delta) Shuffles ``value`` across the masked warp and returns the ``value`` from ``laneid + delta``. If this is outside the warp, then the given ``value`` is returned. .. function:: numba.cuda.shfl_xor_sync(membermask, value, lane_mask) Shuffles ``value`` across the masked warp and returns the ``value`` from ``laneid ^ lane_mask``. .. function:: numba.cuda.match_any_sync(membermask, value, lane_mask) Returns a mask of threads that have same ``value`` as the given ``value`` from within the masked warp. .. function:: numba.cuda.match_all_sync(membermask, value, lane_mask) Returns a tuple of (mask, pred), where mask is a mask of threads that have same ``value`` as the given ``value`` from within the masked warp, if they all have the same value, otherwise it is 0. And pred is a boolean of whether or not all threads in the mask warp have the same warp. .. function:: numba.cuda.activemask() Returns a 32-bit integer mask of all currently active threads in the calling warp. The Nth bit is set if the Nth lane in the warp is active when activemask() is called. Inactive threads are represented by 0 bits in the returned mask. Threads which have exited the kernel are always marked as inactive. .. function:: numba.cuda.lanemask_lt() Returns a 32-bit integer mask of all lanes (including inactive ones) with ID less than the current lane. Integer Intrinsics ~~~~~~~~~~~~~~~~~~ A subset of the CUDA Math API's integer intrinsics are available. For further documentation, including semantics, please refer to the `CUDA Toolkit documentation `_. .. function:: numba.cuda.popc(x) Returns the number of bits set in ``x``. .. function:: numba.cuda.brev(x) Returns the reverse of the bit pattern of ``x``. For example, ``0b10110110`` becomes ``0b01101101``. .. function:: numba.cuda.clz(x) Returns the number of leading zeros in ``x``. .. function:: numba.cuda.ffs(x) Returns the position of the first (least significant) bit set to 1 in ``x``, where the least significant bit position is 1. ``ffs(0)`` returns 0. Floating Point Intrinsics ~~~~~~~~~~~~~~~~~~~~~~~~~ A subset of the CUDA Math API's floating point intrinsics are available. For further documentation, including semantics, please refer to the `single `_ and `double `_ precision parts of the CUDA Toolkit documentation. .. function:: numba.cuda.fma Perform the fused multiply-add operation. Named after the ``fma`` and ``fmaf`` in the C api, but maps to the ``fma.rn.f32`` and ``fma.rn.f64`` (round-to-nearest-even) PTX instructions. .. function:: numba.cuda.cbrt (x) Perform the cube root operation, x ** (1/3). Named after the functions ``cbrt`` and ``cbrtf`` in the C api. Supports float32, and float64 arguments only. 16-bit Floating Point Intrinsics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The following functions are used to operate on 16-bit floating point operands. These functions return a 16-bit floating point result. .. function:: numba.cuda.fp16.hfma (a, b, c) Perform the fused multiply-add operation ``(a * b) + c`` on 16-bit floating point arguments in round to nearest mode. Maps to the ``fma.rn.f16`` PTX instruction. Returns the 16-bit floating point result of the fused multiply-add. .. function:: numba.cuda.fp16.hadd (a, b) Perform the add operation ``a + b`` on 16-bit floating point arguments in round to nearest mode. Maps to the ``add.f16`` PTX instruction. Returns the 16-bit floating point result of the addition. .. function:: numba.cuda.fp16.hsub (a, b) Perform the subtract operation ``a - b`` on 16-bit floating point arguments in round to nearest mode. Maps to the ``sub.f16`` PTX instruction. Returns the 16-bit floating point result of the subtraction. .. function:: numba.cuda.fp16.hmul (a, b) Perform the multiply operation ``a * b`` on 16-bit floating point arguments in round to nearest mode. Maps to the ``mul.f16`` PTX instruction. Returns the 16-bit floating point result of the multiplication. .. function:: numba.cuda.fp16.hneg (a) Perform the negation operation ``-a`` on the 16-bit floating point argument. Maps to the ``neg.f16`` PTX instruction. Returns the 16-bit floating point result of the negation. .. function:: numba.cuda.fp16.habs (a) Perform the absolute value operation ``|a|`` on the 16-bit floating point argument. Maps to the ``abs.f16`` PTX instruction. Returns the 16-bit floating point result of the absolute value operation. Control Flow Instructions ~~~~~~~~~~~~~~~~~~~~~~~~~ A subset of the CUDA's control flow instructions are directly available as intrinsics. Avoiding branches is a key way to improve CUDA performance, and using these intrinsics mean you don't have to rely on the ``nvcc`` optimizer identifying and removing branches. For further documentation, including semantics, please refer to the `relevant CUDA Toolkit documentation `_. .. function:: numba.cuda.selp Select between two expressions, depending on the value of the first argument. Similar to LLVM's ``select`` instruction. Timer Intrinsics ~~~~~~~~~~~~~~~~ .. function:: numba.cuda.nanosleep(ns) Suspends the thread for a sleep duration approximately close to the delay ``ns``, specified in nanoseconds. numba-0.55.1/docs/source/cuda-reference/libdevice.rst000664 000000 000000 00000001211 14174536160 022446 0ustar00rootroot000000 000000 Libdevice functions =================== All wrapped libdevice functions are listed in this section. All functions in libdevice are wrapped, with the exception of ``__nv_nan`` and ``__nv_nanf``. These functions return a representation of a quiet NaN, but the argument they take (a pointer to an object specifying the representation) is undocumented, and follows an unusual form compared to the rest of libdevice - it is not an output like every other pointer argument. If a NaN is required, one can be obtained in CUDA Python by other means, e.g. ``math.nan``. Wrapped functions ----------------- .. automodule:: numba.cuda.libdevice :members: numba-0.55.1/docs/source/cuda-reference/memory.rst000664 000000 000000 00000001551 14174536160 022037 0ustar00rootroot000000 000000 Memory Management ================= .. autofunction:: numba.cuda.to_device .. autofunction:: numba.cuda.device_array .. autofunction:: numba.cuda.device_array_like .. autofunction:: numba.cuda.pinned_array .. autofunction:: numba.cuda.pinned_array_like .. autofunction:: numba.cuda.mapped_array .. autofunction:: numba.cuda.mapped_array_like .. autofunction:: numba.cuda.managed_array .. autofunction:: numba.cuda.pinned .. autofunction:: numba.cuda.mapped Device Objects -------------- .. autoclass:: numba.cuda.cudadrv.devicearray.DeviceNDArray :members: copy_to_device, copy_to_host, is_c_contiguous, is_f_contiguous, ravel, reshape, split .. autoclass:: numba.cuda.cudadrv.devicearray.DeviceRecord :members: copy_to_device, copy_to_host .. autoclass:: numba.cuda.cudadrv.devicearray.MappedNDArray :members: copy_to_device, copy_to_host, split numba-0.55.1/docs/source/cuda/000775 000000 000000 00000000000 14174536160 016037 5ustar00rootroot000000 000000 numba-0.55.1/docs/source/cuda/bindings.rst000664 000000 000000 00000002627 14174536160 020375 0ustar00rootroot000000 000000 CUDA Bindings ============= Numba supports two bindings to the CUDA Driver APIs: its own internal bindings based on ctypes, and the official `NVIDIA CUDA Python bindings `_. Functionality is equivalent between the two bindings. The internal bindings are used by default. If the NVIDIA bindings are installed, then they can be used by setting the environment variable ``NUMBA_CUDA_USE_NVIDIA_BINDING`` to ``1`` prior to the import of Numba. Once Numba has been imported, the selected binding cannot be changed. Per-Thread Default Streams -------------------------- Responsibility for handling Per-Thread Default Streams (PTDS) is delegated to the NVIDIA bindings when they are in use. To use PTDS with the NVIDIA bindings, set the environment variable ``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` to ``1`` instead of Numba's environmnent variable :envvar:`NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM`. .. seealso:: The `Default Stream section `_ in the NVIDIA Bindings documentation. Roadmap ------- In Numba 0.56, the NVIDIA Bindings will be used by default, if they are installed. In future versions of Numba: - The internal bindings will be deprecated. - The internal bindings will be removed. At present, no specific release is planned for the deprecation or removal of the internal bindings. numba-0.55.1/docs/source/cuda/cooperative_groups.rst000664 000000 000000 00000010005 14174536160 022504 0ustar00rootroot000000 000000 ================== Cooperative Groups ================== Supported features ------------------ Numba's Cooperative Groups support presently provides grid groups and grid synchronization, along with cooperative kernel launches. Cooperative groups are supported on Linux, and Windows for devices in `TCC mode `_. Cooperative Groups also require the CUDA Device Runtime library, ``cudadevrt``, to be available - for conda default channel-installed CUDA toolkit packages, it is only available in versions 10.2 onwards. System-installed toolkits (e.g. from NVIDIA distribution packages or runfiles) all include ``cudadevrt``. Using Grid Groups ----------------- To get the current grid group, use the :meth:`cg.this_grid() ` function: .. code-block:: python g = cuda.cg.this_grid() Synchronizing the grid is done with the :meth:`sync() ` method of the grid group: .. code-block:: python g.sync() Cooperative Launches -------------------- Unlike the CUDA C/C++ API, a cooperative launch is invoked using the same syntax as a normal kernel launch - Numba automatically determines whether a cooperative launch is required based on whether a grid group is synchronized in the kernel. The grid size limit for a cooperative launch is more restrictive than for a normal launch - the grid must be no larger than the maximum number of active blocks on the device on which it is launched. To get maximum grid size for a cooperative launch of a kernel with a given block size and dynamic shared memory requirement, use the ``max_cooperative_grid_blocks()`` method of kernel overloads: .. automethod:: numba.cuda.compiler._Kernel.max_cooperative_grid_blocks This can be used to ensure that the kernel is launched with no more than the maximum number of blocks. Exceeding the maximum number of blocks for the cooperative launch will result in a ``CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE`` error. Applications and Example ------------------------ Grid group synchronization can be used to implement a global barrier across all threads in the grid - applications of this include a global reduction to a single value, or looping over rows of a large matrix sequentially using the entire grid to operate on column elements in parallel. In the following example, rows are written sequentially by the grid. Each thread in the grid reads a value from the previous row written by it's *opposite* thread. A grid sync is needed to ensure that threads in the grid don't run ahead of threads in other blocks, or fail to see updates from their opposite thread. First we'll define our kernel: .. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_cg.py :language: python :caption: from ``test_grid_sync`` of ``numba/cuda/tests/doc_example/test_cg.py`` :start-after: magictoken.ex_grid_sync_kernel.begin :end-before: magictoken.ex_grid_sync_kernel.end :dedent: 8 :linenos: Then create some empty input data and determine the grid and block sizes: .. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_cg.py :language: python :caption: from ``test_grid_sync`` of ``numba/cuda/tests/doc_example/test_cg.py`` :start-after: magictoken.ex_grid_sync_data.begin :end-before: magictoken.ex_grid_sync_data.end :dedent: 8 :linenos: Finally we launch the kernel and print the result: .. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_cg.py :language: python :caption: from ``test_grid_sync`` of ``numba/cuda/tests/doc_example/test_cg.py`` :start-after: magictoken.ex_grid_sync_launch.begin :end-before: magictoken.ex_grid_sync_launch.end :dedent: 8 :linenos: The maximum grid size for ``sequential_rows`` can be enquired using: .. code-block:: python overload = sequential_rows.overloads[(int32[:,::1],) max_blocks = overload.max_cooperative_grid_blocks(blockdim) print(max_blocks) # 1152 (e.g. on Quadro RTX 8000 with Numba 0.52.1 and CUDA 11.0) numba-0.55.1/docs/source/cuda/cuda_array_interface.rst000664 000000 000000 00000050105 14174536160 022724 0ustar00rootroot000000 000000 .. _cuda-array-interface: ================================ CUDA Array Interface (Version 3) ================================ The *CUDA Array Interface* (or CAI) is created for interoperability between different implementations of CUDA array-like objects in various projects. The idea is borrowed from the `NumPy array interface`_. .. note:: Currently, we only define the Python-side interface. In the future, we may add a C-side interface for efficient exchange of the information in compiled code. Python Interface Specification ============================== .. note:: Experimental feature. Specification may change. The ``__cuda_array_interface__`` attribute returns a dictionary (``dict``) that must contain the following entries: - **shape**: ``(integer, ...)`` A tuple of ``int`` (or ``long``) representing the size of each dimension. - **typestr**: ``str`` The type string. This has the same definition as ``typestr`` in the `numpy array interface`_. - **data**: ``(integer, boolean)`` The **data** is a 2-tuple. The first element is the data pointer as a Python ``int`` (or ``long``). The data must be device-accessible. For zero-size arrays, use ``0`` here. The second element is the read-only flag as a Python ``bool``. Because the user of the interface may or may not be in the same context, the most common case is to use ``cuPointerGetAttribute`` with ``CU_POINTER_ATTRIBUTE_DEVICE_POINTER`` in the CUDA driver API (or the equivalent CUDA Runtime API) to retrieve a device pointer that is usable in the currently active context. - **version**: ``integer`` An integer for the version of the interface being exported. The current version is *3*. The following are optional entries: - **strides**: ``None`` or ``(integer, ...)`` If **strides** is not given, or it is ``None``, the array is in C-contiguous layout. Otherwise, a tuple of ``int`` (or ``long``) is explicitly given for representing the number of bytes to skip to access the next element at each dimension. - **descr** This is for describing more complicated types. This follows the same specification as in the `numpy array interface`_. - **mask**: ``None`` or object exposing the ``__cuda_array_interface__`` If ``None`` then all values in **data** are valid. All elements of the mask array should be interpreted only as true or not true indicating which elements of this array are valid. This has the same definition as ``mask`` in the `numpy array interface`_. .. note:: Numba does not currently support working with masked CUDA arrays and will raise a ``NotImplementedError`` exception if one is passed to a GPU function. - **stream**: ``None`` or ``integer`` An optional stream upon which synchronization must take place at the point of consumption, either by synchronizing on the stream or enqueuing operations on the data on the given stream. Integer values in this entry are as follows: - ``0``: This is disallowed as it would be ambiguous between ``None`` and the default stream, and also between the legacy and per-thread default streams. Any use case where ``0`` might be given should either use ``None``, ``1``, or ``2`` instead for clarity. - ``1``: The legacy default stream. - ``2``: The per-thread default stream. - Any other integer: a ``cudaStream_t`` represented as a Python integer. When ``None``, no synchronization is required. See the :ref:`cuda-array-interface-synchronization` section below for further details. In a future revision of the interface, this entry may be expanded (or another entry added) so that an event to synchronize on can be specified instead of a stream. .. _cuda-array-interface-synchronization: Synchronization --------------- Definitions ~~~~~~~~~~~ When discussing synchronization, the following definitions are used: - *Producer*: The library / object on which ``__cuda_array_interface__`` is accessed. - *Consumer*: The library / function that accesses the ``__cuda_array_interface__`` of the Producer. - *User Code*: Code that induces a Producer and Consumer to share data through the CAI. - *User*: The person writing or maintaining the User Code. The User may implement User Code without knowledge of the CAI, since the CAI accesses can be hidden from their view. In the following example: .. code-block:: python import cupy from numba import cuda @cuda.jit def add(x, y, out): start = cuda.grid(1) stride = cuda.gridsize(1) for i in range(start, x.shape[0], stride): out[i] = x[i] + y[i] a = cupy.arange(10) b = a * 2 out = cupy.zeros_like(a) add[1, 32](a, b, out) When the ``add`` kernel is launched: - ``a``, ``b``, ``out`` are Producers. - The ``add`` kernel is the Consumer. - The User Code is specifically ``add[1, 32](a, b, out)``. - The author of the code is the User. Design Motivations ~~~~~~~~~~~~~~~~~~ Elements of the CAI design related to synchronization seek to fulfill these requirements: 1. Producers and Consumers that exchange data through the CAI must be able to do so without data races. 2. Requirement 1 should be met without requiring the user to be aware of any particulars of the CAI - in other words, exchanging data between Producers and Consumers that operate on data asynchronously should be correct by default. - An exception to this requirement is made for Producers and Consumers that explicitly document that the User is required to take additional steps to ensure correctness with respect to synchronization. In this case, Users are required to understand the details of the CUDA Array Interface, and the Producer/Consumer library documentation must specify the steps that Users are required to take. Use of this exception should be avoided where possible, as it is provided for libraries that cannot implement the synchronization semantics without the involvement of the User - for example, those interfacing with third-party libraries oblivious to the CUDA Array Interface. 3. Where the User is aware of the particulars of the CAI and implementation details of the Producer and Consumer, they should be able to, at their discretion, override some of the synchronization semantics of the interface to reduce the synchronization overhead. Overriding synchronization semantics implies that: - The CAI design, and the design and implementation of the Producer and Consumer do not specify or guarantee correctness with respect to data races. - Instead, the User is responsible for ensuring correctness with respect to data races. Interface Requirements ~~~~~~~~~~~~~~~~~~~~~~ The ``stream`` entry enables Producers and Consumers to avoid hazards when exchanging data. Expected behaviour of the Consumer is as follows: * When ``stream`` is not present or is ``None``: - No synchronization is required on the part of the Consumer. - The Consumer may enqueue operations on the underlying data immediately on any stream. * When ``stream`` is an integer, its value indicates the stream on which the Producer may have in-progress operations on the data, and which the Consumer is expected to either: - Synchronize on before accessing the data, or - Enqueue operations in when accessing the data. The Consumer can choose which mechanism to use, with the following considerations: - If the Consumer synchronizes on the provided stream prior to accessing the data, then it must ensure that no computation can take place in the provided stream until its operations in its own choice of stream have taken place. This could be achieved by either: - Placing a wait on an event in the provided stream that occurs once all of the Consumer's operations on the data are completed, or - Avoiding returning control to the user code until after its operations on its own stream have completed. - If the consumer chooses to only enqueue operations on the data in the provided stream, then it may return control to the User code immediately after enqueueing its work, as the work will all be serialized on the exported array's stream. This is sufficient to ensure correctness even if the User code were to induce the Producer to subsequently start enqueueing more work on the same stream. * If the User has set the Consumer to ignore CAI synchronization semantics, the Consumer may assume it can operate on the data immediately in any stream with no further synchronization, even if the ``stream`` member has an integer value. When exporting an array through the CAI, Producers must ensure that: * If there is work on the data enqueued in one or more streams, then synchronization on the provided ``stream`` is sufficient to ensure synchronization with all pending work. - If the Producer has no enqueued work, or work only enqueued on the stream identified by ``stream``, then this condition is met. - If the Producer has enqueued work on the data on multiple streams, then it must enqueue events on those streams that follow the enqueued work, and then wait on those events in the provided ``stream``. For example: 1. Work is enqueued by the Producer on streams ``7``, ``9``, and ``15``. 2. Events are then enqueued on each of streams ``7``, ``9``, and ``15``. 3. Producer then tells stream ``3`` to wait on the events from Step 2, and the ``stream`` entry is set to ``3``. * If there is no work enqueued on the data, then the ``stream`` entry may be either ``None``, or not provided. Optionally, to facilitate the User relaxing conformance to synchronization semantics: * Producers may provide a configuration option to always set ``stream`` to ``None``. * Consumers may provide a configuration option to ignore the value of ``stream`` and act as if it were ``None`` or not provided. This elides synchronization on the Producer-provided streams, and allows enqueuing work on streams other than that provided by the Producer. These options should not be set by default in either a Producer or a Consumer. The CAI specification does not prescribe the exact mechanism by which these options are set, or related options that Producers or Consumers might provide to allow the user further control over synchronization behavior. Synchronization in Numba ~~~~~~~~~~~~~~~~~~~~~~~~ Numba is neither strictly a Producer nor a Consumer - it may be used to implement either by a User. In order to facilitate the correct implementation of synchronization semantics, Numba exhibits the following behaviors related to synchronization of the interface: - When Numba acts as a Consumer (for example when an array-like object is passed to a kernel launch): If ``stream`` is an integer, then Numba will immediately synchronize on the provided ``stream``. A Numba :class:`Device Array ` created from an array-like object has its *default stream* set to the provided stream. - When Numba acts as a Producer (when the ``__cuda_array_interface__`` property of a Numba CUDA Array is accessed): If the exported CUDA Array has a *default stream*, then it is given as the ``stream`` entry. Otherwise, ``stream`` is set to ``None``. .. note:: In Numba's terminology, an array's *default stream* is a property specifying the stream that Numba will enqueue asynchronous transfers in if no other stream is provided as an argument to the function invoking the transfer. It is not the same as the `Default Stream `_ in normal CUDA terminology. Numba's synchronization behavior results in the following intended consequences: - Exchanging data either as a Producer or a Consumer will be correct without the need for any further action from the User, provided that the other side of the interaction also follows the CAI synchronization semantics. - The User is expected to either: - Avoid launching kernels or other operations on streams that are not the default stream for their parameters, or - When launching operations on a stream that is not the default stream for a given parameter, they should then insert an event into the stream that they are operating in, and wait on that event in the default stream for the parameter. For an example of this, :ref:`see below `. The User may override Numba's synchronization behavior by setting the environment variable ``NUMBA_CUDA_ARRAY_INTERFACE_SYNC`` or the config variable ``CUDA_ARRAY_INTERFACE_SYNC`` to ``0`` (see :ref:`GPU Support Environment Variables `). When set, Numba will not synchronize on the streams of imported arrays, and it is the responsibility of the user to ensure correctness with respect to stream synchronization. Synchronization when creating a Numba CUDA Array from an object exporting the CUDA Array Interface may also be elided by passing ``sync=False`` when creating the Numba CUDA Array with :func:`numba.cuda.as_cuda_array` or :func:`numba.cuda.from_cuda_array_interface`. There is scope for Numba's synchronization implementation to be optimized in the future, by eliding synchronizations when a kernel or driver API operation (e.g. a memcopy or memset) is launched on the same stream as an imported array. .. _example-multi-streams: An example launching on an array's non-default stream ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This example shows how to ensure that a Consumer can safely consume an array with a default stream when it is passed to a kernel launched in a different stream. First we need to import Numba and a consumer library (a fictitious library named ``other_cai_library`` for this example): .. code-block:: python from numba import cuda, int32, void import other_cai_library Now we'll define a kernel - this initializes the elements of the array, setting each entry to its index: .. code-block:: python @cuda.jit(void, int32[::1]) def initialize_array(x): i = cuda.grid(1) if i < len(x): x[i] = i Next we will create two streams: .. code-block:: python array_stream = cuda.stream() kernel_stream = cuda.stream() Then create an array with one of the streams as its default stream: .. code-block:: python N = 16384 x = cuda.device_array(N, stream=array_stream) Now we launch the kernel in the other stream: .. code-block:: python nthreads = 256 nblocks = N // nthreads initialize_array[nthreads, nblocks, kernel_stream](x) If we were to pass ``x`` to a Consumer now, there is a risk that it may operate on it in ``array_stream`` whilst the kernel is still running in ``kernel_stream``. To prevent operations in ``array_stream`` starting before the kernel launch is finished, we create an event and wait on it: .. code-block:: python # Create event evt = cuda.event() # Record the event after the kernel launch in kernel_stream evt.record(kernel_stream) # Wait for the event in array_stream evt.wait(array_stream) It is now safe for ``other_cai_library`` to consume ``x``: .. code-block:: python other_cai_library.consume(x) Lifetime management ------------------- Data ~~~~ Obtaining the value of the ``__cuda_array_interface__`` property of any object has no effect on the lifetime of the object from which it was created. In particular, note that the interface has no slot for the owner of the data. The User code must preserve the lifetime of the object owning the data for as long as the Consumer might use it. Streams ~~~~~~~ Like data, CUDA streams also have a finite lifetime. It is therefore required that a Producer exporting data on the interface with an associated stream ensures that the exported stream's lifetime is equal to or surpasses the lifetime of the object from which the interface was exported. Lifetime management in Numba ---------------------------- Producing Arrays ~~~~~~~~~~~~~~~~ Numba takes no steps to maintain the lifetime of an object from which the interface is exported - it is the user's responsibility to ensure that the underlying object is kept alive for the duration that the exported interface might be used. The lifetime of any Numba-managed stream exported on the interface is guaranteed to equal or surpass the lifetime of the underlying object, because the underlying object holds a reference to the stream. .. note:: Numba-managed streams are those created with ``cuda.default_stream()``, ``cuda.legacy_default_stream()``, or ``cuda.per_thread_default_stream()``. Streams not managed by Numba are created from an external stream with ``cuda.external_stream()``. Consuming Arrays ~~~~~~~~~~~~~~~~ Numba provides two mechanisms for creating device arrays from objects exporting the CUDA Array Interface. Which to use depends on whether the created device array should maintain the life of the object from which it is created: - ``as_cuda_array``: This creates a device array that holds a reference to the owning object. As long as a reference to the device array is held, its underlying data will also be kept alive, even if all other references to the original owning object have been dropped. - ``from_cuda_array_interface``: This creates a device array with no reference to the owning object by default. The owning object, or some other object to be considered the owner can be passed in the ``owner`` parameter. The interfaces of these functions are: .. automethod:: numba.cuda.as_cuda_array .. automethod:: numba.cuda.from_cuda_array_interface Pointer Attributes ------------------ Additional information about the data pointer can be retrieved using ``cuPointerGetAttribute`` or ``cudaPointerGetAttributes``. Such information include: - the CUDA context that owns the pointer; - is the pointer host-accessible? - is the pointer a managed memory? .. _numpy array interface: https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.interface.html#__array_interface__ Differences with CUDA Array Interface (Version 0) ------------------------------------------------- Version 0 of the CUDA Array Interface did not have the optional **mask** attribute to support masked arrays. Differences with CUDA Array Interface (Version 1) ------------------------------------------------- Versions 0 and 1 of the CUDA Array Interface neither clarified the **strides** attribute for C-contiguous arrays nor specified the treatment for zero-size arrays. Differences with CUDA Array Interface (Version 2) ------------------------------------------------- Prior versions of the CUDA Array Interface made no statement about synchronization. Interoperability ---------------- The following Python libraries have adopted the CUDA Array Interface: - Numba - `CuPy `_ - `PyTorch `_ - `PyArrow `_ - `mpi4py `_ - `ArrayViews `_ - `JAX `_ - `PyCUDA `_ - `DALI: the NVIDIA Data Loading Library `_ : - `TensorGPU objects `_ expose the CUDA Array Interface. - `The External Source operator `_ consumes objects exporting the CUDA Array Interface. - The RAPIDS stack: - `cuDF `_ - `cuML `_ - `cuSignal `_ - `RMM `_ If your project is not on this list, please feel free to report it on the `Numba issue tracker `_. numba-0.55.1/docs/source/cuda/cudapysupported.rst000664 000000 000000 00000014154 14174536160 022031 0ustar00rootroot000000 000000 ======================================== Supported Python features in CUDA Python ======================================== This page lists the Python features supported in the CUDA Python. This includes all kernel and device functions compiled with ``@cuda.jit`` and other higher level Numba decorators that targets the CUDA GPU. Language ======== Execution Model --------------- CUDA Python maps directly to the *single-instruction multiple-thread* execution (SIMT) model of CUDA. Each instruction is implicitly executed by multiple threads in parallel. With this execution model, array expressions are less useful because we don't want multiple threads to perform the same task. Instead, we want threads to perform a task in a cooperative fashion. For details please consult the `CUDA Programming Guide `_. Constructs ---------- The following Python constructs are not supported: * Exception handling (``try .. except``, ``try .. finally``) * Context management (the ``with`` statement) * Comprehensions (either list, dict, set or generator comprehensions) * Generator (any ``yield`` statements) The ``raise`` statement is supported. The ``assert`` statement is supported, but only has an effect when ``debug=True`` is passed to the :func:`numba.cuda.jit` decorator. This is similar to the behavior of the ``assert`` keyword in CUDA C/C++, which is ignored unless compiling with device debug turned on. Printing of strings, integers, and floats is supported, but printing is an asynchronous operation - in order to ensure that all output is printed after a kernel launch, it is necessary to call :func:`numba.cuda.synchronize`. Eliding the call to ``synchronize`` is acceptable, but output from a kernel may appear during other later driver operations (e.g. subsequent kernel launches, memory transfers, etc.), or fail to appear before the program execution completes. Up to 32 arguments may be passed to the ``print`` function - if more are passed then a format string will be emitted instead and a warning will be produced. This is due to a general limitation in CUDA printing, as outlined in the `section on limitations in printing `_ in the CUDA C++ Programming Guide. Built-in types =============== The following built-in types support are inherited from CPU nopython mode. * int * float * complex * bool * None * tuple See :ref:`nopython built-in types `. There is also some very limited support for character sequences (bytes and unicode strings) used in NumPy arrays. Note that this support can only be used with CUDA 11.2 onwards. Built-in functions ================== The following built-in functions are supported: * :func:`abs` * :class:`bool` * :class:`complex` * :func:`enumerate` * :class:`float` * :class:`int`: only the one-argument form * :func:`len` * :func:`min`: only the multiple-argument form * :func:`max`: only the multiple-argument form * :func:`pow` * :class:`range` * :func:`round` * :func:`zip` Standard library modules ======================== ``cmath`` --------- The following functions from the :mod:`cmath` module are supported: * :func:`cmath.acos` * :func:`cmath.acosh` * :func:`cmath.asin` * :func:`cmath.asinh` * :func:`cmath.atan` * :func:`cmath.atanh` * :func:`cmath.cos` * :func:`cmath.cosh` * :func:`cmath.exp` * :func:`cmath.isfinite` * :func:`cmath.isinf` * :func:`cmath.isnan` * :func:`cmath.log` * :func:`cmath.log10` * :func:`cmath.phase` * :func:`cmath.polar` * :func:`cmath.rect` * :func:`cmath.sin` * :func:`cmath.sinh` * :func:`cmath.sqrt` * :func:`cmath.tan` * :func:`cmath.tanh` ``math`` -------- The following functions from the :mod:`math` module are supported: * :func:`math.acos` * :func:`math.asin` * :func:`math.atan` * :func:`math.acosh` * :func:`math.asinh` * :func:`math.atanh` * :func:`math.cos` * :func:`math.sin` * :func:`math.tan` * :func:`math.hypot` * :func:`math.cosh` * :func:`math.sinh` * :func:`math.tanh` * :func:`math.atan2` * :func:`math.erf` * :func:`math.erfc` * :func:`math.exp` * :func:`math.expm1` * :func:`math.fabs` * :func:`math.frexp` * :func:`math.ldexp` * :func:`math.gamma` * :func:`math.lgamma` * :func:`math.log` * :func:`math.log2` * :func:`math.log10` * :func:`math.log1p` * :func:`math.sqrt` * :func:`math.remainder`: Python 3.7+ * :func:`math.pow` * :func:`math.ceil` * :func:`math.floor` * :func:`math.copysign` * :func:`math.fmod` * :func:`math.modf` * :func:`math.isnan` * :func:`math.isinf` * :func:`math.isfinite` ``operator`` ------------ The following functions from the :mod:`operator` module are supported: * :func:`operator.add` * :func:`operator.and_` * :func:`operator.eq` * :func:`operator.floordiv` * :func:`operator.ge` * :func:`operator.gt` * :func:`operator.iadd` * :func:`operator.iand` * :func:`operator.ifloordiv` * :func:`operator.ilshift` * :func:`operator.imod` * :func:`operator.imul` * :func:`operator.invert` * :func:`operator.ior` * :func:`operator.ipow` * :func:`operator.irshift` * :func:`operator.isub` * :func:`operator.itruediv` * :func:`operator.ixor` * :func:`operator.le` * :func:`operator.lshift` * :func:`operator.lt` * :func:`operator.mod` * :func:`operator.mul` * :func:`operator.ne` * :func:`operator.neg` * :func:`operator.not_` * :func:`operator.or_` * :func:`operator.pos` * :func:`operator.pow` * :func:`operator.rshift` * :func:`operator.sub` * :func:`operator.truediv` * :func:`operator.xor` Numpy support ============= Due to the CUDA programming model, dynamic memory allocation inside a kernel is inefficient and is often not needed. Numba disallows any memory allocating features. This disables a large number of NumPy APIs. For best performance, users should write code such that each thread is dealing with a single element at a time. Supported numpy features: * accessing `ndarray` attributes `.shape`, `.strides`, `.ndim`, `.size`, etc.. * scalar ufuncs that have equivalents in the `math` module; i.e. ``np.sin(x[0])``, where x is a 1D array. * indexing and slicing works. Unsupported numpy features: * array creation APIs. * array methods. * functions that returns a new array. numba-0.55.1/docs/source/cuda/device-functions.rst000664 000000 000000 00000000603 14174536160 022035 0ustar00rootroot000000 000000 Writing Device Functions ======================== CUDA device functions can only be invoked from within the device (by a kernel or another device function). To define a device function:: from numba import cuda @cuda.jit(device=True) def a_device_function(a, b): return a + b Unlike a kernel function, a device function can return a value like normal functions. numba-0.55.1/docs/source/cuda/device-management.rst000664 000000 000000 00000004570 14174536160 022150 0ustar00rootroot000000 000000 Device management ================= For multi-GPU machines, users may want to select which GPU to use. By default the CUDA driver selects the fastest GPU as the device 0, which is the default device used by Numba. The features introduced on this page are generally not of interest unless working with systems hosting/offering more than one CUDA-capable GPU. Device Selection ---------------- If at all required, device selection must be done before any CUDA feature is used. :: from numba import cuda cuda.select_device(0) The device can be closed by: :: cuda.close() Users can then create a new context with another device. :: cuda.select_device(1) # assuming we have 2 GPUs .. function:: numba.cuda.select_device(device_id) :noindex: Create a new CUDA context for the selected *device_id*. *device_id* should be the number of the device (starting from 0; the device order is determined by the CUDA libraries). The context is associated with the current thread. Numba currently allows only one context per thread. If successful, this function returns a device instance. .. XXX document device instances? .. function:: numba.cuda.close :noindex: Explicitly close all contexts in the current thread. .. note:: Compiled functions are associated with the CUDA context. This makes it not very useful to close and create new devices, though it is certainly useful for choosing which device to use when the machine has multiple GPUs. The Device List =============== The Device List is a list of all the GPUs in the system, and can be indexed to obtain a context manager that ensures execution on the selected GPU. .. attribute:: numba.cuda.gpus :noindex: .. attribute:: numba.cuda.cudadrv.devices.gpus :py:data:`numba.cuda.gpus` is an instance of the ``_DeviceList`` class, from which the current GPU context can also be retrieved: .. autoclass:: numba.cuda.cudadrv.devices._DeviceList :members: current :noindex: Device UUIDs ============ The UUID of a device (equal to that returned by ``nvidia-smi -L``) is available in the :attr:`uuid ` attribute of a CUDA device object. For example, to obtain the UUID of the current device: .. code-block:: python dev = cuda.current_context().device # prints e.g. "GPU-e6489c45-5b68-3b03-bab7-0e7c8e809643" print(dev.uuid) numba-0.55.1/docs/source/cuda/examples.rst000664 000000 000000 00000010016 14174536160 020405 0ustar00rootroot000000 000000 ======== Examples ======== .. _cuda-matmul: Matrix multiplication ===================== First, import the modules needed for this example: .. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_matmul.py :language: python :caption: from ``test_ex_matmul`` in ``numba/cuda/tests/doc_examples/test_matmul.py`` :start-after: magictoken.ex_import.begin :end-before: magictoken.ex_import.end :dedent: 8 :linenos: Here is a naïve implementation of matrix multiplication using a CUDA kernel: .. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_matmul.py :language: python :caption: from ``test_ex_matmul`` in ``numba/cuda/tests/doc_examples/test_matmul.py`` :start-after: magictoken.ex_matmul.begin :end-before: magictoken.ex_matmul.end :dedent: 8 :linenos: This implementation is straightforward and intuitive but performs poorly, because the same matrix elements will be loaded multiple times from device memory, which is slow (some devices may have transparent data caches, but they may not be large enough to hold the entire inputs at once). It will be faster if we use a blocked algorithm to reduce accesses to the device memory. CUDA provides a fast :ref:`shared memory ` for threads in a block to cooperatively compute on a task. The following implements a faster version of the square matrix multiplication using shared memory: .. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_matmul.py :language: python :caption: from ``test_ex_matmul`` in ``numba/cuda/tests/doc_examples/test_matmul.py`` :start-after: magictoken.ex_fast_matmul.begin :end-before: magictoken.ex_fast_matmul.end :dedent: 8 :linenos: Because the shared memory is a limited resource, the code preloads a small block at a time from the input arrays. Then, it calls :func:`~numba.cuda.syncthreads` to wait until all threads have finished preloading and before doing the computation on the shared memory. It synchronizes again after the computation to ensure all threads have finished with the data in shared memory before overwriting it in the next loop iteration. An example usage of this function is as follows: .. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_matmul.py :language: python :caption: from ``test_ex_matmul`` in ``numba/cuda/tests/doc_examples/test_matmul.py`` :start-after: magictoken.ex_run_fast_matmul.begin :end-before: magictoken.ex_run_fast_matmul.end :dedent: 8 :linenos: This passes a :ref:`CUDA memory check test `, which can help with debugging. Running the code above produces the following output: .. code-block:: none $ python fast_matmul.py [[ 6. 6. 6. 6.] [22. 22. 22. 22.] [38. 38. 38. 38.] [54. 54. 54. 54.]] [[ 6. 6. 6. 6.] [22. 22. 22. 22.] [38. 38. 38. 38.] [54. 54. 54. 54.]] .. note:: For high performance matrix multiplication in CUDA, see also the `CuPy implementation `_. The approach outlined here generalizes to non-square matrix multiplication as follows by adjusting the ``blockspergrid`` variable: Again, here is an example usage: .. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_matmul.py :language: python :caption: from ``test_ex_matmul`` in ``numba/cuda/tests/doc_examples/test_matmul.py`` :start-after: magictoken.ex_run_nonsquare.begin :end-before: magictoken.ex_run_nonsquare.end :dedent: 8 :linenos: and the corresponding output: .. code-block:: none $ python nonsquare_matmul.py [[ 253. 253. 253. 253. 253. 253. 253.] [ 782. 782. 782. 782. 782. 782. 782.] [1311. 1311. 1311. 1311. 1311. 1311. 1311.] [1840. 1840. 1840. 1840. 1840. 1840. 1840.] [2369. 2369. 2369. 2369. 2369. 2369. 2369.]] [[ 253. 253. 253. 253. 253. 253. 253.] [ 782. 782. 782. 782. 782. 782. 782.] [1311. 1311. 1311. 1311. 1311. 1311. 1311.] [1840. 1840. 1840. 1840. 1840. 1840. 1840.] [2369. 2369. 2369. 2369. 2369. 2369. 2369.]] numba-0.55.1/docs/source/cuda/external-memory.rst000664 000000 000000 00000033052 14174536160 021724 0ustar00rootroot000000 000000 .. _cuda-emm-plugin: ================================================= External Memory Management (EMM) Plugin interface ================================================= The :ref:`CUDA Array Interface ` enables sharing of data between different Python libraries that access CUDA devices. However, each library manages its own memory distinctly from the others. For example: - By default, Numba allocates memory on CUDA devices by interacting with the CUDA driver API to call functions such as ``cuMemAlloc`` and ``cuMemFree``, which is suitable for many use cases. - The RAPIDS libraries (cuDF, cuML, etc.) use the `RAPIDS Memory Manager (RMM) `_ for allocating device memory. - `CuPy `_ includes a `memory pool implementation `_ for both device and pinned memory. When multiple CUDA-aware libraries are used together, it may be preferable for Numba to defer to another library for memory management. The EMM Plugin interface facilitates this, by enabling Numba to use another CUDA-aware library for all allocations and deallocations. An EMM Plugin is used to facilitate the use of an external library for memory management. An EMM Plugin can be a part of an external library, or could be implemented as a separate library. Overview of External Memory Management ====================================== When an EMM Plugin is in use (see :ref:`setting-emm-plugin`), Numba will make memory allocations and deallocations through the Plugin. It will never directly call functions such as ``cuMemAlloc``, ``cuMemFree``, etc. EMM Plugins always take responsibility for the management of device memory. However, not all CUDA-aware libraries also support managing host memory, so a facility for Numba to continue the management of host memory whilst ceding control of device memory to the EMM is provided (see :ref:`host-only-cuda-memory-manager`). Effects on Deallocation Strategies ---------------------------------- Numba's internal :ref:`deallocation-behavior` is designed to increase efficiency by deferring deallocations until a significant quantity are pending. It also provides a mechanism for preventing deallocations entirely during critical sections, using the :func:`~numba.cuda.defer_cleanup` context manager. When an EMM Plugin is in use, the deallocation strategy is implemented by the EMM, and Numba's internal deallocation mechanism is not used. The EMM Plugin could implement: - A similar strategy to the Numba deallocation behaviour, or - Something more appropriate to the plugin - for example, deallocated memory might immediately be returned to a memory pool. The ``defer_cleanup`` context manager may behave differently with an EMM Plugin - an EMM Plugin should be accompanied by documentation of the behaviour of the ``defer_cleanup`` context manager when it is in use. For example, a pool allocator could always immediately return memory to a pool even when the context manager is in use, but could choose not to free empty pools until ``defer_cleanup`` is not in use. Management of other objects --------------------------- In addition to memory, Numba manages the allocation and deallocation of :ref:`events `, :ref:`streams `, and modules (a module is a compiled object, which is generated from ``@cuda.jit``\ -ted functions). The management of events, streams, and modules is unchanged by the use of an EMM Plugin. Asynchronous allocation and deallocation ---------------------------------------- The present EMM Plugin interface does not provide support for asynchronous allocation and deallocation. This may be added to a future version of the interface. Implementing an EMM Plugin ========================== An EMM Plugin is implemented by deriving from :class:`~numba.cuda.BaseCUDAMemoryManager`. A summary of considerations for the implementation follows: - Numba instantiates one instance of the EMM Plugin class per context. The context that owns an EMM Plugin object is accessible through ``self.context``, if required. - The EMM Plugin is transparent to any code that uses Numba - all its methods are invoked by Numba, and never need to be called by code that uses Numba. - The allocation methods ``memalloc``, ``memhostalloc``, and ``mempin``, should use the underlying library to allocate and/or pin device or host memory, and construct an instance of a :ref:`memory pointer ` representing the memory to return back to Numba. These methods are always called when the current CUDA context is the context that owns the EMM Plugin instance. - The ``initialize`` method is called by Numba prior to the first use of the EMM Plugin object for a context. This method should do anything required to prepare the underlying library for allocations in the current context. This method may be called multiple times, and must not invalidate previous state when it is called. - The ``reset`` method is called when all allocations in the context are to be cleaned up. It may be called even prior to ``initialize``, and an EMM Plugin implementation needs to guard against this. - To support inter-GPU communication, the ``get_ipc_handle`` method should provide an :class:`~numba.cuda.IpcHandle` for a given :class:`~numba.cuda.MemoryPointer` instance. This method is part of the EMM interface (rather than being handled within Numba) because the base address of the allocation is only known by the underlying library. Closing an IPC handle is handled internally within Numba. - It is optional to provide memory info from the ``get_memory_info`` method, which provides a count of the total and free memory on the device for the context. It is preferrable to implement the method, but this may not be practical for all allocators. If memory info is not provided, this method should raise a :class:`RuntimeError`. - The ``defer_cleanup`` method should return a context manager that ensures that expensive cleanup operations are avoided whilst it is active. The nuances of this will vary between plugins, so the plugin documentation should include an explanation of how deferring cleanup affects deallocations, and performance in general. - The ``interface_version`` property is used to ensure that the plugin version matches the interface provided by the version of Numba. At present, this should always be 1. Full documentation for the base class follows: .. autoclass:: numba.cuda.BaseCUDAMemoryManager :members: memalloc, memhostalloc, mempin, initialize, get_ipc_handle, get_memory_info, reset, defer_cleanup, interface_version :member-order: bysource .. _host-only-cuda-memory-manager: The Host-Only CUDA Memory Manager --------------------------------- Some external memory managers will support management of on-device memory but not host memory. For implementing EMM Plugins using one of these memory managers, a partial implementation of a plugin that implements host-side allocation and pinning is provided. To use it, derive from :class:`~numba.cuda.HostOnlyCUDAMemoryManager` instead of :class:`~numba.cuda.BaseCUDAMemoryManager`. Guidelines for using this class are: - The host-only memory manager implements ``memhostalloc`` and ``mempin`` - the EMM Plugin should still implement ``memalloc``. - If ``reset`` is overridden, it must also call ``super().reset()`` to allow the host allocations to be cleaned up. - If ``defer_cleanup`` is overridden, it must hold an active context manager from ``super().defer_cleanup()`` to ensure that host-side cleanup is also deferred. Documentation for the methods of :class:`~numba.cuda.HostOnlyCUDAMemoryManager` follows: .. autoclass:: numba.cuda.HostOnlyCUDAMemoryManager :members: memhostalloc, mempin, reset, defer_cleanup :member-order: bysource The IPC Handle Mixin -------------------- An implementation of the ``get_ipc_handle()`` function is is provided in the ``GetIpcHandleMixin`` class. This uses the driver API to determine the base address of an allocation for opening an IPC handle. If this implementation is appropriate for an EMM plugin, it can be added by mixing in the ``GetIpcHandleMixin`` class: .. autoclass:: numba.cuda.GetIpcHandleMixin :members: get_ipc_handle Classes and structures of returned objects ========================================== This section provides an overview of the classes and structures that need to be constructed by an EMM Plugin. .. _memory-pointers: Memory Pointers --------------- EMM Plugins should construct memory pointer instances that represent their allocations, for return to Numba. The appropriate memory pointer class to use in each method is: - :class:`~numba.cuda.MemoryPointer`: returned from ``memalloc`` - :class:`~numba.cuda.MappedMemory`: returned from ``memhostalloc`` or ``mempin`` when the host memory is mapped into the device memory space. - :class:`~numba.cuda.PinnedMemory`: return from ``memhostalloc`` or ``mempin`` when the host memory is not mapped into the device memory space. Memory pointers can take a finalizer, which is a function that is called when the buffer is no longer needed. Usually the finalizer will make a call to the memory management library (either internal to Numba, or external if allocated by an EMM Plugin) to inform it that the memory is no longer required, and that it could potentially be freed and/or unpinned. The memory manager may choose to defer actually cleaning up the memory to any later time after the finalizer runs - it is not required to free the buffer immediately. Documentation for the memory pointer classes follows. .. autoclass:: numba.cuda.MemoryPointer The ``AutoFreePointer`` class need not be used directly, but is documented here as it is subclassed by :class:`numba.cuda.MappedMemory`: .. autoclass:: numba.cuda.cudadrv.driver.AutoFreePointer .. autoclass:: numba.cuda.MappedMemory .. autoclass:: numba.cuda.PinnedMemory Memory Info ----------- If an implementation of :meth:`~numba.cuda.BaseCUDAMemoryManager.get_memory_info` is to provide a result, then it should return an instance of the ``MemoryInfo`` named tuple: .. autoclass:: numba.cuda.MemoryInfo IPC --- An instance of ``IpcHandle`` is required to be returned from an implementation of :meth:`~numba.cuda.BaseCUDAMemoryManager.get_ipc_handle`: .. autoclass:: numba.cuda.IpcHandle Guidance for constructing an IPC handle in the context of implementing an EMM Plugin: - The ``memory`` parameter passed to the ``get_ipc_handle`` method of an EMM Plugin can be passed as the ``base`` parameter. - A suitable type for the ``handle`` can be constructed as ``ctypes.c_byte * 64``. The data for ``handle`` must be populated using a method for obtaining a CUDA IPC handle appropriate to the underlying library. - ``size`` should match the size of the original allocation, which can be obtained with ``memory.size`` in ``get_ipc_handle``. - An appropriate value for ``source_info`` can be created by calling ``self.context.device.get_device_identity()``. - If the underlying memory does not point to the base of an allocation returned by the CUDA driver or runtime API (e.g. if a pool allocator is in use) then the ``offset`` from the base must be provided. .. _setting-emm-plugin: Setting the EMM Plugin ====================== By default, Numba uses its internal memory management - if an EMM Plugin is to be used, it must be configured. There are two mechanisms for configuring the use of an EMM Plugin: an environment variable, and a function. Environment variable -------------------- A module name can be provided in the environment variable, ``NUMBA_CUDA_MEMORY_MANAGER``. If this environment variable is set, Numba will attempt to import the module, and and use its ``_numba_memory_manager`` global variable as the memory manager class. This is primarily useful for running the Numba test suite with an EMM Plugin, e.g.: .. code:: $ NUMBA_CUDA_MEMORY_MANAGER=rmm python -m numba.runtests numba.cuda.tests Function -------- The :func:`~numba.cuda.set_memory_manager` function can be used to set the memory manager at runtime. This should be called prior to the initialization of any contexts, as EMM Plugin instances are instantiated along with contexts. .. autofunction:: numba.cuda.set_memory_manager Resetting the memory manager ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It is recommended that the memory manager is set once prior to using any CUDA functionality, and left unchanged for the remainder of execution. It is possible to set the memory manager multiple times, noting the following: * At the time of their creation, contexts are bound to an instance of a memory manager for their lifetime. * Changing the memory manager will have no effect on existing contexts - only contexts created after the memory manager was updated will use instances of the new memory manager. * :func:`numba.cuda.close` can be used to destroy contexts after setting the memory manager so that they get re-created with the new memory manager. - This will invalidate any arrays, streams, events, and modules owned by the context. - Attempting to use invalid arrays, streams, or events will likely fail with an exception being raised due to a ``CUDA_ERROR_INVALID_CONTEXT`` or ``CUDA_ERROR_CONTEXT_IS_DESTROYED`` return code from a Driver API function. - Attempting to use an invalid module will result in similar, or in some cases a segmentation fault / access violation. .. note:: The invalidation of modules means that all functions compiled with ``@cuda.jit`` prior to context destruction will need to be redefined, as the code underlying them will also have been unloaded from the GPU. numba-0.55.1/docs/source/cuda/faq.rst000664 000000 000000 00000001521 14174536160 017337 0ustar00rootroot000000 000000 .. _cudafaq: ================================================= CUDA Frequently Asked Questions ================================================= nvprof reports "No kernels were profiled" ----------------------------------------- When using the ``nvprof`` tool to profile Numba jitted code for the CUDA target, the output contains ``No kernels were profiled`` but there are clearly running kernels present, what is going on? This is quite likely due to the profiling data not being flushed on program exit, see the `NVIDIA CUDA documentation `_ for details. To fix this simply add a call to ``numba.cuda.profile_stop()`` prior to the exit point in your program (or wherever you want to stop profiling). For more on CUDA profiling support in Numba, see :ref:`cuda-profiling`. numba-0.55.1/docs/source/cuda/fastmath.rst000664 000000 000000 00000004270 14174536160 020403 0ustar00rootroot000000 000000 .. _cuda-fast-math: CUDA Fast Math ============== As noted in :ref:`fast-math`, for certain classes of applications that utilize floating point, strict IEEE-754 conformance is not required. For this subset of applications, performance speedups may be possible. The CUDA target implements :ref:`fast-math` behavior with two differences. * First, the ``fastmath`` argument to the :func:`@jit decorator ` is limited to the values ``True`` and ``False``. When ``True``, the following optimizations are enabled: - Flushing of denormals to zero. - Use of a fast approximation to the square root function. - Use of a fast approximation to the division operation. - Contraction of multiply and add operations into single fused multiply-add operations. See the `documentation for nvvmCompileProgram `_ for more details of these optimizations. * Secondly, calls to a subset of math module functions on ``float32`` operands will be implemented using fast approximate implementations from the libdevice library. - :func:`math.cos`: Implemented using `__nv_fast_cosf `_. - :func:`math.sin`: Implemented using `__nv_fast_sinf `_. - :func:`math.tan`: Implemented using `__nv_fast_tanf `_. - :func:`math.exp`: Implemented using `__nv_fast_expf `_. - :func:`math.log2`: Implemented using `__nv_fast_log2f `_. - :func:`math.log10`: Implemented using `__nv_fast_log10f `_. - :func:`math.log`: Implemented using `__nv_fast_logf `_. - :func:`math.pow`: Implemented using `__nv_fast_powf `_. numba-0.55.1/docs/source/cuda/index.rst000664 000000 000000 00000000640 14174536160 017700 0ustar00rootroot000000 000000 .. _cuda-index: Numba for CUDA GPUs =================== .. toctree:: overview.rst kernels.rst memory.rst device-functions.rst cudapysupported.rst fastmath.rst intrinsics.rst cooperative_groups.rst random.rst device-management.rst examples.rst simulator.rst reduction.rst ufunc.rst ipc.rst cuda_array_interface.rst external-memory.rst bindings.rst faq.rst numba-0.55.1/docs/source/cuda/intrinsics.rst000664 000000 000000 00000003253 14174536160 020761 0ustar00rootroot000000 000000 Supported Atomic Operations =========================== Numba provides access to some of the atomic operations supported in CUDA. Those that are presently implemented are as follows: .. automodule:: numba.cuda :members: atomic :noindex: Example ''''''' The following code demonstrates the use of :class:`numba.cuda.atomic.max` to find the maximum value in an array. Note that this is not the most efficient way of finding a maximum in this case, but that it serves as an example:: from numba import cuda import numpy as np @cuda.jit def max_example(result, values): """Find the maximum value in values and store in result[0]""" tid = cuda.threadIdx.x bid = cuda.blockIdx.x bdim = cuda.blockDim.x i = (bid * bdim) + tid cuda.atomic.max(result, 0, values[i]) arr = np.random.rand(16384) result = np.zeros(1, dtype=np.float64) max_example[256,64](result, arr) print(result[0]) # Found using cuda.atomic.max print(max(arr)) # Print max(arr) for comparison (should be equal!) Multiple dimension arrays are supported by using a tuple of ints for the index:: @cuda.jit def max_example_3d(result, values): """ Find the maximum value in values and store in result[0]. Both result and values are 3d arrays. """ i, j, k = cuda.grid(3) # Atomically store to result[0,1,2] from values[i, j, k] cuda.atomic.max(result, (0, 1, 2), values[i, j, k]) arr = np.random.rand(1000).reshape(10,10,10) result = np.zeros((3, 3, 3), dtype=np.float64) max_example_3d[(2, 2, 2), (5, 5, 5)](result, arr) print(result[0, 1, 2], '==', np.max(arr)) numba-0.55.1/docs/source/cuda/ipc.rst000664 000000 000000 00000001520 14174536160 017342 0ustar00rootroot000000 000000 =================== Sharing CUDA Memory =================== .. _cuda-ipc-memory: Sharing between process ======================= Export device array to another process -------------------------------------- A device array can be shared with another process in the same machine using the CUDA IPC API. To do so, use the ``.get_ipc_handle()`` method on the device array to get a ``IpcArrayHandle`` object, which can be transferred to another process. .. automethod:: numba.cuda.cudadrv.devicearray.DeviceNDArray.get_ipc_handle :noindex: .. autoclass:: numba.cuda.cudadrv.devicearray.IpcArrayHandle :members: open, close Import IPC memory from another process -------------------------------------- The following function is used to open IPC handle from another process as a device array. .. automethod:: numba.cuda.open_ipc_array numba-0.55.1/docs/source/cuda/kernels.rst000664 000000 000000 00000021251 14174536160 020235 0ustar00rootroot000000 000000 ==================== Writing CUDA Kernels ==================== Introduction ============ CUDA has an execution model unlike the traditional sequential model used for programming CPUs. In CUDA, the code you write will be executed by multiple threads at once (often hundreds or thousands). Your solution will be modeled by defining a thread hierarchy of *grid*, *blocks* and *threads*. Numba's CUDA support exposes facilities to declare and manage this hierarchy of threads. The facilities are largely similar to those exposed by NVidia's CUDA C language. Numba also exposes three kinds of GPU memory: global :ref:`device memory ` (the large, relatively slow off-chip memory that's connected to the GPU itself), on-chip :ref:`shared memory ` and :ref:`local memory `. For all but the simplest algorithms, it is important that you carefully consider how to use and access memory in order to minimize bandwidth requirements and contention. Kernel declaration ================== A *kernel function* is a GPU function that is meant to be called from CPU code (*). It gives it two fundamental characteristics: * kernels cannot explicitly return a value; all result data must be written to an array passed to the function (if computing a scalar, you will probably pass a one-element array); * kernels explicitly declare their thread hierarchy when called: i.e. the number of thread blocks and the number of threads per block (note that while a kernel is compiled once, it can be called multiple times with different block sizes or grid sizes). At first sight, writing a CUDA kernel with Numba looks very much like writing a :term:`JIT function` for the CPU:: @cuda.jit def increment_by_one(an_array): """ Increment all array elements by one. """ # code elided here; read further for different implementations (*) Note: newer CUDA devices support device-side kernel launching; this feature is called *dynamic parallelism* but Numba does not support it currently) .. _cuda-kernel-invocation: Kernel invocation ================= A kernel is typically launched in the following way:: threadsperblock = 32 blockspergrid = (an_array.size + (threadsperblock - 1)) // threadsperblock increment_by_one[blockspergrid, threadsperblock](an_array) We notice two steps here: * Instantiate the kernel proper, by specifying a number of blocks (or "blocks per grid"), and a number of threads per block. The product of the two will give the total number of threads launched. Kernel instantiation is done by taking the compiled kernel function (here ``increment_by_one``) and indexing it with a tuple of integers. * Running the kernel, by passing it the input array (and any separate output arrays if necessary). Kernels run asynchronously: launches queue their execution on the device and then return immediately. You can use :func:`cuda.synchronize() ` to wait for all previous kernel launches to finish executing. .. note:: Passing an array that resides in host memory will implicitly cause a copy back to the host, which will be synchronous. In this case, the kernel launch will not return until the data is copied back, and therefore appears to execute synchronously. Choosing the block size ----------------------- It might seem curious to have a two-level hierarchy when declaring the number of threads needed by a kernel. The block size (i.e. number of threads per block) is often crucial: * On the software side, the block size determines how many threads share a given area of :ref:`shared memory `. * On the hardware side, the block size must be large enough for full occupation of execution units; recommendations can be found in the `CUDA C Programming Guide`_. Multi-dimensional blocks and grids ---------------------------------- To help deal with multi-dimensional arrays, CUDA allows you to specify multi-dimensional blocks and grids. In the example above, you could make ``blockspergrid`` and ``threadsperblock`` tuples of one, two or three integers. Compared to 1D declarations of equivalent sizes, this doesn't change anything to the efficiency or behaviour of generated code, but can help you write your algorithms in a more natural way. Thread positioning ================== When running a kernel, the kernel function's code is executed by every thread once. It therefore has to know which thread it is in, in order to know which array element(s) it is responsible for (complex algorithms may define more complex responsibilities, but the underlying principle is the same). One way is for the thread to determine its position in the grid and block and manually compute the corresponding array position:: @cuda.jit def increment_by_one(an_array): # Thread id in a 1D block tx = cuda.threadIdx.x # Block id in a 1D grid ty = cuda.blockIdx.x # Block width, i.e. number of threads per block bw = cuda.blockDim.x # Compute flattened index inside the array pos = tx + ty * bw if pos < an_array.size: # Check array boundaries an_array[pos] += 1 .. note:: Unless you are sure the block size and grid size is a divisor of your array size, you **must** check boundaries as shown above. :attr:`.threadIdx`, :attr:`.blockIdx`, :attr:`.blockDim` and :attr:`.gridDim` are special objects provided by the CUDA backend for the sole purpose of knowing the geometry of the thread hierarchy and the position of the current thread within that geometry. These objects can be 1D, 2D or 3D, depending on how the kernel was :ref:`invoked `. To access the value at each dimension, use the ``x``, ``y`` and ``z`` attributes of these objects, respectively. .. attribute:: numba.cuda.threadIdx :noindex: The thread indices in the current thread block. For 1D blocks, the index (given by the ``x`` attribute) is an integer spanning the range from 0 inclusive to :attr:`numba.cuda.blockDim` exclusive. A similar rule exists for each dimension when more than one dimension is used. .. attribute:: numba.cuda.blockDim :noindex: The shape of the block of threads, as declared when instantiating the kernel. This value is the same for all threads in a given kernel, even if they belong to different blocks (i.e. each block is "full"). .. attribute:: numba.cuda.blockIdx :noindex: The block indices in the grid of threads launched a kernel. For a 1D grid, the index (given by the ``x`` attribute) is an integer spanning the range from 0 inclusive to :attr:`numba.cuda.gridDim` exclusive. A similar rule exists for each dimension when more than one dimension is used. .. attribute:: numba.cuda.gridDim :noindex: The shape of the grid of blocks, i.e. the total number of blocks launched by this kernel invocation, as declared when instantiating the kernel. Absolute positions ------------------ Simple algorithms will tend to always use thread indices in the same way as shown in the example above. Numba provides additional facilities to automate such calculations: .. function:: numba.cuda.grid(ndim) :noindex: Return the absolute position of the current thread in the entire grid of blocks. *ndim* should correspond to the number of dimensions declared when instantiating the kernel. If *ndim* is 1, a single integer is returned. If *ndim* is 2 or 3, a tuple of the given number of integers is returned. .. function:: numba.cuda.gridsize(ndim) :noindex: Return the absolute size (or shape) in threads of the entire grid of blocks. *ndim* has the same meaning as in :func:`.grid` above. With these functions, the incrementation example can become:: @cuda.jit def increment_by_one(an_array): pos = cuda.grid(1) if pos < an_array.size: an_array[pos] += 1 The same example for a 2D array and grid of threads would be:: @cuda.jit def increment_a_2D_array(an_array): x, y = cuda.grid(2) if x < an_array.shape[0] and y < an_array.shape[1]: an_array[x, y] += 1 Note the grid computation when instantiating the kernel must still be done manually, for example:: threadsperblock = (16, 16) blockspergrid_x = math.ceil(an_array.shape[0] / threadsperblock[0]) blockspergrid_y = math.ceil(an_array.shape[1] / threadsperblock[1]) blockspergrid = (blockspergrid_x, blockspergrid_y) increment_a_2D_array[blockspergrid, threadsperblock](an_array) Further Reading ---------------- Please refer to the the `CUDA C Programming Guide`_ for a detailed discussion of CUDA programming. .. _CUDA C Programming Guide: http://docs.nvidia.com/cuda/cuda-c-programming-guide numba-0.55.1/docs/source/cuda/memory.rst000664 000000 000000 00000020234 14174536160 020102 0ustar00rootroot000000 000000 ================= Memory management ================= .. _cuda-device-memory: Data transfer ============= Even though Numba can automatically transfer NumPy arrays to the device, it can only do so conservatively by always transferring device memory back to the host when a kernel finishes. To avoid the unnecessary transfer for read-only arrays, you can use the following APIs to manually control the transfer: .. autofunction:: numba.cuda.device_array :noindex: .. autofunction:: numba.cuda.device_array_like :noindex: .. autofunction:: numba.cuda.to_device :noindex: In addition to the device arrays, Numba can consume any object that implements :ref:`cuda array interface `. These objects also can be manually converted into a Numba device array by creating a view of the GPU buffer using the following APIs: .. autofunction:: numba.cuda.as_cuda_array :noindex: .. autofunction:: numba.cuda.is_cuda_array :noindex: Device arrays ------------- Device array references have the following methods. These methods are to be called in host code, not within CUDA-jitted functions. .. autoclass:: numba.cuda.cudadrv.devicearray.DeviceNDArray :members: copy_to_host, is_c_contiguous, is_f_contiguous, ravel, reshape :noindex: .. note:: DeviceNDArray defines the :ref:`cuda array interface `. Pinned memory ============= .. autofunction:: numba.cuda.pinned :noindex: .. autofunction:: numba.cuda.pinned_array :noindex: .. autofunction:: numba.cuda.pinned_array_like :noindex: Mapped memory ============= .. autofunction:: numba.cuda.mapped :noindex: .. autofunction:: numba.cuda.mapped_array :noindex: .. autofunction:: numba.cuda.mapped_array_like :noindex: Managed memory ============== .. autofunction:: numba.cuda.managed_array :noindex: Streams ======= Streams can be passed to functions that accept them (e.g. copies between the host and device) and into kernel launch configurations so that the operations are executed asynchronously. .. autofunction:: numba.cuda.stream :noindex: .. autofunction:: numba.cuda.default_stream :noindex: .. autofunction:: numba.cuda.legacy_default_stream :noindex: .. autofunction:: numba.cuda.per_thread_default_stream :noindex: .. autofunction:: numba.cuda.external_stream :noindex: CUDA streams have the following methods: .. autoclass:: numba.cuda.cudadrv.driver.Stream :members: synchronize, auto_synchronize :noindex: .. _cuda-shared-memory: Shared memory and thread synchronization ======================================== A limited amount of shared memory can be allocated on the device to speed up access to data, when necessary. That memory will be shared (i.e. both readable and writable) amongst all threads belonging to a given block and has faster access times than regular device memory. It also allows threads to cooperate on a given solution. You can think of it as a manually-managed data cache. The memory is allocated once for the duration of the kernel, unlike traditional dynamic memory management. .. function:: numba.cuda.shared.array(shape, type) :noindex: Allocate a shared array of the given *shape* and *type* on the device. This function must be called on the device (i.e. from a kernel or device function). *shape* is either an integer or a tuple of integers representing the array's dimensions and must be a simple constant expression. *type* is a :ref:`Numba type ` of the elements needing to be stored in the array. The returned array-like object can be read and written to like any normal device array (e.g. through indexing). A common pattern is to have each thread populate one element in the shared array and then wait for all threads to finish using :func:`.syncthreads`. .. function:: numba.cuda.syncthreads() :noindex: Synchronize all threads in the same thread block. This function implements the same pattern as `barriers `_ in traditional multi-threaded programming: this function waits until all threads in the block call it, at which point it returns control to all its callers. .. seealso:: :ref:`Matrix multiplication example `. .. _cuda-local-memory: Local memory ============ Local memory is an area of memory private to each thread. Using local memory helps allocate some scratchpad area when scalar local variables are not enough. The memory is allocated once for the duration of the kernel, unlike traditional dynamic memory management. .. function:: numba.cuda.local.array(shape, type) :noindex: Allocate a local array of the given *shape* and *type* on the device. *shape* is either an integer or a tuple of integers representing the array's dimensions and must be a simple constant expression. *type* is a :ref:`Numba type ` of the elements needing to be stored in the array. The array is private to the current thread. An array-like object is returned which can be read and written to like any standard array (e.g. through indexing). .. seealso:: The Local Memory section of `Device Memory Accesses `_ in the CUDA programming guide. Constant memory =============== Constant memory is an area of memory that is read only, cached and off-chip, it is accessible by all threads and is host allocated. A method of creating an array in constant memory is through the use of: .. function:: numba.cuda.const.array_like(arr) :noindex: Allocate and make accessible an array in constant memory based on array-like *arr*. .. _deallocation-behavior: Deallocation Behavior ===================== This section describes the deallocation behaviour of Numba's internal memory management. If an External Memory Management Plugin is in use (see :ref:`cuda-emm-plugin`), then deallocation behaviour may differ; you may refer to the documentation for the EMM Plugin to understand its deallocation behaviour. Deallocation of all CUDA resources are tracked on a per-context basis. When the last reference to a device memory is dropped, the underlying memory is scheduled to be deallocated. The deallocation does not occur immediately. It is added to a queue of pending deallocations. This design has two benefits: 1. Resource deallocation API may cause the device to synchronize; thus, breaking any asynchronous execution. Deferring the deallocation could avoid latency in performance critical code section. 2. Some deallocation errors may cause all the remaining deallocations to fail. Continued deallocation errors can cause critical errors at the CUDA driver level. In some cases, this could mean a segmentation fault in the CUDA driver. In the worst case, this could cause the system GUI to freeze and could only recover with a system reset. When an error occurs during a deallocation, the remaining pending deallocations are cancelled. Any deallocation error will be reported. When the process is terminated, the CUDA driver is able to release all allocated resources by the terminated process. The deallocation queue is flushed automatically as soon as the following events occur: - An allocation failed due to out-of-memory error. Allocation is retried after flushing all deallocations. - The deallocation queue has reached its maximum size, which is default to 10. User can override by setting the environment variable `NUMBA_CUDA_MAX_PENDING_DEALLOCS_COUNT`. For example, `NUMBA_CUDA_MAX_PENDING_DEALLOCS_COUNT=20`, increases the limit to 20. - The maximum accumulated byte size of resources that are pending deallocation is reached. This is default to 20% of the device memory capacity. User can override by setting the environment variable `NUMBA_CUDA_MAX_PENDING_DEALLOCS_RATIO`. For example, `NUMBA_CUDA_MAX_PENDING_DEALLOCS_RATIO=0.5` sets the limit to 50% of the capacity. Sometimes, it is desired to defer resource deallocation until a code section ends. Most often, users want to avoid any implicit synchronization due to deallocation. This can be done by using the following context manager: .. autofunction:: numba.cuda.defer_cleanup numba-0.55.1/docs/source/cuda/overview.rst000664 000000 000000 00000011252 14174536160 020440 0ustar00rootroot000000 000000 ======== Overview ======== Numba supports CUDA GPU programming by directly compiling a restricted subset of Python code into CUDA kernels and device functions following the CUDA execution model. Kernels written in Numba appear to have direct access to NumPy arrays. NumPy arrays are transferred between the CPU and the GPU automatically. Terminology =========== Several important terms in the topic of CUDA programming are listed here: - *host*: the CPU - *device*: the GPU - *host memory*: the system main memory - *device memory*: onboard memory on a GPU card - *kernels*: a GPU function launched by the host and executed on the device - *device function*: a GPU function executed on the device which can only be called from the device (i.e. from a kernel or another device function) Programming model ================= Most CUDA programming facilities exposed by Numba map directly to the CUDA C language offered by NVidia. Therefore, it is recommended you read the official `CUDA C programming guide `_. Requirements ============ Supported GPUs -------------- Numba supports CUDA-enabled GPUs with Compute Capability 3.0 or greater. Support for devices with Compute Capability less than 5.3 is deprecated, and will be removed in the next Numba release (0.56). Devices with Compute Capability 5.3 or greater include (but are not limited to): - Embedded platforms: NVIDIA Jetson Nano, TX1, TX2, Xavier NX, AGX Xavier. - Desktop / Server GPUs: All GPUs with Pascal microarchitecture or later. E.g. GTX 10 / 16 series, RTX 20 / 30 series, Quadro P / V / RTX series, RTX A series. - Laptop GPUs: All GPUs with Pascal microarchitecture or later. E.g. MX series, Quadro P / T series (mobile), RTX 20 / 30 series (mobile), RTX A series (mobile). Software -------- Numba aims to support CUDA Toolkit versions released within the last 3 years. An NVIDIA driver sufficient for the toolkit version is also required. Presently: * 9.2 is the minimum required toolkit version. * Support for versions less than 10.2 is deprecated, and will be removed in the next Numba release (0.56). * 11.2 or later is recommended, as it uses an NVVM version based on LLVM 7 (as opposed to 3.4 in earlier releases). CUDA is supported on 64-bit Linux and Windows. If you are using Conda, you can install the CUDA toolkit with:: $ conda install cudatoolkit If you are not using Conda or if you want to use a different version of CUDA toolkit, the following describes how Numba searches for a CUDA toolkit installation. .. _cuda-bindings: CUDA Bindings ~~~~~~~~~~~~~ Numba supports interacting with the CUDA Driver API via the `NVIDIA CUDA Python bindings `_ and its own ctypes-based bindings. Functionality is equivalent between the two bindings. The ctypes-based bindings are presently the default, but the NVIDIA bindings will be used by default (if they are available in the environment) in a future Numba release. You can install the NVIDIA bindings with:: $ conda install nvidia::cuda-python if you are using Conda, or:: $ pip install cuda-python if you are using pip. The use of the NVIDIA bindings is enabled by setting the environment variable :envvar:`NUMBA_CUDA_USE_NVIDIA_BINDING` to ``"1"``. .. _cudatoolkit-lookup: Setting CUDA Installation Path ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Numba searches for a CUDA toolkit installation in the following order: 1. Conda installed `cudatoolkit` package. 2. Environment variable ``CUDA_HOME``, which points to the directory of the installed CUDA toolkit (i.e. ``/home/user/cuda-10``) 3. System-wide installation at exactly ``/usr/local/cuda`` on Linux platforms. Versioned installation paths (i.e. ``/usr/local/cuda-10.0``) are intentionally ignored. Users can use ``CUDA_HOME`` to select specific versions. In addition to the CUDA toolkit libraries, which can be installed by conda into an environment or installed system-wide by the `CUDA SDK installer <(https://developer.nvidia.com/cuda-downloads)>`_, the CUDA target in Numba also requires an up-to-date NVIDIA graphics driver. Updated graphics drivers are also installed by the CUDA SDK installer, so there is no need to do both. Note that on macOS, the CUDA SDK must be installed to get the required driver, and the driver is only supported on macOS prior to 10.14 (Mojave). If the ``libcuda`` library is in a non-standard location, users can set environment variable ``NUMBA_CUDA_DRIVER`` to the file path (not the directory path) of the shared library file. Missing CUDA Features ===================== Numba does not implement all features of CUDA, yet. Some missing features are listed below: * dynamic parallelism * texture memory numba-0.55.1/docs/source/cuda/random.rst000664 000000 000000 00000007524 14174536160 020061 0ustar00rootroot000000 000000 .. _cuda-random: Random Number Generation ======================== Numba provides a random number generation algorithm that can be executed on the GPU. Due to technical issues with how NVIDIA implemented cuRAND, however, Numba's GPU random number generator is not based on cuRAND. Instead, Numba's GPU RNG is an implementation of the `xoroshiro128+ algorithm `_. The xoroshiro128+ algorithm has a period of ``2**128 - 1``, which is shorter than the period of the XORWOW algorithm used by default in cuRAND, but xoroshiro128+ still passes the BigCrush tests of random number generator quality. When using any RNG on the GPU, it is important to make sure that each thread has its own RNG state, and they have been initialized to produce non-overlapping sequences. The numba.cuda.random module provides a host function to do this, as well as CUDA device functions to obtain uniformly or normally distributed random numbers. .. note:: Numba (like cuRAND) uses the `Box-Muller transform ` to generate normally distributed random numbers from a uniform generator. However, Box-Muller generates pairs of random numbers, and the current implementation only returns one of them. As a result, generating normally distributed values is half the speed of uniformly distributed values. .. automodule:: numba.cuda.random :members: create_xoroshiro128p_states, init_xoroshiro128p_states, xoroshiro128p_uniform_float32, xoroshiro128p_uniform_float64, xoroshiro128p_normal_float32, xoroshiro128p_normal_float64 :noindex: A simple example '''''''''''''''' Here is a sample program that uses the random number generator:: from __future__ import print_function, absolute_import from numba import cuda from numba.cuda.random import create_xoroshiro128p_states, xoroshiro128p_uniform_float32 import numpy as np @cuda.jit def compute_pi(rng_states, iterations, out): """Find the maximum value in values and store in result[0]""" thread_id = cuda.grid(1) # Compute pi by drawing random (x, y) points and finding what # fraction lie inside a unit circle inside = 0 for i in range(iterations): x = xoroshiro128p_uniform_float32(rng_states, thread_id) y = xoroshiro128p_uniform_float32(rng_states, thread_id) if x**2 + y**2 <= 1.0: inside += 1 out[thread_id] = 4.0 * inside / iterations threads_per_block = 64 blocks = 24 rng_states = create_xoroshiro128p_states(threads_per_block * blocks, seed=1) out = np.zeros(threads_per_block * blocks, dtype=np.float32) compute_pi[blocks, threads_per_block](rng_states, 10000, out) print('pi:', out.mean()) An example of managing RNG state size and using a 3D grid ''''''''''''''''''''''''''''''''''''''''''''''''''''''''' The number of RNG states scales with the number of threads using the RNG, so it is often better to use strided loops in conjunction with the RNG in order to keep the state size manageable. In the following example, which initializes a large 3D array with random numbers, using one thread per output element would result in 453,617,100 RNG states. This would take a long time to initialize and poorly utilize the GPU. Instead, it uses a fixed size 3D grid with a total of 2,097,152 (``(16 ** 3) * (8 ** 3)``) threads striding over the output array. The 3D thread indices ``startx``, ``starty``, and ``startz`` are linearized into a 1D index, ``tid``, to index into the 2,097,152 RNG states. .. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_random.py :language: python :caption: from ``test_ex_3d_grid of ``numba/cuda/tests/doc_example/test_random.py`` :start-after: magictoken.ex_3d_grid.begin :end-before: magictoken.ex_3d_grid.end :dedent: 8 :linenos: numba-0.55.1/docs/source/cuda/reduction.rst000664 000000 000000 00000001555 14174536160 020573 0ustar00rootroot000000 000000 GPU Reduction ============== Writing a reduction algorithm for CUDA GPU can be tricky. Numba provides a ``@reduce`` decorator for converting a simple binary operation into a reduction kernel. An example follows:: import numpy from numba import cuda @cuda.reduce def sum_reduce(a, b): return a + b A = (numpy.arange(1234, dtype=numpy.float64)) + 1 expect = A.sum() # numpy sum reduction got = sum_reduce(A) # cuda sum reduction assert expect == got Lambda functions can also be used here:: sum_reduce = cuda.reduce(lambda a, b: a + b) The Reduce class ---------------- The ``reduce`` decorator creates an instance of the ``Reduce`` class. Currently, ``reduce`` is an alias to ``Reduce``, but this behavior is not guaranteed. .. autoclass:: numba.cuda.Reduce :members: __init__, __call__ :member-order: bysource numba-0.55.1/docs/source/cuda/simulator.rst000664 000000 000000 00000011577 14174536160 020623 0ustar00rootroot000000 000000 .. _simulator: ================================================= Debugging CUDA Python with the the CUDA Simulator ================================================= Numba includes a CUDA Simulator that implements most of the semantics in CUDA Python using the Python interpreter and some additional Python code. This can be used to debug CUDA Python code, either by adding print statements to your code, or by using the debugger to step through the execution of an individual thread. The simulator deliberately allows running non-CUDA code like starting a debugger and printing arbitrary expressions for debugging purposes. Therefore, it is best to start from code that compiles for the CUDA target, and then move over to the simulator to investigate issues. Execution of kernels is performed by the simulator one block at a time. One thread is spawned for each thread in the block, and scheduling of the execution of these threads is left up to the operating system. Using the simulator =================== The simulator is enabled by setting the environment variable :envvar:`NUMBA_ENABLE_CUDASIM` to 1 prior to importing Numba. CUDA Python code may then be executed as normal. The easiest way to use the debugger inside a kernel is to only stop a single thread, otherwise the interaction with the debugger is difficult to handle. For example, the kernel below will stop in the thread ``<<<(3,0,0), (1, 0, 0)>>>``:: @cuda.jit def vec_add(A, B, out): x = cuda.threadIdx.x bx = cuda.blockIdx.x bdx = cuda.blockDim.x if x == 1 and bx == 3: from pdb import set_trace; set_trace() i = bx * bdx + x out[i] = A[i] + B[i] when invoked with a one-dimensional grid and one-dimensional blocks. Supported features ================== The simulator aims to provide as complete a simulation of execution on a real GPU as possible - in particular, the following are supported: * Atomic operations * Constant memory * Local memory * Shared memory: declarations of shared memory arrays must be on separate source lines, since the simulator uses source line information to keep track of allocations of shared memory across threads. * Mapped arrays. * Host and device memory operations: copying and setting memory. * :func:`.syncthreads` is supported - however, in the case where divergent threads enter different :func:`.syncthreads` calls, the launch will not fail, but unexpected behaviour will occur. A future version of the simulator may detect this condition. * The stream API is supported, but all operations occur sequentially and synchronously, unlike on a real device. Synchronising on a stream is therefore a no-op. * The event API is also supported, but provides no meaningful timing information. * Data transfer to and from the GPU - in particular, creating array objects with :func:`.device_array` and :func:`.device_array_like`. The APIs for pinned memory :func:`.pinned` and :func:`.pinned_array` are also supported, but no pinning takes place. * The driver API implementation of the list of GPU contexts (``cuda.gpus`` and ``cuda.cudadrv.devices.gpus``) is supported, and reports a single GPU context. This context can be closed and reset as the real one would. * The :func:`.detect` function is supported, and reports one device called `SIMULATOR`. * Cooperative grids: A cooperative kernel can be launched, but with only one block - the simulator always returns ``1`` from a kernel overload's :meth:`~numba.cuda.compiler._Kernel.max_cooperative_grid_blocks` method. Some limitations of the simulator include: * It does not perform type checking/type inference. If any argument types to a jitted function are incorrect, or if the specification of the type of any local variables are incorrect, this will not be detected by the simulator. * Only one GPU is simulated. * Multithreaded accesses to a single GPU are not supported, and will result in unexpected behaviour. * Most of the driver API is unimplemented. * It is not possible to link PTX code with CUDA Python functions. * Warps and warp-level operations are not yet implemented. * Because the simulator executes kernels using the Python interpreter, structured array access by attribute that works with the hardware target may fail in the simulator - see :ref:`structured-array-access`. * Operations directly against device arrays are only partially supported, that is, testing equality, less than, greater than, and basic mathematical operations are supported, but many other operations, such as the in-place operators and bit operators are not. * The :func:`ffs() ` function only works correctly for values that can be represented using 32-bit integers. Obviously, the speed of the simulator is also much lower than that of a real device. It may be necessary to reduce the size of input data and the size of the CUDA grid in order to make debugging with the simulator tractable. numba-0.55.1/docs/source/cuda/ufunc.rst000664 000000 000000 00000011506 14174536160 017714 0ustar00rootroot000000 000000 CUDA Ufuncs and Generalized Ufuncs ================================== This page describes the CUDA ufunc-like object. To support the programming pattern of CUDA programs, CUDA Vectorize and GUVectorize cannot produce a conventional ufunc. Instead, a ufunc-like object is returned. This object is a close analog but not fully compatible with a regular NumPy ufunc. The CUDA ufunc adds support for passing intra-device arrays (already on the GPU device) to reduce traffic over the PCI-express bus. It also accepts a `stream` keyword for launching in asynchronous mode. Example: Basic Example ------------------------ :: import math from numba import vectorize, cuda import numpy as np @vectorize(['float32(float32, float32, float32)', 'float64(float64, float64, float64)'], target='cuda') def cu_discriminant(a, b, c): return math.sqrt(b ** 2 - 4 * a * c) N = 10000 dtype = np.float32 # prepare the input A = np.array(np.random.sample(N), dtype=dtype) B = np.array(np.random.sample(N) + 10, dtype=dtype) C = np.array(np.random.sample(N), dtype=dtype) D = cu_discriminant(A, B, C) print(D) # print result Example: Calling Device Functions ---------------------------------- All CUDA ufunc kernels have the ability to call other CUDA device functions:: from numba import vectorize, cuda # define a device function @cuda.jit('float32(float32, float32, float32)', device=True, inline=True) def cu_device_fn(x, y, z): return x ** y / z # define a ufunc that calls our device function @vectorize(['float32(float32, float32, float32)'], target='cuda') def cu_ufunc(x, y, z): return cu_device_fn(x, y, z) Generalized CUDA ufuncs ----------------------- Generalized ufuncs may be executed on the GPU using CUDA, analogous to the CUDA ufunc functionality. This may be accomplished as follows:: from numba import guvectorize @guvectorize(['void(float32[:,:], float32[:,:], float32[:,:])'], '(m,n),(n,p)->(m,p)', target='cuda') def matmulcore(A, B, C): ... There are times when the gufunc kernel uses too many of a GPU's resources, which can cause the kernel launch to fail. The user can explicitly control the maximum size of the thread block by setting the `max_blocksize` attribute on the compiled gufunc object. :: from numba import guvectorize @guvectorize(..., target='cuda') def very_complex_kernel(A, B, C): ... very_complex_kernel.max_blocksize = 32 # limits to 32 threads per block .. comment Example: A Chunk at a Time --------------------------- Partitioning your data into chunks allows computation and memory transfer to be overlapped. This can increase the throughput of your ufunc and enables your ufunc to operate on data that is larger than the memory capacity of your GPU. For example: :: import math from numba import vectorize, cuda import numpy as np # the ufunc kernel def discriminant(a, b, c): return math.sqrt(b ** 2 - 4 * a * c) cu_discriminant = vectorize(['float32(float32, float32, float32)', 'float64(float64, float64, float64)'], target='cuda')(discriminant) N = int(1e+8) dtype = np.float32 # prepare the input A = np.array(np.random.sample(N), dtype=dtype) B = np.array(np.random.sample(N) + 10, dtype=dtype) C = np.array(np.random.sample(N), dtype=dtype) D = np.empty(A.shape, dtype=A.dtype) # create a CUDA stream stream = cuda.stream() chunksize = 1e+6 chunkcount = N // chunksize # partition numpy arrays into chunks # no copying is performed sA = np.split(A, chunkcount) sB = np.split(B, chunkcount) sC = np.split(C, chunkcount) sD = np.split(D, chunkcount) device_ptrs = [] with stream.auto_synchronize(): # every operation in this context with be launched asynchronously # by using the CUDA stream # for each chunk for a, b, c, d in zip(sA, sB, sC, sD): # transfer to device dA = cuda.to_device(a, stream) dB = cuda.to_device(b, stream) dC = cuda.to_device(c, stream) dD = cuda.to_device(d, stream, copy=False) # no copying # launch kernel cu_discriminant(dA, dB, dC, out=dD, stream=stream) # retrieve result dD.copy_to_host(d, stream) # store device pointers to prevent them from freeing before # the kernel is scheduled device_ptrs.extend([dA, dB, dC, dD]) # data is ready at this point inside D numba-0.55.1/docs/source/developer/000775 000000 000000 00000000000 14174536160 017110 5ustar00rootroot000000 000000 numba-0.55.1/docs/source/developer/architecture.rst000664 000000 000000 00000125662 14174536160 022340 0ustar00rootroot000000 000000 .. _architecture: ================== Numba architecture ================== Introduction ============ Numba is a compiler for Python bytecode with optional type-specialization. Suppose you enter a function like this into the standard Python interpreter (henceforward referred to as "CPython"):: def add(a, b): return a + b The interpreter will immediately parse the function and convert it into a bytecode representation that describes how the CPython interpreter should execute the function at a low level. For the example above, it looks something like this:: >>> import dis >>> dis.dis(add) 2 0 LOAD_FAST 0 (a) 3 LOAD_FAST 1 (b) 6 BINARY_ADD 7 RETURN_VALUE CPython uses a stack-based interpreter (much like an HP calculator), so the code first pushes two local variables onto the stack. The ``BINARY_ADD`` opcode pops the top two arguments off the stack and makes a Python C API function call that is equivalent to calling ``a.__add__(b)``. The result is then pushed onto the top of the interpreter stack. Finally, the ``RETURN_VALUE`` opcode returns value on the top of the stack as the result of the function call. Numba can take this bytecode and compile it to machine code that performs the same operations as the CPython interpreter, treating ``a`` and ``b`` as generic Python objects. The full semantics of Python are preserved, and the compiled function can be used with any kind of objects that have the add operator defined. When a Numba function is compiled this way, we say that it has been compiled in :term:`object mode`, because the code still manipulates Python objects. Numba code compiled in object mode is not much faster than executing the original Python function in the CPython interpreter. However, if we specialize the function to only run with certain data types, Numba can generate much shorter and more efficient code that manipulates the data natively without any calls into the Python C API. When code has been compiled for specific data types so that the function body no longer relies on the Python runtime, we say the function has been compiled in :term:`nopython mode`. Numeric code compiled in nopython mode can be hundreds of times faster than the original Python. Compiler architecture ===================== Like many compilers, Numba can be conceptually divided into a *frontend* and a *backend*. The Numba *frontend* comprises the stages which analyze the Python bytecode, translate it to :term:`Numba IR` and perform various transformations and analysis steps on the IR. One of the key steps is :term:`type inference`. The frontend must succeed in typing all variables unambiguously in order for the backend to generate code in :term:`nopython mode`, because the backend uses type information to match appropriate code generators with the values they operate on. The Numba *backend* walks the Numba IR resulting from the frontend analyses and exploits the type information deduced by the type inference phase to produce the right LLVM code for each encountered operation. After LLVM code is produced, the LLVM library is asked to optimize it and generate native processor code for the final, native function. There are other pieces besides the compiler frontend and backend, such as the caching machinery for JIT functions. Those pieces are not considered in this document. Contexts ======== Numba is quite flexible, allowing it to generate code for different hardware architectures like CPUs and GPUs. In order to support these different applications, Numba uses a *typing context* and a *target context*. A *typing context* is used in the compiler frontend to perform type inference on operations and values in the function. Similar typing contexts could be used for many architectures because for nearly all cases, typing inference is hardware-independent. However, Numba currently has a different typing context for each target. A *target context* is used to generate the specific instruction sequence required to operate on the Numba types identified during type inference. Target contexts are architecture-specific and are flexible in defining the execution model and available Python APIs. For example, Numba has a "cpu" and a "cuda" context for those two kinds of architecture, and a "parallel" context which produces multithreaded CPU code. Compiler stages =============== The :func:`~numba.jit` decorator in Numba ultimately calls ``numba.compiler.compile_extra()`` which compiles the Python function in a multi-stage process, described below. Stage 1: Analyze bytecode ------------------------- At the start of compilation, the function bytecode is passed to an instance of the Numba interpreter (``numba.interpreter``). The interpreter object analyzes the bytecode to find the control flow graph (``numba.controlflow``). The control flow graph (CFG) describes the ways that execution can move from one block to the next inside the function as a result of loops and branches. The data flow analysis (``numba.dataflow``) takes the control flow graph and traces how values get pushed and popped off the Python interpreter stack for different code paths. This is important to understand the lifetimes of variables on the stack, which are needed in Stage 2. If you set the environment variable ``NUMBA_DUMP_CFG`` to 1, Numba will dump the results of the control flow graph analysis to the screen. Our ``add()`` example is pretty boring, since there is only one statement block:: CFG adjacency lists: {0: []} CFG dominators: {0: set([0])} CFG post-dominators: {0: set([0])} CFG back edges: [] CFG loops: {} CFG node-to-loops: {0: []} A function with more complex flow control will have a more interesting control flow graph. This function:: def doloops(n): acc = 0 for i in range(n): acc += 1 if n == 10: break return acc compiles to this bytecode:: 9 0 LOAD_CONST 1 (0) 3 STORE_FAST 1 (acc) 10 6 SETUP_LOOP 46 (to 55) 9 LOAD_GLOBAL 0 (range) 12 LOAD_FAST 0 (n) 15 CALL_FUNCTION 1 18 GET_ITER >> 19 FOR_ITER 32 (to 54) 22 STORE_FAST 2 (i) 11 25 LOAD_FAST 1 (acc) 28 LOAD_CONST 2 (1) 31 INPLACE_ADD 32 STORE_FAST 1 (acc) 12 35 LOAD_FAST 0 (n) 38 LOAD_CONST 3 (10) 41 COMPARE_OP 2 (==) 44 POP_JUMP_IF_FALSE 19 13 47 BREAK_LOOP 48 JUMP_ABSOLUTE 19 51 JUMP_ABSOLUTE 19 >> 54 POP_BLOCK 14 >> 55 LOAD_FAST 1 (acc) 58 RETURN_VALUE The corresponding CFG for this bytecode is:: CFG adjacency lists: {0: [6], 6: [19], 19: [54, 22], 22: [19, 47], 47: [55], 54: [55], 55: []} CFG dominators: {0: set([0]), 6: set([0, 6]), 19: set([0, 6, 19]), 22: set([0, 6, 19, 22]), 47: set([0, 6, 19, 22, 47]), 54: set([0, 6, 19, 54]), 55: set([0, 6, 19, 55])} CFG post-dominators: {0: set([0, 6, 19, 55]), 6: set([6, 19, 55]), 19: set([19, 55]), 22: set([22, 55]), 47: set([47, 55]), 54: set([54, 55]), 55: set([55])} CFG back edges: [(22, 19)] CFG loops: {19: Loop(entries=set([6]), exits=set([54, 47]), header=19, body=set([19, 22]))} CFG node-to-loops: {0: [], 6: [], 19: [19], 22: [19], 47: [], 54: [], 55: []} The numbers in the CFG refer to the bytecode offsets shown just to the left of the opcode names above. .. _arch_generate_numba_ir: Stage 2: Generate the Numba IR ------------------------------ Once the control flow and data analyses are complete, the Numba interpreter can step through the bytecode and translate it into an Numba-internal intermediate representation. This translation process changes the function from a stack machine representation (used by the Python interpreter) to a register machine representation (used by LLVM). Although the IR is stored in memory as a tree of objects, it can be serialized to a string for debugging. If you set the environment variable ``NUMBA_DUMP_IR`` equal to 1, the Numba IR will be dumped to the screen. For the ``add()`` function described above, the Numba IR looks like:: label 0: a = arg(0, name=a) ['a'] b = arg(1, name=b) ['b'] $0.3 = a + b ['$0.3', 'a', 'b'] del b [] del a [] $0.4 = cast(value=$0.3) ['$0.3', '$0.4'] del $0.3 [] return $0.4 ['$0.4'] The ``del`` instructions are produced by :ref:`live variable analysis`. Those instructions ensure references are not leaked. In :term:`nopython mode`, some objects are tracked by the Numba runtime and some are not. For tracked objects, a dereference operation is emitted; otherwise, the instruction is an no-op. In :term:`object mode` each variable contains an owned reference to a PyObject. .. _`rewrite-untyped-ir`: Stage 3: Rewrite untyped IR --------------------------- Before running type inference, it may be desired to run certain transformations on the Numba IR. One such example is to detect ``raise`` statements which have an implicitly constant argument, so as to support them in :term:`nopython mode`. Let's say you compile the following function with Numba:: def f(x): if x == 0: raise ValueError("x cannot be zero") If you set the :envvar:`NUMBA_DUMP_IR` environment variable to ``1``, you'll see the IR being rewritten before the type inference phase:: REWRITING: del $0.3 [] $12.1 = global(ValueError: ) ['$12.1'] $const12.2 = const(str, x cannot be zero) ['$const12.2'] $12.3 = call $12.1($const12.2) ['$12.1', '$12.3', '$const12.2'] del $const12.2 [] del $12.1 [] raise $12.3 ['$12.3'] ____________________________________________________________ del $0.3 [] $12.1 = global(ValueError: ) ['$12.1'] $const12.2 = const(str, x cannot be zero) ['$const12.2'] $12.3 = call $12.1($const12.2) ['$12.1', '$12.3', '$const12.2'] del $const12.2 [] del $12.1 [] raise ('x cannot be zero') [] .. _arch_type_inference: Stage 4: Infer types -------------------- Now that the Numba IR has been generated, type analysis can be performed. The types of the function arguments can be taken either from the explicit function signature given in the ``@jit`` decorator (such as ``@jit('float64(float64, float64)')``), or they can be taken from the types of the actual function arguments if compilation is happening when the function is first called. The type inference engine is found in ``numba.typeinfer``. Its job is to assign a type to every intermediate variable in the Numba IR. The result of this pass can be seen by setting the :envvar:`NUMBA_DUMP_ANNOTATION` environment variable to 1: .. code-block:: python -----------------------------------ANNOTATION----------------------------------- # File: archex.py # --- LINE 4 --- @jit(nopython=True) # --- LINE 5 --- def add(a, b): # --- LINE 6 --- # label 0 # a = arg(0, name=a) :: int64 # b = arg(1, name=b) :: int64 # $0.3 = a + b :: int64 # del b # del a # $0.4 = cast(value=$0.3) :: int64 # del $0.3 # return $0.4 return a + b If type inference fails to find a consistent type assignment for all the intermediate variables, it will label every variable as type ``pyobject`` and fall back to object mode. Type inference can fail when unsupported Python types, language features, or functions are used in the function body. .. _`rewrite-typed-ir`: Stage 5a: Rewrite typed IR -------------------------- This pass's purpose is to perform any high-level optimizations that still require, or could at least benefit from, Numba IR type information. One example of a problem domain that isn't as easily optimized once lowered is the domain of multidimensional array operations. When Numba lowers an array operation, Numba treats the operation like a full ufunc kernel. During lowering a single array operation, Numba generates an inline broadcasting loop that creates a new result array. Then Numba generates an application loop that applies the operator over the array inputs. Recognizing and rewriting these loops once they are lowered into LLVM is hard, if not impossible. An example pair of optimizations in the domain of array operators is loop fusion and shortcut deforestation. When the optimizer recognizes that the output of one array operator is being fed into another array operator, and only to that array operator, it can fuse the two loops into a single loop. The optimizer can further eliminate the temporary array allocated for the initial operation by directly feeding the result of the first operation into the second, skipping the store and load to the intermediate array. This elimination is known as shortcut deforestation. Numba currently uses the rewrite pass to implement these array optimizations. For more information, please consult the ":ref:`case-study-array-expressions`" subsection, later in this document. One can see the result of rewriting by setting the :envvar:`NUMBA_DUMP_IR` environment variable to a non-zero value (such as 1). The following example shows the output of the rewrite pass as it recognizes an array expression consisting of a multiply and add, and outputs a fused kernel as a special operator, :func:`arrayexpr`:: ______________________________________________________________________ REWRITING: a0 = arg(0, name=a0) ['a0'] a1 = arg(1, name=a1) ['a1'] a2 = arg(2, name=a2) ['a2'] $0.3 = a0 * a1 ['$0.3', 'a0', 'a1'] del a1 [] del a0 [] $0.5 = $0.3 + a2 ['$0.3', '$0.5', 'a2'] del a2 [] del $0.3 [] $0.6 = cast(value=$0.5) ['$0.5', '$0.6'] del $0.5 [] return $0.6 ['$0.6'] ____________________________________________________________ a0 = arg(0, name=a0) ['a0'] a1 = arg(1, name=a1) ['a1'] a2 = arg(2, name=a2) ['a2'] $0.5 = arrayexpr(ty=array(float64, 1d, C), expr=('+', [('*', [Var(a0, test.py (14)), Var(a1, test.py (14))]), Var(a2, test.py (14))])) ['$0.5', 'a0', 'a1', 'a2'] del a0 [] del a1 [] del a2 [] $0.6 = cast(value=$0.5) ['$0.5', '$0.6'] del $0.5 [] return $0.6 ['$0.6'] ______________________________________________________________________ Following this rewrite, Numba lowers the array expression into a new ufunc-like function that is inlined into a single loop that only allocates a single result array. .. _`parallel-accelerator`: Stage 5b: Perform Automatic Parallelization ------------------------------------------- This pass is only performed if the ``parallel`` option in the :func:`~numba.jit` decorator is set to ``True``. This pass finds parallelism implicit in the semantics of operations in the Numba IR and replaces those operations with explicitly parallel representations of those operations using a special `parfor` operator. Then, optimizations are performed to maximize the number of parfors that are adjacent to each other such that they can then be fused together into one parfor that takes only one pass over the data and will thus typically have better cache performance. Finally, during lowering, these parfor operators are converted to a form similar to guvectorize to implement the actual parallelism. The automatic parallelization pass has a number of sub-passes, many of which are controllable using a dictionary of options passed via the ``parallel`` keyword argument to :func:`~numba.jit`:: { 'comprehension': True/False, # parallel comprehension 'prange': True/False, # parallel for-loop 'numpy': True/False, # parallel numpy calls 'reduction': True/False, # parallel reduce calls 'setitem': True/False, # parallel setitem 'stencil': True/False, # parallel stencils 'fusion': True/False, # enable fusion or not } The default is set to `True` for all of them. The sub-passes are described in more detail in the following paragraphs. #. CFG Simplification Sometimes Numba IR will contain chains of blocks containing no loops which are merged in this sub-pass into single blocks. This sub-pass simplifies subsequent analysis of the IR. #. Numpy canonicalization Some Numpy operations can be written as operations on Numpy objects (e.g. ``arr.sum()``), or as calls to Numpy taking those objects (e.g. ``numpy.sum(arr)``). This sub-pass converts all such operations to the latter form for cleaner subsequent analysis. #. Array analysis A critical requirement for later parfor fusion is that parfors have identical iteration spaces and these iteration spaces typically correspond to the sizes of the dimensions of Numpy arrays. In this sub-pass, the IR is analyzed to determine equivalence classes for the dimensions of Numpy arrays. Consider the example, ``a = b + 1``, where ``a`` and ``b`` are both Numpy arrays. Here, we know that each dimension of ``a`` must have the same equivalence class as the corresponding dimension of ``b``. Typically, routines rich in Numpy operations will enable equivalence classes to be fully known for all arrays created within a function. Array analysis will also reason about size equivalence for slice selection, and boolean array masking (one dimensional only). For example, it is able to infer that ``a[1 : n-1]`` is of the same size as ``b[0 : n-2]``. Array analysis may also insert safety assumptions to ensure pre-conditions related to array sizes are met before an operation can be parallelized. For example, ``np.dot(X, w)`` between a 2-D matrix ``X`` and a 1-D vector ``w`` requires that the second dimension of ``X`` is of the same size as ``w``. Usually this kind of runtime check is automatically inserted, but if array analysis can infer such equivalence, it will skip them. Users can even help array analysis by turning implicit knowledge about array sizes into explicit assertions. For example, in the code below: .. code-block:: python @numba.njit(parallel=True) def logistic_regression(Y, X, w, iterations): assert(X.shape == (Y.shape[0], w.shape[0])) for i in range(iterations): w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X) return w Making the explicit assertion helps eliminate all bounds checks in the rest of the function. #. ``prange()`` to parfor The use of prange (:ref:`numba-prange`) in a for loop is an explicit indication from the programmer that all iterations of the for loop can execute in parallel. In this sub-pass, we analyze the CFG to locate loops and to convert those loops controlled by a prange object to the explicit `parfor` operator. Each explicit parfor operator consists of: a. A list of loop nest information that describes the iteration space of the parfor. Each entry in the loop nest list contains an indexing variable, the start of the range, the end of the range, and the step value for each iteration. #. An initialization (init) block which contains instructions to be executed one time before the parfor begins executing. #. A loop body comprising a set of basic blocks that correspond to the body of the loop and compute one point in the iteration space. #. The index variables used for each dimension of the iteration space. For parfor `pranges`, the loop nest is a single entry where the start, stop, and step fields come from the specified `prange`. The init block is empty for `prange` parfors and the loop body is the set of blocks in the loop minus the loop header. With parallelization on, array comprehensions (:ref:`pysupported-comprehension`) will also be translated to prange so as to run in parallel. This behavior be disabled by setting ``parallel={'comprehension': False}``. Likewise, the overall `prange` to `parfor` translation can be disabled by setting ``parallel={'prange': False}``, in which case `prange` is treated the same as `range`. #. Numpy to parfor In this sub-pass, Numpy functions such as ``ones``, ``zeros``, ``dot``, most of the random number generating functions, arrayexprs (from Section :ref:`rewrite-typed-ir`), and Numpy reductions are converted to parfors. Generally, this conversion creates the loop nest list, whose length is equal to the number of dimensions of the left-hand side of the assignment instruction in the IR. The number and size of the dimensions of the left-hand-side array is taken from the array analysis information generated in sub-pass 3 above. An instruction to create the result Numpy array is generated and stored in the new parfor's init block. A basic block is created for the loop body and an instruction is generated and added to the end of that block to store the result of the computation into the array at the current point in the iteration space. The result stored into the array depends on the operation that is being converted. For example, for ``ones``, the value stored is a constant 1. For calls to generate a random array, the value comes from a call to the same random number function but with the size parameter dropped and therefore returning a scalar. For arrayexpr operators, the arrayexpr tree is converted to Numba IR and the value at the root of that expression tree is used to write into the output array. The translation from Numpy functions and arrayexpr operators to `parfor` can be disabled by setting ``parallel={'numpy': False}``. For reductions, the loop nest list is similarly created using the array analysis information for the array being reduced. In the init block, the initial value is assigned to the reduction variable. The loop body consists of a single block in which the next value in the iteration space is fetched and the reduction operation is applied to that value and the current reduction value and the result stored back into the reduction value. The translation of reduction functions to `parfor` can be disabled by setting ``parallel={'reduction': False}``. Setting the :envvar:`NUMBA_DEBUG_ARRAY_OPT_STATS` environment variable to 1 will show some statistics about parfor conversions in general. #. Setitem to parfor Setting a range of array elements using a slice or boolean array selection can also run in parallel. Statement such as ``A[P] = B[Q]`` (or a simpler case ``A[P] = c``, where ``c`` is a scalar) is translated to `parfor` if one of the following conditions is met: a. ``P`` and ``Q`` are slices or multi-dimensional selector involving scalar and slices, and ``A[P]`` and ``B[Q]`` are considered size equivalent by array analysis. Only 2-value slice/range is supported, 3-value with a step will not be translated to `parfor`. #. ``P`` and ``Q`` are the same boolean array. This translation can be disabled by setting ``parallel={'setitem': False}``. #. Simplification Performs a copy propagation and dead code elimination pass. #. Fusion This sub-pass first processes each basic block and does a reordering of the instructions within the block with the goal of pushing parfors lower in the block and lifting non-parfors towards the start of the block. In practice, this approach does a good job of getting parfors adjacent to each other in the IR, which enables more parfors to then be fused. During parfor fusion, each basic block is repeatedly scanned until no further fusion is possible. During this scan, each set of adjacent instructions are considered. Adjacent instructions are fused together if: a. they are both parfors #. the parfors' loop nests are the same size and the array equivalence classes for each dimension of the loop nests are the same, and #. the first parfor does not create a reduction variable used by the second parfor. The two parfors are fused together by adding the second parfor's init block to the first's, merging the two parfors' loop bodies together and replacing the instances of the second parfor's loop index variables in the second parfor's body with the loop index variables for the first parfor. Fusion can be disabled by setting ``parallel={'fusion': False}``. Setting the :envvar:`NUMBA_DEBUG_ARRAY_OPT_STATS` environment variable to 1 will show some statistics about parfor fusions. #. Push call objects and compute parfor parameters In the lowering phase described in Section :ref:`lowering`, each parfor becomes a separate function executed in parallel in ``guvectorize`` (:ref:`guvectorize`) style. Since parfors may use variables defined previously in a function, when those parfors become separate functions, those variables must be passed to the parfor function as parameters. In this sub-pass, a use-def scan is made over each parfor body and liveness information is used to determine which variables are used but not defined by the parfor. That list of variables is stored here in the parfor for use during lowering. Function variables are a special case in this process since function variables cannot be passed to functions compiled in nopython mode. Instead, for function variables, this sub-pass pushes the assignment instruction to the function variable into the parfor body so that those do not need to be passed as parameters. To see the intermediate IR between the above sub-passes and other debugging information, set the :envvar:`NUMBA_DEBUG_ARRAY_OPT` environment variable to 1. For the example in Section :ref:`rewrite-typed-ir`, the following IR with a parfor is generated during this stage:: ______________________________________________________________________ label 0: a0 = arg(0, name=a0) ['a0'] a0_sh_attr0.0 = getattr(attr=shape, value=a0) ['a0', 'a0_sh_attr0.0'] $consta00.1 = const(int, 0) ['$consta00.1'] a0size0.2 = static_getitem(value=a0_sh_attr0.0, index_var=$consta00.1, index=0) ['$consta00.1', 'a0_sh_attr0.0', 'a0size0.2'] a1 = arg(1, name=a1) ['a1'] a1_sh_attr0.3 = getattr(attr=shape, value=a1) ['a1', 'a1_sh_attr0.3'] $consta10.4 = const(int, 0) ['$consta10.4'] a1size0.5 = static_getitem(value=a1_sh_attr0.3, index_var=$consta10.4, index=0) ['$consta10.4', 'a1_sh_attr0.3', 'a1size0.5'] a2 = arg(2, name=a2) ['a2'] a2_sh_attr0.6 = getattr(attr=shape, value=a2) ['a2', 'a2_sh_attr0.6'] $consta20.7 = const(int, 0) ['$consta20.7'] a2size0.8 = static_getitem(value=a2_sh_attr0.6, index_var=$consta20.7, index=0) ['$consta20.7', 'a2_sh_attr0.6', 'a2size0.8'] ---begin parfor 0--- index_var = parfor_index.9 LoopNest(index_variable=parfor_index.9, range=0,a0size0.2,1 correlation=5) init block: $np_g_var.10 = global(np: ) ['$np_g_var.10'] $empty_attr_attr.11 = getattr(attr=empty, value=$np_g_var.10) ['$empty_attr_attr.11', '$np_g_var.10'] $np_typ_var.12 = getattr(attr=float64, value=$np_g_var.10) ['$np_g_var.10', '$np_typ_var.12'] $0.5 = call $empty_attr_attr.11(a0size0.2, $np_typ_var.12, kws=(), func=$empty_attr_attr.11, vararg=None, args=[Var(a0size0.2, test2.py (7)), Var($np_typ_var.12, test2.py (7))]) ['$0.5', '$empty_attr_attr.11', '$np_typ_var.12', 'a0size0.2'] label 1: $arg_out_var.15 = getitem(value=a0, index=parfor_index.9) ['$arg_out_var.15', 'a0', 'parfor_index.9'] $arg_out_var.16 = getitem(value=a1, index=parfor_index.9) ['$arg_out_var.16', 'a1', 'parfor_index.9'] $arg_out_var.14 = $arg_out_var.15 * $arg_out_var.16 ['$arg_out_var.14', '$arg_out_var.15', '$arg_out_var.16'] $arg_out_var.17 = getitem(value=a2, index=parfor_index.9) ['$arg_out_var.17', 'a2', 'parfor_index.9'] $expr_out_var.13 = $arg_out_var.14 + $arg_out_var.17 ['$arg_out_var.14', '$arg_out_var.17', '$expr_out_var.13'] $0.5[parfor_index.9] = $expr_out_var.13 ['$0.5', '$expr_out_var.13', 'parfor_index.9'] ----end parfor 0---- $0.6 = cast(value=$0.5) ['$0.5', '$0.6'] return $0.6 ['$0.6'] ______________________________________________________________________ .. _`lowering`: Stage 6a: Generate nopython LLVM IR ----------------------------------- If type inference succeeds in finding a Numba type for every intermediate variable, then Numba can (potentially) generate specialized native code. This process is called :term:`lowering`. The Numba IR tree is translated into LLVM IR by using helper classes from `llvmlite `_. The machine-generated LLVM IR can seem unnecessarily verbose, but the LLVM toolchain is able to optimize it quite easily into compact, efficient code. The basic lowering algorithm is generic, but the specifics of how particular Numba IR nodes are translated to LLVM instructions is handled by the target context selected for compilation. The default target context is the "cpu" context, defined in ``numba.targets.cpu``. The LLVM IR can be displayed by setting the :envvar:`NUMBA_DUMP_LLVM` environment variable to 1. For the "cpu" context, our ``add()`` example would look like: .. code-block:: llvm define i32 @"__main__.add$1.int64.int64"(i64* %"retptr", {i8*, i32}** %"excinfo", i8* %"env", i64 %"arg.a", i64 %"arg.b") { entry: %"a" = alloca i64 %"b" = alloca i64 %"$0.3" = alloca i64 %"$0.4" = alloca i64 br label %"B0" B0: store i64 %"arg.a", i64* %"a" store i64 %"arg.b", i64* %"b" %".8" = load i64* %"a" %".9" = load i64* %"b" %".10" = add i64 %".8", %".9" store i64 %".10", i64* %"$0.3" %".12" = load i64* %"$0.3" store i64 %".12", i64* %"$0.4" %".14" = load i64* %"$0.4" store i64 %".14", i64* %"retptr" ret i32 0 } The post-optimization LLVM IR can be output by setting :envvar:`NUMBA_DUMP_OPTIMIZED` to 1. The optimizer shortens the code generated above quite significantly: .. code-block:: llvm define i32 @"__main__.add$1.int64.int64"(i64* nocapture %retptr, { i8*, i32 }** nocapture readnone %excinfo, i8* nocapture readnone %env, i64 %arg.a, i64 %arg.b) { entry: %.10 = add i64 %arg.b, %arg.a store i64 %.10, i64* %retptr, align 8 ret i32 0 } If created during :ref:`parallel-accelerator`, parfor operations are lowered in the following manner. First, instructions in the parfor's init block are lowered into the existing function using the normal lowering code. Second, the loop body of the parfor is turned into a separate GUFunc. Third, code is emitted for the current function to call the parallel GUFunc. To create a GUFunc from the parfor body, the signature of the GUFunc is created by taking the parfor parameters as identified in step 9 of Stage :ref:`parallel-accelerator` and adding to that a special `schedule` parameter, across which the GUFunc will be parallelized. The schedule parameter is in effect a static schedule mapping portions of the parfor iteration space to Numba threads and so the length of the schedule array is the same as the number of configured Numba threads. To make this process easier and somewhat less dependent on changes to Numba IR, this stage creates a Python function as text that contains the parameters to the GUFunc and iteration code that takes the current schedule entry and loops through the specified portion of the iteration space. In the body of that loop, a special sentinel is inserted for subsequent easy location. This code that handles the processing of the iteration space is then ``eval``'ed into existence and the Numba compiler's run_frontend function is called to generate IR. That IR is scanned to locate the sentinel and the sentinel is replaced with the loop body of the parfor. Then, the process of creating the parallel GUFunc is completed by compiling this merged IR with the Numba compiler's ``compile_ir`` function. To call the parallel GUFunc, the static schedule must be created. Code is inserted to call a function named ``do_scheduling.`` This function is called with the size of each of the parfor's dimensions and the number `N` of configured Numba threads (:envvar:`NUMBA_NUM_THREADS`). The ``do_scheduling`` function will divide the iteration space into N approximately equal sized regions (linear for 1D, rectangular for 2D, or hyperrectangles for 3+D) and the resulting schedule is passed to the parallel GUFunc. The number of threads dedicated to a given dimension of the full iteration space is roughly proportional to the ratio of the size of the given dimension to the sum of the sizes of all the dimensions of the iteration space. Parallel reductions are not natively provided by GUFuncs but the parfor lowering strategy allows us to use GUFuncs in a way that reductions can be performed in parallel. To accomplish this, for each reduction variable computed by a parfor, the parallel GUFunc and the code that calls it are modified to make the scalar reduction variable into an array of reduction variables whose length is equal to the number of Numba threads. In addition, the GUFunc still contains a scalar version of the reduction variable that is updated by the parfor body during each iteration. One time at the end of the GUFunc this local reduction variable is copied into the reduction array. In this way, false sharing of the reduction array is prevented. Code is also inserted into the main function after the parallel GUFunc has returned that does a reduction across this smaller reduction array and this final reduction value is then stored into the original scalar reduction variable. The GUFunc corresponding to the example from Section :ref:`parallel-accelerator` can be seen below:: ______________________________________________________________________ label 0: sched.29 = arg(0, name=sched) ['sched.29'] a0 = arg(1, name=a0) ['a0'] a1 = arg(2, name=a1) ['a1'] a2 = arg(3, name=a2) ['a2'] _0_5 = arg(4, name=_0_5) ['_0_5'] $3.1.24 = global(range: ) ['$3.1.24'] $const3.3.21 = const(int, 0) ['$const3.3.21'] $3.4.23 = getitem(value=sched.29, index=$const3.3.21) ['$3.4.23', '$const3.3.21', 'sched.29'] $const3.6.28 = const(int, 1) ['$const3.6.28'] $3.7.27 = getitem(value=sched.29, index=$const3.6.28) ['$3.7.27', '$const3.6.28', 'sched.29'] $const3.8.32 = const(int, 1) ['$const3.8.32'] $3.9.31 = $3.7.27 + $const3.8.32 ['$3.7.27', '$3.9.31', '$const3.8.32'] $3.10.36 = call $3.1.24($3.4.23, $3.9.31, kws=[], func=$3.1.24, vararg=None, args=[Var($3.4.23, (2)), Var($3.9.31, (2))]) ['$3.1.24', '$3.10.36', '$3.4.23', '$3.9.31'] $3.11.30 = getiter(value=$3.10.36) ['$3.10.36', '$3.11.30'] jump 1 [] label 1: $28.2.35 = iternext(value=$3.11.30) ['$28.2.35', '$3.11.30'] $28.3.25 = pair_first(value=$28.2.35) ['$28.2.35', '$28.3.25'] $28.4.40 = pair_second(value=$28.2.35) ['$28.2.35', '$28.4.40'] branch $28.4.40, 2, 3 ['$28.4.40'] label 2: $arg_out_var.15 = getitem(value=a0, index=$28.3.25) ['$28.3.25', '$arg_out_var.15', 'a0'] $arg_out_var.16 = getitem(value=a1, index=$28.3.25) ['$28.3.25', '$arg_out_var.16', 'a1'] $arg_out_var.14 = $arg_out_var.15 * $arg_out_var.16 ['$arg_out_var.14', '$arg_out_var.15', '$arg_out_var.16'] $arg_out_var.17 = getitem(value=a2, index=$28.3.25) ['$28.3.25', '$arg_out_var.17', 'a2'] $expr_out_var.13 = $arg_out_var.14 + $arg_out_var.17 ['$arg_out_var.14', '$arg_out_var.17', '$expr_out_var.13'] _0_5[$28.3.25] = $expr_out_var.13 ['$28.3.25', '$expr_out_var.13', '_0_5'] jump 1 [] label 3: $const44.1.33 = const(NoneType, None) ['$const44.1.33'] $44.2.39 = cast(value=$const44.1.33) ['$44.2.39', '$const44.1.33'] return $44.2.39 ['$44.2.39'] ______________________________________________________________________ Stage 6b: Generate object mode LLVM IR -------------------------------------- If type inference fails to find Numba types for all values inside a function, the function will be compiled in object mode. The generated LLVM will be significantly longer, as the compiled code will need to make calls to the `Python C API `_ to perform basically all operations. The optimized LLVM for our example ``add()`` function is: .. code-block:: llvm @PyExc_SystemError = external global i8 @".const.Numba_internal_error:_object_mode_function_called_without_an_environment" = internal constant [73 x i8] c"Numba internal error: object mode function called without an environment\00" @".const.name_'a'_is_not_defined" = internal constant [24 x i8] c"name 'a' is not defined\00" @PyExc_NameError = external global i8 @".const.name_'b'_is_not_defined" = internal constant [24 x i8] c"name 'b' is not defined\00" define i32 @"__main__.add$1.pyobject.pyobject"(i8** nocapture %retptr, { i8*, i32 }** nocapture readnone %excinfo, i8* readnone %env, i8* %arg.a, i8* %arg.b) { entry: %.6 = icmp eq i8* %env, null br i1 %.6, label %entry.if, label %entry.endif, !prof !0 entry.if: ; preds = %entry tail call void @PyErr_SetString(i8* @PyExc_SystemError, i8* getelementptr inbounds ([73 x i8]* @".const.Numba_internal_error:_object_mode_function_called_without_an_environment", i64 0, i64 0)) ret i32 -1 entry.endif: ; preds = %entry tail call void @Py_IncRef(i8* %arg.a) tail call void @Py_IncRef(i8* %arg.b) %.21 = icmp eq i8* %arg.a, null br i1 %.21, label %B0.if, label %B0.endif, !prof !0 B0.if: ; preds = %entry.endif tail call void @PyErr_SetString(i8* @PyExc_NameError, i8* getelementptr inbounds ([24 x i8]* @".const.name_'a'_is_not_defined", i64 0, i64 0)) tail call void @Py_DecRef(i8* null) tail call void @Py_DecRef(i8* %arg.b) ret i32 -1 B0.endif: ; preds = %entry.endif %.30 = icmp eq i8* %arg.b, null br i1 %.30, label %B0.endif1, label %B0.endif1.1, !prof !0 B0.endif1: ; preds = %B0.endif tail call void @PyErr_SetString(i8* @PyExc_NameError, i8* getelementptr inbounds ([24 x i8]* @".const.name_'b'_is_not_defined", i64 0, i64 0)) tail call void @Py_DecRef(i8* %arg.a) tail call void @Py_DecRef(i8* null) ret i32 -1 B0.endif1.1: ; preds = %B0.endif %.38 = tail call i8* @PyNumber_Add(i8* %arg.a, i8* %arg.b) %.39 = icmp eq i8* %.38, null br i1 %.39, label %B0.endif1.1.if, label %B0.endif1.1.endif, !prof !0 B0.endif1.1.if: ; preds = %B0.endif1.1 tail call void @Py_DecRef(i8* %arg.a) tail call void @Py_DecRef(i8* %arg.b) ret i32 -1 B0.endif1.1.endif: ; preds = %B0.endif1.1 tail call void @Py_DecRef(i8* %arg.b) tail call void @Py_DecRef(i8* %arg.a) tail call void @Py_IncRef(i8* %.38) tail call void @Py_DecRef(i8* %.38) store i8* %.38, i8** %retptr, align 8 ret i32 0 } declare void @PyErr_SetString(i8*, i8*) declare void @Py_IncRef(i8*) declare void @Py_DecRef(i8*) declare i8* @PyNumber_Add(i8*, i8*) The careful reader might notice several unnecessary calls to ``Py_IncRef`` and ``Py_DecRef`` in the generated code. Currently Numba isn't able to optimize those away. Object mode compilation will also attempt to identify loops which can be extracted and statically-typed for "nopython" compilation. This process is called *loop-lifting*, and results in the creation of a hidden nopython mode function just containing the loop which is then called from the original function. Loop-lifting helps improve the performance of functions that need to access uncompilable code (such as I/O or plotting code) but still contain a time-intensive section of compilable code. Stage 7: Compile LLVM IR to machine code ---------------------------------------- In both :term:`object mode` and :term:`nopython mode`, the generated LLVM IR is compiled by the LLVM JIT compiler and the machine code is loaded into memory. A Python wrapper is also created (defined in ``numba.dispatcher.Dispatcher``) which can do the dynamic dispatch to the correct version of the compiled function if multiple type specializations were generated (for example, for both ``float32`` and ``float64`` versions of the same function). The machine assembly code generated by LLVM can be dumped to the screen by setting the :envvar:`NUMBA_DUMP_ASSEMBLY` environment variable to 1: .. code-block:: gas .globl __main__.add$1.int64.int64 .align 16, 0x90 .type __main__.add$1.int64.int64,@function __main__.add$1.int64.int64: addq %r8, %rcx movq %rcx, (%rdi) xorl %eax, %eax retq The assembly output will also include the generated wrapper function that translates the Python arguments to native data types. numba-0.55.1/docs/source/developer/caching.rst000664 000000 000000 00000010016 14174536160 021234 0ustar00rootroot000000 000000 .. _developer-caching: ================ Notes on Caching ================ Numba supports caching of compiled functions into the filesystem for future use of the same functions. The Implementation ================== Caching is done by saving the compiled *object code*, the ELF object of the executable code. By using the *object code*, cached functions have minimal overhead because no compilation is needed. The cached data is saved under the cache directory (see :envvar:`NUMBA_CACHE_DIR`). The index of the cache is stored in a ``.nbi`` file, with one index per function, and it lists all the overloaded signatures compiled for the function. The *object code* is stored in files with an ``.nbc`` extension, one file per overload. The data in both files is serialized with :mod:`pickle`. .. note:: On Python <=3.7, Numba extends ``pickle`` using the pure-Python pickler. To use the faster C Pickler, install ``pickle5`` from ``pip``. ``pickle5`` backports Python 3.8 pickler features. Requirements for Cacheability ----------------------------- Developers should note the requirements of a function to permit it to be cached to ensure that the features they are working on are compatible with caching. Requirements for cacheable function: - The LLVM module must be *self-contained*, meaning that it cannot rely on other compiled units without linking to them. - The only allowed external symbols are from the :ref:`NRT ` or other common symbols from system libraries (i.e. libc and libm). Debugging note: - Look for the usage of ``inttoptr`` in the LLVM IR or ``target_context.add_dynamic_add()`` in the lowering code in Python. They indicate potential usage of runtime address. Not all uses are problematic and some are necessary. Only the conversion of constant integers into pointers will affect caching. - Misuse of dynamic address or dynamic symbols will likely result in a segfault. - Linking order matters because unused symbols are dropped after linking. Linking should start from the leaf nodes of the dependency graph. Features Compatible with Caching -------------------------------- The following features are explicitly verified to work with caching. - ufuncs and gufuncs for the ``cpu`` and ``parallel`` target - parallel accelerator features (i.e. ``parallel=True``) Caching Limitations ------------------- This is a list of known limitation of the cache: - Cache invalidation fails to recognize changes in symbols defined in a different file. - Global variables are treated as constants. The cache will remember the value in the global variable used at compilation. On cache load, the cached function will not rebind to the new value of the global variable. .. _cache-sharing: Cache Sharing ------------- It is safe to share and reuse the contents in the cache directory on a different machine. The cache remembers the CPU model and the available CPU features during compilation. If the CPU model and the CPU features do not match exactly, the cache contents will not be considered. (Also see :envvar:`NUMBA_CPU_NAME`) If the cache directory is shared on a network filesystem, concurrent read/write of the cache is safe only if file replacement operation is atomic for the filesystem. Numba always writes to a unique temporary file first, it then replaces the target cache file path with the temporary file. Numba is tolerant against lost cache files and lost cache entries. .. _cache-clearing: Cache Clearing -------------- The cache is invalidated when the corresponding source file is modified. However, it is necessary sometimes to clear the cache directory manually. For instance, changes in the compiler will not be recognized because the source files are not modified. To clear the cache, the cache directory can be simply removed. Removing the cache directory when a Numba application is running may cause an ``OSError`` exception to be raised at the compilation site. Related Environment Variables ----------------------------- See :ref:`env-vars for caching `. numba-0.55.1/docs/source/developer/compiler_pass_example.py000664 000000 000000 00000006303 14174536160 024037 0ustar00rootroot000000 000000 def ex_compiler_pass(): # magictoken.ex_compiler_pass.begin from numba import njit from numba.core import ir from numba.core.compiler import CompilerBase, DefaultPassBuilder from numba.core.compiler_machinery import FunctionPass, register_pass from numba.core.untyped_passes import IRProcessing from numbers import Number # Register this pass with the compiler framework, declare that it will not # mutate the control flow graph and that it is not an analysis_only pass (it # potentially mutates the IR). @register_pass(mutates_CFG=False, analysis_only=False) class ConstsAddOne(FunctionPass): _name = "consts_add_one" # the common name for the pass def __init__(self): FunctionPass.__init__(self) # implement method to do the work, "state" is the internal compiler # state from the CompilerBase instance. def run_pass(self, state): func_ir = state.func_ir # get the FunctionIR object mutated = False # used to record whether this pass mutates the IR # walk the blocks for blk in func_ir.blocks.values(): # find the assignment nodes in the block and walk them for assgn in blk.find_insts(ir.Assign): # if an assignment value is a ir.Consts if isinstance(assgn.value, ir.Const): const_val = assgn.value # if the value of the ir.Const is a Number if isinstance(const_val.value, Number): # then add one! const_val.value += 1 mutated |= True return mutated # return True if the IR was mutated, False if not. # magictoken.ex_compiler_pass.end # magictoken.ex_compiler_defn.begin class MyCompiler(CompilerBase): # custom compiler extends from CompilerBase def define_pipelines(self): # define a new set of pipelines (just one in this case) and for ease # base it on an existing pipeline from the DefaultPassBuilder, # namely the "nopython" pipeline pm = DefaultPassBuilder.define_nopython_pipeline(self.state) # Add the new pass to run after IRProcessing pm.add_pass_after(ConstsAddOne, IRProcessing) # finalize pm.finalize() # return as an iterable, any number of pipelines may be defined! return [pm] # magictoken.ex_compiler_defn.end # magictoken.ex_compiler_call.begin @njit(pipeline_class=MyCompiler) # JIT compile using the custom compiler def foo(x): a = 10 b = 20.2 c = x + a + b return c print(foo(100)) # 100 + 10 + 20.2 (+ 1 + 1), extra + 1 + 1 from the rewrite! # magictoken.ex_compiler_call.end # magictoken.ex_compiler_timings.begin compile_result = foo.overloads[foo.signatures[0]] nopython_times = compile_result.metadata['pipeline_times']['nopython'] for k in nopython_times.keys(): if ConstsAddOne._name in k: print(nopython_times[k]) # magictoken.ex_compiler_timings.end assert foo(100) == 132.2 ex_compiler_pass() numba-0.55.1/docs/source/developer/contributing.rst000664 000000 000000 00000044472 14174536160 022364 0ustar00rootroot000000 000000 Contributing to Numba ===================== We welcome people who want to make contributions to Numba, big or small! Even simple documentation improvements are encouraged. If you have questions, don't hesitate to ask them (see below). Communication ------------- Real-time Chat '''''''''''''' Numba uses Gitter for public real-time chat. To help improve the signal-to-noise ratio, we have two channels: * `numba/numba `_: General Numba discussion, questions, and debugging help. * `numba/numba-dev `_: Discussion of PRs, planning, release coordination, etc. Both channels are public, but we may ask that discussions on numba-dev move to the numba channel. This is simply to ensure that numba-dev is easy for core developers to keep up with. Note that the Github issue tracker is the best place to report bugs. Bug reports in chat are difficult to track and likely to be lost. Forum ..... Numba uses Discourse as a forum for longer running threads such as design discussions and roadmap planning. There are various categories available and it can be reached at: `numba.discourse.group `_. Weekly Meetings ''''''''''''''' The core Numba developers have a weekly video conference to discuss roadmap, feature planning, and outstanding issues. These meetings are entirely public, details are posted on `numba.discourse.group Announcements `_ and everyone is welcome to join the discussion. Minutes will be taken and will be posted to the `Numba wiki `_. .. _report-numba-bugs: Bug tracker '''''''''''' We use the `Github issue tracker `_ to track both bug reports and feature requests. If you report an issue, please include specifics: * what you are trying to do; * which operating system you have and which version of Numba you are running; * how Numba is misbehaving, e.g. the full error traceback, or the unexpected results you are getting; * as far as possible, a code snippet that allows full reproduction of your problem. Getting set up -------------- If you want to contribute, we recommend you fork our `Github repository `_, then create a branch representing your work. When your work is ready, you should submit it as a pull request from the Github interface. If you want, you can submit a pull request even when you haven't finished working. This can be useful to gather feedback, or to stress your changes against the :ref:`continuous integration ` platform. In this case, please prepend ``[WIP]`` to your pull request's title. .. _buildenv: Build environment ''''''''''''''''' Numba has a number of dependencies (mostly `NumPy `_ and `llvmlite `_) with non-trivial build instructions. Unless you want to build those dependencies yourself, we recommend you use `conda `_ to create a dedicated development environment and install precompiled versions of those dependencies there. First add the Anaconda Cloud ``numba`` channel so as to get development builds of the llvmlite library:: $ conda config --add channels numba Then create an environment with the right dependencies:: $ conda create -n numbaenv python=3.8 llvmlite numpy scipy jinja2 cffi .. note:: This installs an environment based on Python 3.8, but you can of course choose another version supported by Numba. To test additional features, you may also need to install ``tbb`` and/or ``llvm-openmp`` and ``intel-openmp``. To activate the environment for the current shell session:: $ conda activate numbaenv .. note:: These instructions are for a standard Linux shell. You may need to adapt them for other platforms. Once the environment is activated, you have a dedicated Python with the required dependencies:: $ python Python 3.8.5 (default, Sep 4 2020, 07:30:14) [GCC 7.3.0] :: Anaconda, Inc. on linux Type "help", "copyright", "credits" or "license" for more information. >>> import llvmlite >>> llvmlite.__version__ '0.35.0' Building Numba '''''''''''''' For a convenient development workflow, we recommend you build Numba inside its source checkout:: $ git clone git://github.com/numba/numba.git $ cd numba $ python setup.py build_ext --inplace This assumes you have a working C compiler and runtime on your development system. You will have to run this command again whenever you modify C files inside the Numba source tree. The ``build_ext`` command in Numba's setup also accepts the following arguments: - ``--noopt``: This disables optimization when compiling Numba's CPython extensions, which makes debugging them much easier. Recommended in conjunction with the standard ``build_ext`` option ``--debug``. - ``--werror``: Compiles Numba's CPython extensions with the ``-Werror`` flag. - ``--wall``: Compiles Numba's CPython extensions with the ``-Wall`` flag. Note that Numba's CI and the conda recipe for Linux build with the ``--werror`` and ``--wall`` flags, so any contributions that change the CPython extensions should be tested with these flags too. Running tests ''''''''''''' Numba is validated using a test suite comprised of various kind of tests (unit tests, functional tests). The test suite is written using the standard :py:mod:`unittest` framework. The tests can be executed via ``python -m numba.runtests``. If you are running Numba from a source checkout, you can type ``./runtests.py`` as a shortcut. Various options are supported to influence test running and reporting. Pass ``-h`` or ``--help`` to get a glimpse at those options. Examples: * to list all available tests:: $ python -m numba.runtests -l * to list tests from a specific (sub-)suite:: $ python -m numba.runtests -l numba.tests.test_usecases * to run those tests:: $ python -m numba.runtests numba.tests.test_usecases * to run all tests in parallel, using multiple sub-processes:: $ python -m numba.runtests -m * For a detailed list of all options:: $ python -m numba.runtests -h The numba test suite can take a long time to complete. When you want to avoid the long wait, it is useful to focus on the failing tests first with the following test runner options: * The ``--failed-first`` option is added to capture the list of failed tests and to re-execute them first:: $ python -m numba.runtests --failed-first -m -v -b * The ``--last-failed`` option is used with ``--failed-first`` to execute the previously failed tests only:: $ python -m numba.runtests --last-failed -m -v -b When debugging, it is useful to turn on logging. Numba logs using the standard ``logging`` module. One can use the standard ways (i.e. ``logging.basicConfig``) to configure the logging behavior. To enable logging in the test runner, there is a ``--log`` flag for convenience:: $ python -m numba.runtests --log To enable :ref:`runtime type-checking `, set the environment variable ``NUMBA_USE_TYPEGUARD=1`` and use `runtests.py` from the source root instead. For example:: $ NUMBA_USE_TYPEGUARD=1 python runtests.py Development rules ----------------- Code reviews '''''''''''' Any non-trivial change should go through a code review by one or several of the core developers. The recommended process is to submit a pull request on github. A code review should try to assess the following criteria: * general design and correctness * code structure and maintainability * coding conventions * docstrings, comments * test coverage Coding conventions '''''''''''''''''' All Python code should follow :pep:`8`. Our C code doesn't have a well-defined coding style (would it be nice to follow :pep:`7`?). Code and documentation should generally fit within 80 columns, for maximum readability with all existing tools (such as code review UIs). Numba uses `Flake8 `_ to ensure a consistent Python code format throughout the project. ``flake8`` can be installed with ``pip`` or ``conda`` and then run from the root of the Numba repository:: flake8 numba Optionally, you may wish to setup `pre-commit hooks `_ to automatically run ``flake8`` when you make a git commit. This can be done by installing ``pre-commit``:: pip install pre-commit and then running:: pre-commit install from the root of the Numba repository. Now ``flake8`` will be run each time you commit changes. You can skip this check with ``git commit --no-verify``. Numba has started the process of using `type hints `_ in its code base. This will be a gradual process of extending the number of files that use type hints, as well as going from voluntary to mandatory type hints for new features. `Mypy `_ is used for automated static checking. At the moment, only certain files are checked by mypy. The list can be found in ``mypy.ini``. When making changes to those files, it is necessary to add the required type hints such that mypy tests will pass. Only in exceptional circumstances should ``type: ignore`` comments be used. If you are contributing a new feature, we encourage you to use type hints, even if the file is not currently in the checklist. If you want to contribute type hints to enable a new file to be in the checklist, please add the file to the ``files`` variable in ``mypy.ini``, and decide what level of compliance you are targetting. Level 3 is basic static checks, while levels 2 and 1 represent stricter checking. The levels are described in details in ``mypy.ini``. There is potential for confusion between the Numba module ``typing`` and Python built-in module ``typing`` used for type hints, as well as between Numba types---such as ``Dict`` or ``Literal``---and ``typing`` types of the same name. To mitigate the risk of confusion we use a naming convention by which objects of the built-in ``typing`` module are imported with an ``pt`` prefix. For example, ``typing.Dict`` is imported as ``from typing import Dict as ptDict``. Stability ''''''''' The repository's ``master`` branch is expected to be stable at all times. This translates into the fact that the test suite passes without errors on all supported platforms (see below). This also means that a pull request also needs to pass the test suite before it is merged in. .. _platform_support: Platform support '''''''''''''''' Every commit to the master branch is automatically tested on all of the platforms Numba supports. This includes ARMv8, POWER8, and NVIDIA GPUs. The build system however is internal to Anaconda, so we also use `Azure `_ to provide public continuous integration information for as many combinations as can be supported by the service. Azure CI automatically tests all pull requests on Windows, OS X and Linux, as well as a sampling of different Python and NumPy versions. If you see problems on platforms you are unfamiliar with, feel free to ask for help in your pull request. The Numba core developers can help diagnose cross-platform compatibility issues. Also see the :ref:`continuous integration ` section on how public CI is implemented. .. _continuous_integration_testing: Continuous integration testing '''''''''''''''''''''''''''''' The Numba test suite causes CI systems a lot of grief: #. It's huge, 9000+ tests. #. In part because of 1. and that compilers are pretty involved, the test suite takes a long time to run. #. There's sections of the test suite that are deliberately designed to stress systems almost to the point of failure (tests which concurrently compile and execute with threads and fork processes etc). #. The combination of things that Numba has to test well exceeds the capacity of any public CI system, (Python versions x NumPy versions x Operating systems x Architectures x feature libraries (e.g. SVML) x threading backends (e.g. OpenMP, TBB)) and then there's CUDA too and all its version variants. As a result of the above, public CI is implemented as follows: #. The combination of OS x Python x NumPy x Various Features in the testing matrix is designed to give a good indicative result for whether "this pull request is probably ok". #. When public CI runs it: #. Looks for files that contain tests that have been altered by the proposed change and runs these on the whole testing matrix. #. Runs a subset of the test suite on each part of the testing matrix. i.e. slice the test suite up by the number of combinations in the testing matrix and each combination runs one chunk. This is done for speed, because public CI cannot cope with the load else. If a Pull Request (PR) changes CUDA code or will affect the CUDA target, it needs to be run on `gpuCI `_. This can be triggered by one of the Numba maintainers commenting ``run gpuCI tests`` on the PR discussion. This runs the CUDA testsuite with various CUDA toolkit versions on Linux, to provide some initial confidence in the correctness of the changes with respect to CUDA. Following approval, the PR will also be run on Numba's build farm to test other configurations with CUDA (including Windows, which is not tested by gpuCI). If the PR is not CUDA-related but makes changes to something that the core developers consider risky, then it will also be run on the Numba farm just to make sure. The Numba project's private build and test farm will actually exercise all the applicable tests on all the combinations noted above on real hardware! .. _type_anno_check: Type annotation and runtime type checking ''''''''''''''''''''''''''''''''''''''''' Numba is slowly gaining type annotations. To facilitate the review of pull requests that are incrementally adding type annotations, the test suite uses `typeguard`_ to perform runtime type checking. This helps verify the validity of type annotations. To enable runtime type checking in the test suite, users can use `runtests.py`_ in the source root as the test runner and set environment variable ``NUMBA_USE_TYPEGUARD=1``. For example:: $ NUMBA_USE_TYPEGUARD=1 python runtests.py numba.tests Things that help with pull requests ''''''''''''''''''''''''''''''''''' Even with the mitigating design above public CI can get overloaded which causes a backlog of builds. It's therefore really helpful when opening pull requests if you can limit the frequency of pushing changes. Ideally, please squash commits to reduce the number of patches and/or push as infrequently as possible. Also, once a pull request review has started, please don't rebase/force push/squash or do anything that rewrites history of the reviewed code as GitHub cannot track this and it makes it very hard for reviewers to see what has changed. The core developers thank everyone for their cooperation with the above! Why is my pull request/issue seemingly being ignored? ''''''''''''''''''''''''''''''''''''''''''''''''''''' Numba is an open source project and like many similar projects it has limited resources. As a result, it is unfortunately necessary for the core developers to associate a priority with issues/pull requests (PR). A great way to move your issue/PR up the priority queue is to help out somewhere else in the project so as to free up core developer time. Examples of ways to help: * Perform an initial review on a PR. This often doesn't require compiler engineering knowledge and just involves checking that the proposed patch is of good quality, fixes the problem/implements the feature, is well tested and documented. * Debug an issue, there are numerous issues which `"need triage" `_ which essentially involves debugging the reported problem. Even if you cannot get right to the bottom of a problem, leaving notes about what was discovered for someone else is also helpful. * Answer questions/provide help for users on `discourse `_ and/or `gitter.im `_. The core developers thank everyone for their understanding with the above! Documentation ------------- The Numba documentation is split over two repositories: * This documentation is in the ``docs`` directory inside the `Numba repository `_. * The `Numba homepage `_ has its sources in a separate repository at https://github.com/numba/numba-webpage Main documentation '''''''''''''''''' This documentation is under the ``docs`` directory of the `Numba repository`_. It is built with `Sphinx `_ and `numpydoc `_, which are available using conda or pip; i.e. ``conda install sphinx numpydoc``. To build the documentation, you need the bootstrap theme:: $ pip install sphinx_bootstrap_theme You can edit the source files under ``docs/source/``, after which you can build and check the documentation:: $ make html $ open _build/html/index.html Core developers can upload this documentation to the Numba website at https://numba.pydata.org by using the ``gh-pages.py`` script under ``docs``:: $ python gh-pages.py version # version can be 'dev' or '0.16' etc then verify the repository under the ``gh-pages`` directory and use ``git push``. Web site homepage ''''''''''''''''' The Numba homepage on https://numba.pydata.org can be fetched from here: https://github.com/numba/numba-webpage After pushing documentation to a new version, core developers will want to update the website. Some notable files: * ``index.rst`` # Update main page * ``_templates/sidebar_versions.html`` # Update sidebar links * ``doc.rst`` # Update after adding a new version for numba docs * ``download.rst`` # Updata after uploading new numba version to pypi After updating run:: $ make html and check out ``_build/html/index.html``. To push updates to the Web site:: $ python _scripts/gh-pages.py then verify the repository under the ``gh-pages`` directory. Make sure the ``CNAME`` file is present and contains a single line for ``numba.pydata.org``. Finally, use ``git push`` to update the website. .. _typeguard: https://typeguard.readthedocs.io/en/latest/ .. _runtests.py: https://github.com/numba/numba/blob/master/runtests.py numba-0.55.1/docs/source/developer/custom_pipeline.rst000664 000000 000000 00000015647 14174536160 023056 0ustar00rootroot000000 000000 .. _arch-pipeline: ======================== Customizing the Compiler ======================== .. warning:: The custom pipeline feature is for expert use only. Modifying the compiler behavior can invalidate internal assumptions in the numba source code. For library developers looking for a way to extend or modify the compiler behavior, you can do so by defining a custom compiler by inheriting from ``numba.compiler.CompilerBase``. The default Numba compiler is defined as ``numba.compiler.Compiler``, implementing the ``.define_pipelines()`` method, which adds the *nopython-mode*, *object-mode* and *interpreted-mode* pipelines. For convenience these three pipelines are defined in ``numba.compiler.DefaultPassBuilder`` by the methods: * ``.define_nopython_pipeline()`` * ``.define_objectmode_pipeline()`` * ``.define_interpreted_pipeline()`` respectively. To use a custom subclass of ``CompilerBase``, supply it as the ``pipeline_class`` keyword argument to the ``@jit`` and ``@generated_jit`` decorators. By doing so, the effect of the custom pipeline is limited to the function being decorated. Implementing a compiler pass ---------------------------- Numba makes it possible to implement a new compiler pass and does so through the use of an API similar to that of LLVM. The following demonstrates the basic process involved. Compiler pass classes ##################### All passes must inherit from ``numba.compiler_machinery.CompilerPass``, commonly used subclasses are: * ``numba.compiler_machinery.FunctionPass`` for describing a pass that operates on a function-at-once level and may mutate the IR state. * ``numba.compiler_machinery.AnalysisPass`` for describing a pass that performs analysis only. * ``numba.compiler_machinery.LoweringPass`` for describing a pass that performs lowering only. In this example a new compiler pass will be implemented that will rewrite all ``ir.Const(x)`` nodes, where ``x`` is a subclass of ``numbers.Number``, such that the value of x is incremented by one. There is no use for this pass other than to serve as a pedagogical vehicle! The ``numba.compiler_machinery.FunctionPass`` is appropriate for the suggested pass behavior and so is the base class of the new pass. Further, a ``run_pass`` method is defined to do the work (this method is abstract, all compiler passes must implement it). First the new class: .. literalinclude:: compiler_pass_example.py :language: python :dedent: 4 :start-after: magictoken.ex_compiler_pass.begin :end-before: magictoken.ex_compiler_pass.end Note also that the class must be registered with Numba's compiler machinery using ``@register_pass``. This in part is to allow the declaration of whether the pass mutates the control flow graph and whether it is an analysis only pass. Next, define a new compiler based on the existing ``numba.compiler.CompilerBase``. The compiler pipeline is defined through the use of an existing pipeline and the new pass declared above is added to be run after the ``IRProcessing`` pass. .. literalinclude:: compiler_pass_example.py :language: python :dedent: 4 :start-after: magictoken.ex_compiler_defn.begin :end-before: magictoken.ex_compiler_defn.end Finally update the ``@njit`` decorator at the call site to make use of the newly defined compilation pipeline. .. literalinclude:: compiler_pass_example.py :language: python :dedent: 4 :start-after: magictoken.ex_compiler_call.begin :end-before: magictoken.ex_compiler_call.end Debugging compiler passes ------------------------- Observing IR Changes #################### It is often useful to be able to see the changes a pass makes to the IR. Numba conveniently permits this through the use of the environment variable :envvar:`NUMBA_DEBUG_PRINT_AFTER`. In the case of the above pass, running the example code with ``NUMBA_DEBUG_PRINT_AFTER="ir_processing,consts_add_one"`` gives: .. code-block:: none :emphasize-lines: 4, 7, 24, 27 ----------------------------nopython: ir_processing----------------------------- label 0: x = arg(0, name=x) ['x'] $const0.1 = const(int, 10) ['$const0.1'] a = $const0.1 ['$const0.1', 'a'] del $const0.1 [] $const0.2 = const(float, 20.2) ['$const0.2'] b = $const0.2 ['$const0.2', 'b'] del $const0.2 [] $0.5 = x + a ['$0.5', 'a', 'x'] del x [] del a [] $0.7 = $0.5 + b ['$0.5', '$0.7', 'b'] del b [] del $0.5 [] c = $0.7 ['$0.7', 'c'] del $0.7 [] $0.9 = cast(value=c) ['$0.9', 'c'] del c [] return $0.9 ['$0.9'] ----------------------------nopython: consts_add_one---------------------------- label 0: x = arg(0, name=x) ['x'] $const0.1 = const(int, 11) ['$const0.1'] a = $const0.1 ['$const0.1', 'a'] del $const0.1 [] $const0.2 = const(float, 21.2) ['$const0.2'] b = $const0.2 ['$const0.2', 'b'] del $const0.2 [] $0.5 = x + a ['$0.5', 'a', 'x'] del x [] del a [] $0.7 = $0.5 + b ['$0.5', '$0.7', 'b'] del b [] del $0.5 [] c = $0.7 ['$0.7', 'c'] del $0.7 [] $0.9 = cast(value=c) ['$0.9', 'c'] del c [] return $0.9 ['$0.9'] Note the change in the values in the ``const`` nodes. Pass execution times #################### Numba has built-in support for timing all compiler passes, the execution times are stored in the metadata associated with a compilation result. This demonstrates one way of accessing this information based on the previously defined function, ``foo``: .. literalinclude:: compiler_pass_example.py :language: python :dedent: 4 :start-after: magictoken.ex_compiler_timings.begin :end-before: magictoken.ex_compiler_timings.end the output of which is, for example:: pass_timings(init=1.914000677061267e-06, run=4.308700044930447e-05, finalize=1.7400006981915794e-06) this displaying the pass initialization, run and finalization times in seconds. numba-0.55.1/docs/source/developer/debugging.rst000664 000000 000000 00000016776 14174536160 021616 0ustar00rootroot000000 000000 .. _developer-debugging: ================== Notes on Debugging ================== This section describes techniques that can be useful in debugging the compilation and execution of generated code. .. seealso:: :ref:`debugging-jit-compiled-code` Memcheck -------- Memcheck_ is a memory error detector implemented using Valgrind_. It is useful for detecting memory errors in compiled code, particularly out-of-bounds accesses and use-after-free errors. Buggy or miscompiled native code can generate these kinds of errors. The `Memcheck documentation `_ explains its usage; here, we discuss only the specifics of using it with Numba. .. _Memcheck: https://valgrind.org/docs/manual/mc-manual.html .. _Valgrind: https://valgrind.org/ The Python interpreter and some of the libraries used by Numba can generate false positives with Memcheck - see `this section of the manual `_ for more information on why false positives occur. The false positives can make it difficult to determine when an actual error has occurred, so it is helpful to suppress known false positives. This can be done by supplying a suppressions file, which instructs Memcheck to ignore errors that match the suppressions defined in it. The CPython source distribution includes a suppressions file, in the file ``Misc/valgrind-python.supp``. Using this file prevents a lot of spurious errors generated by Python's memory allocation implementation. Additionally, the Numba repository includes a suppressions file in ``contrib/valgrind-numba.supp``. .. note:: It is important to use the suppressions files from the versions of the Python interpreter and Numba that you are using - these files evolve over time, so non-current versions can fail to suppress some errors, or erroneously suppress actual errors. To run the Python interpreter under Memcheck with both suppressions files, it is invoked with the following command:: valgrind --tool=memcheck \ --suppressions=${CPYTHON_SRC_DIR}/Misc/valgrind-python.supp \ --suppressions=${NUMBA_SRC_DIR}/contrib/valgrind-numba.supp \ python ${PYTHON_ARGS} where ``${CPYTHON_SRC_DIR}`` is set to the location of the CPython source distribution, ``${NUMBA_SRC_DIR}`` is the location of the Numba source dir, and ``${PYTHON_ARGS}`` are the arguments to the Python interpreter. If there are errors, then messages describing them will be printed to standard error. An example of an error is:: ==77113== at 0x24169A: PyLong_FromLong (longobject.c:251) ==77113== by 0x241881: striter_next (bytesobject.c:3084) ==77113== by 0x2D3C95: _PyEval_EvalFrameDefault (ceval.c:2809) ==77113== by 0x21B499: _PyEval_EvalCodeWithName (ceval.c:3930) ==77113== by 0x26B436: _PyFunction_FastCallKeywords (call.c:433) ==77113== by 0x2D3605: call_function (ceval.c:4616) ==77113== by 0x2D3605: _PyEval_EvalFrameDefault (ceval.c:3124) ==77113== by 0x21B977: _PyEval_EvalCodeWithName (ceval.c:3930) ==77113== by 0x21C2A4: _PyFunction_FastCallDict (call.c:376) ==77113== by 0x2D5129: do_call_core (ceval.c:4645) ==77113== by 0x2D5129: _PyEval_EvalFrameDefault (ceval.c:3191) ==77113== by 0x21B499: _PyEval_EvalCodeWithName (ceval.c:3930) ==77113== by 0x26B436: _PyFunction_FastCallKeywords (call.c:433) ==77113== by 0x2D46DA: call_function (ceval.c:4616) ==77113== by 0x2D46DA: _PyEval_EvalFrameDefault (ceval.c:3139) ==77113== ==77113== Use of uninitialised value of size 8 The traceback provided only outlines the C call stack, which can make it difficult to determine what the Python interpreter was doing at the time of the error. One can learn more about the state of the stack by looking at the backtrace in the `GNU Debugger (GDB) `_. Launch ``valgrind`` with an additional argument, ``--vgdb-error=0`` and attach to the process using GDB as instructed by the output. Once an error is encountered, GDB will stop at the error and the stack can be inspected. GDB does provide support for backtracing through the Python stack, but this requires symbols which may not be easily available in your Python distribution. In this case, it is still possible to determine some information about what was happening in Python, but this depends on examining the backtrace closely. For example, in a backtrace corresponding to the above error, we see items such as: .. code-block:: #18 0x00000000002722da in slot_tp_call ( self=<_wrap_impl(_callable=<_wrap_missing_loc(func=) at remote 0x1d200bd0>, _imp=, _context=, , , , , , , , , , , , , , , , , , , , , ], attributes=[, , , , , , identified_types={}) at remote 0xbb5add0>, name='cuconstRecAlign$7', data_layout='e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64', scope=, triple='nvptx64-nvidia-cuda', globals={'_ZN08NumbaEnv5numba4cuda5tests6cudapy13test_constmem19cuconstRecAlign$247E5ArrayIdLi1E1C7mutable7ali...(truncated), kwds=0x0) We can see some of the arguments, in particular the names of the compiled functions, e.g:: _ZN5numba4cuda5tests6cudapy13test_constmem19cuconstRecAlign$247E5ArrayIdLi1E1C7mutable7alignedE5ArrayIdLi1E1C7mutable7alignedE5ArrayIdLi1E1C7mutable7alignedE5ArrayIdLi1E1C7mutable7alignedE5ArrayIdLi1E1C7mutable7alignedE We can run this through ``c++filt`` to see a more human-readable representation:: numba::cuda::tests::cudapy::test_constmem::cuconstRecAlign$247( Array, Array, Array, Array, Array) which is the fully qualified name of a jitted function and the types with which it was called. numba-0.55.1/docs/source/developer/dispatching.rst000664 000000 000000 00000027230 14174536160 022143 0ustar00rootroot000000 000000 ======================= Polymorphic dispatching ======================= Functions compiled using :func:`~numba.jit` or :func:`~numba.vectorize` are open-ended: they can be called with many different input types and have to select (possibly compile on-the-fly) the right low-level specialization. We hereby explain how this mechanism is implemented. Requirements ============ JIT-compiled functions can take several arguments and each of them is taken into account when selecting a specialization. Thus it is a form of multiple dispatch, more complex than single dispatch. Each argument weighs in the selection based on its :ref:`Numba type `. Numba types are often more granular than Python types: for example, Numba types Numpy arrays differently depending on their dimensionality and their layout (C-contiguous, etc.). Once a Numba type is inferred for each argument, a specialization must be chosen amongst the available ones; or, if not suitable specialization is found, a new one must be compiled. This is not a trivial decision: there can be multiple specializations compatible with a given concrete signature (for example, say a two-argument function has compiled specializations for ``(float64, float64)`` and ``(complex64, complex64)``, and it is called with ``(float32, float32)``). Therefore, there are two crucial steps in the dispatch mechanism: 1. infer the Numba types of the concrete arguments 2. select the best available specialization (or choose to compile a new one) for the inferred Numba types Compile-time vs. run-time ------------------------- This document discusses dispatching when it is done at runtime, i.e. when a JIT-compiled function is called from pure Python. In that context, performance is important. To stay in the realm of normal function call overhead in Python, the overhead of dispatching should stay under a microsecond. Of course, *the faster the better*... When a JIT-compiled function is called from another JIT-compiled function (in :term:`nopython mode`), the polymorphism is resolved at compile-time, using a non-performance critical mechanism, bearing zero runtime performance overhead. .. note:: In practice, the performance-critical parts described here are coded in C. Type resolution =============== The first step is therefore to infer, at call-time, a Numba type for each of the function's concrete arguments. Given the finer granularity of Numba types compared to Python types, one cannot simply lookup an object's class and key a dictionary with it to obtain the corresponding Numba type. Instead, there is a machinery to inspect the object and, based on its Python type, query various properties to infer the appropriate Numba type. This can be more or less complex: for example, a Python ``int`` argument will always infer to a Numba ``intp`` (a pointer-sized integer), but a Python ``tuple`` argument can infer to multiple Numba types (depending on the tuple's size and the concrete type of each of its elements). The Numba type system is high-level and written in pure Python; there is a pure Python machinery, based on a generic function, to do said inference (in :mod:`numba.typing.typeof`). That machinery is used for compile-time inference, e.g. on constants. Unfortunately, it is too slow for run-time value-based dispatching. It is only used as a fallback for rarely used (or difficult to infer) types, and exhibits multiple-microsecond overhead. Typecodes --------- The Numba type system is really too high-level to be manipulated efficiently from C code. Therefore, the C dispatching layer uses another representation based on integer typecodes. Each Numba type gets a unique integer typecode when constructed; also, an interning system ensure no two instances of same type are created. The dispatching layer is therefore able to *eschew* the overhead of the Numba type system by working with simple integer typecodes, amenable to well-known optimizations (fast hash tables, etc.). The goal of the type resolution step becomes: infer a Numba *typecode* for each of the function's concrete arguments. Ideally, it doesn't deal with Numba types anymore... Hard-coded fast paths --------------------- While eschewing the abstraction and object-orientation overhead of the type system, the integer typecodes still have the same conceptual complexity. Therefore, an important technique to speed up inference is to first go through checks for the most important types, and hard-code a fast resolution for each of them. Several types benefit from such an optimization, notably: * basic Python scalars (``bool``, ``int``, ``float``, ``complex``); * basic Numpy scalars (the various kinds of integer, floating-point, complex numbers); * Numpy arrays of certain dimensionalities and basic element types. Each of those fast paths ideally uses a hard-coded result value or a direct table lookup after a few simple checks. However, we can't apply that technique to all argument types; there would be an explosion of ad-hoc internal caches, and it would become difficult to maintain. Besides, the recursive application of hard-coded fast paths would not necessarily combine into a low overhead (in the nested tuple case, for example). Fingerprint-based typecode cache -------------------------------- For non-so-trivial types (imagine a tuple, or a Numpy ``datetime64`` array, for example), the hard-coded fast paths don't match. Another mechanism then kicks in, more generic. The principle here is to examine each argument value, as the pure Python machinery would do, and to describe its Numba type unambiguously. The difference is that *we don't actually compute a Numba type*. Instead, we compute a simple bytestring, a low-level possible denotation of that Numba type: a *fingerprint*. The fingerprint format is designed to be short and extremely simple to compute from C code (in practice, it has a bytecode-like format). Once the fingerprint is computed, it is looked up in a cache mapping fingerprints to typecodes. The cache is a hash table, and the lookup is fast thanks to the fingerprints being generally very short (rarely more than 20 bytes). If the cache lookup fails, the typecode must first be computed using the slow pure Python machinery. Luckily, this would only happen once: on subsequent calls, the cached typecode would be returned for the given fingerprint. In rare cases, a fingerprint cannot be computed efficiently. This is the case for some types which cannot be easily inspected from C: for example ``cffi`` function pointers. Then, the slow Pure Python machinery is invoked at each function call with such an argument. .. note:: Two fingerprints may denote a single Numba type. This does not make the mechanism incorrect; it only creates more cache entries. Summary ------- Type resolution of a function argument involves the following mechanisms in order: * Try a few hard-coded fast paths, for common simple types. * If the above failed, compute a fingerprint for the argument and lookup its typecode in a cache. * If all the above failed, invoke the pure Python machinery which will determine a Numba type for the argument (and look up its typecode). Specialization selection ======================== At the previous step, an integer typecode has been determined for each concrete argument to the JIT-compiled function. Now it remains to match that concrete signature against each of the available specializations for the function. There can be three outcomes: * There is a satisfying best match: the corresponding specialization is then invoked (it will handle argument unboxing and other details). * There is a tie between two or more "best matches": an exception is raised, refusing to solve the ambiguity. * There is no satisfying match: a new specialization is compiled tailored for the concrete argument types that were inferred. The selection works by looping over all available specializations, and computing the compatibility of each concrete argument type with the corresponding type in the specialization's intended signature. Specifically, we are interested in: 1. Whether the concrete argument type is allowed to convert implicitly to the specialization's argument type; 2. If so, at what semantic (user-visible) cost the conversion comes. Implicit conversion rules ------------------------- There are five possible kinds of implicit conversion from a source type to a destination type (note this is an asymmetric relationship): 1. *exact match*: the two types are identical; this is the ideal case, since the specialization would behave exactly as intended; 2. *same-kind promotion*: the two types belong to the same "kind" (for example ``int32`` and ``int64`` are two integer types), and the source type can be converted losslessly to the destination type (e.g. from ``int32`` to ``int64``, but not the reverse); 3. *safe conversion*: the two types belong to different kinds, but the source type can be reasonably converted to the destination type (e.g. from ``int32`` to ``float64``, but not the reverse); 4. *unsafe conversion*: a conversion is available from the source type to the destination type, but it may lose precision, magnitude, or another desirable quality. 5. *no conversion*: there is no correct or reasonably efficient way to convert between the two types (for example between an ``int64`` and a ``datetime64``, or a C-contiguous array and a Fortran-contiguous array). When a specialization is examined, the latter two cases eliminate it from the final choice: i.e. when at least one argument has *no conversion* or only an *unsafe conversion* to the signature's argument type. .. note:: However, if the function is compiled with explicit signatures in the :func:`~numba.jit` call (and therefore it is not allowed to compile new specializations), *unsafe conversion* is allowed. Candidates and best match ------------------------- If a specialization is not eliminated by the rule above, it enters the list of *candidates* for the final choice. Those candidates are ranked by an ordered 4-uple of integers: ``(number of unsafe conversions, number of safe conversions, number of same-kind promotions, number of exact matches)`` (note the sum of the tuple's elements is equal to the number of arguments). The best match is then the #1 result in sorted ascending order, thereby preferring exact matches over promotions, promotions over safe conversions, safe conversions over unsafe conversions. Implementation -------------- The above-described mechanism works on integer typecodes, not on Numba types. It uses an internal hash table storing the possible conversion kind for each pair of compatible types. The internal hash table is in part built at startup (for built-in trivial types such as ``int32``, ``int64`` etc.), in part filled dynamically (for arbitrarily complex types such as array types: for example to allow using a C-contiguous 2D array where a function expects a non-contiguous 2D array). Summary ------- Selecting the right specialization involves the following steps: * Examine each available specialization and match it against the concrete argument types. * Eliminate any specialization where at least one argument doesn't offer sufficient compatibility. * If there are remaining candidates, choose the best one in terms of preserving the types' semantics. Miscellaneous ============= Some `benchmarks of dispatch performance `_ exist in the `Numba benchmarks `_ repository. Some unit tests of specific aspects of the machinery are available in :mod:`numba.tests.test_typeinfer` and :mod:`numba.tests.test_typeof`. Higher-level dispatching tests are in :mod:`numba.tests.test_dispatcher`. numba-0.55.1/docs/source/developer/environment.rst000664 000000 000000 00000004247 14174536160 022215 0ustar00rootroot000000 000000 ================== Environment Object ================== The Environment object (Env) is used to maintain references to python objects that are needed to support compiled functions for both object-mode and nopython-mode. In nopython-mode, the Env is used for: * Storing pyobjects for reconstruction from native values, such as: * for printing native values of NumPy arrays; * for returning or yielding native values back to the interpreter. In object-mode, the Env is used for: * storing constant values referenced in the code. * storing a reference to the function's global dictionary to load global values. The Implementation ================== The Env is implemented in two parts. In ``_dynfunc.c``, the Env is defined as ``EnvironmentObject`` as a Python C-extension type. In ``lowering.py``, the `EnvironmentObject`` (exported as ``_dynfunc.Environment``) is extended to support necessary operations needed at lowering. Serialization ------------- The Env supports being pickled. Compilation cache files and ahead-of-time compiled modules serialize all the used Envs for recreation at the runtime. Usage ----- At the start of the lowering for a function or a generator, an Env is created. Throughout the compilation, the Env is mutated to attach additional information. The compiled code references an Env via a global variable in the emitted LLVM IR. The global variable is zero-initialized with "common" linkage, which is the default linkage for C global values. The use of this linkage allows multiple definitions of the global variable to be merged into a single definition when the modules are linked together. The name of the global variable is computed from the name of the function (see ``FunctionDescriptor.env_name`` and ``.get_env_name()`` of the target context). The Env is initialized when the compiled-function is loaded. The JIT engine finds the address of the associated global variable for the Env and stores the address of the Env into it. For cached functions, the same process applies. For ahead-of-time compiled functions, the module initializer in the generated library is responsible for initializing the global variables of all the Envs in the module. numba-0.55.1/docs/source/developer/event_api.rst000664 000000 000000 00000000103 14174536160 021606 0ustar00rootroot000000 000000 Event API ========= .. automodule:: numba.core.event :members:numba-0.55.1/docs/source/developer/generators.rst000664 000000 000000 00000026074 14174536160 022024 0ustar00rootroot000000 000000 .. _arch-generators: =================== Notes on generators =================== Numba recently gained support for compiling generator functions. This document explains some of the implementation choices. Terminology =========== For clarity, we distinguish between *generator functions* and *generators*. A generator function is a function containing one or several ``yield`` statements. A generator (sometimes also called "generator iterator") is the return value of a generator function; it resumes execution inside its frame each time :py:func:`next` is called. A *yield point* is the place where a ``yield`` statement is called. A *resumption point* is the place just after a *yield point* where execution is resumed when :py:func:`next` is called again. Function analysis ================= Suppose we have the following simple generator function:: def gen(x, y): yield x + y yield x - y Here is its CPython bytecode, as printed out using :py:func:`dis.dis`:: 7 0 LOAD_FAST 0 (x) 3 LOAD_FAST 1 (y) 6 BINARY_ADD 7 YIELD_VALUE 8 POP_TOP 8 9 LOAD_FAST 0 (x) 12 LOAD_FAST 1 (y) 15 BINARY_SUBTRACT 16 YIELD_VALUE 17 POP_TOP 18 LOAD_CONST 0 (None) 21 RETURN_VALUE When compiling this function with :envvar:`NUMBA_DUMP_IR` set to 1, the following information is printed out:: ----------------------------------IR DUMP: gen---------------------------------- label 0: x = arg(0, name=x) ['x'] y = arg(1, name=y) ['y'] $0.3 = x + y ['$0.3', 'x', 'y'] $0.4 = yield $0.3 ['$0.3', '$0.4'] del $0.4 [] del $0.3 [] $0.7 = x - y ['$0.7', 'x', 'y'] del y [] del x [] $0.8 = yield $0.7 ['$0.7', '$0.8'] del $0.8 [] del $0.7 [] $const0.9 = const(NoneType, None) ['$const0.9'] $0.10 = cast(value=$const0.9) ['$0.10', '$const0.9'] del $const0.9 [] return $0.10 ['$0.10'] ------------------------------GENERATOR INFO: gen------------------------------- generator state variables: ['$0.3', '$0.7', 'x', 'y'] yield point #1: live variables = ['x', 'y'], weak live variables = ['$0.3'] yield point #2: live variables = [], weak live variables = ['$0.7'] What does it mean? The first part is the Numba IR, as already seen in :ref:`arch_generate_numba_ir`. We can see the two yield points (``yield $0.3`` and ``yield $0.7``). The second part shows generator-specific information. To understand it we have to understand what suspending and resuming a generator means. When suspending a generator, we are not merely returning a value to the caller (the operand of the ``yield`` statement). We also have to save the generator's *current state* in order to resume execution. In trivial use cases, perhaps the CPU's register values or stack slots would be preserved until the next call to next(). However, any non-trivial case will hopelessly clobber those values, so we have to save them in a well-defined place. What are the values we need to save? Well, in the context of the Numba Intermediate Representation, we must save all *live variables* at each yield point. These live variables are computed thanks to the control flow graph. Once live variables are saved and the generator is suspended, resuming the generator simply involves the inverse operation: the live variables are restored from the saved generator state. .. note:: It is the same analysis which helps insert Numba ``del`` instructions where appropriate. Let's go over the generator info again:: generator state variables: ['$0.3', '$0.7', 'x', 'y'] yield point #1: live variables = ['x', 'y'], weak live variables = ['$0.3'] yield point #2: live variables = [], weak live variables = ['$0.7'] Numba has computed the union of all live variables (denoted as "state variables"). This will help define the layout of the :ref:`generator structure `. Also, for each yield point, we have computed two sets of variables: * the *live variables* are the variables which are used by code following the resumption point (i.e. after the ``yield`` statement) * the *weak live variables* are variables which are del'ed immediately after the resumption point; they have to be saved in :term:`object mode`, to ensure proper reference cleanup .. _generator-structure: The generator structure ======================= Layout ------ Function analysis helps us gather enough information to define the layout of the generator structure, which will store the entire execution state of a generator. Here is a sketch of the generator structure's layout, in pseudo-code:: struct gen_struct_t { int32_t resume_index; struct gen_args_t { arg_0_t arg0; arg_1_t arg1; ... arg_N_t argN; } struct gen_state_t { state_0_t state_var0; state_1_t state_var1; ... state_N_t state_varN; } } Let's describe those fields in order. * The first member, the *resume index*, is an integer telling the generator at which resumption point execution must resume. By convention, it can have two special values: 0 means execution must start at the beginning of the generator (i.e. the first time :py:func:`next` is called); -1 means the generator is exhausted and resumption must immediately raise StopIteration. Other values indicate the yield point's index starting from 1 (corresponding to the indices shown in the generator info above). * The second member, the *arguments structure* is read-only after it is first initialized. It stores the values of the arguments the generator function was called with. In our example, these are the values of ``x`` and ``y``. * The third member, the *state structure*, stores the live variables as computed above. Concretely, our example's generator structure (assuming the generator function is called with floating-point numbers) is then:: struct gen_struct_t { int32_t resume_index; struct gen_args_t { double arg0; double arg1; } struct gen_state_t { double $0.3; double $0.7; double x; double y; } } Note that here, saving ``x`` and ``y`` is redundant: Numba isn't able to recognize that the state variables ``x`` and ``y`` have the same value as ``arg0`` and ``arg1``. Allocation ---------- How does Numba ensure the generator structure is preserved long enough? There are two cases: * When a Numba-compiled generator function is called from a Numba-compiled function, the structure is allocated on the stack by the callee. In this case, generator instantiation is practically costless. * When a Numba-compiled generator function is called from regular Python code, a CPython-compatible wrapper is instantiated that has the right amount of allocated space to store the structure, and whose :c:member:`~PyTypeObject.tp_iternext` slot is a wrapper around the generator's native code. Compiling to native code ======================== When compiling a generator function, three native functions are actually generated by Numba: * An initialization function. This is the function corresponding to the generator function itself: it receives the function arguments and stores them inside the generator structure (which is passed by pointer). It also initialized the *resume index* to 0, indicating that the generator hasn't started yet. * A next() function. This is the function called to resume execution inside the generator. Its single argument is a pointer to the generator structure and it returns the next yielded value (or a special exit code is used if the generator is exhausted, for quick checking when called from Numba-compiled functions). * An optional finalizer. In object mode, this function ensures that all live variables stored in the generator state are decref'ed, even if the generator is destroyed without having been exhausted. The next() function ------------------- The next() function is the least straight-forward of the three native functions. It starts with a trampoline which dispatches execution to the right resume point depending on the *resume index* stored in the generator structure. Here is how the function start may look like in our example: .. code-block:: llvm define i32 @"__main__.gen.next"( double* nocapture %retptr, { i8*, i32 }** nocapture readnone %excinfo, i8* nocapture readnone %env, { i32, { double, double }, { double, double, double, double } }* nocapture %arg.gen) { entry: %gen.resume_index = getelementptr { i32, { double, double }, { double, double, double, double } }* %arg.gen, i64 0, i32 0 %.47 = load i32* %gen.resume_index, align 4 switch i32 %.47, label %stop_iteration [ i32 0, label %B0 i32 1, label %generator_resume1 i32 2, label %generator_resume2 ] ; rest of the function snipped (uninteresting stuff trimmed from the LLVM IR to make it more readable) We recognize the pointer to the generator structure in ``%arg.gen``. The trampoline switch has three targets (one for each *resume index* 0, 1 and 2), and a fallback target label named ``stop_iteration``. Label ``B0`` represents the function's start, ``generator_resume1`` (resp. ``generator_resume2``) is the resumption point after the first (resp. second) yield point. After generation by LLVM, the whole native assembly code for this function may look like this (on x86-64): .. code-block:: asm .globl __main__.gen.next .align 16, 0x90 __main__.gen.next: movl (%rcx), %eax cmpl $2, %eax je .LBB1_5 cmpl $1, %eax jne .LBB1_2 movsd 40(%rcx), %xmm0 subsd 48(%rcx), %xmm0 movl $2, (%rcx) movsd %xmm0, (%rdi) xorl %eax, %eax retq .LBB1_5: movl $-1, (%rcx) jmp .LBB1_6 .LBB1_2: testl %eax, %eax jne .LBB1_6 movsd 8(%rcx), %xmm0 movsd 16(%rcx), %xmm1 movaps %xmm0, %xmm2 addsd %xmm1, %xmm2 movsd %xmm1, 48(%rcx) movsd %xmm0, 40(%rcx) movl $1, (%rcx) movsd %xmm2, (%rdi) xorl %eax, %eax retq .LBB1_6: movl $-3, %eax retq Note the function returns 0 to indicate a value is yielded, -3 to indicate StopIteration. ``%rcx`` points to the start of the generator structure, where the resume index is stored. numba-0.55.1/docs/source/developer/hashing.rst000664 000000 000000 00000005016 14174536160 021265 0ustar00rootroot000000 000000 ================ Notes on Hashing ================ Numba supports the built-in :func:`hash` and does so by simply calling the :func:`__hash__` member function on the supplied argument. This makes it trivial to add hash support for new types as all that is required is the application of the extension API :func:`overload_method` decorator to overload a function for computing the hash value for the new type registered to the type's :func:`__hash__` method. For example:: from numba.extending import overload_method @overload_method(myType, '__hash__') def myType_hash_overload(obj): # implementation details The Implementation ================== The implementation of the Numba hashing functions strictly follows that of Python 3. The only exception to this is that for hashing Unicode and bytes (for content longer than ``sys.hash_info.cutoff``) the only supported algorithm is ``siphash24`` (default in CPython 3). As a result Numba will match Python 3 hash values for all supported types under the default conditions described. Unicode hash cache differences ------------------------------ Both Numba and CPython Unicode string internal representations have a ``hash`` member for the purposes of caching the string's hash value. This member is always checked ahead of computing a hash value with the view of simply providing a value from cache as it is considerably cheaper to do so. The Numba Unicode string hash caching implementation behaves in a similar way to that of CPython's. The only notable behavioral change (and its only impact is a minor potential change in performance) is that Numba always computes and caches the hash for Unicode strings created in ``nopython mode`` at the time they are boxed for reuse in Python, this is too eager in some cases in comparison to CPython which may delay hashing a new Unicode string depending on creation method. It should also be noted that Numba copies in the ``hash`` member of the CPython internal representation for Unicode strings when unboxing them to its own representation so as to not recompute the hash of a string that already has a hash value associated with it. The accommodation of ``PYTHONHASHSEED`` --------------------------------------- The ``PYTHONHASHSEED`` environment variable can be used to seed the CPython hashing algorithms for e.g. the purposes of reproduciblity. The Numba hashing implementation directly reads the CPython hashing algorithms' internal state and as a result the influence of ``PYTHONHASHSEED`` is replicated in Numba's hashing implementations. numba-0.55.1/docs/source/developer/index.rst000664 000000 000000 00000001012 14174536160 020743 0ustar00rootroot000000 000000 .. _developer-manual: Developer Manual ================ .. toctree:: :maxdepth: 2 contributing.rst release.rst repomap.rst architecture.rst dispatching.rst generators.rst numba-runtime.rst rewrites.rst live_variable_analysis.rst listings.rst stencil.rst custom_pipeline.rst inlining.rst environment.rst hashing.rst caching.rst threading_implementation.rst literal.rst llvm_timings.rst debugging.rst event_api.rst target_extension.rst roadmap.rst numba-0.55.1/docs/source/developer/inline_example.py000664 000000 000000 00000004320 14174536160 022452 0ustar00rootroot000000 000000 from numba import njit import numba from numba.core import ir @njit(inline='never') def never_inline(): return 100 @njit(inline='always') def always_inline(): return 200 def sentinel_cost_model(expr, caller_info, callee_info): # this cost model will return True (i.e. do inlining) if either: # a) the callee IR contains an `ir.Const(37)` # b) the caller IR contains an `ir.Const(13)` logically prior to the call # site # check the callee for blk in callee_info.blocks.values(): for stmt in blk.body: if isinstance(stmt, ir.Assign): if isinstance(stmt.value, ir.Const): if stmt.value.value == 37: return True # check the caller before_expr = True for blk in caller_info.blocks.values(): for stmt in blk.body: if isinstance(stmt, ir.Assign): if isinstance(stmt.value, ir.Expr): if stmt.value == expr: before_expr = False if isinstance(stmt.value, ir.Const): if stmt.value.value == 13: return True & before_expr return False @njit(inline=sentinel_cost_model) def maybe_inline1(): # Will not inline based on the callee IR with the declared cost model # The following is ir.Const(300). return 300 @njit(inline=sentinel_cost_model) def maybe_inline2(): # Will inline based on the callee IR with the declared cost model # The following is ir.Const(37). return 37 @njit def foo(): a = never_inline() # will never inline b = always_inline() # will always inline # will not inline as the function does not contain a magic constant known to # the cost model, and the IR up to the call site does not contain a magic # constant either d = maybe_inline1() # declare this magic constant to trigger inlining of maybe_inline1 in a # subsequent call magic_const = 13 # will inline due to above constant declaration e = maybe_inline1() # will inline as the maybe_inline2 function contains a magic constant known # to the cost model c = maybe_inline2() return a + b + c + d + e + magic_const foo() numba-0.55.1/docs/source/developer/inline_overload_example.py000664 000000 000000 00000003435 14174536160 024353 0ustar00rootroot000000 000000 import numba from numba.extending import overload from numba import njit, types def bar(x): """A function stub to overload""" pass @overload(bar, inline='always') def ol_bar_tuple(x): # An overload that will always inline, there is a type guard so that this # only applies to UniTuples. if isinstance(x, types.UniTuple): def impl(x): return x[0] return impl def cost_model(expr, caller, callee): # Only inline if the type of the argument is an Integer return isinstance(caller.typemap[expr.args[0].name], types.Integer) @overload(bar, inline=cost_model) def ol_bar_scalar(x): # An overload that will inline based on a cost model, it only applies to # scalar values in the numerical domain as per the type guard on Number if isinstance(x, types.Number): def impl(x): return x + 1 return impl @njit def foo(): # This will resolve via `ol_bar_tuple` as the argument is a types.UniTuple # instance. It will always be inlined as specified in the decorator for this # overload. a = bar((1, 2, 3)) # This will resolve via `ol_bar_scalar` as the argument is a types.Number # instance, hence the cost_model will be used to determine whether to # inline. # The function will be inlined as the value 100 is an IntegerLiteral which # is an instance of a types.Integer as required by the cost_model function. b = bar(100) # This will also resolve via `ol_bar_scalar` as the argument is a # types.Number instance, again the cost_model will be used to determine # whether to inline. # The function will not be inlined as the complex value is not an instance # of a types.Integer as required by the cost_model function. c = bar(300j) return a + b + c foo() numba-0.55.1/docs/source/developer/inlining.rst000664 000000 000000 00000031456 14174536160 021462 0ustar00rootroot000000 000000 ================= Notes on Inlining ================= There are occasions where it is useful to be able to inline a function at its call site, at the Numba IR level of representation. The decorators such as :func:`numba.jit`, :func:`numba.extending.overload` and :func:`register_jitable` support the keyword argument ``inline``, to facilitate this behaviour. When attempting to inline at this level, it is important to understand what purpose this serves and what effect this will have. In contrast to the inlining performed by LLVM, which is aimed at improving performance, the main reason to inline at the Numba IR level is to allow type inference to cross function boundaries. As an example, consider the following snippet: .. code:: python from numba import njit @njit def bar(a): a.append(10) @njit def foo(): z = [] bar(z) foo() This will fail to compile and run, because the type of ``z`` can not be inferred as it will only be refined within ``bar``. If we now add ``inline=True`` to the decorator for ``bar`` the snippet will compile and run. This is because inlining the call to ``a.append(10)`` will mean that ``z`` will be refined to hold integers and so type inference will succeed. So, to recap, inlining at the Numba IR level is unlikely to have a performance benefit. Whereas inlining at the LLVM level stands a better chance. The ``inline`` keyword argument can be one of three values: * The string ``'never'``, this is the default and results in the function not being inlined under any circumstances. * The string ``'always'``, this results in the function being inlined at all call sites. * A python function that takes three arguments. The first argument is always the ``ir.Expr`` node that is the ``call`` requesting the inline, this is present to allow the function to make call contextually aware decisions. The second and third arguments are: * In the case of an untyped inline, i.e. that which occurs when using the :func:`numba.jit` family of decorators, both arguments are ``numba.ir.FunctionIR`` instances. The second argument corresponding to the IR of the caller, the third argument corresponding to the IR of the callee. * In the case of a typed inline, i.e. that which occurs when using :func:`numba.extending.overload`, both arguments are instances of a ``namedtuple`` with fields (corresponding to their standard use in the compiler internals): * ``func_ir`` - the function's Numba IR. * ``typemap`` - the function's type map. * ``calltypes`` - the call types of any calls in the function. * ``signature`` - the function's signature. The second argument holds the information from the caller, the third holds the information from the callee. In all cases the function should return True to inline and return False to not inline, this essentially permitting custom inlining rules (typical use might be cost models). * Recursive functions with ``inline='always'`` will result in a non-terminating compilation. If you wish to avoid this, supply a function to limit the recursion depth (see below). .. note:: No guarantee is made about the order in which functions are assessed for inlining or about the order in which they are inlined. Example using :func:`numba.jit` =============================== An example of using all three options to ``inline`` in the :func:`numba.njit` decorator: .. literalinclude:: inline_example.py which produces the following when executed (with a print of the IR after the legalization pass, enabled via the environment variable ``NUMBA_DEBUG_PRINT_AFTER="ir_legalization"``): .. code-block:: none :emphasize-lines: 2, 3, 9, 16, 17, 21, 22, 26, 35 label 0: $0.1 = global(never_inline: CPUDispatcher()) ['$0.1'] $0.2 = call $0.1(func=$0.1, args=[], kws=(), vararg=None) ['$0.1', '$0.2'] del $0.1 [] a = $0.2 ['$0.2', 'a'] del $0.2 [] $0.3 = global(always_inline: CPUDispatcher()) ['$0.3'] del $0.3 [] $const0.1.0 = const(int, 200) ['$const0.1.0'] $0.2.1 = $const0.1.0 ['$0.2.1', '$const0.1.0'] del $const0.1.0 [] $0.4 = $0.2.1 ['$0.2.1', '$0.4'] del $0.2.1 [] b = $0.4 ['$0.4', 'b'] del $0.4 [] $0.5 = global(maybe_inline1: CPUDispatcher()) ['$0.5'] $0.6 = call $0.5(func=$0.5, args=[], kws=(), vararg=None) ['$0.5', '$0.6'] del $0.5 [] d = $0.6 ['$0.6', 'd'] del $0.6 [] $const0.7 = const(int, 13) ['$const0.7'] magic_const = $const0.7 ['$const0.7', 'magic_const'] del $const0.7 [] $0.8 = global(maybe_inline1: CPUDispatcher()) ['$0.8'] del $0.8 [] $const0.1.2 = const(int, 300) ['$const0.1.2'] $0.2.3 = $const0.1.2 ['$0.2.3', '$const0.1.2'] del $const0.1.2 [] $0.9 = $0.2.3 ['$0.2.3', '$0.9'] del $0.2.3 [] e = $0.9 ['$0.9', 'e'] del $0.9 [] $0.10 = global(maybe_inline2: CPUDispatcher()) ['$0.10'] del $0.10 [] $const0.1.4 = const(int, 37) ['$const0.1.4'] $0.2.5 = $const0.1.4 ['$0.2.5', '$const0.1.4'] del $const0.1.4 [] $0.11 = $0.2.5 ['$0.11', '$0.2.5'] del $0.2.5 [] c = $0.11 ['$0.11', 'c'] del $0.11 [] $0.14 = a + b ['$0.14', 'a', 'b'] del b [] del a [] $0.16 = $0.14 + c ['$0.14', '$0.16', 'c'] del c [] del $0.14 [] $0.18 = $0.16 + d ['$0.16', '$0.18', 'd'] del d [] del $0.16 [] $0.20 = $0.18 + e ['$0.18', '$0.20', 'e'] del e [] del $0.18 [] $0.22 = $0.20 + magic_const ['$0.20', '$0.22', 'magic_const'] del magic_const [] del $0.20 [] $0.23 = cast(value=$0.22) ['$0.22', '$0.23'] del $0.22 [] return $0.23 ['$0.23'] Things to note in the above: 1. The call to the function ``never_inline`` remains as a call. 2. The ``always_inline`` function has been inlined, note its ``const(int, 200)`` in the caller body. 3. There is a call to ``maybe_inline1`` before the ``const(int, 13)`` declaration, the cost model prevented this from being inlined. 4. After the ``const(int, 13)`` the subsequent call to ``maybe_inline1`` has been inlined as shown by the ``const(int, 300)`` in the caller body. 5. The function ``maybe_inline2`` has been inlined as demonstrated by ``const(int, 37)`` in the caller body. 6. That dead code elimination has not been performed and as a result there are superfluous statements present in the IR. Example using :func:`numba.extending.overload` ============================================== An example of using inlining with the :func:`numba.extending.overload` decorator. It is most interesting to note that if a function is supplied as the argument to ``inline`` a lot more information is available via the supplied function arguments for use in decision making. Also that different ``@overload`` s can have different inlining behaviours, with multiple ways to achieve this: .. literalinclude:: inline_overload_example.py which produces the following when executed (with a print of the IR after the legalization pass, enabled via the environment variable ``NUMBA_DEBUG_PRINT_AFTER="ir_legalization"``): .. code-block:: none :emphasize-lines: 2, 3, 4, 5, 6, 15, 16, 17, 18, 19, 20, 21, 22, 28, 29, 30 label 0: $const0.2 = const(tuple, (1, 2, 3)) ['$const0.2'] x.0 = $const0.2 ['$const0.2', 'x.0'] del $const0.2 [] $const0.2.2 = const(int, 0) ['$const0.2.2'] $0.3.3 = getitem(value=x.0, index=$const0.2.2) ['$0.3.3', '$const0.2.2', 'x.0'] del x.0 [] del $const0.2.2 [] $0.4.4 = $0.3.3 ['$0.3.3', '$0.4.4'] del $0.3.3 [] $0.3 = $0.4.4 ['$0.3', '$0.4.4'] del $0.4.4 [] a = $0.3 ['$0.3', 'a'] del $0.3 [] $const0.5 = const(int, 100) ['$const0.5'] x.5 = $const0.5 ['$const0.5', 'x.5'] del $const0.5 [] $const0.2.7 = const(int, 1) ['$const0.2.7'] $0.3.8 = x.5 + $const0.2.7 ['$0.3.8', '$const0.2.7', 'x.5'] del x.5 [] del $const0.2.7 [] $0.4.9 = $0.3.8 ['$0.3.8', '$0.4.9'] del $0.3.8 [] $0.6 = $0.4.9 ['$0.4.9', '$0.6'] del $0.4.9 [] b = $0.6 ['$0.6', 'b'] del $0.6 [] $0.7 = global(bar: ) ['$0.7'] $const0.8 = const(complex, 300j) ['$const0.8'] $0.9 = call $0.7($const0.8, func=$0.7, args=[Var($const0.8, inline_overload_example.py (56))], kws=(), vararg=None) ['$0.7', '$0.9', '$const0.8'] del $const0.8 [] del $0.7 [] c = $0.9 ['$0.9', 'c'] del $0.9 [] $0.12 = a + b ['$0.12', 'a', 'b'] del b [] del a [] $0.14 = $0.12 + c ['$0.12', '$0.14', 'c'] del c [] del $0.12 [] $0.15 = cast(value=$0.14) ['$0.14', '$0.15'] del $0.14 [] return $0.15 ['$0.15'] Things to note in the above: 1. The first highlighted section is the always inlined overload for the ``UniTuple`` argument type. 2. The second highlighted section is the overload for the ``Number`` argument type that has been inlined as the cost model function decided to do so as the argument was an ``Integer`` type instance. 3. The third highlighted section is the overload for the ``Number`` argument type that has not inlined as the cost model function decided to reject it as the argument was an ``Complex`` type instance. 4. That dead code elimination has not been performed and as a result there are superfluous statements present in the IR. Using a function to limit the inlining depth of a recursive function ==================================================================== When using recursive inlines, you can terminate the compilation by using a cost model. .. code:: python from numba import njit import numpy as np class CostModel(object): def __init__(self, max_inlines): self._count = 0 self._max_inlines = max_inlines def __call__(self, expr, caller, callee): ret = self._count < self._max_inlines self._count += 1 return ret @njit(inline=CostModel(3)) def factorial(n): if n <= 0: return 1 return n * factorial(n - 1) factorial(5) numba-0.55.1/docs/source/developer/listings.rst000664 000000 000000 00000001301 14174536160 021471 0ustar00rootroot000000 000000 Listings ======== This shows listings from compiler internal registries (e.g. lowering definitions). The information is provided as developer reference. When possible, links to source code are provided via github links. New style listings ------------------ The following listings are generated from ``numba.help.inspector.write_listings()``. Users can run ``python -m numba.help.inspector --format=rst `` to recreate the the documentation. .. toctree:: :maxdepth: 2 autogen_builtins_listing.rst autogen_math_listing.rst autogen_cmath_listing.rst autogen_numpy_listing.rst Old style listings ------------------ .. toctree:: :maxdepth: 2 autogen_lower_listing.rst numba-0.55.1/docs/source/developer/literal.rst000664 000000 000000 00000004420 14174536160 021276 0ustar00rootroot000000 000000 .. _developer-literally: ====================== Notes on Literal Types ====================== .. note:: This document describes an advanced feature designed to overcome some limitations of the compilation mechanism relating to types. Some features need to specialize based on the literal value during compliation to produce type stable code necessary for successful compilation in Numba. This can be achieved by propagating the literal value through the type system. Numba recognizes inline literal values as :class:`numba.types.Literal`. For example:: def foo(x): a = 123 return bar(x, a) Numba will infer the type of ``a`` as ``Literal[int](123)``. The definition of ``bar()`` can subsequently specialize its implementation knowing that the second argument is an ``int`` with the value ``123``. ``Literal`` Type ---------------- Classes and methods related to the ``Literal`` type. .. autoclass:: numba.types.Literal .. autofunction:: numba.types.literal .. autofunction:: numba.types.unliteral .. autofunction:: numba.types.maybe_literal Specifying for Literal Typing ----------------------------- To specify a value as a ``Literal`` type in code scheduled for JIT compilation, use the following function: .. autofunction:: numba.literally Code Example ~~~~~~~~~~~~ .. literalinclude:: ../../../numba/tests/doc_examples/test_literally_usage.py :language: python :caption: from ``test_literally_usage`` of ``numba/tests/doc_examples/test_literally_usage.py`` :start-after: magictoken.ex_literally_usage.begin :end-before: magictoken.ex_literally_usage.end :dedent: 4 :linenos: Internal Details ~~~~~~~~~~~~~~~~ Internally, the compiler raises a ``ForceLiteralArgs`` exception to signal the dispatcher to wrap specified arguments using the ``Literal`` type. .. autoclass:: numba.errors.ForceLiteralArg :members: __init__, combine, __or__ Inside Extensions ----------------- ``@overload`` extensions can use ``literally`` inside the implementation body like in normal jit-code. Explicit handling of literal requirements is possible through use of the following: .. autoclass:: numba.extending.SentryLiteralArgs :members: .. autoclass:: numba.extending.BoundLiteralArgs :members: .. autofunction:: numba.extending.sentry_literal_args numba-0.55.1/docs/source/developer/live_variable_analysis.rst000664 000000 000000 00000006345 14174536160 024361 0ustar00rootroot000000 000000 .. _live variable analysis: ====================== Live Variable Analysis ====================== (Related issue https://github.com/numba/numba/pull/1611) Numba uses reference-counting for garbage collection, a technique that requires cooperation by the compiler. The Numba IR encodes the location where a decref must be inserted. These locations are determined by live variable analysis. The corresponding source code is the ``_insert_var_dels()`` method in https://github.com/numba/numba/blob/master/numba/interpreter.py. In Python semantic, once a variable is defined inside a function, it is alive until the variable is explicitly deleted or the function scope is ended. However, Numba analyzes the code to determine the minimum bound of the lifetime of each variable by its definition and usages during compilation. As soon as a variable is unreachable, a ``del`` instruction is inserted at the closest basic-block (either at the start of the next block(s) or at the end of the current block). This means variables can be released earlier than in regular Python code. The behavior of the live variable analysis affects memory usage of the compiled code. Internally, Numba does not differentiate temporary variables and user variables. Since each operation generates at least one temporary variable, a function can accumulate a high number of temporary variables if they are not released as soon as possible. Our generator implementation can benefit from early releasing of variables, which reduces the size of the state to suspend at each yield point. Notes on behavior of the live variable analysis ================================================ Variable deleted before definition ----------------------------------- (Related issue: https://github.com/numba/numba/pull/1738) When a variable lifetime is confined within the loop body (its definition and usage does not escape the loop body), like: .. code-block:: python def f(arr): # BB 0 res = 0 # BB 1 for i in (0, 1): # BB 2 t = arr[i] if t[i] > 1: # BB 3 res += t[i] # BB 4 return res Variable ``t`` is never referenced outside of the loop. A ``del`` instruction is emitted for ``t`` at the head of the loop (BB 1) before a variable is defined. The reason is obvious once we know the control flow graph:: +------------------------------> BB4 | | BB 0 --> BB 1 --> BB 2 ---> BB 3 ^ | | | V V +---------------------+ Variable ``t`` is defined in BB 1. In BB 2, the evaluation of ``t[i] > 1`` uses ``t``, which is the last use if execution takes the false branch and goto BB 1. In BB 3, ``t`` is only used in ``res += t[i]``, which is the last use if execution takes the true branch. Because BB 3, an outgoing branch of BB 2 uses ``t``, ``t`` must be deleted at the common predecessor. The closest point is BB 1, which does not have ``t`` defined from the incoming edge of BB 0. Alternatively, if ``t`` is deleted at BB 4, we will still have to delete the variable before its definition because BB4 can be executed without executing the loop body (BB 2 and BB 3), where the variable is defined. numba-0.55.1/docs/source/developer/llvm_timings.rst000664 000000 000000 00000007706 14174536160 022360 0ustar00rootroot000000 000000 .. _developer-llvm-timings: ==================== Notes on timing LLVM ==================== Getting LLVM Pass Timings ------------------------- The dispatcher stores LLVM pass timings in the dispatcher object metadata under the ``llvm_pass_timings`` key when :envvar:`NUMBA_LLVM_PASS_TIMINGS` is enabled or ``numba.config.LLVM_PASS_TIMINGS`` is set to truthy. The timings information contains details on how much time has been spent in each pass. The pass timings are also grouped by their purpose. For example, there will be pass timings for function-level pre-optimizations, module-level optimizations, and object code generation. Code Example ~~~~~~~~~~~~ .. literalinclude:: ../../../numba/tests/doc_examples/test_llvm_pass_timings.py :language: python :caption: from ``test_pass_timings`` of ``numba/tests/doc_examples/test_llvm_pass_timings.py`` :start-after: magictoken.ex_llvm_pass_timings.begin :end-before: magictoken.ex_llvm_pass_timings.end :dedent: 16 :linenos: Example output: .. code-block:: text Printing pass timings for JITCodeLibrary('DocsLLVMPassTimings.test_pass_timings..foo') Total time: 0.0376 == #0 Function passes on '_ZN5numba5tests12doc_examples22test_llvm_pass_timings19DocsLLVMPassTimings17test_pass_timings12$3clocals$3e7foo$241Ex' Percent: 4.8% Total 0.0018s Top timings: 0.0015s ( 81.6%) SROA #3 0.0002s ( 9.3%) Early CSE #2 0.0001s ( 4.0%) Simplify the CFG #9 0.0000s ( 1.5%) Prune NRT refops #4 0.0000s ( 1.1%) Post-Dominator Tree Construction #5 == #1 Function passes on '_ZN7cpython5numba5tests12doc_examples22test_llvm_pass_timings19DocsLLVMPassTimings17test_pass_timings12$3clocals$3e7foo$241Ex' Percent: 0.8% Total 0.0003s Top timings: 0.0001s ( 30.4%) Simplify the CFG #10 0.0001s ( 24.1%) Early CSE #3 0.0001s ( 17.8%) SROA #4 0.0000s ( 8.8%) Prune NRT refops #5 0.0000s ( 5.6%) Post-Dominator Tree Construction #6 == #2 Function passes on 'cfunc._ZN5numba5tests12doc_examples22test_llvm_pass_timings19DocsLLVMPassTimings17test_pass_timings12$3clocals$3e7foo$241Ex' Percent: 0.5% Total 0.0002s Top timings: 0.0001s ( 27.7%) Early CSE #4 0.0001s ( 26.8%) Simplify the CFG #11 0.0000s ( 13.8%) Prune NRT refops #6 0.0000s ( 7.4%) Post-Dominator Tree Construction #7 0.0000s ( 6.7%) Dominator Tree Construction #29 == #3 Module passes (cheap optimization for refprune) Percent: 3.7% Total 0.0014s Top timings: 0.0007s ( 52.0%) Combine redundant instructions 0.0001s ( 5.4%) Function Integration/Inlining 0.0001s ( 4.9%) Prune NRT refops #2 0.0001s ( 4.8%) Natural Loop Information 0.0001s ( 4.6%) Post-Dominator Tree Construction #2 == #4 Module passes (full optimization) Percent: 43.9% Total 0.0165s Top timings: 0.0032s ( 19.5%) Combine redundant instructions #9 0.0022s ( 13.5%) Combine redundant instructions #7 0.0010s ( 6.1%) Induction Variable Simplification 0.0008s ( 4.8%) Unroll loops #2 0.0007s ( 4.5%) Loop Vectorization == #5 Finalize object Percent: 46.3% Total 0.0174s Top timings: 0.0060s ( 34.6%) X86 DAG->DAG Instruction Selection #2 0.0019s ( 11.0%) Greedy Register Allocator #2 0.0013s ( 7.4%) Machine Instruction Scheduler #2 0.0012s ( 7.1%) Loop Strength Reduction 0.0004s ( 2.3%) Induction Variable Users API for custom analysis ~~~~~~~~~~~~~~~~~~~~~~~ It is possible to get more details then the summary text in the above example. The pass timings are stored in a :class:`numba.misc.llvm_pass_timings.PassTimingsCollection`, which contains methods for accessing individual record for each pass. .. autoclass:: numba.misc.llvm_pass_timings.PassTimingsCollection :members: get_total_time, list_longest_first, summary, __getitem__, __len__ .. autoclass:: numba.misc.llvm_pass_timings.ProcessedPassTimings :members: get_raw_data, get_total_time, list_records, list_top, summary .. autoclass:: numba.misc.llvm_pass_timings.PassTimingRecord numba-0.55.1/docs/source/developer/numba-runtime.rst000664 000000 000000 00000016662 14174536160 022440 0ustar00rootroot000000 000000 .. _arch-numba-runtime: ====================== Notes on Numba Runtime ====================== The *Numba Runtime (NRT)* provides the language runtime to the *nopython mode* Python subset. NRT is a standalone C library with a Python binding. This allows :term:`NPM` runtime feature to be used without the GIL. Currently, the only language feature implemented in NRT is memory management. Memory Management ================= NRT implements memory management for :term:`NPM` code. It uses *atomic reference count* for threadsafe, deterministic memory management. NRT maintains a separate ``MemInfo`` structure for storing information about each allocation. Cooperating with CPython ------------------------ For NRT to cooperate with CPython, the NRT python binding provides adaptors for converting python objects that export a memory region. When such an object is used as an argument to a :term:`NPM` function, a new ``MemInfo`` is created and it acquires a reference to the Python object. When a :term:`NPM` value is returned to the Python interpreter, the associated ``MemInfo`` (if any) is checked. If the ``MemInfo`` references a Python object, the underlying Python object is released and returned instead. Otherwise, the ``MemInfo`` is wrapped in a Python object and returned. Additional process maybe required depending on the type. The current implementation supports Numpy array and any buffer-exporting types. Compiler-side Cooperation ------------------------- NRT reference counting requires the compiler to emit incref/decref operations according to the usage. When the reference count drops to zero, the compiler must call the destructor routine in NRT. .. _nrt-refct-opt-pass: Optimizations ------------- The compiler is allowed to emit incref/decref operations naively. It relies on an optimization pass to remove redundant reference count operations. A new optimization pass is implemented in version 0.52.0 to remove reference count operations that fall into the following four categories of control-flow structure---per basic-block, diamond, fanout, fanout+raise. See the documentation for :envvar:`NUMBA_LLVM_REFPRUNE_FLAGS` for their descriptions. The old optimization pass runs at block level to avoid control flow analysis. It depends on LLVM function optimization pass to simplify the control flow, stack-to-register, and simplify instructions. It works by matching and removing incref and decref pairs within each block. The old pass can be enabled by setting :envvar:`NUMBA_LLVM_REFPRUNE_PASS` to `0`. Important assumptions --------------------- Both the old (pre-0.52.0) and the new (post-0.52.0) optimization passes assume that the only function that can consume a reference is ``NRT_decref``. It is important that there are no other functions that will consume references. Since the passes operate on LLVM IR, the "functions" here are referring to any callee in a LLVM call instruction. To summarize, all functions exposed to the refcount optimization pass **must not** consume counted references unless done so via ``NRT_decref``. Quirks of the old optimization pass ----------------------------------- Since the pre-0.52.0 `refcount optimization pass `_ requires the LLVM function optimization pass, the pass works on the LLVM IR as text. The optimized IR is then materialized again as a new LLVM in-memory bitcode object. Debugging Leaks --------------- To debug reference leaks in NRT MemInfo, each MemInfo python object has a ``.refcount`` attribute for inspection. To get the MemInfo from a ndarray allocated by NRT, use the ``.base`` attribute. To debug memory leaks in NRT, the ``numba.core.runtime.rtsys`` defines ``.get_allocation_stats()``. It returns a namedtuple containing the number of allocation and deallocation since the start of the program. Checking that the allocation and deallocation counters are matching is the simplest way to know if the NRT is leaking. Debugging Leaks in C -------------------- The start of `numba/core/runtime/nrt.h `_ has these lines: .. code-block:: C /* Debugging facilities - enabled at compile-time */ /* #undef NDEBUG */ #if 0 # define NRT_Debug(X) X #else # define NRT_Debug(X) if (0) { X; } #endif Undefining NDEBUG (uncomment the ``#undef NDEBUG`` line) enables the assertion check in NRT. Enabling the NRT_Debug (replace ``#if 0`` with ``#if 1``) turns on debug print inside NRT. Recursion Support ================= During the compilation of a pair of mutually recursive functions, one of the functions will contain unresolved symbol references since the compiler handles one function at a time. The memory for the unresolved symbols is allocated and initialized to the address of the *unresolved symbol abort* function (``nrt_unresolved_abort``) just before the machine code is generated by LLVM. These symbols are tracked and resolved as new functions are compiled. If a bug prevents the resolution of these symbols, the abort function will be called, raising a ``RuntimeError`` exception. The *unresolved symbol abort* function is defined in the NRT with a zero-argument signature. The caller is safe to call it with arbitrary number of arguments. Therefore, it is safe to be used inplace of the intended callee. Using the NRT from C code ========================= Externally compiled C code should use the ``NRT_api_functions`` struct as a function table to access the NRT API. The struct is defined in :ghfile:`numba/core/runtime/nrt_external.h`. Users can use the utility function ``numba.extending.include_path()`` to determine the include directory for Numba provided C headers. .. literalinclude:: ../../../numba/core/runtime/nrt_external.h :language: C :caption: `numba/core/runtime/nrt_external.h` Inside Numba compiled code, the ``numba.core.unsafe.nrt.NRT_get_api()`` intrinsic can be used to obtain a pointer to the ``NRT_api_functions``. Here is an example that uses the ``nrt_external.h``: .. code-block:: C #include #include "numba/core/runtime/nrt_external.h" void my_dtor(void *ptr) { free(ptr); } NRT_MemInfo* my_allocate(NRT_api_functions *nrt) { /* heap allocate some memory */ void * data = malloc(10); /* wrap the allocated memory; yield a new reference */ NRT_MemInfo *mi = nrt->manage_memory(data, my_dtor); /* acquire reference */ nrt->acquire(mi); /* release reference */ nrt->release(mi); return mi; } It is important to ensure that the NRT is initialized prior to making calls to it, calling ``numba.core.runtime.nrt.rtsys.initialize(context)`` from Python will have the desired effect. Similarly the code snippet: .. code-block:: Python from numba.core.registry import cpu_target # Get the CPU target singleton cpu_target.target_context # Access the target_context property to initialize will achieve the same specifically for Numba's CPU target (the default). Failure to initialize the NRT will result in access violations as function pointers for various internal atomic operations will be missing in the ``NRT_MemSys`` struct. Future Plan =========== The plan for NRT is to make a standalone shared library that can be linked to Numba compiled code, including use within the Python interpreter and without the Python interpreter. To make that work, we will be doing some refactoring: * numba :term:`NPM` code references statically compiled code in "helperlib.c". Those functions should be moved to NRT. numba-0.55.1/docs/source/developer/release.rst000664 000000 000000 00000004403 14174536160 021263 0ustar00rootroot000000 000000 Numba Release Process ===================== The goal of the Numba release process -- from a high level perspective -- is to publish source and binary artifacts that correspond to a given version number. This usually involves a sequence of individual tasks that must be performed in the correct order and with diligence. Numba and llvmlite are commonly released in lockstep since there is usually a one-to-one mapping between a Numba version and a corresponding llvmlite version. This section contains various notes and templates that can be used to create a Numba release checklist on the Numba Github issue tracker. This is an aid for the maintainers during the release process and helps to ensure that all tasks are completed in the correct order and that no tasks are accidentally omitted. If new or additional items do appear during release, please do remember to add them to the checklist templates. Also note that the release process itself is always a work in progress. This means that some of the information here may be outdated. If you notice this please do remember to submit a pull-request to update this document. All release checklists are available as Gitub issue templates. To create a new release checklist simply open a new issue and select the correct template. Primary Release Candidate Checklist ----------------------------------- This is for the first/primary release candidate for minor release i.e. the first release of every series. It is special, because during this release, the release branch will have to be created. Release candidate indexing begins at 1. .. literalinclude:: ../../../.github/ISSUE_TEMPLATE/first_rc_checklist.md :language: md :lines: 9- `Open a primary release checklist `_. Subsequent Release Candidates, Final Releases and Patch Releases ---------------------------------------------------------------- Releases subsequent to the first release in a series usually involves a series of cherry-picks, the recipe is therefore slightly different. .. literalinclude:: ../../../.github/ISSUE_TEMPLATE/sub_rc_checklist.md :language: md :lines: 9- `Open a subsequent release checklist `_. numba-0.55.1/docs/source/developer/repomap.rst000664 000000 000000 00000063143 14174536160 021314 0ustar00rootroot000000 000000 A Map of the Numba Repository ============================= The Numba repository is quite large, and due to age has functionality spread around many locations. To help orient developers, this document will try to summarize where different categories of functionality can be found. Support Files ------------- Build and Packaging ''''''''''''''''''' - :ghfile:`setup.py` - Standard Python distutils/setuptools script - :ghfile:`MANIFEST.in` - Distutils packaging instructions - :ghfile:`requirements.txt` - Pip package requirements, not used by conda - :ghfile:`versioneer.py` - Handles automatic setting of version in installed package from git tags - :ghfile:`.flake8` - Preferences for code formatting. Files should be fixed and removed from the exception list as time allows. - :ghfile:`.pre-commit-config.yaml` - Configuration file for pre-commit hooks. - :ghfile:`.readthedocs.yml` - Configuration file for Read the Docs. - :ghfile:`buildscripts/condarecipe.local` - Conda build recipe - :ghfile:`buildscripts/condarecipe_clone_icc_rt` - Recipe to build a standalone icc_rt package. Continuous Integration '''''''''''''''''''''' - :ghfile:`azure-pipelines.yml` - Azure Pipelines CI config (active: Win/Mac/Linux) - :ghfile:`buildscripts/azure/` - Azure Pipeline configuration for specific platforms - :ghfile:`buildscripts/appveyor/` - Appveyor build scripts - :ghfile:`buildscripts/incremental/` - Generic scripts for building Numba on various CI systems - :ghfile:`codecov.yml` - Codecov.io coverage reporting Documentation ''''''''''''' - :ghfile:`LICENSE` - License for Numba - :ghfile:`LICENSES.third-party` - License for third party code vendored into Numba - :ghfile:`README.rst` - README for repo, also uploaded to PyPI - :ghfile:`CONTRIBUTING.md` - Documentation on how to contribute to project (out of date, should be updated to point to Sphinx docs) - :ghfile:`CHANGE_LOG` - History of Numba releases, also directly embedded into Sphinx documentation - :ghfile:`docs/` - Documentation source - :ghfile:`docs/_templates/` - Directory for templates (to override defaults with Sphinx theme) - :ghfile:`docs/Makefile` - Used to build Sphinx docs with ``make`` - :ghfile:`docs/source` - ReST source for Numba documentation - :ghfile:`docs/_static/` - Static CSS and image assets for Numba docs - :ghfile:`docs/gh-pages.py` - Utility script to update Numba docs (stored as gh-pages) - :ghfile:`docs/make.bat` - Not used (remove?) - :ghfile:`docs/requirements.txt` - Pip package requirements for building docs with Read the Docs. - :ghfile:`numba/scripts/generate_lower_listing.py` - Dump all registered implementations decorated with ``@lower*`` for reference documentation. Currently misses implementations from the higher level extension API. Numba Source Code ----------------- Numba ships with both the source code and tests in one package. - :ghfile:`numba/` - all of the source code and tests Public API '''''''''' These define aspects of the public Numba interface. - :ghfile:`numba/core/decorators.py` - User-facing decorators for compiling regular functions on the CPU - :ghfile:`numba/core/extending.py` - Public decorators for extending Numba (``overload``, ``intrinsic``, etc) - :ghfile:`numba/experimental/structref.py` - Public API for defining a mutable struct - :ghfile:`numba/core/ccallback.py` - ``@cfunc`` decorator for compiling functions to a fixed C signature. Used to make callbacks. - :ghfile:`numba/np/ufunc/decorators.py` - ufunc/gufunc compilation decorators - :ghfile:`numba/core/config.py` - Numba global config options and environment variable handling - :ghfile:`numba/core/annotations` - Gathering and printing type annotations of Numba IR - :ghfile:`numba/core/annotations/pretty_annotate.py` - Code highlighting of Numba functions and types (both ANSI terminal and HTML) - :ghfile:`numba/core/event.py` - A simple event system for applications to listen to specific compiler events. Dispatching ''''''''''' - :ghfile:`numba/core/dispatcher.py` - Dispatcher objects are compiled functions produced by ``@jit``. A dispatcher has different implementations for different type signatures. - :ghfile:`numba/_dispatcher.cpp` - C++ dispatcher implementation (for speed on common data types) - :ghfile:`numba/core/retarget.py` - Support for dispatcher objects to switch target via a specific with-context. Compiler Pipeline ''''''''''''''''' - :ghfile:`numba/core/compiler.py` - Compiler pipelines and flags - :ghfile:`numba/core/errors.py` - Numba exception and warning classes - :ghfile:`numba/core/ir.py` - Numba IR data structure objects - :ghfile:`numba/core/bytecode.py` - Bytecode parsing and function identity (??) - :ghfile:`numba/core/interpreter.py` - Translate Python interpreter bytecode to Numba IR - :ghfile:`numba/core/analysis.py` - Utility functions to analyze Numba IR (variable lifetime, prune branches, etc) - :ghfile:`numba/core/dataflow.py` - Dataflow analysis for Python bytecode (used in analysis.py) - :ghfile:`numba/core/controlflow.py` - Control flow analysis of Numba IR and Python bytecode - :ghfile:`numba/core/typeinfer.py` - Type inference algorithm - :ghfile:`numba/core/transforms.py` - Numba IR transformations - :ghfile:`numba/core/rewrites` - Rewrite passes used by compiler - :ghfile:`numba/core/rewrites/__init__.py` - Loads all rewrite passes so they are put into the registry - :ghfile:`numba/core/rewrites/registry.py` - Registry object for collecting rewrite passes - :ghfile:`numba/core/rewrites/ir_print.py` - Write print() calls into special print nodes in the IR - :ghfile:`numba/core/rewrites/static_raise.py` - Converts exceptions with static arguments into a special form that can be lowered - :ghfile:`numba/core/rewrites/static_getitem.py` - Rewrites getitem and setitem with constant arguments to allow type inference - :ghfile:`numba/core/rewrites/static_binop.py` - Rewrites binary operations (specifically ``**``) with constant arguments so faster code can be generated - :ghfile:`numba/core/inline_closurecall.py` - Inlines body of closure functions to call site. Support for array comprehensions, reduction inlining, and stencil inlining. - :ghfile:`numba/core/postproc.py` - Postprocessor for Numba IR that computes variable lifetime, inserts del operations, and handles generators - :ghfile:`numba/core/lowering.py` - General implementation of lowering Numba IR to LLVM :ghfile:`numba/core/environment.py` - Runtime environment object - :ghfile:`numba/core/withcontexts.py` - General scaffolding for implementing context managers in nopython mode, and the objectmode context manager - :ghfile:`numba/core/pylowering.py` - Lowering of Numba IR in object mode - :ghfile:`numba/core/pythonapi.py` - LLVM IR code generation to interface with CPython API - :ghfile:`numba/core/targetconfig.py` - Utils for target configurations such as compiler flags. Type Management ''''''''''''''' - :ghfile:`numba/core/typeconv/` - Implementation of type casting and type signature matching in both C++ and Python - :ghfile:`numba/capsulethunk.h` - Used by typeconv - :ghfile:`numba/core/types/` - definition of the Numba type hierarchy, used everywhere in compiler to select implementations - :ghfile:`numba/core/consts.py` - Constant inference (used to make constant values available during codegen when possible) - :ghfile:`numba/core/datamodel` - LLVM IR representations of data types in different contexts - :ghfile:`numba/core/datamodel/models.py` - Models for most standard types - :ghfile:`numba/core/datamodel/registry.py` - Decorator to register new data models - :ghfile:`numba/core/datamodel/packer.py` - Pack typed values into a data structure - :ghfile:`numba/core/datamodel/testing.py` - Data model tests (this should move??) - :ghfile:`numba/core/datamodel/manager.py` - Map types to data models Compiled Extensions ''''''''''''''''''' Numba uses a small amount of compiled C/C++ code for core functionality, like dispatching and type matching where performance matters, and it is more convenient to encapsulate direct interaction with CPython APIs. - :ghfile:`numba/_arraystruct.h` - Struct for holding NumPy array attributes. Used in helperlib and the Numba Runtime. - :ghfile:`numba/_helperlib.c` - C functions required by Numba compiled code at runtime. Linked into ahead-of-time compiled modules - :ghfile:`numba/_helpermod.c` - Python extension module with pointers to functions from ``_helperlib.c`` and ``_npymath_exports.c`` - :ghfile:`numba/_npymath_exports.c` - Export function pointer table to NumPy C math functions - :ghfile:`numba/_dynfuncmod.c` - Python extension module exporting _dynfunc.c functionality - :ghfile:`numba/_dynfunc.c` - C level Environment and Closure objects (keep in sync with numba/target/base.py) - :ghfile:`numba/mathnames.h` - Macros for defining names of math functions - :ghfile:`numba/_pymodule.h` - C macros for Python 2/3 portable naming of C API functions - :ghfile:`numba/mviewbuf.c` - Handles Python memoryviews - :ghfile:`numba/_typeof.{h,c}` - C implementation of type fingerprinting, used by dispatcher - :ghfile:`numba/_numba_common.h` - Portable C macro for marking symbols that can be shared between object files, but not outside the library. Misc Support '''''''''''' - :ghfile:`numba/_version.py` - Updated by versioneer - :ghfile:`numba/core/runtime` - Language runtime. Currently manages reference-counted memory allocated on the heap by Numba-compiled functions - :ghfile:`numba/core/ir_utils.py` - Utility functions for working with Numba IR data structures - :ghfile:`numba/core/cgutils.py` - Utility functions for generating common code patterns in LLVM IR - :ghfile:`numba/core/utils.py` - Python 2 backports of Python 3 functionality (also imports local copy of ``six``) - :ghfile:`numba/core/overload_glue.py` - Functions for wrapping split typing and lowering API use cases into overloads. - :ghfile:`numba/misc/appdirs.py` - Vendored package for determining application config directories on every platform - :ghfile:`numba/core/compiler_lock.py` - Global compiler lock because Numba's usage of LLVM is not thread-safe - :ghfile:`numba/misc/special.py` - Python stub implementations of special Numba functions (prange, gdb*) - :ghfile:`numba/core/itanium_mangler.py` - Python implementation of Itanium C++ name mangling - :ghfile:`numba/misc/findlib.py` - Helper function for locating shared libraries on all platforms - :ghfile:`numba/core/debuginfo.py` - Helper functions to construct LLVM IR debug info - :ghfile:`numba/core/unsafe/refcount.py` - Read reference count of object - :ghfile:`numba/core/unsafe/eh.py` - Exception handling helpers - :ghfile:`numba/core/unsafe/nrt.py` - Numba runtime (NRT) helpers - :ghfile:`numba/cpython/unsafe/tuple.py` - Replace a value in a tuple slot - :ghfile:`numba/np/unsafe/ndarray.py` - NumPy array helpers - :ghfile:`numba/core/unsafe/bytes.py` - Copying and dereferencing data from void pointers - :ghfile:`numba/misc/dummyarray.py` - Used by GPU backends to hold array information on the host, but not the data. - :ghfile:`numba/core/callwrapper.py` - Handles argument unboxing and releasing the GIL when moving from Python to nopython mode - :ghfile:`numba/np/numpy_support.py` - Helper functions for working with NumPy and translating Numba types to and from NumPy dtypes. - :ghfile:`numba/core/tracing.py` - Decorator for tracing Python calls and emitting log messages - :ghfile:`numba/core/funcdesc.py` - Classes for describing function metadata (used in the compiler) - :ghfile:`numba/core/sigutils.py` - Helper functions for parsing and normalizing Numba type signatures - :ghfile:`numba/core/serialize.py` - Support for pickling compiled functions - :ghfile:`numba/core/caching.py` - Disk cache for compiled functions - :ghfile:`numba/np/npdatetime.py` - Helper functions for implementing NumPy datetime64 support - :ghfile:`numba/misc/llvm_pass_timings.py` - Helper to record timings of LLVM passes. - :ghfile:`numba/cloudpickle` - Vendored cloudpickle subpackage Core Python Data Types '''''''''''''''''''''' - :ghfile:`numba/_hashtable.{h,c}` - Adaptation of the Python 3.7 hash table implementation - :ghfile:`numba/cext/dictobject.{h,c}` - C level implementation of typed dictionary - :ghfile:`numba/typed/dictobject.py` - Nopython mode wrapper for typed dictionary - :ghfile:`numba/cext/listobject.{h,c}` - C level implementation of typed list - :ghfile:`numba/typed/listobject.py` - Nopython mode wrapper for typed list - :ghfile:`numba/typed/typedobjectutils.py` - Common utilities for typed dictionary and list - :ghfile:`numba/cpython/unicode.py` - Unicode strings (Python 3.5 and later) - :ghfile:`numba/typed` - Python interfaces to statically typed containers - :ghfile:`numba/typed/typeddict.py` - Python interface to typed dictionary - :ghfile:`numba/typed/typedlist.py` - Python interface to typed list - :ghfile:`numba/experimental/jitclass` - Implementation of experimental JIT compilation of Python classes - :ghfile:`numba/core/generators.py` - Support for lowering Python generators Math '''' - :ghfile:`numba/_random.c` - Reimplementation of NumPy / CPython random number generator - :ghfile:`numba/_lapack.c` - Wrappers for calling BLAS and LAPACK functions (requires SciPy) ParallelAccelerator ''''''''''''''''''' Code transformation passes that extract parallelizable code from a function and convert it into multithreaded gufunc calls. - :ghfile:`numba/parfors/parfor.py` - General ParallelAccelerator - :ghfile:`numba/parfors/parfor_lowering.py` - gufunc lowering for ParallelAccelerator - :ghfile:`numba/parfors/array_analysis.py` - Array analysis passes used in ParallelAccelerator Stencil ''''''' Implementation of ``@stencil``: - :ghfile:`numba/stencils/stencil.py` - Stencil function decorator (implemented without ParallelAccelerator) - :ghfile:`numba/stencils/stencilparfor.py` - ParallelAccelerator implementation of stencil Debugging Support ''''''''''''''''' - :ghfile:`numba/misc/gdb_hook.py` - Hooks to jump into GDB from nopython mode - :ghfile:`numba/misc/cmdlang.gdb` - Commands to setup GDB for setting explicit breakpoints from Python Type Signatures (CPU) ''''''''''''''''''''' Some (usually older) Numba supported functionality separates the declaration of allowed type signatures from the definition of implementations. This package contains registries of type signatures that must be matched during type inference. - :ghfile:`numba/core/typing` - Type signature module - :ghfile:`numba/core/typing/templates.py` - Base classes for type signature templates - :ghfile:`numba/core/typing/cmathdecl.py` - Python complex math (``cmath``) module - :ghfile:`numba/core/typing/bufproto.py` - Interpreting objects supporting the buffer protocol - :ghfile:`numba/core/typing/mathdecl.py` - Python ``math`` module - :ghfile:`numba/core/typing/listdecl.py` - Python lists - :ghfile:`numba/core/typing/builtins.py` - Python builtin global functions and operators - :ghfile:`numba/core/typing/randomdecl.py` - Python and NumPy ``random`` modules - :ghfile:`numba/core/typing/setdecl.py` - Python sets - :ghfile:`numba/core/typing/npydecl.py` - NumPy ndarray (and operators), NumPy functions - :ghfile:`numba/core/typing/arraydecl.py` - Python ``array`` module - :ghfile:`numba/core/typing/context.py` - Implementation of typing context (class that collects methods used in type inference) - :ghfile:`numba/core/typing/collections.py` - Generic container operations and namedtuples - :ghfile:`numba/core/typing/ctypes_utils.py` - Typing ctypes-wrapped function pointers - :ghfile:`numba/core/typing/enumdecl.py` - Enum types - :ghfile:`numba/core/typing/cffi_utils.py` - Typing of CFFI objects - :ghfile:`numba/core/typing/typeof.py` - Implementation of typeof operations (maps Python object to Numba type) - :ghfile:`numba/core/typing/asnumbatype.py` - Implementation of ``as_numba_type`` operations (maps Python types to Numba type) - :ghfile:`numba/core/typing/npdatetime.py` - Datetime dtype support for NumPy arrays Target Implementations (CPU) '''''''''''''''''''''''''''' Implementations of Python / NumPy functions and some data models. These modules are responsible for generating LLVM IR during lowering. Note that some of these modules do not have counterparts in the typing package because newer Numba extension APIs (like overload) allow typing and implementation to be specified together. - :ghfile:`numba/core/cpu.py` - Context for code gen on CPU - :ghfile:`numba/core/base.py` - Base class for all target contexts - :ghfile:`numba/core/codegen.py` - Driver for code generation - :ghfile:`numba/core/boxing.py` - Boxing and unboxing for most data types - :ghfile:`numba/core/intrinsics.py` - Utilities for converting LLVM intrinsics to other math calls - :ghfile:`numba/core/callconv.py` - Implements different calling conventions for Numba-compiled functions - :ghfile:`numba/core/options.py` - Container for options that control lowering - :ghfile:`numba/core/optional.py` - Special type representing value or ``None`` - :ghfile:`numba/core/registry.py` - Registry object for collecting implementations for a specific target - :ghfile:`numba/core/imputils.py` - Helper functions for lowering - :ghfile:`numba/core/externals.py` - Registers external C functions needed to link generated code - :ghfile:`numba/core/fastmathpass.py` - Rewrite pass to add fastmath attributes to function call sites and binary operations - :ghfile:`numba/core/removerefctpass.py` - Rewrite pass to remove unnecessary incref/decref pairs - :ghfile:`numba/core/descriptors.py` - empty base class for all target descriptors (is this needed?) - :ghfile:`numba/cpython/builtins.py` - Python builtin functions and operators - :ghfile:`numba/cpython/cmathimpl.py` - Python complex math module - :ghfile:`numba/cpython/enumimpl.py` - Enum objects - :ghfile:`numba/cpython/hashing.py` - Hashing algorithms - :ghfile:`numba/cpython/heapq.py` - Python ``heapq`` module - :ghfile:`numba/cpython/iterators.py` - Iterable data types and iterators - :ghfile:`numba/cpython/listobj.py` - Python lists - :ghfile:`numba/cpython/mathimpl.py` - Python ``math`` module - :ghfile:`numba/cpython/numbers.py` - Numeric values (int, float, etc) - :ghfile:`numba/cpython/printimpl.py` - Print function - :ghfile:`numba/cpython/randomimpl.py` - Python and NumPy ``random`` modules - :ghfile:`numba/cpython/rangeobj.py` - Python `range` objects - :ghfile:`numba/cpython/slicing.py` - Slice objects, and index calculations used in slicing - :ghfile:`numba/cpython/setobj.py` - Python set type - :ghfile:`numba/cpython/tupleobj.py` - Tuples (statically typed as immutable struct) - :ghfile:`numba/misc/cffiimpl.py` - CFFI functions - :ghfile:`numba/misc/quicksort.py` - Quicksort implementation used with list and array objects - :ghfile:`numba/misc/mergesort.py` - Mergesort implementation used with array objects - :ghfile:`numba/np/arraymath.py` - Math operations on arrays (both Python and NumPy) - :ghfile:`numba/np/arrayobj.py` - Array operations (both NumPy and buffer protocol) - :ghfile:`numba/np/linalg.py` - NumPy linear algebra operations - :ghfile:`numba/np/npdatetime.py` - NumPy datetime operations - :ghfile:`numba/np/npyfuncs.py` - Kernels used in generating some NumPy ufuncs - :ghfile:`numba/np/npyimpl.py` - Implementations of most NumPy ufuncs - :ghfile:`numba/np/polynomial.py` - ``numpy.roots`` function - :ghfile:`numba/np/ufunc_db.py` - Big table mapping types to ufunc implementations Ufunc Compiler and Runtime '''''''''''''''''''''''''' - :ghfile:`numba/np/ufunc` - ufunc compiler implementation - :ghfile:`numba/np/ufunc/_internal.{h,c}` - Python extension module with helper functions that use CPython & NumPy C API - :ghfile:`numba/np/ufunc/_ufunc.c` - Used by `_internal.c` - :ghfile:`numba/np/ufunc/deviceufunc.py` - Custom ufunc dispatch for non-CPU targets - :ghfile:`numba/np/ufunc/gufunc_scheduler.{h,cpp}` - Schedule work chunks to threads - :ghfile:`numba/np/ufunc/dufunc.py` - Special ufunc that can compile new implementations at call time - :ghfile:`numba/np/ufunc/ufuncbuilder.py` - Top-level orchestration of ufunc/gufunc compiler pipeline - :ghfile:`numba/np/ufunc/sigparse.py` - Parser for generalized ufunc indexing signatures - :ghfile:`numba/np/ufunc/parallel.py` - Codegen for ``parallel`` target - :ghfile:`numba/np/ufunc/array_exprs.py` - Rewrite pass for turning array expressions in regular functions into ufuncs - :ghfile:`numba/np/ufunc/wrappers.py` - Wrap scalar function kernel with loops - :ghfile:`numba/np/ufunc/workqueue.{h,c}` - Threading backend based on pthreads/Windows threads and queues - :ghfile:`numba/np/ufunc/omppool.cpp` - Threading backend based on OpenMP - :ghfile:`numba/np/ufunc/tbbpool.cpp` - Threading backend based on TBB Unit Tests (CPU) '''''''''''''''' CPU unit tests (GPU target unit tests listed in later sections - :ghfile:`runtests.py` - Convenience script that launches test runner and turns on full compiler tracebacks - :ghfile:`run_coverage.py` - Runs test suite with coverage tracking enabled - :ghfile:`.coveragerc` - Coverage.py configuration - :ghfile:`numba/runtests.py` - Entry point to unittest runner - :ghfile:`numba/testing/_runtests.py` - Implementation of custom test runner command line interface - :ghfile:`numba/tests/test_*` - Test cases - :ghfile:`numba/tests/*_usecases.py` - Python functions compiled by some unit tests - :ghfile:`numba/tests/support.py` - Helper functions for testing and special TestCase implementation - :ghfile:`numba/tests/dummy_module.py` - Module used in ``test_dispatcher.py`` - :ghfile:`numba/tests/npyufunc` - ufunc / gufunc compiler tests - :ghfile:`numba/testing` - Support code for testing - :ghfile:`numba/testing/loader.py` - Find tests on disk - :ghfile:`numba/testing/notebook.py` - Support for testing notebooks - :ghfile:`numba/testing/main.py` - Numba test runner Command Line Utilities '''''''''''''''''''''' - :ghfile:`bin/numba` - Command line stub, delegates to main in ``numba_entry.py`` - :ghfile:`numba/misc/numba_entry.py` - Main function for ``numba`` command line tool - :ghfile:`numba/pycc` - Ahead of time compilation of functions to shared library extension - :ghfile:`numba/pycc/__init__.py` - Main function for ``pycc`` command line tool - :ghfile:`numba/pycc/cc.py` - User-facing API for tagging functions to compile ahead of time - :ghfile:`numba/pycc/compiler.py` - Compiler pipeline for creating standalone Python extension modules - :ghfile:`numba/pycc/llvm_types.py` - Aliases to LLVM data types used by ``compiler.py`` - :ghfile:`numba/pycc/pycc` - Stub to call main function. Is this still used? - :ghfile:`numba/pycc/modulemixin.c` - C file compiled into every compiled extension. Pulls in C source from Numba core that is needed to make extension standalone - :ghfile:`numba/pycc/platform.py` - Portable interface to platform-specific compiler toolchains - :ghfile:`numba/pycc/decorators.py` - Deprecated decorators for tagging functions to compile. Use ``cc.py`` instead. CUDA GPU Target ''''''''''''''' Note that the CUDA target does reuse some parts of the CPU target. - :ghfile:`numba/cuda/` - The implementation of the CUDA (NVIDIA GPU) target and associated unit tests - :ghfile:`numba/cuda/decorators.py` - Compiler decorators for CUDA kernels and device functions - :ghfile:`numba/cuda/dispatcher.py` - Dispatcher for CUDA JIT functions - :ghfile:`numba/cuda/printimpl.py` - Special implementation of device printing - :ghfile:`numba/cuda/libdevice.py` - Registers libdevice functions - :ghfile:`numba/cuda/kernels/` - Custom kernels for reduction and transpose - :ghfile:`numba/cuda/device_init.py` - Initializes the CUDA target when imported - :ghfile:`numba/cuda/compiler.py` - Compiler pipeline for CUDA target - :ghfile:`numba/cuda/intrinsic_wrapper.py` - CUDA device intrinsics (shuffle, ballot, etc) - :ghfile:`numba/cuda/initialize.py` - Defered initialization of the CUDA device and subsystem. Called only when user imports ``numba.cuda`` - :ghfile:`numba/cuda/simulator_init.py` - Initalizes the CUDA simulator subsystem (only when user requests it with env var) - :ghfile:`numba/cuda/random.py` - Implementation of random number generator - :ghfile:`numba/cuda/api.py` - User facing APIs imported into ``numba.cuda.*`` - :ghfile:`numba/cuda/stubs.py` - Python placeholders for functions that only can be used in GPU device code - :ghfile:`numba/cuda/simulator/` - Simulate execution of CUDA kernels in Python interpreter - :ghfile:`numba/cuda/vectorizers.py` - Subclasses of ufunc/gufunc compilers for CUDA - :ghfile:`numba/cuda/args.py` - Management of kernel arguments, including host<->device transfers - :ghfile:`numba/cuda/target.py` - Typing and target contexts for GPU - :ghfile:`numba/cuda/cudamath.py` - Type signatures for math functions in CUDA Python - :ghfile:`numba/cuda/errors.py` - Validation of kernel launch configuration - :ghfile:`numba/cuda/nvvmutils.py` - Helper functions for generating NVVM-specific IR - :ghfile:`numba/cuda/testing.py` - Support code for creating CUDA unit tests and capturing standard out - :ghfile:`numba/cuda/cudadecl.py` - Type signatures of CUDA API (threadIdx, blockIdx, atomics) in Python on GPU - :ghfile:`numba/cuda/cudaimpl.py` - Implementations of CUDA API functions on GPU - :ghfile:`numba/cuda/codegen.py` - Code generator object for CUDA target - :ghfile:`numba/cuda/cudadrv/` - Wrapper around CUDA driver API - :ghfile:`numba/cuda/tests/` - CUDA unit tests, skipped when CUDA is not detected - :ghfile:`numba/cuda/tests/cudasim/` - Tests of CUDA simulator - :ghfile:`numba/cuda/tests/nocuda/` - Tests for NVVM functionality when CUDA not present - :ghfile:`numba/cuda/tests/cudapy/` - Tests of compiling Python functions for GPU - :ghfile:`numba/cuda/tests/cudadrv/` - Tests of Python wrapper around CUDA API numba-0.55.1/docs/source/developer/rewrites.rst000664 000000 000000 00000041725 14174536160 021517 0ustar00rootroot000000 000000 ===================================================== Using the Numba Rewrite Pass for Fun and Optimization ===================================================== Overview ======== This section introduces intermediate representation (IR) rewrites, and how they can be used to implement optimizations. As discussed earlier in ":ref:`rewrite-typed-ir`", rewriting the Numba IR allows us to perform optimizations that would be much more difficult to perform at the lower LLVM level. Similar to the Numba type and lowering subsystems, the rewrite subsystem is user extensible. This extensibility affords Numba the possibility of supporting a wide variety of domain-specific optimizations (DSO's). The remaining subsections detail the mechanics of implementing a rewrite, registering a rewrite with the rewrite registry, and provide examples of adding new rewrites, as well as internals of the array expression optimization pass. We conclude by reviewing some use cases exposed in the examples, as well as reviewing any points where developers should take care. Rewriting Passes ================ Rewriting passes have a simple :func:`~Rewrite.match` and :func:`~Rewrite.apply` interface. The division between matching and rewriting follows how one would define a term rewrite in a declarative domain-specific languages (DSL's). In such DSL's, one may write a rewrite as follows:: => The ```` and ```` symbols represent IR term expressions, where the left-hand side presents a pattern to match, and the right-hand side an IR term constructor to build upon matching. Whenever the rewrite matches an IR pattern, any free variables in the left-hand side are bound within a custom environment. When applied, the rewrite uses the pattern matching environment to bind any free variables in the right-hand side. As Python is not commonly used in a declarative capacity, Numba uses object state to handle the transfer of information between the matching and application steps. The :class:`Rewrite` Base Class ------------------------------- .. class:: Rewrite The :class:`Rewrite` class simply defines an abstract base class for Numba rewrites. Developers should define rewrites as subclasses of this base type, overloading the :func:`~Rewrite.match` and :func:`~Rewrite.apply` methods. .. attribute:: pipeline The pipeline attribute contains the :class:`numba.compiler.Pipeline` instance that is currently compiling the function under consideration for rewriting. .. method:: __init__(self, pipeline, *args, **kws) The base constructor for rewrites simply stashes its arguments into attributes of the same name. Unless being used in debugging or testing, rewrites should only be constructed by the :class:`RewriteRegistry` in the :func:`RewriteRegistry.apply` method, and the construction interface should remain stable (though the pipeline will commonly contain just about everything there is to know). .. method:: match(self, block, typemap, callmap) The :func:`~Rewrite.match` method takes four arguments other than *self*: * *func_ir*: This is an instance of :class:`numba.ir.FunctionIR` for the function being rewritten. * *block*: This is an instance of :class:`numba.ir.Block`. The matching method should iterate over the instructions contained in the :attr:`numba.ir.Block.body` member. * *typemap*: This is a Python :class:`dict` instance mapping from symbol names in the IR, represented as strings, to Numba types. * *callmap*: This is another :class:`dict` instance mapping from calls, represented as :class:`numba.ir.Expr` instances, to their corresponding call site type signatures, represented as a :class:`numba.typing.templates.Signature` instance. The :func:`~Rewrite.match` method should return a :class:`bool` result. A :obj:`True` result should indicate that one or more matches were found, and the :func:`~Rewrite.apply` method will return a new replacement :class:`numba.ir.Block` instance. A :obj:`False` result should indicate that no matches were found, and subsequent calls to :func:`~Rewrite.apply` will return undefined or invalid results. .. method:: apply(self) The :func:`~Rewrite.apply` method should only be invoked following a successful call to :func:`~Rewrite.match`. This method takes no additional parameters other than *self*, and should return a replacement :class:`numba.ir.Block` instance. As mentioned above, the behavior of calling :func:`~Rewrite.apply` is undefined unless :func:`~Rewrite.match` has already been called and returned :obj:`True`. Subclassing :class:`Rewrite` ---------------------------- Before going into the expectations for the overloaded methods any :class:`Rewrite` subclass must have, let's step back a minute to review what is taking place here. By providing an extensible compiler, Numba opens itself to user-defined code generators which may be incomplete, or worse, incorrect. When a code generator goes awry, it can cause abnormal program behavior or early termination. User-defined rewrites add a new level of complexity because they must not only generate correct code, but the code they generate should ensure that the compiler does not get stuck in a match/apply loop. Non-termination by the compiler will directly lead to non-termination of user function calls. There are several ways to help ensure that a rewrite terminates: * *Typing*: A rewrite should generally attempt to decompose composite types, and avoid composing new types. If the rewrite is matching a specific type, changing expression types to a lower-level type will ensure they will no long match after the rewrite is applied. * *Special instructions*: A rewrite may synthesize custom operators or use special functions in the target IR. This technique again generates code that is no longer within the domain of the original match, and the rewrite will terminate. In the ":ref:`case-study-array-expressions`" subsection, below, we'll see how the array expression rewriter uses both of these techniques. Overloading :func:`Rewrite.match` --------------------------------- Every rewrite developer should seek to have their implementation of :func:`~Rewrite.match` return a :obj:`False` value as quickly as possible. Numba is a just-in-time compiler, and adding compilation time ultimately adds to the user's run time. When a rewrite returns :obj:`False` for a given block, the registry will no longer process that block with that rewrite, and the compiler is that much closer to proceeding to lowering. This need for timeliness has to be balanced against collecting the necessary information to make a match for a rewrite. Rewrite developers should be comfortable adding dynamic attributes to their subclasses, and then having these new attributes guide construction of the replacement basic block. Overloading :func:`Rewrite.apply` ----------------------------------- The :func:`~Rewrite.apply` method should return a replacement :class:`numba.ir.Block` instance to replace the basic block that contained a match for the rewrite. As mentioned above, the IR built by :func:`~Rewrite.apply` methods should preserve the semantics of the user's code, but also seek to avoid generating another match for the same rewrite or set of rewrites. The Rewrite Registry ==================== When you want to include a rewrite in the rewrite pass, you should register it with the rewrite registry. The :mod:`numba.rewrites` module provides both the abstract base class and a class decorator for hooking into the Numba rewrite subsystem. The following illustrates a stub definition of a new rewrite:: from numba import rewrites @rewrites.register_rewrite class MyRewrite(rewrites.Rewrite): def match(self, block, typemap, calltypes): raise NotImplementedError("FIXME") def apply(self): raise NotImplementedError("FIXME") Developers should note that using the class decorator as shown above will register a rewrite at import time. It is the developer's responsibility to ensure their extensions are loaded before compilation starts. .. _`case-study-array-expressions`: Case study: Array Expressions ============================= This subsection looks at the array expression rewriter in more depth. The array expression rewriter, and most of its support functionality, are found in the :mod:`numba.npyufunc.array_exprs` module. The rewriting pass itself is implemented in the :class:`RewriteArrayExprs` class. In addition to the rewriter, the :mod:`~numba.npyufunc.array_exprs` module includes a function for lowering array expressions, :func:`~numba.npyufunc.array_exprs._lower_array_expr`. The overall optimization process is as follows: * :func:`RewriteArrayExprs.match`: The rewrite pass looks for two or more array operations that form an array expression. * :func:`RewriteArrayExprs.apply`: Once an array expression is found, the rewriter replaces the individual array operations with a new kind of IR expression, the ``arrayexpr``. * :func:`numba.npyufunc.array_exprs._lower_array_expr`: During lowering, the code generator calls :func:`~numba.npyufunc.array_exprs._lower_array_expr` whenever it finds an ``arrayexpr`` IR expression. More details on each step of the optimization are given below. The :func:`RewriteArrayExprs.match` method ------------------------------------------ The array expression optimization pass starts by looking for array operations, including calls to supported :class:`~numpy.ufunc`\'s and user-defined :class:`~numba.DUFunc`\'s. Numba IR follows the conventions of a static single assignment (SSA) language, meaning that the search for array operators begins with looking for assignment instructions. When the rewriting pass calls the :func:`RewriteArrayExprs.match` method, it first checks to see if it can trivially reject the basic block. If the method determines the block to be a candidate for matching, it sets up the following state variables in the rewrite object: * *crnt_block*: The current basic block being matched. * *typemap*: The *typemap* for the function being matched. * *matches*: A list of variable names that reference array expressions. * *array_assigns*: A map from assignment variable names to the actual assignment instructions that define the given variable. * *const_assigns*: A map from assignment variable names to the constant valued expression that defines the constant variable. At this point, the match method iterates over the assignment instructions in the input basic block. For each assignment instruction, the matcher looks for one of two things: * Array operations: If the right-hand side of the assignment instruction is an expression, and the result of that expression is an array type, the matcher checks to see if the expression is either a known array operation, or a call to a universal function. If an array operator is found, the matcher stores the left-hand variable name and the whole instruction in the *array_assigns* member. Finally, the matcher tests to see if any operands of the array operation have also been identified as targets of other array operations. If one or more operands are also targets of array operations, then the matcher will also append the left-hand side variable name to the *matches* member. * Constants: Constants (even scalars) can be operands to array operations. Without worrying about the constant being apart of an array expression, the matcher stores constant names and values in the *const_assigns* member. The end of the matching method simply checks for a non-empty *matches* list, returning :obj:`True` if there were one or more matches, and :obj:`False` when *matches* is empty. The :func:`RewriteArrayExprs.apply` method ------------------------------------------ When one or matching array expressions are found by :func:`RewriteArrayExprs.match`, the rewriting pass will call :func:`RewriteArrayExprs.apply`. The apply method works in two passes. The first pass iterates over the matches found, and builds a map from instructions in the old basic block to new instructions in the new basic block. The second pass iterates over the instructions in the old basic block, copying instructions that are not changed by the rewrite, and replacing or deleting instructions that were identified by the first pass. The :func:`RewriteArrayExprs._handle_matches` implements the first pass of the code generation portion of the rewrite. For each match, this method builds a special IR expression that contains an expression tree for the array expression. To compute the leaves of the expression tree, the :func:`~RewriteArrayExprs._handle_matches` method iterates over the operands of the identified root operation. If the operand is another array operation, it is translated into an expression sub-tree. If the operand is a constant, :func:`~RewriteArrayExprs._handle_matches` copies the constant value. Otherwise, the operand is marked as being used by an array expression. As the method builds array expression nodes, it builds a map from old instructions to new instructions (*replace_map*), as well as sets of variables that may have moved (*used_vars*), and variables that should be removed altogether (*dead_vars*). These three data structures are returned back to the calling :func:`RewriteArrayExprs.apply` method. The remaining part of the :func:`RewriteArrayExprs.apply` method iterates over the instructions in the old basic block. For each instruction, this method either replaces, deletes, or duplicates that instruction based on the results of :func:`RewriteArrayExprs._handle_matches`. The following list describes how the optimization handles individual instructions: * When an instruction is an assignment, :func:`~RewriteArrayExprs.apply` checks to see if it is in the replacement instruction map. When an assignment instruction is found in the instruction map, :func:`~RewriteArrayExprs.apply` must then check to see if the replacement instruction is also in the replacement map. The optimizer continues this check until it either arrives at a :obj:`None` value or an instruction that isn't in the replacement map. Instructions that have a replacement that is :obj:`None` are deleted. Instructions that have a non-:obj:`None` replacement are replaced. Assignment instructions not in the replacement map are appended to the new basic block with no changes made. * When the instruction is a delete instruction, the rewrite checks to see if it deletes a variable that may still be used by a later array expression, or if it deletes a dead variable. Delete instructions for used variables are added to a map of deferred delete instructions that :func:`~RewriteArrayExprs.apply` uses to move them past any uses of that variable. The loop copies delete instructions for non-dead variables, and ignores delete instructions for dead variables (effectively removing them from the basic block). * All other instructions are appended to the new basic block. Finally, the :func:`~RewriteArrayExprs.apply` method returns the new basic block for lowering. The :func:`~numba.npyufunc.array_exprs._lower_array_expr` function ------------------------------------------------------------------ If we left things at just the rewrite, then the lowering stage of the compiler would fail, complaining it doesn't know how to lower ``arrayexpr`` operations. We start by hooking a lowering function into the target context whenever the :class:`RewriteArrayExprs` class is instantiated by the compiler. This hook causes the lowering pass to call :func:`~numba.npyufunc.array_exprs._lower_array_expr` whenever it encounters an ``arrayexr`` operator. This function has two steps: * Synthesize a Python function that implements the array expression: This new Python function essentially behaves like a Numpy :class:`~numpy.ufunc`, returning the result of the expression on scalar values in the broadcasted array arguments. The lowering function accomplishes this by translating from the array expression tree into a Python AST. * Compile the synthetic Python function into a kernel: At this point, the lowering function relies on existing code for lowering ufunc and DUFunc kernels, calling :func:`numba.targets.numpyimpl.numpy_ufunc_kernel` after defining how to lower calls to the synthetic function. The end result is similar to loop lifting in Numba's object mode. Conclusions and Caveats ======================= We have seen how to implement rewrites in Numba, starting with the interface, and ending with an actual optimization. The key points of this section are: * When writing a good plug-in, the matcher should try to get a go/no-go result as soon as possible. * The rewrite application portion can be more computationally expensive, but should still generate code that won't cause infinite loops in the compiler. * We use object state to communicate any results of matching to the rewrite application pass. numba-0.55.1/docs/source/developer/roadmap.rst000664 000000 000000 00000006115 14174536160 021270 0ustar00rootroot000000 000000 ===================== Numba Project Roadmap ===================== .. note:: This page was last revised in *December 2018*. This roadmap is for informational purposes only. Priorities and resources change, so we may choose to reorder or abandon things on this list. Additionally, the further out items are, the less concrete they will be. If you have an interest in working on one of these items, please open an issue where we can discuss the design and approach first. Short Term: 2019H1 ================== * Container improvements: * Numba dictionary support * Refactor lists to follow new container best practices. See the discussion in `issue 3546 `_. * Deprecate Python 2.7 support * Improve caching: * Full support for functions compiled with ParallelAccelerator * Safe caching of generated functions (eval of strings) * Expire cache when any function in call chain (even in other files) changes * Process for distributing pre-populated cache * Continue to improve usability and debugging: * Trap more unsupported features earlier in pipeline (especially things that parfors can’t handle) * Error messages * Diagnostic tools for debugging and understanding performance * Better on-boarding for new users and contributors (revise docs, more examples) * Begin refactoring existing features that cause common bug reports: * Enhance description of interfaces provided by Numba functions to give more type information * Convert older Numba function implementations to use public extension mechanisms * More unit testing and modularization of ParallelAccelerator passes Medium Term: 2019H2 =================== * Unify dispatch of regular functions, ufuncs, and gufuncs * Declare Numba 1.0 with stable interfaces * Continue to improve usability and debugging (see above) * Continue refactoring Numba internals to solve common bug reports (see above) * JIT class review and improvement * Improve compilation speed * Improve memory management of Numba-allocated memory * Better support for writing code transformation passes * Make caching and parallel execution features opt-out instead of opt-in * add heuristic to determine if parfor passes will be beneficial Long Term: 2020 and beyond ========================== * Unify GPU backends (share more code and interfaces) * Improve ahead of time compilation (for low powered devices) * Improve cross language connections (C++, JVM?, Julia?, R?) * Call Numba from other languages, * Call from Numba into other languages * Better support for "hybrid" CPU/GPU/TPU/etc programming * Partial / deferred compilation of functions * Foster integration of Numba into core PyData packages: * scipy/scikit-learn/scikit-image/pandas * More support for efforts to put Numba into other applications (databases, etc) for compiling user-defined functions * More support for usage of Numba as a “compiler toolkit” to create custom compilers (like HPAT, automatic differentiation of functions, etc) * Investigate AST-based Numba frontend in addition to existing bytecode-based frontendnumba-0.55.1/docs/source/developer/stencil.rst000664 000000 000000 00000017660 14174536160 021315 0ustar00rootroot000000 000000 .. Copyright (c) 2017 Intel Corporation SPDX-License-Identifier: BSD-2-Clause .. _arch-stencil: ================= Notes on stencils ================= Numba provides the :ref:`@stencil decorator ` to represent stencil computations. This document explains how this feature is implemented in the several different modes available in Numba. Currently, calls to the stencil from non-jitted code is supported as well as calls from jitted code, either with or without the :ref:`parallel=True ` option. The stencil decorator ===================== The stencil decorator itself just returns a ``StencilFunc`` object. This object encapsulates the original stencil kernel function as specified in the program and the options passed to the stencil decorator. Also of note is that after the first compilation of the stencil, the computed neighborhood of the stencil is stored in the ``StencilFunc`` object in the ``neighborhood`` attribute. Handling the three modes ======================== As mentioned above, Numba supports the calling of stencils from inside or outside a ``@jit`` compiled function, with or without the :ref:`parallel=True ` option. Outside jit context ------------------- ``StencilFunc`` overrides the ``__call__`` method so that calls to ``StencilFunc`` objects execute the stencil:: def __call__(self, *args, **kwargs): result = kwargs.get('out') new_stencil_func = self._stencil_wrapper(result, None, *args) if result is None: return new_stencil_func.entry_point(*args) else: return new_stencil_func.entry_point(*args, result) First, the presence of the optional :ref:`out ` parameter is checked. If it is present then the output array is stored in ``result``. Then, the call to ``_stencil_wrapper`` generates the stencil function given the result and argument types and finally the generated stencil function is executed and its result returned. Jit without ``parallel=True`` ----------------------------- When constructed, a ``StencilFunc`` inserts itself into the typing context's set of user functions and provides the ``_type_me`` callback. In this way, the standard Numba compiler is able to determine the output type and signature of a ``StencilFunc``. Each ``StencilFunc`` maintains a cache of previously seen combinations of input argument types and keyword types. If previously seen, the ``StencilFunc`` returns the computed signature. If not previously computed, the ``StencilFunc`` computes the return type of the stencil by running the Numba compiler frontend on the stencil kernel and then performing type inference on the :term:`Numba IR` (IR) to get the scalar return type of the kernel. From that, a Numpy array type is constructed whose element type matches that scalar return type. After computing the signature of the stencil for a previously unseen combination of input and keyword types, the ``StencilFunc`` then :ref:`creates the stencil function ` itself. ``StencilFunc`` then installs the new stencil function's definition in the target context so that jitted code is able to call it. Thus, in this mode, the generated stencil function is a stand-alone function called like a normal function from within jitted code. Jit with ``parallel=True`` -------------------------- When calling a ``StencilFunc`` from a jitted context with ``parallel=True``, a separate stencil function as generated by :ref:`arch-stencil-create-function` is not used. Instead, `parfors` (:ref:`parallel-accelerator`) are created within the current function that implements the stencil. This code again starts with the stencil kernel and does a similar kernel size computation but then rather than standard Python looping syntax, corresponding `parfors` are created so that the execution of the stencil will take place in parallel. The stencil to `parfor` translations can also be selectively disabled by setting ``parallel={'stencil': False}``, among other sub-options described in :ref:`parallel-accelerator`. .. _arch-stencil-create-function: Creating the stencil function ============================= Conceptually, a stencil function is created from the user-specified stencil kernel by adding looping code around the kernel, transforming the relative kernel indices into absolute array indices based on the loop indices, and replacing the kernel's ``return`` statement with a statement to assign the computed value into the output array. To accomplish this transformation, first, a copy of the stencil kernel IR is created so that subsequent modifications of the IR for different stencil signatures will not effect each other. Then, an approach similar to how GUFunc's are created for `parfors` is employed. In a text buffer, a Python function is created with a unique name. The input array parameter is added to the function definition and if the ``out`` argument type is present then an ``out`` parameter is added to the stencil function definition. If the ``out`` argument is not present then first an output array is created with ``numpy.zeros`` having the same shape as the input array. The kernel is then analyzed to compute the stencil size and the shape of the boundary (or the ``neighborhood`` stencil decorator argument is used for this purpose if present). Then, one ``for`` loop for each dimension of the input array is added to the stencil function definition. The range of each loop is controlled by the stencil kernel size previously computed so that the boundary of the output image is not modified but instead left as is. The body of the innermost ``for`` loop is a single ``sentinel`` statement that is easily recognized in the IR. A call to ``exec`` with the text buffer is used to force the stencil function into existence and an ``eval`` is used to get access to the corresponding function on which ``run_frontend`` is used to get the stencil function IR. Various renaming and relabeling is performed on the stencil function IR and the kernel IR so that the two can be combined without conflict. The relative indices in the kernel IR (i.e., ``getitem`` calls) are replaced with expressions where the corresponding loop index variables are added to the relative indices. The ``return`` statement in the kernel IR is replaced with a ``setitem`` for the corresponding element in the output array. The stencil function IR is then scanned for the sentinel and the sentinel replaced with the modified kernel IR. Next, ``compile_ir`` is used to compile the combined stencil function IR. The resulting compile result is cached in the ``StencilFunc`` so that other calls to the same stencil do not need to undertake this process again. Exceptions raised ================= Various checks are performed during stencil compilation to make sure that user-specified options do not conflict with each other or with other runtime parameters. For example, if the user has manually specified a ``neighborhood`` to the stencil decorator, the length of that neighborhood must match the dimensionality of the input array. If this is not the case, a ``ValueError`` is raised. If the neighborhood has not been specified then it must be inferred and a requirement to infer the kernel is that all indices are constant integers. If they are not, a ``ValueError`` is raised indicating that kernel indices may not be non-constant. Finally, the stencil implementation detects the output array type by running Numba type inference on the stencil kernel. If the return type of this kernel does not match the type of the value passed to the ``cval`` stencil decorator option then a ``ValueError`` is raised. numba-0.55.1/docs/source/developer/target_extension.rst000664 000000 000000 00000004774 14174536160 023240 0ustar00rootroot000000 000000 ========================== Notes on Target Extensions ========================== .. warning:: All features and APIs described in this page are in-development and may change at any time without deprecation notices being issued. Inheriting compiler flags from the caller ========================================= Compiler flags, i.e. options such as ``fastmath``, ``nrt`` in ``@jit(nrt=True, fastmath=True))`` are specified per-function but their effects are not well-defined---some flags affect the entire callgraph, some flags affect only the current function. Sometimes it is necessary for callees to inherit flags from the caller; for example the ``fastmath`` flag should be infectious. To address the problem, the following are needed: 1. Better definitions for the semantics of compiler flags. Preferably, all flags should limit their effect to the current function. (TODO) 2. Allow compiler flags to be inherited from the caller. (Done) 3. Consider compiler flags in function resolution. (TODO) :class:`numba.core.targetconfig.ConfigStack` is used to propagate the compiler flags throughout the compiler. At the start of the compilation, the flags are pushed into the ``ConfigStack``, which maintains a thread-local stack for the compilation. Thus, callees can check the flags in the caller. .. autoclass:: numba.core.targetconfig.ConfigStack :members: Compiler flags -------------- `Compiler flags`_ are defined as a subclass of ``TargetConfig``: .. _Compiler flags: https://github.com/numba/numba/blob/7e8538140ce3f8d01a5273a39233b5481d8b20b1/numba/core/compiler.py#L39 .. autoclass:: numba.core.targetconfig.TargetConfig :members: These are internal compiler flags and they are different from the user-facing options used in the jit decorators. Internally, `the user-facing options are mapped to the internal compiler flags `_ by :class:`numba.core.options.TargetOptions`. Each target can override the default compiler flags and control the flag inheritance in ``TargetOptions.finalize``. `The CPU target overrides it. `_ .. autoclass:: numba.core.options.TargetOptions :members: finalize In :meth:`numba.core.options.TargetOptions.finalize`, use :meth:`numba.core.targetconfig.TargetConfig.inherit_if_not_set` to request a compiler flag from the caller if it is not set for the current function. numba-0.55.1/docs/source/developer/threading_implementation.rst000664 000000 000000 00000026334 14174536160 024724 0ustar00rootroot000000 000000 ========================================= Notes on Numba's threading implementation ========================================= The execution of the work presented by the Numba ``parallel`` targets is undertaken by the Numba threading layer. Practically, the "threading layer" is a Numba built-in library that can perform the required concurrent execution. At the time of writing there are three threading layers available, each implemented via a different lower level native threading library. More information on the threading layers and appropriate selection of a threading layer for a given application/system can be found in the :ref:`threading layer documentation `. The pertinent information to note for the following sections is that the function in the threading library that performs the parallel execution is the ``parallel_for`` function. The job of this function is to both orchestrate and execute the parallel tasks. The relevant source files referenced in this document are - ``numba/np/ufunc/tbbpool.cpp`` - ``numba/np/ufunc/omppool.cpp`` - ``numba/np/ufunc/workqueue.c`` These files contain the TBB, OpenMP, and workqueue threadpool implementations, respectively. Each includes the functions ``set_num_threads()``, ``get_num_threads()``, and ``get_thread_id()``, as well as the relevant logic for thread masking in their respective schedulers. Note that the basic thread local variable logic is duplicated in each of these files, and not shared between them. - ``numba/np/ufunc/parallel.py`` This file contains the Python and JIT compatible wrappers for ``set_num_threads()``, ``get_num_threads()``, and ``get_thread_id()``, as well as the code that loads the above libraries into Python and launches the threadpool. - ``numba/parfors/parfor_lowering.py`` This file contains the main logic for generating code for the parallel backend. The thread mask is accessed in this file in the code that generates scheduler code, and passed to the relevant backend scheduler function (see below). Thread masking -------------- As part of its design, Numba never launches new threads beyond the threads that are launched initially with ``numba.np.ufunc.parallel._launch_threads()`` when the first parallel execution is run. This is due to the way threads were already implemented in Numba prior to thread masking being implemented. This restriction was kept to keep the design simple, although it could be removed in the future. Consequently, it's possible to programmatically set the number of threads, but only to less than or equal to the total number that have already been launched. This is done by "masking" out unused threads, causing them to do no work. For example, on a 16 core machine, if the user were to call ``set_num_threads(4)``, Numba would always have 16 threads present, but 12 of them would sit idle for parallel computations. A further call to ``set_num_threads(16)`` would cause those same threads to do work in later computations. :ref:`Thread masking ` was added to make it possible for a user to programmatically alter the number of threads performing work in the threading layer. Thread masking proved challenging to implement as it required the development of a programming model that is suitable for users, easy to reason about, and could be implemented safely, with consistent behavior across the various threading layers. Programming model ~~~~~~~~~~~~~~~~~ The programming model chosen is similar to that found in OpenMP. The reasons for this choice were that it is familiar to a lot of users, restricted in scope and also simple. The number of threads in use is specified by calling ``set_num_threads`` and the number of threads in use can be queried by calling ``get_num_threads``.These two functions are synonymous with their OpenMP counterparts (with the above restriction that the mask must be less than or equal to the number of launched threads). The execution semantics are also similar to OpenMP in that once a parallel region is launched, altering the thread mask has no impact on the currently executing region, but will have an impact on parallel regions executed subsequently. The Implementation ~~~~~~~~~~~~~~~~~~ So as to place no further restrictions on user code other than those that already existed in the threading layer libraries, careful consideration of the design of thread masking was required. The "thread mask" cannot be stored in a global value as concurrent use of the threading layer may result in classic forms of race conditions on the value itself. Numerous designs were discussed involving various types of mutex on such a global value, all of which were eventually broken through thought experiment alone. It eventually transpired that, following some OpenMP implementations, the "thread mask" is best implemented as a ``thread local``. This means each thread that executes a Numba parallel function will have a thread local storage (TLS) slot that contains the value of the thread mask to use when scheduling threads in the ``parallel_for`` function. The above notion of TLS use for a thread mask is relatively easy to implement, ``get_num_threads`` and ``set_num_threads`` simply need to address the TLS slot in a given threading layer. This also means that the execution schedule for a parallel region can be derived from a run time call to ``get_num_threads``. This is achieved via a well known and relatively easy to implement pattern of a ``C`` library function registration and wrapping it in the internal Numba implementation. In addition to satisfying the original upfront thread masking requirements, a few more complicated scenarios needed consideration as follows. Nested parallelism ****************** In all threading layers a "main thread" will invoke the ``parallel_for`` function and then in the parallel region, depending on the threading layer, some number of additional threads will assist in doing the actual work. If the work contains a call to another parallel function (i.e. nested parallelism) it is necessary for the thread making the call to know what the "thread mask" of the main thread is so that it can propagate it into the ``parallel_for`` call it makes when executing the nested parallel function. The implementation of this behavior is threading layer specific but the general principle is for the "main thread" to always "send" the value of the thread mask from its TLS slot to all threads in the threading layer that are active in the parallel region. These active threads then update their TLS slots with this value prior to performing any work. The net result of this implementation detail is that: * thread masks correctly propagate into nested functions * it's still possible for each thread in a parallel region to safely have a different mask with which to call nested functions, if it's not set explicitly then the inherited mask from the "main thread" is used * threading layers which have dynamic scheduling with threads potentially joining and leaving the active pool during a ``parallel_for`` execution are successfully accommodated * any "main thread" thread mask is entirely decoupled from the in-flux nature of the thread masks of the threads in the active thread pool Python threads independently invoking parallel functions ******************************************************** The threading layer launch sequence is heavily guarded to ensure that the launch is both thread and process safe and run once per process. In a system with numerous Python ``threading`` module threads all using Numba, the first thread through the launch sequence will get its thread mask set appropriately, but no further threads can run the launch sequence. This means that other threads will need their initial thread mask set some other way. This is achieved when ``get_num_threads`` is called and no thread mask is present, in this case the thread mask will be set to the default. In the implementation, "no thread mask is present" is represented by the value ``-1`` and the "default thread mask" (unset) is represented by the value ``0``. The implementation also immediately calls ``set_num_threads(NUMBA_NUM_THREADS)`` after doing this, so if either ``-1`` or ``0`` is encountered as a result from ``get_num_threads()`` it indicates a bug in the above processes. OS ``fork()`` calls ******************* The use of TLS was also in part driven by the Linux (the most popular platform for Numba use by far) having a ``fork(2, 3P)`` call that will do TLS propagation into child processes, see ``clone(2)``\ 's ``CLONE_SETTLS``. Thread ID ********* A private ``get_thread_id()`` function was added to each threading backend, which returns a unique ID for each thread. This can be accessed from Python by ``numba.np.ufunc.parallel._get_thread_id()`` (it can also be used inside a JIT compiled function). The thread ID function is useful for testing that the thread masking behavior is correct, but it should not be used outside of the tests. For example, one can call ``set_num_threads(4)`` and then collect all unique ``_get_thread_id()``\ s in a parallel region to verify that only 4 threads are run. Caveats ~~~~~~~ Some caveats to be aware of when testing thread masking: - The TBB backend may choose to schedule fewer than the given mask number of threads. Thus a test such as the one described above may return fewer than 4 unique threads. - The workqueue backend is not threadsafe, so attempts to do multithreading nested parallelism with it may result in deadlocks or other undefined behavior. The workqueue backend will raise a SIGABRT signal if it detects nested parallelism. - Certain backends may reuse the main thread for computation, but this behavior shouldn't be relied upon (for instance, if propagating exceptions). Use in Code Generation ~~~~~~~~~~~~~~~~~~~~~~ The general pattern for using ``get_num_threads`` in code generation is .. code:: python import llvmlite.llvmpy.core as lc get_num_threads = cgutils.get_or_insert_function(builder.module lc.Type.function(lc.Type.int(types.intp.bitwidth), []), name="get_num_threads") num_threads = builder.call(get_num_threads, []) with cgutils.if_unlikely(builder, builder.icmp_signed('<=', num_threads, num_threads.type(0))): cgutils.printf(builder, "num_threads: %d\n", num_threads) context.call_conv.return_user_exc(builder, RuntimeError, ("Invalid number of threads. " "This likely indicates a bug in Numba.",)) # Pass num_threads through to the appropriate backend function here See the code in ``numba/parfors/parfor_lowering.py``. The guard against ``num_threads`` being <= 0 is not strictly necessary, but it can protect against accidentally incorrect behavior in case the thread masking logic contains a bug. The ``num_threads`` variable should be passed through to the appropriate backend function, such as ``do_scheduling`` or ``parallel_for``. If it's used in some way other than passing it through to the backend function, the above considerations should be taken into account to ensure the use of the ``num_threads`` variable is safe. It would probably be better to keep such logic in the threading backends, rather than trying to do it in code generation. numba-0.55.1/docs/source/extending/000775 000000 000000 00000000000 14174536160 017110 5ustar00rootroot000000 000000 numba-0.55.1/docs/source/extending/entrypoints.rst000664 000000 000000 00000005325 14174536160 022245 0ustar00rootroot000000 000000 Registering Extensions with Entry Points ======================================== Often, third party packages will have a user-facing API as well as define extensions to the Numba compiler. In those situations, the new types and overloads can registered with Numba when the package is imported by the user. However, there are situations where a Numba extension would not normally be imported directly by the user, but must still be registered with the Numba compiler. An example of this is the `numba-scipy `_ package, which adds support for some SciPy functions to Numba. The end user does not need to ``import numba_scipy`` to enable compiler support for SciPy, the extension only needs to be installed in the Python environment. Numba discovers extensions using the `entry points `_ feature of ``setuptools``. This allows a Python package to register an initializer function that will be called before ``numba`` compiles for the first time. The delay ensures that the cost of importing extensions is deferred until it is necessary. Adding Support for the "Init" Entry Point ----------------------------------------- A package can register an initialization function with Numba by adding the ``entry_points`` argument to the ``setup()`` function call in ``setup.py``: .. code-block:: python setup( ..., entry_points={ "numba_extensions": [ "init = numba_scipy:_init_extension", ], }, ... ) Numba currently only looks for the ``init`` entry point in the ``numba_extensions`` group. The entry point should be a function (any name, as long as it matches what is listed in ``setup.py``) that takes no arguments, and the return value is ignored. This function should register types, overloads, or call other Numba extension APIs. The order of initialization of extensions is undefined. Testing your Entry Point ------------------------ Numba loads all entry points when the first function is compiled. To test your entry point, it is not sufficient to just ``import numba``; you have to define and run a small function, like this: .. code-block:: python import numba; numba.njit(lambda x: x + 1)(123) It is not necessary to import your module: entry points are identified by the ``entry_points.txt`` file in your library's ``*.egg-info`` directory. The ``setup.py build`` command does not create eggs, but ``setup.py sdist`` (for testing in a local directory) and ``setup.py install`` do. All entry points registered in eggs that are on the Python path are loaded. Be sure to check for stale ``entry_points.txt`` when debugging. numba-0.55.1/docs/source/extending/high-level.rst000664 000000 000000 00000021013 14174536160 021663 0ustar00rootroot000000 000000 .. _high-level-extending: High-level extension API ======================== This extension API is exposed through the :mod:`numba.extending` module. To aid debugging extensions to Numba, it's recommended to set the following environment variable:: NUMBA_CAPTURED_ERRORS="new_style" this makes it easy to differentiate between errors in implementation and acceptable errors that can take part in e.g. type inference. For more information see :envvar:`NUMBA_CAPTURED_ERRORS`. Implementing functions ---------------------- The ``@overload`` decorator allows you to implement arbitrary functions for use in :term:`nopython mode` functions. The function decorated with ``@overload`` is called at compile-time with the *types* of the function's runtime arguments. It should return a callable representing the *implementation* of the function for the given types. The returned implementation is compiled by Numba as if it were a normal function decorated with ``@jit``. Additional options to ``@jit`` can be passed as dictionary using the ``jit_options`` argument. For example, let's pretend Numba doesn't support the :func:`len` function on tuples yet. Here is how to implement it using ``@overload``:: from numba import types from numba.extending import overload @overload(len) def tuple_len(seq): if isinstance(seq, types.BaseTuple): n = len(seq) def len_impl(seq): return n return len_impl You might wonder, what happens if :func:`len()` is called with something else than a tuple? If a function decorated with ``@overload`` doesn't return anything (i.e. returns None), other definitions are tried until one succeeds. Therefore, multiple libraries may overload :func:`len()` for different types without conflicting with each other. Implementing methods -------------------- The ``@overload_method`` decorator similarly allows implementing a method on a type well-known to Numba. .. autofunction:: numba.core.extending.overload_method Implementing classmethods ------------------------- The ``@overload_classmethod`` decorator similarly allows implementing a classmethod on a type well-known to Numba. .. autofunction:: numba.core.extending.overload_classmethod Implementing attributes ----------------------- The ``@overload_attribute`` decorator allows implementing a data attribute (or property) on a type. Only reading the attribute is possible; writable attributes are only supported through the :ref:`low-level API `. The following example implements the :attr:`~numpy.ndarray.nbytes` attribute on Numpy arrays:: @overload_attribute(types.Array, 'nbytes') def array_nbytes(arr): def get(arr): return arr.size * arr.itemsize return get .. _cython-support: Importing Cython Functions -------------------------- The function ``get_cython_function_address`` obtains the address of a C function in a Cython extension module. The address can be used to access the C function via a :func:`ctypes.CFUNCTYPE` callback, thus allowing use of the C function inside a Numba jitted function. For example, suppose that you have the file ``foo.pyx``:: from libc.math cimport exp cdef api double myexp(double x): return exp(x) You can access ``myexp`` from Numba in the following way:: import ctypes from numba.extending import get_cython_function_address addr = get_cython_function_address("foo", "myexp") functype = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_double) myexp = functype(addr) The function ``myexp`` can now be used inside jitted functions, for example:: @njit def double_myexp(x): return 2*myexp(x) One caveat is that if your function uses Cython's fused types, then the function's name will be mangled. To find out the mangled name of your function you can check the extension module's ``__pyx_capi__`` attribute. Implementing intrinsics ----------------------- The ``@intrinsic`` decorator is used for marking a function *func* as typing and implementing the function in ``nopython`` mode using the `llvmlite IRBuilder API `_. This is an escape hatch for expert users to build custom LLVM IR that will be inlined into the caller, there is no safety net! The first argument to *func* is the typing context. The rest of the arguments corresponds to the type of arguments of the decorated function. These arguments are also used as the formal argument of the decorated function. If *func* has the signature ``foo(typing_context, arg0, arg1)``, the decorated function will have the signature ``foo(arg0, arg1)``. The return values of *func* should be a 2-tuple of expected type signature, and a code-generation function that will passed to :func:`~numba.targets.imputils.lower_builtin`. For an unsupported operation, return ``None``. Here is an example that cast any integer to a byte pointer:: from numba import types from numba.extending import intrinsic @intrinsic def cast_int_to_byte_ptr(typingctx, src): # check for accepted types if isinstance(src, types.Integer): # create the expected type signature result_type = types.CPointer(types.uint8) sig = result_type(types.uintp) # defines the custom code generation def codegen(context, builder, signature, args): # llvm IRBuilder code here [src] = args rtype = signature.return_type llrtype = context.get_value_type(rtype) return builder.inttoptr(src, llrtype) return sig, codegen it may be used as follows:: from numba import njit @njit('void(int64)') def foo(x): y = cast_int_to_byte_ptr(x) foo.inspect_types() and the output of ``.inspect_types()`` demonstrates the cast (note the ``uint8*``):: def foo(x): # x = arg(0, name=x) :: int64 # $0.1 = global(cast_int_to_byte_ptr: ) :: Function() # $0.3 = call $0.1(x, func=$0.1, args=[Var(x, check_intrin.py (24))], kws=(), vararg=None) :: (uint64,) -> uint8* # del x # del $0.1 # y = $0.3 :: uint8* # del y # del $0.3 # $const0.4 = const(NoneType, None) :: none # $0.5 = cast(value=$const0.4) :: none # del $const0.4 # return $0.5 y = cast_int_to_byte_ptr(x) Implementing mutable structures ------------------------------- .. warning:: This is an experimental feature, the API may change without warning. The ``numba.experimental.structref`` module provides utilities for defining mutable pass-by-reference structures, a ``StructRef``. The following example demonstrates how to define a basic mutable structure: Defining a StructRef '''''''''''''''''''' .. literalinclude:: ../../../numba/tests/doc_examples/test_structref_usage.py :language: python :caption: from ``numba/tests/doc_examples/test_structref_usage.py`` :start-after: magictoken.ex_structref_type_definition.begin :end-before: magictoken.ex_structref_type_definition.end :dedent: 0 :linenos: The following demonstrates using the above mutable struct definition: .. literalinclude:: ../../../numba/tests/doc_examples/test_structref_usage.py :language: python :caption: from ``test_type_definition`` of ``numba/tests/doc_examples/test_structref_usage.py`` :start-after: magictoken.ex_structref_type_definition_test.begin :end-before: magictoken.ex_structref_type_definition_test.end :dedent: 8 :linenos: Defining a method on StructRef '''''''''''''''''''''''''''''' Methods and attributes can be attached using ``@overload_*`` as shown in the previous sections. The following demonstrates the use of ``@overload_method`` to insert a method for instances of ``MyStructType``: .. literalinclude:: ../../../numba/tests/doc_examples/test_structref_usage.py :language: python :caption: from ``test_overload_method`` of ``numba/tests/doc_examples/test_structref_usage.py`` :start-after: magictoken.ex_structref_method.begin :end-before: magictoken.ex_structref_method.end :dedent: 8 :linenos: ``numba.experimental.structref`` API Reference '''''''''''''''''''''''''''''''''''''''''''''' .. automodule:: numba.experimental.structref :members: Determining if a function is already wrapped by a ``jit`` family decorator -------------------------------------------------------------------------- The following function is provided for this purpose. .. automethod:: numba.extending.is_jitted numba-0.55.1/docs/source/extending/index.rst000664 000000 000000 00000001622 14174536160 020752 0ustar00rootroot000000 000000 Extending Numba =============== .. module:: numba.extending This chapter describes how to extend Numba to make it recognize and support additional operations, functions or types. Numba provides two categories of APIs to this end: * The high-level APIs provide abstracted entry points which are sufficient for simple uses. They require little knowledge of Numba's internal compilation chain. * The low-level APIs reflect Numba's internal compilation chain and allow flexible interaction with its various layers, but require more effort and experience with Numba internals. It may be helpful for readers of this chapter to also read some of the documents in the :doc:`developer manual <../developer/index>`, especially the :doc:`architecture document <../developer/architecture>`. .. toctree:: high-level.rst low-level.rst interval-example.rst overloading-guide.rst entrypoints.rst numba-0.55.1/docs/source/extending/interval-example.rst000664 000000 000000 00000030527 14174536160 023126 0ustar00rootroot000000 000000 Example: an interval type ========================= We will extend the Numba frontend to support a class that it does not currently support so as to allow: * Passing an instance of the class to a Numba function * Accessing attributes of the class in a Numba function * Constructing and returning a new instance of the class from a Numba function (all the above in :term:`nopython mode`) We will mix APIs from the :ref:`high-level extension API ` and the :ref:`low-level extension API `, depending on what is available for a given task. The starting point for our example is the following pure Python class:: class Interval(object): """ A half-open interval on the real number line. """ def __init__(self, lo, hi): self.lo = lo self.hi = hi def __repr__(self): return 'Interval(%f, %f)' % (self.lo, self.hi) @property def width(self): return self.hi - self.lo Extending the typing layer """""""""""""""""""""""""" Creating a new Numba type ------------------------- As the ``Interval`` class is not known to Numba, we must create a new Numba type to represent instances of it. Numba does not deal with Python types directly: it has its own type system that allows a different level of granularity as well as various meta-information not available with regular Python types. We first create a type class ``IntervalType`` and, since we don't need the type to be parametric, we instantiate a single type instance ``interval_type``:: from numba import types class IntervalType(types.Type): def __init__(self): super(IntervalType, self).__init__(name='Interval') interval_type = IntervalType() Type inference for Python values -------------------------------- In itself, creating a Numba type doesn't do anything. We must teach Numba how to infer some Python values as instances of that type. In this example, it is trivial: any instance of the ``Interval`` class should be treated as belonging to the type ``interval_type``:: from numba.extending import typeof_impl @typeof_impl.register(Interval) def typeof_index(val, c): return interval_type Function arguments and global values will thusly be recognized as belonging to ``interval_type`` whenever they are instances of ``Interval``. Type inference for Python annotations ------------------------------------- While ``typeof`` is used to infer the Numba type of Python objects, ``as_numba_type`` is used to infer the Numba type of Python types. For simple cases, we can simply register that the Python type ``Interval`` corresponds with the Numba type ``interval_type``:: from numba.extending import as_numba_type as_numba_type.register(Interval, interval_type) Note that ``as_numba_type`` is only used to infer types from type annotations at compile time. The ``typeof`` registry above is used to infer the type of objects at runtime. Type inference for operations ----------------------------- We want to be able to construct interval objects from Numba functions, so we must teach Numba to recognize the two-argument ``Interval(lo, hi)`` constructor. The arguments should be floating-point numbers:: from numba.extending import type_callable @type_callable(Interval) def type_interval(context): def typer(lo, hi): if isinstance(lo, types.Float) and isinstance(hi, types.Float): return interval_type return typer The :func:`type_callable` decorator specifies that the decorated function should be invoked when running type inference for the given callable object (here the ``Interval`` class itself). The decorated function must simply return a typer function that will be called with the argument types. The reason for this seemingly convoluted setup is for the typer function to have *exactly* the same signature as the typed callable. This allows handling keyword arguments correctly. The *context* argument received by the decorated function is useful in more sophisticated cases where computing the callable's return type requires resolving other types. Extending the lowering layer """""""""""""""""""""""""""" We have finished teaching Numba about our type inference additions. We must now teach Numba how to actually generate code and data for the new operations. Defining the data model for native intervals -------------------------------------------- As a general rule, :term:`nopython mode` does not work on Python objects as they are generated by the CPython interpreter. The representations used by the interpreter are far too inefficient for fast native code. Each type supported in :term:`nopython mode` therefore has to define a tailored native representation, also called a *data model*. A common case of data model is an immutable struct-like data model, that is akin to a C ``struct``. Our interval datatype conveniently falls in that category, and here is a possible data model for it:: from numba.extending import models, register_model @register_model(IntervalType) class IntervalModel(models.StructModel): def __init__(self, dmm, fe_type): members = [ ('lo', types.float64), ('hi', types.float64), ] models.StructModel.__init__(self, dmm, fe_type, members) This instructs Numba that values of type ``IntervalType`` (or any instance thereof) are represented as a structure of two fields ``lo`` and ``hi``, each of them a double-precision floating-point number (``types.float64``). .. note:: Mutable types need more sophisticated data models to be able to persist their values after modification. They typically cannot be stored and passed on the stack or in registers like immutable types do. Exposing data model attributes ------------------------------ We want the data model attributes ``lo`` and ``hi`` to be exposed under the same names for use in Numba functions. Numba provides a convenience function to do exactly that:: from numba.extending import make_attribute_wrapper make_attribute_wrapper(IntervalType, 'lo', 'lo') make_attribute_wrapper(IntervalType, 'hi', 'hi') This will expose the attributes in read-only mode. As mentioned above, writable attributes don't fit in this model. Exposing a property ------------------- As the ``width`` property is computed rather than stored in the structure, we cannot simply expose it like we did for ``lo`` and ``hi``. We have to re-implement it explicitly:: from numba.extending import overload_attribute @overload_attribute(IntervalType, "width") def get_width(interval): def getter(interval): return interval.hi - interval.lo return getter You might ask why we didn't need to expose a type inference hook for this attribute? The answer is that ``@overload_attribute`` is part of the high-level API: it combines type inference and code generation in a single API. Implementing the constructor ---------------------------- Now we want to implement the two-argument ``Interval`` constructor:: from numba.extending import lower_builtin from numba.core import cgutils @lower_builtin(Interval, types.Float, types.Float) def impl_interval(context, builder, sig, args): typ = sig.return_type lo, hi = args interval = cgutils.create_struct_proxy(typ)(context, builder) interval.lo = lo interval.hi = hi return interval._getvalue() There is a bit more going on here. ``@lower_builtin`` decorates the implementation of the given callable or operation (here the ``Interval`` constructor) for some specific argument types. This allows defining type-specific implementations of a given operation, which is important for heavily overloaded functions such as :func:`len`. ``types.Float`` is the class of all floating-point types (``types.float64`` is an instance of ``types.Float``). It is generally more future-proof to match argument types on their class rather than on specific instances (however, when *returning* a type -- chiefly during the type inference phase --, you must usually return a type instance). ``cgutils.create_struct_proxy()`` and ``interval._getvalue()`` are a bit of boilerplate due to how Numba passes values around. Values are passed as instances of :class:`llvmlite.ir.Value`, which can be too limited: LLVM structure values especially are quite low-level. A struct proxy is a temporary wrapper around a LLVM structure value allowing to easily get or set members of the structure. The ``_getvalue()`` call simply gets the LLVM value out of the wrapper. Boxing and unboxing ------------------- If you try to use an ``Interval`` instance at this point, you'll certainly get the error *"cannot convert Interval to native value"*. This is because Numba doesn't yet know how to make a native interval value from a Python ``Interval`` instance. Let's teach it how to do it:: from numba.extending import unbox, NativeValue @unbox(IntervalType) def unbox_interval(typ, obj, c): """ Convert a Interval object to a native interval structure. """ lo_obj = c.pyapi.object_getattr_string(obj, "lo") hi_obj = c.pyapi.object_getattr_string(obj, "hi") interval = cgutils.create_struct_proxy(typ)(c.context, c.builder) interval.lo = c.pyapi.float_as_double(lo_obj) interval.hi = c.pyapi.float_as_double(hi_obj) c.pyapi.decref(lo_obj) c.pyapi.decref(hi_obj) is_error = cgutils.is_not_null(c.builder, c.pyapi.err_occurred()) return NativeValue(interval._getvalue(), is_error=is_error) *Unbox* is the other name for "convert a Python object to a native value" (it fits the idea of a Python object as a sophisticated box containing a simple native value). The function returns a ``NativeValue`` object which gives its caller access to the computed native value, the error bit and possibly other information. The snippet above makes abundant use of the ``c.pyapi`` object, which gives access to a subset of the `Python interpreter's C API `_. Note the use of ``c.pyapi.err_occurred()`` to detect any errors that may have happened when unboxing the object (try passing ``Interval('a', 'b')`` for example). We also want to do the reverse operation, called *boxing*, so as to return interval values from Numba functions:: from numba.extending import box @box(IntervalType) def box_interval(typ, val, c): """ Convert a native interval structure to an Interval object. """ interval = cgutils.create_struct_proxy(typ)(c.context, c.builder, value=val) lo_obj = c.pyapi.float_from_double(interval.lo) hi_obj = c.pyapi.float_from_double(interval.hi) class_obj = c.pyapi.unserialize(c.pyapi.serialize_object(Interval)) res = c.pyapi.call_function_objargs(class_obj, (lo_obj, hi_obj)) c.pyapi.decref(lo_obj) c.pyapi.decref(hi_obj) c.pyapi.decref(class_obj) return res Using it """""""" :term:`nopython mode` functions are now able to make use of Interval objects and the various operations you have defined on them. You can try for example the following functions:: from numba import jit @jit(nopython=True) def inside_interval(interval, x): return interval.lo <= x < interval.hi @jit(nopython=True) def interval_width(interval): return interval.width @jit(nopython=True) def sum_intervals(i, j): return Interval(i.lo + j.lo, i.hi + j.hi) Conclusion """""""""" We have shown how to do the following tasks: * Define a new Numba type class by subclassing the ``Type`` class * Define a singleton Numba type instance for a non-parametric type * Teach Numba how to infer the Numba type of Python values of a certain class, using ``typeof_impl.register`` * Teach Numba how to infer the Numba type of the Python type itself, using ``as_numba_type.register`` * Define the data model for a Numba type using ``StructModel`` and ``register_model`` * Implementing a boxing function for a Numba type using the ``@box`` decorator * Implementing an unboxing function for a Numba type using the ``@unbox`` decorator and the ``NativeValue`` class * Type and implement a callable using the ``@type_callable`` and ``@lower_builtin`` decorators * Expose a read-only structure attribute using the ``make_attribute_wrapper`` convenience function * Implement a read-only property using the ``@overload_attribute`` decorator numba-0.55.1/docs/source/extending/low-level.rst000664 000000 000000 00000017765 14174536160 021570 0ustar00rootroot000000 000000 .. _low-level-extending: Low-level extension API ======================= This extension API is available through the :mod:`numba.extending` module. It allows you to hook directly into the Numba compilation chain. As such, it distinguished between several compilation phases: * The :term:`typing` phase deduces the types of variables in a compiled function by looking at the operations performed. * The :term:`lowering` phase converts high-level Python operations into low-level LLVM code. This phase exploits the typing information derived by the typing phase. * *Boxing* and *unboxing* convert Python objects into native values, and vice-versa. They occur at the boundaries of calling a Numba function from the Python interpreter. Typing ------ .. XXX the API described here can be insufficient for some use cases. Should we describe the whole templates menagerie? Type inference -- or simply *typing* -- is the process of assigning Numba types to all values involved in a function, so as to enable efficient code generation. Broadly speaking, typing comes in two flavours: typing plain Python *values* (e.g. function arguments or global variables) and typing *operations* (or *functions*) on known value types. .. decorator:: typeof_impl.register(cls) Register the decorated function as typing Python values of class *cls*. The decorated function will be called with the signature ``(val, c)`` where *val* is the Python value being typed and *c* is a context object. .. decorator:: type_callable(func) Register the decorated function as typing the callable *func*. *func* can be either an actual Python callable or a string denoting a operation internally known to Numba (for example ``'getitem'``). The decorated function is called with a single *context* argument and must return a typer function. The typer function should have the same signature as the function being typed, and it is called with the Numba *types* of the function arguments; it should return either the Numba type of the function's return value, or ``None`` if inference failed. .. function:: as_numba_type.register(py_type, numba_type) Register that the Python type *py_type* corresponds with the Numba type *numba_type*. This can be used to register a new type or overwrite the existing default (e.g. to treat ``float`` as ``numba.float32`` instead of ``numba.float64``). .. decorator:: as_numba_type.register Register the decorated function as a type inference function used by ``as_numba_type`` when trying to infer the Numba type of a Python type. The decorated function is called with a single *py_type* argument and returns either a corresponding Numba type, or None if it cannot infer that *py_type*. Lowering -------- The following decorators all take a type specification of some kind. A type specification is usually a type class (such as ``types.Float``) or a specific type instance (such as ``types.float64``). Some values have a special meaning: * ``types.Any`` matches any type; this allows doing your own dispatching inside the implementation * ``types.VarArg()`` matches any number of arguments of the given type; it can only appear as the last type specification when describing a function's arguments. A *context* argument in the following APIs is a target context providing various utility methods for code generation (such as creating a constant, converting from a type to another, looking up the implementation of a specific function, etc.). A *builder* argument is a :class:`llvmlite.ir.IRBuilder` instance for the LLVM code being generated. A *signature* is an object specifying the concrete type of an operation. The ``args`` attribute of the signature is a tuple of the argument types. The ``return_type`` attribute of the signature is the type that the operation should return. .. note:: Numba always reasons on Numba types, but the values being passed around during lowering are LLVM values: they don't hold the required type information, which is why Numba types are passed explicitly too. LLVM has its own, very low-level type system: you can access the LLVM type of a value by looking up its ``.type`` attribute. Native operations ''''''''''''''''' .. decorator:: lower_builtin(func, typespec, ...) Register the decorated function as implementing the callable *func* for the arguments described by the given Numba *typespecs*. As with :func:`type_callable`, *func* can be either an actual Python callable or a string denoting a operation internally known to Numba (for example ``'getitem'``). The decorated function is called with four arguments ``(context, builder, sig, args)``. ``sig`` is the concrete signature the callable is being invoked with. ``args`` is a tuple of the values of the arguments the callable is being invoked with; each value in ``args`` corresponds to a type in ``sig.args``. The function must return a value compatible with the type ``sig.return_type``. .. decorator:: lower_getattr(typespec, name) Register the decorated function as implementing the attribute *name* of the given *typespec*. The decorated function is called with four arguments ``(context, builder, typ, value)``. *typ* is the concrete type the attribute is being looked up on. *value* is the value the attribute is being looked up on. .. decorator:: lower_getattr_generic(typespec) Register the decorated function as a fallback for attribute lookup on a given *typespec*. Any attribute that does not have a corresponding :func:`lower_getattr` declaration will go through :func:`lower_getattr_generic`. The decorated function is called with five arguments ``(context, builder, typ, value, name)``. *typ* and *value* are as in :func:`lower_getattr`. *name* is the name of the attribute being looked up. .. decorator:: lower_cast(fromspec, tospec) Register the decorated function as converting from types described by *fromspec* to types described by *tospec*. The decorated function is called with five arguments ``(context, builder, fromty, toty, value)``. *fromty* and *toty* are the concrete types being converted from and to, respectively. *value* is the value being converted. The function must return a value compatible with the type ``toty``. Constants ''''''''' .. decorator:: lower_constant(typespec) Register the decorated function as implementing the creation of constants for the Numba *typespec*. The decorated function is called with four arguments ``(context, builder, ty, pyval)``. *ty* is the concrete type to create a constant for. *pyval* is the Python value to convert into a LLVM constant. The function must return a value compatible with the type ``ty``. Boxing and unboxing ''''''''''''''''''' In these functions, *c* is a convenience object with several attributes: * its ``context`` attribute is a target context as above * its ``builder`` attribute is a :class:`llvmlite.ir.IRBuilder` as above * its ``pyapi`` attribute is an object giving access to a subset of the `Python interpreter's C API `_ An object, as opposed to a native value, is a ``PyObject *`` pointer. Such pointers can be produced or processed by the methods in the ``pyapi`` object. .. decorator:: box(typespec) Register the decorated function as boxing values matching the *typespec*. The decorated function is called with three arguments ``(typ, val, c)``. *typ* is the concrete type being boxed. *val* is the value being boxed. The function should return a Python object, or NULL to signal an error. .. decorator:: unbox(typespec) Register the decorated function as unboxing values matching the *typespec*. The decorated function is called with three arguments ``(typ, obj, c)``. *typ* is the concrete type being unboxed. *obj* is the Python object (a ``PyObject *`` pointer, in C terms) being unboxed. The function should return a ``NativeValue`` object giving the unboxing result value and an optional error bit. numba-0.55.1/docs/source/extending/mynorm.py000664 000000 000000 00000004420 14174536160 021003 0ustar00rootroot000000 000000 import numpy as np from numba import njit, types from numba.extending import overload, register_jitable from numba.core.errors import TypingError import scipy.linalg @register_jitable def _oneD_norm_2(a): # re-usable implementation of the 2-norm val = np.abs(a) return np.sqrt(np.sum(val * val)) @overload(scipy.linalg.norm) def jit_norm(a, ord=None): if isinstance(ord, types.Optional): ord = ord.type # Reject non integer, floating-point or None types for ord if not isinstance(ord, (types.Integer, types.Float, types.NoneType)): raise TypingError("'ord' must be either integer or floating-point") # Reject non-ndarray types if not isinstance(a, types.Array): raise TypingError("Only accepts NumPy ndarray") # Reject ndarrays with non integer or floating-point dtype if not isinstance(a.dtype, (types.Integer, types.Float)): raise TypingError("Only integer and floating point types accepted") # Reject ndarrays with unsupported dimensionality if not (0 <= a.ndim <= 2): raise TypingError('3D and beyond are not allowed') # Implementation for scalars/0d-arrays elif a.ndim == 0: return a.item() # Implementation for vectors elif a.ndim == 1: def _oneD_norm_x(a, ord=None): if ord == 2 or ord is None: return _oneD_norm_2(a) elif ord == np.inf: return np.max(np.abs(a)) elif ord == -np.inf: return np.min(np.abs(a)) elif ord == 0: return np.sum(a != 0) elif ord == 1: return np.sum(np.abs(a)) else: return np.sum(np.abs(a)**ord)**(1. / ord) return _oneD_norm_x # Implementation for matrices elif a.ndim == 2: def _two_D_norm_2(a, ord=None): return _oneD_norm_2(a.ravel()) return _two_D_norm_2 if __name__ == "__main__": @njit def use(a, ord=None): # simple test function to check that the overload works return scipy.linalg.norm(a, ord) # spot check for vectors a = np.arange(10) print(use(a)) print(scipy.linalg.norm(a)) # spot check for matrices b = np.arange(9).reshape((3, 3)) print(use(b)) print(scipy.linalg.norm(b)) numba-0.55.1/docs/source/extending/overloading-guide.rst000664 000000 000000 00000021016 14174536160 023246 0ustar00rootroot000000 000000 .. _overloading-guide: ============================== A guide to using ``@overload`` ============================== As mentioned in the :ref:`high-level extension API `, you can use the ``@overload`` decorator to create a Numba implementation of a function that can be used in :term:`nopython mode` functions. A common use case is to re-implement NumPy functions so that they can be called in ``@jit`` decorated code. This section discusses how and when to use the ``@overload`` decorator and what contributing such a function to the Numba code base might entail. This should help you get started when needing to use the ``@overload`` decorator or when attempting to contribute new functions to Numba itself. The ``@overload`` decorator and it's variants are useful when you have a third-party library that you do not control and you wish to provide Numba compatible implementations for specific functions from that library. Concrete Example ================ Let's assume that you are working on a minimization algorithm that makes use of |scipy.linalg.norm|_ to find different vector norms and the `frobenius norm `_ for matrices. You know that only integer and real numbers will be involved. (While this may sound like an artificial example, especially because a Numba implementation of ``numpy.linalg.norm`` exists, it is largely pedagogical and serves to illustrate how and when to use ``@overload``). .. |scipy.linalg.norm| replace:: ``scipy.linalg.norm`` .. _scipy.linalg.norm: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.norm.html The skeleton might look something like this:: def algorithm(): # setup v = ... while True: # take a step d = scipy.linalg.norm(v) if d < tolerance: break Now, let's further assume, that you have heard of Numba and you now wish to use it to accelerate your function. However, after adding the ``jit(nopython=True)`` decorator, Numba complains that ``scipy.linalg.norm`` isn't supported. From looking at the documentation, you realize that a norm is probably fairly easy to implement using NumPy. A good starting point is the following template. .. literalinclude:: template.py After some deliberation and tinkering, you end up with the following code: .. literalinclude:: mynorm.py As you can see, the implementation only supports what you need right now: * Only supports integer and floating-point types * All vector norms * Only the Frobenius norm for matrices * Code sharing between vector and matrix implementations using ``@register_jitable``. * Norms are implemented using NumPy syntax. (This is possible because Numba is very aware of NumPy and many functions are supported.) So what actually happens here? The ``overload`` decorator registers a suitable implementation for ``scipy.linalg.norm`` in case a call to this is encountered in code that is being JIT-compiled, for example when you decorate your ``algorithm`` function with ``@jit(nopython=True)``. In that case, the function ``jit_norm`` will be called with the currently encountered types and will then return either ``_oneD_norm_x`` in the vector case and ``_two_D_norm_2``. You can download the example code here: :download:`mynorm.py ` Implementing ``@overload`` for NumPy functions ============================================== Numba supports NumPy through the provision of ``@jit`` compatible re-implementations of NumPy functions. In such cases ``@overload`` is a very convenient option for writing such implementations, however there are a few additional things to watch out for. * The Numba implementation should match the NumPy implementation as closely as feasible with respect to accepted types, arguments, raised exceptions and algorithmic complexity (Big-O / Landau order). * When implementing supported argument types, bear in mind that, due to duck typing, NumPy does tend to accept a multitude of argument types beyond NumPy arrays such as scalar, list, tuple, set, iterator, generator etc. You will need to account for that during type inference and subsequently as part of the tests. * A NumPy function may return a scalar, array or a data structure which matches one of its inputs, you need to be aware of type unification problems and dispatch to appropriate implementations. For example, |np.corrcoef|_ may return an array or a scalar depending on its inputs. .. |np.corrcoef| replace:: ``np.corrcoef`` .. _np.corrcoef: https://docs.scipy.org/doc/numpy/reference/generated/numpy.corrcoef.html * If you are implementing a new function, you should always update the `documentation `_. The sources can be found in ``docs/source/reference/numpysupported.rst``. Be sure to mention any limitations that your implementation has, e.g. no support for the ``axis`` keyword. * When writing tests for the functionality itself, it's useful to include handling of non-finite values, arrays with different shapes and layouts, complex inputs, scalar inputs, inputs with types for which support is not documented (e.g. a function which the NumPy docs say requires a float or int input might also 'work' if given a bool or complex input). * When writing tests for exceptions, for example if adding tests to ``numba/tests/test_np_functions.py``, you may encounter the following error message: .. code:: ====================================================================== FAIL: test_foo (numba.tests.test_np_functions.TestNPFunctions) ---------------------------------------------------------------------- Traceback (most recent call last): File "/numba/numba/tests/support.py", line 645, in tearDown self.memory_leak_teardown() File "/numba/numba/tests/support.py", line 619, in memory_leak_teardown self.assert_no_memory_leak() File "/numba/numba/tests/support.py", line 628, in assert_no_memory_leak self.assertEqual(total_alloc, total_free) AssertionError: 36 != 35 This occurs because raising exceptions from jitted code leads to reference leaks. Ideally, you will place all exception testing in a separate test method and then add a call in each test to ``self.disable_leak_check()`` to disable the leak-check (inherit from ``numba.tests.support.TestCase`` to make that available). * For many of the functions that are available in NumPy, there are corresponding methods defined on the NumPy ``ndarray`` type. For example, the function ``repeat`` is available as a NumPy module level function and a member function on the ``ndarray`` class. .. code:: python import numpy as np a = np.arange(10) # function np.repeat(a, 10) # method a.repeat(10) Once you have written the function implementation, you can easily use ``@overload_method`` and reuse it. Just be sure to check that NumPy doesn't diverge in the implementations of its function/method. As an example, the ``repeat`` function/method: .. code:: python @extending.overload_method(types.Array, 'repeat') def array_repeat(a, repeats): def array_repeat_impl(a, repeat): # np.repeat has already been overloaded return np.repeat(a, repeat) return array_repeat_impl * If you need to create ancillary functions, for example to re-use a small utility function or to split your implementation across functions for the sake of readability, you can make use of the ``@register_jitable`` decorator. This will make those functions available from within your ``@jit`` and ``@overload`` decorated functions. * The Numba continuous integration (CI) set up tests a wide variety of NumPy versions, you'll sometimes be alerted to a change in behaviour from some previous NumPy version. If you can find supporting evidence in the NumPy change log / repository, then you'll need to decide whether to create branches and attempt to replicate the logic across versions, or use a version gate (with associated wording in the documentation) to advertise that Numba replicates NumPy from some particular version onwards. * You can look at the Numba source code for inspiration, many of the overloaded NumPy functions and methods are in ``numba/targets/arrayobj.py``. Below, you will find a list of implementations to look at that are well implemented in terms of accepted types and test coverage. * ``np.repeat`` numba-0.55.1/docs/source/extending/template.py000664 000000 000000 00000001722 14174536160 021277 0ustar00rootroot000000 000000 # Declare that function `myfunc` is going to be overloaded (have a # substitutable Numba implementation) @overload(myfunc) # Define the overload function with formal arguments # these arguments must be matched in the inner function implementation def jit_myfunc(arg0, arg1, arg2, ...): # This scope is for typing, access is available to the *type* of all # arguments. This information can be used to change the behaviour of the # implementing function and check that the types are actually supported # by the implementation. print(arg0) # this will show the Numba type of arg0 # This is the definition of the function that implements the `myfunc` work. # It does whatever algorithm is needed to implement myfunc. def myfunc_impl(arg0, arg1, arg2, ...): # match arguments to jit_myfunc # < Implementation goes here > return # whatever needs to be returned by the algorithm # return the implementation return myfunc_impl numba-0.55.1/docs/source/glossary.rst000664 000000 000000 00000011013 14174536160 017514 0ustar00rootroot000000 000000 Glossary ======== .. glossary:: ahead-of-time compilation AOT compilation AOT Compilation of a function in a separate step before running the program code, producing an on-disk binary object which can be distributed independently. This is the traditional kind of compilation known in languages such as C, C++ or Fortran. bytecode Python bytecode The original form in which Python functions are executed. Python bytecode describes a stack-machine executing abstract (untyped) operations using operands from both the function stack and the execution environment (e.g. global variables). compile-time constant An expression whose value Numba can infer and freeze at compile-time. Global variables and closure variables are compile-time constants. just-in-time compilation JIT compilation JIT Compilation of a function at execution time, as opposed to :term:`ahead-of-time compilation`. JIT function Shorthand for "a function :term:`JIT-compiled ` with Numba using the :ref:`@jit ` decorator." loop-lifting loop-jitting A feature of compilation in :term:`object mode` where a loop can be automatically extracted and compiled in :term:`nopython mode`. This allows functions with operations unsupported in nopython mode to see significant performance improvements if they contain loops with only nopython-supported operations. lowering The act of translating :term:`Numba IR` into LLVM IR. The term "lowering" stems from the fact that LLVM IR is low-level and machine-specific while Numba IR is high-level and abstract. NPM nopython mode A Numba compilation mode that generates code that does not access the Python C API. This compilation mode produces the highest performance code, but requires that the native types of all values in the function can be :term:`inferred `. Unless otherwise instructed, the ``@jit`` decorator will automatically fall back to :term:`object mode` if nopython mode cannot be used. Numba IR Numba intermediate representation A representation of a piece of Python code which is more amenable to analysis and transformations than the original Python :term:`bytecode`. object mode A Numba compilation mode that generates code that handles all values as Python objects and uses the Python C API to perform all operations on those objects. Code compiled in object mode will often run no faster than Python interpreted code, unless the Numba compiler can take advantage of :term:`loop-jitting`. ``OptionalType`` An ``OptionalType`` is effectively a type union of a ``type`` and ``None``. They typically occur in practice due to a variable being set to ``None`` and then in a branch the variable being set to some other value. It's often not possible at compile time to determine if the branch will execute so to permit :term:`type inference` to complete, the type of the variable becomes the union of a ``type`` (from the value) and ``None``, i.e. ``OptionalType(type)``. type inference The process by which Numba determines the specialized types of all values within a function being compiled. Type inference can fail if arguments or globals have Python types unknown to Numba, or if functions are used that are not recognized by Numba. Successful type inference is a prerequisite for compilation in :term:`nopython mode`. typing The act of running :term:`type inference` on a value or operation. ufunc A NumPy `universal function `_. Numba can create new compiled ufuncs with the :ref:`@vectorize ` decorator. reflection In numba, when a mutable container is passed as argument to a nopython function from the Python interpreter, the container object and all its contained elements are converted into nopython values. To match the semantics of Python, any mutation on the container inside the nopython function must be visible in the Python interpreter. To do so, Numba must update the container and its elements and convert them back into Python objects during the transition back into the interpreter. Not to be confused with Python's "reflection" in the context of binary operators (see https://docs.python.org/3.5/reference/datamodel.html). numba-0.55.1/docs/source/index.rst000664 000000 000000 00000001425 14174536160 016766 0ustar00rootroot000000 000000 .. Numba documentation master file, created by sphinx-quickstart on Tue Dec 30 11:55:40 2014. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Numba documentation =================== This is the Numba documentation. Unless you are already acquainted with Numba, we suggest you start with the :doc:`User manual `. .. toctree:: :caption: For all users :maxdepth: 2 user/index.rst reference/index.rst .. toctree:: :caption: For CUDA users :maxdepth: 2 cuda/index.rst cuda-reference/index.rst .. toctree:: :caption: For advanced users & developers :maxdepth: 2 extending/index.rst developer/index.rst proposals/index.rst glossary.rst release-notes.rst numba-0.55.1/docs/source/proposals/000775 000000 000000 00000000000 14174536160 017145 5ustar00rootroot000000 000000 numba-0.55.1/docs/source/proposals/cfunc.rst000664 000000 000000 00000012321 14174536160 020774 0ustar00rootroot000000 000000 ============================ NBEP 4: Defining C callbacks ============================ :Author: Antoine Pitrou :Date: April 2016 :Status: Draft Interfacing with some native libraries (for example written in C or C++) can necessitate writing native callbacks to provide business logic to the library. Some Python-facing libraries may also provide the alternative of passing a ctypes-wrapped native callback instead of a Python callback for better performance. A simple example is the ``scipy.integrate`` package where the user passes the function to be integrated as a callback. Users of those libraries may want to benefit from the performance advantage of running purely native code, while writing their code in Python. This proposal outlines a scheme to provide such a functionality in Numba. Basic usage =========== We propose adding a new decorator, ``@cfunc``, importable from the main package. This decorator allows defining a callback as in the following example:: from numba import cfunc from numba.types import float64 # A callback with the C signature `double(double)` @cfunc(float64(float64), nopython=True) def integrand(x): return 1 / x The ``@cfunc`` decorator returns a "C function" object holding the resources necessary to run the given compiled function (for example its LLVM module). This object has several attributes and methods: * the ``ctypes`` attribute is a ctypes function object representing the native function. * the ``address`` attribute is the address of the native function code, as an integer (note this can also be computed from the ``ctypes`` attribute). * the ``native_name`` attribute is the symbol under which the function can be looked up inside the current process. * the ``inspect_llvm()`` method returns the IR for the LLVM module in which the function is compiled. It is expected that the ``native_name`` attribute corresponds to the function's name in the LLVM IR. The general signature of the decorator is ``cfunc(signature, **options)``. The ``signature`` must specify the argument types and return type of the function using Numba types. In contrary to ``@jit``, the return type cannot be omitted. The ``options`` are keyword-only parameters specifying compilation options. We are expecting that the standard ``@jit`` options (``nopython``, ``forceobj``, ``cache``) can be made to work with ``@cfunc``. Calling from Numba-compiled functions ------------------------------------- While the intended use is to pass a callback's address to foreign C code expecting a function pointer, it should be made possible to call the C callback from a Numba-compiled function. Passing array data ================== Native platform ABIs as used by C or C++ don't have the notion of a shaped array as in Numpy. One common solution is to pass a raw data pointer and one or several size arguments (depending on dimensionality). Numba must provide a way to rebuild an array view of this data inside the callback. :: from numba import cfunc, carray from numba.types import float64, CPointer, void, intp # A callback with the C signature `void(double *, double *, size_t)` @cfunc(void(CPointer(float64), CPointer(float64), intp)) def invert(in_ptr, out_ptr, n): in_ = carray(in_ptr, (n,)) out = carray(out_ptr, (n,)) for i in range(n): out[i] = 1 / in_[i] The ``carray`` function takes ``(pointer, shape, dtype)`` arguments (``dtype`` being optional) and returns a C-layout array view over the data *pointer*, with the given *shape* and *dtype*. *pointer* must be a ctypes pointer object (not a Python integer). The array's dimensionality corresponds to the *shape* tuple's length. If *dtype* is not given, the array's dtype corresponds to the *pointer*'s pointee type. The ``farray`` function is similar except that it returns a F-layout array view. Error handling ============== There is no standard mechanism in C for error reporting. Unfortunately, Numba currently doesn't handle ``try..except`` blocks, which makes it more difficult for the user to implement the required error reporting scheme. The current stance of this proposal is to let users guard against invalid arguments where necessary, and do whatever is required to inform the caller of the error. Based on user feedback, we can later add support for some error reporting schemes, such as returning an integer error code depending on whether an exception was raised, or setting ``errno``. Deferred topics =============== Ahead-of-Time compilation ------------------------- This proposal doesn't make any provision for AOT compilation of C callbacks. It would probably necessitate a separate API (a new method on the ``numba.pycc.CC`` object), and the implementation would require exposing a subset of the C function object's functionality from the compiled C extension module. Opaque data pointers -------------------- Some libraries allow passing an opaque data pointer (``void *``) to a user-provided callback, to provide any required context for execution of the callback. Taking advantage of this functionality would require adding specific support in Numba, for example the ability to do generic conversion from ``types.voidptr`` and to take the address of a Python-facing ``jitclass`` instance. numba-0.55.1/docs/source/proposals/extension-points.rst000664 000000 000000 00000032030 14174536160 023223 0ustar00rootroot000000 000000 ======================== NBEP 2: Extension points ======================== :Author: Antoine Pitrou :Date: July 2015 :Status: Draft Implementing new types or functions in Numba requires hooking into various mechanisms along the compilation chain (and potentially outside of it). This document aims, first, at examining the current ways of doing so and, second, at making proposals to make extending easier. If some of the proposals are implemented, we should first strive to use and exercise them internally, before exposing the APIs to the public. .. note:: This document doesn't cover CUDA or any other non-CPU backend. High-level API ============== There is currently no high-level API, making some use cases more complicated than they should be. Proposed changes ---------------- Dedicated module '''''''''''''''' We propose the addition of a ``numba.extending`` module exposing the main APIs useful for extending Numba. Implementing a function ''''''''''''''''''''''' We propose the addition of a ``@overload`` decorator allowing the implementation of a given function for use in :term:`nopython mode`. The overloading function has the same formal signature as the implemented function, and receives the actual argument types. It should return a Python function implementing the overloaded function for the given types. The following example implements :func:`numpy.where` with this approach. .. literalinclude:: np-where-override.py It is also possible to implement functions already known to Numba, to support additional types. The following example implements the built-in function :func:`len` for tuples with this approach:: @overload(len) def tuple_len(x): if isinstance(x, types.BaseTuple): # The tuple length is known at compile-time, so simply reify it # as a constant. n = len(x) def len_impl(x): return n return len_impl Implementing an attribute ''''''''''''''''''''''''' We propose the addition of a ``@overload_attribute`` decorator allowing the implementation of an attribute getter for use in :term:`nopython mode`. The following example implements the ``.nbytes`` attribute on Numpy arrays:: @overload_attribute(types.Array, 'nbytes') def array_nbytes(arr): def get(arr): return arr.size * arr.itemsize return get .. note:: The overload_attribute() signature allows for expansion to also define setters and deleters, by letting the decorated function return a ``getter, setter, deleter`` tuple instead of a single ``getter``. Implementing a method ''''''''''''''''''''' We propose the addition of a ``@overload_method`` decorator allowing the implementation of an instance method for use in :term:`nopython mode`. The following example implements the ``.take()`` method on Numpy arrays:: @overload_method(types.Array, 'take') def array_take(arr, indices): if isinstance(indices, types.Array): def take_impl(arr, indices): n = indices.shape[0] res = np.empty(n, arr.dtype) for i in range(n): res[i] = arr[indices[i]] return res return take_impl Exposing a structure member ''''''''''''''''''''''''''' We propose the addition of a ``make_attribute_wrapper()`` function exposing an internal field as a visible read-only attribute, for those types backed by a ``StructModel`` data model. For example, assuming ``PdIndexType`` is the Numba type of pandas indices, here is how to expose the underlying Numpy array as a ``._data`` attribute:: @register_model(PdIndexType) class PdIndexModel(models.StructModel): def __init__(self, dmm, fe_type): members = [ ('values', fe_type.as_array), ] models.StructModel.__init__(self, dmm, fe_type, members) make_attribute_wrapper(PdIndexType, 'values', '_data') Typing ====== Numba types ----------- Numba's standard types are declared in :mod:`numba.types`. To declare a new type, one subclasses the base :class:`Type` class or one of its existing abstract subclasses, and implements the required functionality. Proposed changes '''''''''''''''' No change required. Type inference on values ------------------------ Values of a new type need to be type-inferred if they can appear as function arguments or constants. The core machinery is in :mod:`numba.typing.typeof`. In the common case where some Python class or classes map exclusively to the new type, one can extend a generic function to dispatch on said classes, e.g.:: from numba.typing.typeof import typeof_impl @typeof_impl(MyClass) def _typeof_myclass(val, c): if "some condition": return MyType(...) The ``typeof_impl`` specialization must return a Numba type instance, or None if the value failed typing. (when one controls the class being type-inferred, an alternative to ``typeof_impl`` is to define a ``_numba_type_`` property on the class) In the rarer case where the new type can denote various Python classes that are impossible to enumerate, one must insert a manual check in the fallback implementation of the ``typeof_impl`` generic function. Proposed changes '''''''''''''''' Allow people to define a generic hook without monkeypatching the fallback implementation. Fast path for type inference on function arguments -------------------------------------------------- Optionally, one may want to allow a new type to participate in the fast type resolution (written in C code) to minimize function call overhead when a JIT-compiled function is called with the new type. One must then insert the required checks and implementation in the ``_typeof.c`` file, presumably inside the ``compute_fingerprint()`` function. Proposed changes '''''''''''''''' None. Adding generic hooks to C code embedded in a C Python extension is too delicate a change. Type inference on operations ---------------------------- Values resulting from various operations (function calls, operators, etc.) are typed using a set of helpers called "templates". One can define a new template by subclass one of the existing base classes and implement the desired inference mechanism. The template is explicitly registered with the type inference machinery using a decorator. The :class:`ConcreteTemplate` base class allows one to define inference as a set of supported signatures for a given operation. The following example types the modulo operator:: @builtin class BinOpMod(ConcreteTemplate): key = "%" cases = [signature(op, op, op) for op in sorted(types.signed_domain)] cases += [signature(op, op, op) for op in sorted(types.unsigned_domain)] cases += [signature(op, op, op) for op in sorted(types.real_domain)] (note that type *instances* are used in the signatures, severely limiting the amount of genericity that can be expressed) The :class:`AbstractTemplate` base class allows to define inference programmatically, giving it full flexibility. Here is a simplistic example of how tuple indexing (i.e. the ``__getitem__`` operator) can be expressed:: @builtin class GetItemUniTuple(AbstractTemplate): key = "getitem" def generic(self, args, kws): tup, idx = args if isinstance(tup, types.UniTuple) and isinstance(idx, types.Integer): return signature(tup.dtype, tup, idx) The :class:`AttributeTemplate` base class allows to type the attributes and methods of a given type. Here is an example, typing the ``.real`` and ``.imag`` attributes of complex numbers:: @builtin_attr class ComplexAttribute(AttributeTemplate): key = types.Complex def resolve_real(self, ty): return ty.underlying_float def resolve_imag(self, ty): return ty.underlying_float .. note:: :class:`AttributeTemplate` only works for getting attributes. Setting an attribute's value is hardcoded in :mod:`numba.typeinfer`. The :class:`CallableTemplate` base class offers an easier way to parse flexible function signatures, by letting one define a callable that has the same definition as the function being typed. For example, here is how one could hypothetically type Python's ``sorted`` function if Numba supported lists:: @builtin class Sorted(CallableTemplate): key = sorted def generic(self): def typer(iterable, key=None, reverse=None): if reverse is not None and not isinstance(reverse, types.Boolean): return if key is not None and not isinstance(key, types.Callable): return if not isinstance(iterable, types.Iterable): return return types.List(iterable.iterator_type.yield_type) return typer (note you can return just the function's return type instead of the full signature) Proposed changes '''''''''''''''' Naming of the various decorators is quite vague and confusing. We propose renaming ``@builtin`` to ``@infer``, ``@builtin_attr`` to ``@infer_getattr`` and ``builtin_global`` to ``infer_global``. The two-step declaration for global values is a bit verbose, we propose simplifying it by allowing the use of ``infer_global`` as a decorator:: @infer_global(len) class Len(AbstractTemplate): key = len def generic(self, args, kws): assert not kws (val,) = args if isinstance(val, (types.Buffer, types.BaseTuple)): return signature(types.intp, val) The class-based API can feel clumsy, we can add a functional API for some of the template kinds: .. code-block:: python @type_callable(sorted) def type_sorted(context): def typer(iterable, key=None, reverse=None): # [same function as above] return typer Code generation =============== Concrete representation of values of a Numba type ------------------------------------------------- Any concrete Numba type must be able to be represented in LLVM form (for variable storage, argument passing, etc.). One defines that representation by implementing a datamodel class and registering it with a decorator. Datamodel classes for standard types are defined in :mod:`numba.datamodel.models`. Proposed changes '''''''''''''''' No change required. Conversion between types ------------------------ Implicit conversion between Numba types is currently implemented as a monolithic sequence of choices and type checks in the :meth:`BaseContext.cast` method. To add a new implicit conversion, one appends a type-specific check in that method. Boolean evaluation is a special case of implicit conversion (the destination type being :class:`types.Boolean`). .. note:: Explicit conversion is seen as a regular operation, e.g. a constructor call. Proposed changes '''''''''''''''' Add a generic function for implicit conversion, with multiple dispatch based on the source and destination types. Here is an example showing how to write a float-to-integer conversion:: @lower_cast(types.Float, types.Integer) def float_to_integer(context, builder, fromty, toty, val): lty = context.get_value_type(toty) if toty.signed: return builder.fptosi(val, lty) else: return builder.fptoui(val, lty) Implementation of an operation ------------------------------ Other operations are implemented and registered using a set of generic functions and decorators. For example, here is how lookup for a the ``.ndim`` attribute on Numpy arrays is implemented:: @builtin_attr @impl_attribute(types.Kind(types.Array), "ndim", types.intp) def array_ndim(context, builder, typ, value): return context.get_constant(types.intp, typ.ndim) And here is how calling ``len()`` on a tuple value is implemented:: @builtin @implement(types.len_type, types.Kind(types.BaseTuple)) def tuple_len(context, builder, sig, args): tupty, = sig.args retty = sig.return_type return context.get_constant(retty, len(tupty.types)) Proposed changes '''''''''''''''' Review and streamine the API. Drop the requirement to write ``types.Kind(...)`` explicitly. Remove the separate ``@implement`` decorator and rename ``@builtin`` to ``@lower_builtin``, ``@builtin_attr`` to ``@lower_getattr``, etc. Add decorators to implement ``setattr()`` operations, named ``@lower_setattr`` and ``@lower_setattr_generic``. Conversion from / to Python objects ----------------------------------- Some types need to be converted from or to Python objects, if they can be passed as function arguments or returned from a function. The corresponding boxing and unboxing operations are implemented using a generic function. The implementations for standard Numba types are in :mod:`numba.targets.boxing`. For example, here is the boxing implementation for a boolean value:: @box(types.Boolean) def box_bool(c, typ, val): longval = c.builder.zext(val, c.pyapi.long) return c.pyapi.bool_from_long(longval) Proposed changes '''''''''''''''' Change the implementation signature from ``(c, typ, val)`` to ``(typ, val, c)``, to match the one chosen for the ``typeof_impl`` generic function. numba-0.55.1/docs/source/proposals/external-memory-management.rst000664 000000 000000 00000104756 14174536160 025156 0ustar00rootroot000000 000000 .. _nbep-7: =============================================== NBEP 7: CUDA External Memory Management Plugins =============================================== :Author: Graham Markall, NVIDIA :Contributors: Thomson Comer, Peter Entschev, Leo Fang, John Kirkham, Keith Kraus :Date: March 2020 :Status: Final Background and goals -------------------- The :ref:`CUDA Array Interface ` enables sharing of data between different Python libraries that access CUDA devices. However, each library manages its own memory distinctly from the others. For example: * `Numba `_ internally manages memory for the creation of device and mapped host arrays. * `The RAPIDS libraries `_ (cuDF, cuML, etc.) use the `Rapids Memory Manager `_ for allocating device memory. * `CuPy `_ includes a `memory pool implementation `_ for both device and pinned memory. The goal of this NBEP is to describe a plugin interface that enables Numba's internal memory management to be replaced with an external memory manager by the user. When the plugin interface is in use, Numba no longer directly allocates or frees any memory when creating arrays, but instead requests allocations and frees through the external manager. Requirements ------------ Provide an *External Memory Manager (EMM)* interface in Numba. * When the EMM is in use, Numba will make all memory allocation using the EMM. It will never directly call functions such as ``CuMemAlloc``\ , ``cuMemFree``\ , etc. * When not using an *External Memory Manager (EMM)*\ , Numba's present behaviour is unchanged (at the time of writing, the current version is the 0.48 release). If an EMM is to be used, it will entirely replace Numba's internal memory management for the duration of program execution. An interface for setting the memory manager will be provided. Device vs. Host memory ^^^^^^^^^^^^^^^^^^^^^^^ An EMM will always take responsibility for the management of device memory. However, not all CUDA memory management libraries also support managing host memory, so a facility for Numba to continue the management of host memory whilst ceding control of device memory to the EMM will be provided. Deallocation strategies ^^^^^^^^^^^^^^^^^^^^^^^ Numba's internal memory management uses a :ref:`deallocation strategy ` designed to increase efficiency by deferring deallocations until a significant quantity are pending. It also provides a mechanism for preventing deallocations entirely during critical sections, using the :func:`~numba.cuda.defer_cleanup` context manager. * When the EMM is not in use, the deallocation strategy and operation of ``defer_cleanup`` remain unchanged. * When the EMM is in use, the deallocation strategy is implemented by the EMM, and Numba's internal deallocation mechanism is not used. For example: * A similar strategy to Numba's could be implemented by the EMM, or * Deallocated memory might immediately be returned to a memory pool. * The ``defer_cleanup`` context manager may behave differently with an EMM - an EMM should be accompanied by documentation of the behaviour of the ``defer_cleanup`` context manager when it is in use. * For example, a pool allocator could always immediately return memory to a pool even when the context manager is in use, but could choose not to free empty pools until ``defer_cleanup`` is not in use. Management of other objects ^^^^^^^^^^^^^^^^^^^^^^^^^^^ In addition to memory, Numba manages the allocation and deallocation of :ref:`events `, :ref:`streams `, and modules (a module is a compiled object, which is generated from ``@cuda.jit``\ -ted functions). The management of streams, events, and modules should be unchanged by the presence or absence of an EMM. Asynchronous allocation / deallocation ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ An asynchronous memory manager might provide the facility for an allocation or free to take a CUDA stream and execute asynchronously. For freeing, this is unlikely to cause issues since it operates at a layer beneath Python, but for allocations this could be problematic if the user tries to then launch a kernel on the default stream from this asynchronous memory allocation. The interface described in this proposal will not be required to support asynchronous allocation and deallocation, and as such these use cases will not be considered further. However, nothing in this proposal should preclude the straightforward addition of asynchronous operations in future versions of the interface. Non-requirements ^^^^^^^^^^^^^^^^ In order to minimise complexity and constrain this proposal to a reasonable scope, the following will not be supported: * Using different memory manager implementations for different contexts. All contexts will use the same memory manager implementation - either the Numba internal implementation or an external implementation. * Changing the memory manager once execution has begun. It is not practical to change the memory manager and retain all allocations. Cleaning up the entire state and then changing to a different memory allocator (rather than starting a new process) appears to be a rather niche use case. * Any changes to the ``__cuda_array_interface__`` to further define its semantics, e.g. for acquiring / releasing memory as discussed in `Numba Issue #4886 `_ - these are independent, and can be addressed as part of separate proposals. * Managed memory / UVM is not supported. At present Numba does not support UVM - see `Numba Issue #4362 `_ for discussion of support. Interface for Plugin developers ------------------------------- New classes and functions will be added to ``numba.cuda.cudadrv.driver``: * ``BaseCUDAMemoryManager`` and ``HostOnlyCUDAMemoryManager``\ : base classes for EMM plugin implementations. * ``set_memory_manager``: a method for registering an external memory manager with Numba. These will be exposed through the public API, in the ``numba.cuda`` module. Additionally, some classes that are already part of the `driver` module will be exposed as part of the public API: * ``MemoryPointer``: used to encapsulate information about a pointer to device memory. * ``MappedMemory``: used to hold information about host memory that is mapped into the device address space (a subclass of ``MemoryPointer``\ ). * ``PinnedMemory``: used to hold information about host memory that is pinned (a subclass of ``mviewbuf.MemAlloc``\ , a class internal to Numba). As an alternative to calling the ``set_memory_manager`` function, an environment variable can be used to set the memory manager. The value of the environment variable should be the name of the module containing the memory manager in its global scope, named ``_numba_memory_manager``\ : .. code-block:: export NUMBA_CUDA_MEMORY_MANAGER="" When this variable is set, Numba will automatically use the memory manager from the specified module. Calls to ``set_memory_manager`` will issue a warning, but otherwise be ignored. Plugin Base Classes ^^^^^^^^^^^^^^^^^^^ An EMM plugin is implemented by inheriting from the ``BaseCUDAMemoryManager`` class, which is defined as: .. code-block:: python class BaseCUDAMemoryManager(object, metaclass=ABCMeta): @abstractmethod def memalloc(self, size): """ Allocate on-device memory in the current context. Arguments: - `size`: Size of allocation in bytes Returns: a `MemoryPointer` to the allocated memory. """ @abstractmethod def memhostalloc(self, size, mapped, portable, wc): """ Allocate pinned host memory. Arguments: - `size`: Size of the allocation in bytes - `mapped`: Whether the allocated memory should be mapped into the CUDA address space. - `portable`: Whether the memory will be considered pinned by all contexts, and not just the calling context. - `wc`: Whether to allocate the memory as write-combined. Returns a `MappedMemory` or `PinnedMemory` instance that owns the allocated memory, depending on whether the region was mapped into device memory. """ @abstractmethod def mempin(self, owner, pointer, size, mapped): """ Pin a region of host memory that is already allocated. Arguments: - `owner`: An object owning the memory - e.g. a `DeviceNDArray`. - `pointer`: The pointer to the beginning of the region to pin. - `size`: The size of the region to pin. - `mapped`: Whether the region should also be mapped into device memory. Returns a `MappedMemory` or `PinnedMemory` instance that refers to the allocated memory, depending on whether the region was mapped into device memory. """ @abstractmethod def initialize(self): """ Perform any initialization required for the EMM plugin to be ready to use. """ @abstractmethod def get_memory_info(self): """ Returns (free, total) memory in bytes in the context """ @abstractmethod def get_ipc_handle(self, memory): """ Return an `IpcHandle` from a GPU allocation. Arguments: - `memory`: A `MemoryPointer` for which the IPC handle should be created. """ @abstractmethod def reset(self): """ Clear up all memory allocated in this context. """ @abstractmethod def defer_cleanup(self): """ Returns a context manager that ensures the implementation of deferred cleanup whilst it is active. """ @property @abstractmethod def interface_version(self): """ Returns an integer specifying the version of the EMM Plugin interface supported by the plugin implementation. Should always return 1 for implementations described in this proposal. """ All of the methods of an EMM plugin are called from within Numba - they never need to be invoked directly by a Numba user. The ``initialize`` method is called by Numba prior to any memory allocations being requested. This gives the EMM an opportunity to initialize any data structures, etc., that it needs for its normal operations. The method may be called multiple times during the lifetime of the program - subsequent calls should not invalidate or reset the state of the EMM. The ``memalloc``\ , ``memhostalloc``\ , and ``mempin`` methods are called when Numba requires an allocation of device or host memory, or pinning of host memory. Device memory should always be allocated in the current context. ``get_ipc_handle`` is called when an IPC handle for an array is required. Note that there is no method for closing an IPC handle - this is because the ``IpcHandle`` object constructed by ``get_ipc_handle`` contains a ``close()`` method as part of its definition in Numba, which closes the handle by calling ``cuIpcCloseMemHandle``. It is expected that this is sufficient for general use cases, so no facility for customising the closing of IPC handles is provided by the EMM Plugin interface. ``get_memory_info`` may be called at any time after ``initialize``. ``reset`` is called as part of resetting a context. Numba does not normally call reset spontaneously, but it may be called at the behest of the user. Calls to ``reset`` may even occur before ``initialize`` is called, so the plugin should be robust against this occurrence. ``defer_cleanup`` is called when the ``numba.cuda.defer_cleanup`` context manager is used from user code. ``interface_version`` is called by Numba when the memory manager is set, to ensure that the version of the interface implemented by the plugin is compatible with the version of Numba in use. Representing pointers ^^^^^^^^^^^^^^^^^^^^^ Device Memory ~~~~~~~~~~~~~ The ``MemoryPointer`` class is used to represent a pointer to memory. Whilst there are various details of its implementation, the only aspect relevant to EMM plugin development is its initialization. The ``__init__`` method has the following interface: .. code-block:: python class MemoryPointer: def __init__(self, context, pointer, size, owner=None, finalizer=None): * ``context``\ : The context in which the pointer was allocated. * ``pointer``\ : A ``ctypes`` pointer (e.g. ``ctypes.c_uint64``\ ) holding the address of the memory. * ``size``\ : The size of the allocation in bytes. * ``owner``\ : The owner is sometimes set by the internals of the class, or used for Numba's internal memory management, but need not be provided by the writer of an EMM plugin - the default of ``None`` should always suffice. * ``finalizer``\ : A method that is called when the last reference to the ``MemoryPointer`` object is released. Usually this will make a call to the external memory management library to inform it that the memory is no longer required, and that it could potentially be freed (though the EMM is not required to free it immediately). Host Memory ~~~~~~~~~~~ Memory mapped into the CUDA address space (which is created when the ``memhostalloc`` or ``mempin`` methods are called with ``mapped=True``\ ) is managed using the ``MappedMemory`` class: .. code-block:: python class MappedMemory(AutoFreePointer): def __init__(self, context, pointer, size, owner, finalizer=None): * ``context``\ : The context in which the pointer was allocated. * ``pointer``\ : A ``ctypes`` pointer (e.g. ``ctypes.c_void_p``\ ) holding the address of the allocated memory. * ``size``\ : The size of the allocated memory in bytes. * ``owner``\ : A Python object that owns the memory, e.g. a ``DeviceNDArray`` instance. * ``finalizer``\ : A method that is called when the last reference to the ``MappedMemory`` object is released. For example, this method could call ``cuMemFreeHost`` on the pointer to deallocate the memory immediately. Note that the inheritance from ``AutoFreePointer`` is an implementation detail and need not concern the developer of an EMM plugin - ``MemoryPointer`` is higher in the MRO of ``MappedMemory``. Memory that is only in the host address space and has been pinned is represented with the ``PinnedMemory`` class: .. code-block:: python class PinnedMemory(mviewbuf.MemAlloc): def __init__(self, context, pointer, size, owner, finalizer=None): * ``context``\ : The context in which the pointer was allocated. * ``pointer``\ : A ``ctypes`` pointer (e.g. ``ctypes.c_void_p``\ ) holding the address of the pinned memory. * ``size``\ : The size of the pinned region in bytes. * ``owner``\ : A Python object that owns the memory, e.g. a ``DeviceNDArray`` instance. * ``finalizer``\ : A method that is called when the last reference to the ``PinnedMemory`` object is released. This method could e.g. call ``cuMemHostUnregister`` on the pointer to unpin the memory immediately. Providing device memory management only ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Some external memory managers will support management of on-device memory but not host memory. To make it easy to implement an EMM plugin using one of these managers, Numba will provide a memory manager class with implementations of the ``memhostalloc`` and ``mempin`` methods. An abridged definition of this class follows: .. code-block:: python class HostOnlyCUDAMemoryManager(BaseCUDAMemoryManager): # Unimplemented methods: # # - memalloc # - get_memory_info def memhostalloc(self, size, mapped, portable, wc): # Implemented. def mempin(self, owner, pointer, size, mapped): # Implemented. def initialize(self): # Implemented. # # Must be called by any subclass when its initialize() method is # called. def reset(self): # Implemented. # # Must be called by any subclass when its reset() method is # called. def defer_cleanup(self): # Implemented. # # Must be called by any subclass when its defer_cleanup() method is # called. A class can subclass the ``HostOnlyCUDAMemoryManager`` and then it only needs to add implementations of methods for on-device memory. Any subclass must observe the following rules: * If the subclass implements ``__init__``\ , then it must also call ``HostOnlyCUDAMemoryManager.__init__``\ , as this is used to initialize some of its data structures (\ ``self.allocations`` and ``self.deallocations``\ ). * The subclass must implement ``memalloc`` and ``get_memory_info``. * The ``initialize`` and ``reset`` methods perform initialisation of structures used by the ``HostOnlyCUDAMemoryManager``. * If the subclass has nothing to do on initialisation (possibly) or reset (unlikely) then it need not implement these methods. * However, if it does implement these methods then it must also call the methods from ``HostOnlyCUDAMemoryManager`` in its own implementations. * Similarly if ``defer_cleanup`` is implemented, it should enter the context provided by ``HostOnlyCUDAManager.defer_cleanup()`` prior to ``yield``\ ing (or in the ``__enter__`` method) and release it prior to exiting (or in the ``__exit__`` method). Import order ^^^^^^^^^^^^ The order in which Numba and the library implementing an EMM Plugin should not matter. For example, if ``rmm`` were to implement and register an EMM Plugin, then: .. code-block:: python from numba import cuda import rmm and .. code-block:: python import rmm from numba import cuda are equivalent - this is because Numba does not initialize CUDA or allocate any memory until the first call to a CUDA function - neither instantiating and registering an EMM plugin, nor importing ``numba.cuda`` causes a call to a CUDA function. Numba as a Dependency ^^^^^^^^^^^^^^^^^^^^^ Adding the implementation of an EMM Plugin to a library naturally makes Numba a dependency of the library where it may not have been previously. In order to make the dependency optional, if this is desired, one might conditionally instantiate and register the EMM Plugin like: .. code-block:: python try: import numba from mylib.numba_utils import MyNumbaMemoryManager numba.cuda.cudadrv.driver.set_memory_manager(MyNumbaMemoryManager) except: print("Numba not importable - not registering EMM Plugin") so that ``mylib.numba_utils``\ , which contains the implementation of the EMM Plugin, is only imported if Numba is already present. If Numba is not available, then ``mylib.numba_utils`` (which necessarily imports ``numba``\ ), will never be imported. It is recommended that any library with an EMM Plugin includes at least some environments with Numba for testing with the EMM Plugin in use, as well as some environments without Numba, to avoid introducing an accidental Numba dependency. Example implementation - A RAPIDS Memory Manager (RMM) Plugin ------------------------------------------------------------- An implementation of an EMM plugin within the `Rapids Memory Manager (RMM) `_ is sketched out in this section. This is intended to show an overview of the implementation in order to support the descriptions above and to illustrate how the plugin interface can be used - different choices may be made for a production-ready implementation. The plugin implementation consists of additions to `python/rmm/rmm.py `_: .. code-block:: python # New imports: from contextlib import context_manager # RMM already has Numba as a dependency, so these imports need not be guarded # by a check for the presence of numba. from numba.cuda import (HostOnlyCUDAMemoryManager, MemoryPointer, IpcHandle, set_memory_manager) # New class implementing the EMM Plugin: class RMMNumbaManager(HostOnlyCUDAMemoryManager): def memalloc(self, size): # Allocates device memory using RMM functions. The finalizer for the # allocated memory calls back to RMM to free the memory. addr = librmm.rmm_alloc(bytesize, 0) ctx = cuda.current_context() ptr = ctypes.c_uint64(int(addr)) finalizer = _make_finalizer(addr, stream) return MemoryPointer(ctx, ptr, size, finalizer=finalizer) def get_ipc_handle(self, memory): """ Get an IPC handle for the memory with offset modified by the RMM memory pool. """ # This implementation provides a functional implementation and illustrates # what get_ipc_handle needs to do, but it is not a very "clean" # implementation, and it relies on borrowing bits of Numba internals to # initialise ipchandle. # # A more polished implementation might make use of additional functions in # the RMM C++ layer for initialising IPC handles, and not use any Numba # internals. ipchandle = (ctypes.c_byte * 64)() # IPC handle is 64 bytes cuda.cudadrv.memory.driver_funcs.cuIpcGetMemHandle( ctypes.byref(ipchandle), memory.owner.handle, ) source_info = cuda.current_context().device.get_device_identity() ptr = memory.device_ctypes_pointer.value offset = librmm.rmm_getallocationoffset(ptr, 0) return IpcHandle(memory, ipchandle, memory.size, source_info, offset=offset) def get_memory_info(self): # Returns a tuple of (free, total) using RMM functionality. return get_info() # Function defined in rmm.py def initialize(self): # Nothing required to initialize RMM here, but this method is added # to illustrate that the super() method should also be called. super().initialize() @contextmanager def defer_cleanup(self): # Does nothing to defer cleanup - a full implementation may choose to # implement a different policy. with super().defer_cleanup(): yield @property def interface_version(self): # As required by the specification return 1 # The existing _make_finalizer function is used by RMMNumbaManager: def _make_finalizer(handle, stream): """ Factory to make the finalizer function. We need to bind *handle* and *stream* into the actual finalizer, which takes no args. """ def finalizer(): """ Invoked when the MemoryPointer is freed """ librmm.rmm_free(handle, stream) return finalizer # Utility function register `RMMNumbaManager` as an EMM: def use_rmm_for_numba(): set_memory_manager(RMMNumbaManager) # To support `NUMBA_CUDA_MEMORY_MANAGER=rmm`: _numba_memory_manager = RMMNumbaManager Example usage ^^^^^^^^^^^^^ A simple example that configures Numba to use RMM for memory management and creates a device array is as follows: .. code-block:: python # example.py import rmm import numpy as np from numba import cuda rmm.use_rmm_for_numba() a = np.zeros(10) d_a = cuda.to_device(a) del(d_a) print(rmm.csv_log()) Running this should result in output similar to the following: .. code-block:: Event Type,Device ID,Address,Stream,Size (bytes),Free Memory,Total Memory,Current Allocs,Start,End,Elapsed,Location Alloc,0,0x7fae06600000,0,80,0,0,1,1.10549,1.1074,0.00191666,/numba/numba/cuda/cudadrv/driver.py:683 Free,0,0x7fae06600000,0,0,0,0,0,1.10798,1.10921,0.00122238,/numba/numba/utils.py:678 Note that there is some scope for improvement in RMM for detecting the line number at which the allocation / free occurred, but this is outside the scope of the example in this proposal. Setting the memory manager through the environment ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Rather than calling ``rmm.use_rmm_for_numba()`` in the example above, the memory manager could also be set to use RMM globally with an environment variable, so the Python interpreter is invoked to run the example as: .. code-block:: NUMBA_CUDA_MEMORY_MANAGER="rmm.RMMNumbaManager" python example.py Numba internal changes ---------------------- This section is intended primarily for Numba developers - those with an interest in the external interface for implementing EMM plugins may choose to skip over this section. Current model / implementation ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ At present, memory management is implemented in the :class:`~numba.cuda.cudadrv.driver.Context` class. It maintains lists of allocations and deallocations: * ``allocations`` is a ``numba.core.utils.UniqueDict``, created at context creation time. * ``deallocations`` is an instance of the ``_PendingDeallocs`` class, and is created when ``Context.prepare_for_use()`` is called. These are used to track allocations and deallocations of: * Device memory * Pinned memory * Mapped memory * Streams * Events * Modules The ``_PendingDeallocs`` class implements the deferred deallocation strategy - cleanup functions (such as ``cuMemFree``\ ) for the items above are added to its list of pending deallocations by the finalizers of objects representing allocations. These finalizers are run when the objects owning them are garbage-collected by the Python interpreter. When the addition of a new cleanup function to the deallocation list causes the number or size of pending deallocations to exceed a configured ratio, the ``_PendingDeallocs`` object runs deallocators for all items it knows about and then clears its internal pending list. See :ref:`deallocation-behavior` for more details of this implementation. Proposed changes ^^^^^^^^^^^^^^^^ This section outlines the major changes that will be made to support the EMM plugin interface - there will be various small changes to other parts of Numba that will be required in order to adapt to these changes; an exhaustive list of these is not provided. Context changes ~~~~~~~~~~~~~~~ The ``numba.cuda.cudadrv.driver.Context`` class will no longer directly allocate and free memory. Instead, the context will hold a reference to a memory manager instance, and its memory allocation methods will call into the memory manager, e.g.: .. code-block:: python def memalloc(self, size): return self.memory_manager.memalloc(size) def memhostalloc(self, size, mapped=False, portable=False, wc=False): return self.memory_manager.memhostalloc(size, mapped, portable, wc) def mempin(self, owner, pointer, size, mapped=False): if mapped and not self.device.CAN_MAP_HOST_MEMORY: raise CudaDriverError("%s cannot map host memory" % self.device) return self.memory_manager.mempin(owner, pointer, size, mapped) def prepare_for_use(self): self.memory_manager.initialize() def get_memory_info(self): self.memory_manager.get_memory_info() def get_ipc_handle(self, memory): return self.memory_manager.get_ipc_handle(memory) def reset(self): # ... Already-extant reset logic, plus: self._memory_manager.reset() The ``memory_manager`` member is initialised when the context is created. The ``memunpin`` method (not shown above but currently exists in the ``Context`` class) has never been implemented - it presently raises a ``NotImplementedError``. This method arguably un-needed - pinned memory is immediately unpinned by its finalizer, and unpinning before a finalizer runs would invalidate the state of ``PinnedMemory`` objects for which references are still held. It is proposed that this is removed when making the other changes to the ``Context`` class. The ``Context`` class will still instantiate ``self.allocations`` and ``self.deallocations`` as before - these will still be used by the context to manage the allocations and deallocations of events, streams, and modules, which are not handled by the EMM plugin. New components of the ``driver`` module ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * ``BaseCUDAMemoryManager``\ : An abstract class, as defined in the plugin interface above. * ``HostOnlyCUDAMemoryManager``\ : A subclass of ``BaseCUDAMemoryManager``\ , with the logic from ``Context.memhostalloc`` and ``Context.mempin`` moved into it. This class will also create its own ``allocations`` and ``deallocations`` members, similarly to how the ``Context`` class creates them. These are used to manage the allocations and deallocations of pinned and mapped host memory. * ``NumbaCUDAMemoryManager``\ : A subclass of ``HostOnlyCUDAMemoryManager``\ , which also contains an implementation of ``memalloc`` based on that presently existing in the ``Context`` class. This is the default memory manager, and its use preserves the behaviour of Numba prior to the addition of the EMM plugin interface - that is, all memory allocation and deallocation for Numba arrays is handled within Numba. * This class shares the ``allocations`` and ``deallocations`` members with its parent class ``HostOnlyCUDAMemoryManager``\ , and it uses these for the management of device memory that it allocates. * The ``set_memory_manager`` function, which sets a global pointing to the memory manager class. This global initially holds ``NumbaCUDAMemoryManager`` (the default). Staged IPC ~~~~~~~~~~ Staged IPC should not take ownership of the memory that it allocates. When the default internal memory manager is in use, the memory allocated for the staging array is already owned. When an EMM plugin is in use, it is not legitimate to take ownership of the memory. This change can be made by applying the following small patch, which has been tested to have no effect on the CUDA test suite: .. code-block:: diff diff --git a/numba/cuda/cudadrv/driver.py b/numba/cuda/cudadrv/driver.py index 7832955..f2c1352 100644 --- a/numba/cuda/cudadrv/driver.py +++ b/numba/cuda/cudadrv/driver.py @@ -922,7 +922,11 @@ class _StagedIpcImpl(object): with cuda.gpus[srcdev.id]: impl.close() - return newmem.own() + return newmem Testing ~~~~~~~ Alongside the addition of appropriate tests for new functionality, there will be some refactoring of existing tests required, but these changes are not substantial. Tests of the deallocation strategy (e.g. ``TestDeallocation``\ , ``TestDeferCleanup``\ ) will need to be modified to ensure that they are examining the correct set of deallocations. When an EMM plugin is in use, they will need to be skipped. Prototyping / experimental implementation ----------------------------------------- Some prototype / experimental implementations have been produced to guide the designs presented in this document. The current implementations can be found in: * Numba branch: https://github.com/gmarkall/numba/tree/grm-numba-nbep-7. * RMM branch: https://github.com/gmarkall/rmm/tree/grm-numba-nbep-7. * CuPy implementation: https://github.com/gmarkall/nbep-7/blob/master/nbep7/cupy_mempool.py - uses an unmodified CuPy. * See `CuPy memory management docs `_. Current implementation status ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RMM Plugin ~~~~~~~~~~ For a minimal example, a simple allocation and free using RMM works as expected. For the example code (similar to the RMM example above): .. code-block:: python import rmm import numpy as np from numba import cuda rmm.use_rmm_for_numba() a = np.zeros(10) d_a = cuda.to_device(a) del(d_a) print(rmm.csv_log()) We see the following output: .. code-block:: Event Type,Device ID,Address,Stream,Size (bytes),Free Memory,Total Memory,Current Allocs,Start,End,Elapsed,Location Alloc,0,0x7f96c7400000,0,80,0,0,1,1.13396,1.13576,0.00180059,/numba/numba/cuda/cudadrv/driver.py:686 Free,0,0x7f96c7400000,0,0,0,0,0,1.13628,1.13723,0.000956004,/numba/numba/utils.py:678 This output is similar to the expected output from the example usage presented above (though note that the pointer addresses and timestamps vary compared to the example), and provides some validation of the example use case. CuPy Plugin ~~~~~~~~~~~ .. code-block:: python from nbep7.cupy_mempool import use_cupy_mm_for_numba import numpy as np from numba import cuda use_cupy_mm_for_numba() a = np.zeros(10) d_a = cuda.to_device(a) del(d_a) The prototype CuPy plugin has somewhat primitive logging, so we see the output: .. code-block:: Allocated 80 bytes at 7f004d400000 Freeing 80 bytes at 7f004d400000 Numba CUDA Unit tests ^^^^^^^^^^^^^^^^^^^^^ As well as providing correct execution of a simple example, all relevant Numba CUDA unit tests also pass with the prototype branch, for both the internal memory manager and the RMM EMM Plugin. RMM ~~~ The unit test suite can be run with the RMM EMM Plugin with: .. code-block:: NUMBA_CUDA_MEMORY_MANAGER=rmm python -m numba.runtests numba.cuda.tests A summary of the unit test suite output is: .. code-block:: Ran 564 tests in 142.211s OK (skipped=11) When running with the built-in Numba memory management, the output is: .. code-block:: Ran 564 tests in 133.396s OK (skipped=5) i.e. the changes for using an external memory manager do not break the built-in Numba memory management. There are an additional 6 skipped tests, from: * ``TestDeallocation``\ : skipped as it specifically tests Numba's internal deallocation strategy. * ``TestDeferCleanup``\ : skipped as it specifically tests Numba's implementation of deferred cleanup. * ``TestCudaArrayInterface.test_ownership``\ : skipped as Numba does not own memory when an EMM Plugin is used, but ownership is assumed by this test case. CuPy ~~~~ The test suite can be run with the CuPy plugin using: .. code-block:: NUMBA_CUDA_MEMORY_MANAGER=nbep7.cupy_mempool python -m numba.runtests numba.cuda.tests This plugin implementation is presently more primitive than the RMM implementation, and results in some errors with the unit test suite: .. code-block:: Ran 564 tests in 111.699s FAILED (errors=8, skipped=11) The 8 errors are due to a lack of implementation of ``get_ipc_handle`` in the CuPy EMM Plugin implementation. It is expected that this implementation will be re-visited and completed so that CuPy can be used stably as an allocator for Numba in the future. numba-0.55.1/docs/source/proposals/index.rst000664 000000 000000 00000001605 14174536160 021010 0ustar00rootroot000000 000000 =========================== Numba Enhancement Proposals =========================== Numba Enhancement Proposals (not really abbreviated "NEPs", since "NEP" is already taken by the Numpy project) describe proposed changes to Numba. They are modeled on Python Enhancement Proposals (PEPs) and Numpy Enhancement Proposals, and are typically written up when important changes (behavioural changes, feature additions...) to Numba are proposed. This page provides an overview of all proposals, making only a distinction between the ones that have been implemented and those that have not been implemented. Implemented proposals --------------------- .. toctree:: :maxdepth: 1 integer-typing.rst external-memory-management.rst Other proposals --------------- .. toctree:: :maxdepth: 1 extension-points.rst jit-classes.rst cfunc.rst type-inference.rst typing_recursion.rst numba-0.55.1/docs/source/proposals/integer-typing.rst000664 000000 000000 00000017273 14174536160 022656 0ustar00rootroot000000 000000 .. _nbep-1: ================================= NBEP 1: Changes in integer typing ================================= :Author: Antoine Pitrou :Date: July 2015 :Status: Final Current semantics ================= Type inference of integers in Numba currently has some subtleties and some corner cases. The simple case is when some variable has an obvious Numba type (for example because it is the result of a constructor call to a Numpy scalar type such as ``np.int64``). That case suffers no ambiguity. The less simple case is when a variable doesn't bear such explicit information. This can happen because it is inferred from a built-in Python ``int`` value, or from an arithmetic operation between two integers, or other cases yet. Then Numba has a number of rules to infer the resulting Numba type, especially its signedness and bitwidth. Currently, the generic case could be summarized as: *start small, grow bigger as required*. Concretely: 1. Each constant or pseudo-constant is inferred using the *smallest signed integer type* that can correctly represent it (or, possibly, ``uint64`` for positive integers between ``2**63`` and ``2**64 - 1``). 2. The result of an operation is typed so as to ensure safe representation in the face of overflow and other magnitude increases (for example, ``int32 + int32`` would be typed ``int64``). 3. As an exception, a Python ``int`` used as function argument is always typed ``intp``, a pointer-size integer. This is to avoid the proliferation of compiled specializations, as otherwise various integer bitwidths in input arguments may produce multiple signatures. .. note:: The second rule above (the "respect magnitude increases" rule) reproduces Numpy's behaviour with arithmetic on scalar values. Numba, however, has different implementation and performance constraints than Numpy scalars. It is worth nothing, by the way, that Numpy arrays do not implement said rule (i.e. ``array(int32) + array(int32)`` is typed ``array(int32)``, not ``array(int64)``). Probably because this makes performance more controllable. This has several non-obvious side-effects: 1. It is difficult to predict the precise type of a value inside a function, after several operations. The basic operands in an expression tree may for example be ``int8`` but the end result may be ``int64``. Whether this is desirable or not is an open question; it is good for correctness, but potentially bad for performance. 2. In trying to follow the correctness over predictability rule, some values can actually leave the integer realm. For example, ``int64 + uint64`` is typed ``float64`` in order to avoid magnitude losses (but incidentally will lose precision on large integer values...), again following Numpy's semantics for scalars. This is usually not intended by the user. 3. More complicated scenarios can produce unexpected errors at the type unification stage. An example is at `Github issue 1299 `_, the gist of which is reproduced here:: @jit(nopython=True) def f(): variable = 0 for i in range(1): variable = variable + 1 return np.arange(variable) At the time of this writing, this fails compiling, on a 64-bit system, with the error:: numba.errors.TypingError: Failed at nopython (nopython frontend) Can't unify types of variable '$48.4': $48.4 := {array(int32, 1d, C), array(int64, 1d, C)} People expert with Numba's type unification system can understand why. But the user is caught in mystery. Proposal: predictable width-conserving typing ============================================= We propose to turn the current typing philosophy on its head. Instead of "*start small and grow as required*", we propose "*start big and keep the width unchanged*". Concretely: 1. The typing of Python ``int`` values used as function arguments doesn't change, as it works satisfyingly and doesn't surprise the user. 2. The typing of integer *constants* (and pseudo-constants) changes to match the typing of integer arguments. That is, every non-explicitly typed integer constant is typed ``intp``, the pointer-sized integer; except for the rare cases where ``int64`` (on 32-bit systems) or ``uint64`` is required. 3. Operations on integers promote bitwidth to ``intp``, if smaller, otherwise they don't promote. For example, on a 32-bit machine, ``int8 + int8`` is typed ``int32``, as is ``int32 + int32``. However, ``int64 + int64`` is typed ``int64``. 4. Furthermore, mixed operations between signed and unsigned fall back to signed, while following the same bitwidth rule. For example, on a 32-bit machine, ``int8 + uint16`` is typed ``int32``, as is ``uint32 + int32``. Proposal impact =============== Semantics --------- With this proposal, the semantics become clearer. Regardless of whether the arguments and constants of a function were explicitly typed or not, the results of various expressions at any point in the function have easily predictable types. When using built-in Python ``int``, the user gets acceptable magnitude (32 or 64 bits depending on the system's bitness), and the type remains the same across all computations. When explicitly using smaller bitwidths, intermediate results don't suffer from magnitude loss, since their bitwidth is promoted to ``intp``. There is also less potential for annoyances with the type unification system as demonstrated above. The user would have to force several different types to be faced with such an error. One potential cause for concern is the discrepancy with Numpy's scalar semantics; but at the same time this brings Numba scalar semantics closer to array semantics (both Numba's and Numpy's), which seems a desirable outcome as well. It is worth pointing out that some sources of integer numbers, such as the ``range()`` built-in, always yield 32-bit integers or larger. This proposal could be an opportunity to standardize them on ``intp``. Performance ----------- Except in trivial cases, it seems unlikely that the current "best fit" behaviour for integer constants really brings a performance benefit. After all, most integers in Numba code would either be stored in arrays (with well-known types, chosen by the user) or be used as indices, where a ``int8`` is highly unlikely to fare better than a ``intp`` (actually, it may be worse, if LLVM isn't able to optimize away the required sign-extension). As a side note, the default use of ``intp`` rather than ``int64`` ensures that 32-bit systems won't suffer from poor arithmetic performance. Implementation -------------- Optimistically, this proposal may simplify some Numba internals a bit. Or, at least, it doesn't threaten to make them significantly more complicated. Limitations ----------- This proposal doesn't really solve the combination of signed and unsigned integers. It is geared mostly at solving the bitwidth issues, which are a somewhat common cause of pain for users. Unsigned integers are in practice very uncommon in Numba-compiled code, except when explicitly asked for, and therefore much less of a pain point. On the bitwidth front, 32-bit systems could still show discrepancies based on the values of constants: if a constant is too large to fit in 32 bits, it is typed ``int64``, which propagates through other computations. This would be a reminiscence of the current behaviour, but rarer and much more controlled still. Long-term horizon ----------------- While we believe this proposal makes Numba's behaviour more regular and more predictable, it also pulls it further from general compatibility with pure Python semantics, where users can assume arbitrary-precision integers without any truncation issues. numba-0.55.1/docs/source/proposals/jit-classes.rst000664 000000 000000 00000017172 14174536160 022130 0ustar00rootroot000000 000000 =================== NBEP 3: JIT Classes =================== :Author: Siu Kwan Lam :Date: Dec 2015 :Status: Draft Introduction ============ Numba does not yet support user-defined classes. Classes provide useful abstraction and promote modularity when used right. In the simplest sense, a class specifies the set of data and operations as attributes and methods, respectively. A class instance is an instantiation of that class. This proposal will focus on supporting this simple usecase of classes--with just attributes and methods. Other features, such as class methods, static methods, and inheritance are deferred to another proposal, but we believe these features can be easily implemented given the foundation described here. Proposal: jit-classes ===================== A JIT-classes is more restricted than a Python class. We will focus on the following operations on a class and its instance: * Instantiation: create an instance of a class using the class object as the constructor: ``cls(*args, **kwargs)`` * Destruction: remove resources allocated during instantiation and release all references to other objects. * Attribute access: loading and storing attributes using ``instance.attr`` syntax. * Method access: loading methods using ``instance.method`` syntax. With these operations, a class object (not the instance) does not need to be materialize. Using the class object as a constructor is fully resolved (a runtime implementation is picked) during the typing phase in the compiler. This means **a class object will not be first class**. On the other hand, implementating a first-class class object will require an "interface" type, or the type of class. The instantiation of a class will allocate resources for storing the data attributes. This is described in the "Storage model" section. Methods are never stored in the instance. They are information attached to the class. Since a class object only exists in the type domain, the methods will also be fully resolved at the typing phase. Again, numba do not have first-class function value and each function type maps uniquely to each function implementation (this needs to be changed to support function value as argument). A class instance can contain other NRT reference-counted object as attributes. To properly clean up an instance, a destructor is called when the reference count of the instance is dropped to zero. This is described in the "Reference count and descructor" section. Storage model ~~~~~~~~~~~~~ For compatibility with C, attributes are stored in a simple plain-old-data structure. Each attribute are stored in a user-defined order in a padded (for proper alignment), contiguous memory region. An instance that contains three fields of int32, float32, complex64 will be compatible with the following C structure:: struct { int32 field0; float32 field1; complex64 field2; }; This will also be comptabile with an aligned numpy structure dtype. Methods ~~~~~~~ Methods are regular function that can be bounded to an instance. They can be compiled as regular function by numba. The operation ``getattr(instance, name)`` (getting an attribute ``name`` from ``instance``) binds the instance to the requested method at runtime. The special ``__init__`` method is also handled like regular functions. ``__del__`` is not supported at this time. Reference count and destructor ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ An instance of jit-class is reference-counted by NRT. Since it may contain other NRT tracked object, it must call a destructor when its reference count dropped to zero. The destructor will decrement the reference count of all attributes by one. At this time, there is no support for user defined ``__del__`` method. Proper cleanup for cyclic reference is not handled at this time. Cycles will cause memory leak. Type inference ~~~~~~~~~~~~~~ So far we have not described the type of the attributes or the methods. Type information is necessary to materailize the instance (e.g. allocate the storage). The simplest way is to let user provide the type of each attributes as well as the ordering; for instance:: dct = OrderedDict() dct['x'] = int32 dct['y'] = float32 Allowing user to supply an ordered dictionary will provide the name, ordering and types of the attributes. However, this statically typed semantic is not as flexible as the Python semantic which behaves like a generic class. Inferring the type of attributes is difficult. In a previous attempt to implement JIT classes, the ``__init__`` method is specialized to capture the type stored into the attributes. Since the method can contain arbitrary logic, the problem can become a dependent typing problem if types are assigned conditionally depending on the value. (Very few languages implement dependent typing and those that does are mostly theorem provers.) Example: typing function using an OrderedDict --------------------------------------------- .. code-block:: python spec = OrderedDict() spec['x'] = numba.int32 spec['y'] = numba.float32 @jitclass(spec) class Vec(object): def __init__(self, x, y): self.x = x self.y = y def add(self, dx, dy): self.x += dx self.y += dy Example: typing function using a list of 2-tuples ------------------------------------------------- .. code-block:: python spec = [('x', numba.int32), ('y', numba.float32)] @jitclass(spec) class Vec(object): ... Creating multiple jitclasses from a single class object ------------------------------------------------------- The `jitclass(spec)` decorator creates a new jitclass type even when applied to the same class object and the same type specification. .. code-block:: python class Vec(object): ... Vec1 = jitclass(spec)(Vec) Vec2 = jitclass(spec)(Vec) # Vec1 and Vec2 are two different jitclass types Usage from the Interpreter ~~~~~~~~~~~~~~~~~~~~~~~~~~ When constructing a new instance of a jitclass, a "box" is created that wraps the underlying jitclass instance from numba. Attributes and methods are accessible from the interpreter. The actual implementation will be in numba compiled code. Any Python object is converted to its native representation for consumption in numba. Similarly, the returned value is converted to its Python representation. As a result, there may be overhead in manipulating jitclass instances in the interpreter. This overhead is minimal and should be easily amortized by more efficient computation in the compiled methods. Support for property, staticmethod and classmethod ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The use of ``property`` is accepted for getter and setter only. Deleter is not supported. The use of ``staticmethod`` is not supported. The use of ``classmethod`` is not supported. Inheritance ~~~~~~~~~~~ Class inhertance is not considered in this proposal. The only accepted base class for a jitclass is `object`. Supported targets ~~~~~~~~~~~~~~~~~~ Only the CPU target (including the parallel target) is supported. GPUs (e.g. CUDA and HSA) targets are supported via an immutable version of the jitclass instance, which will be described in a separate NBEP. Other properties ~~~~~~~~~~~~~~~~ Given: .. code-block:: python spec = [('x', numba.int32), ('y', numba.float32)] @jitclass(spec) class Vec(object): ... * ``isinstance(Vec(1, 2), Vec)`` is True. * ``type(Vec(1, 2))`` may not be ``Vec``. Future enhancements ~~~~~~~~~~~~~~~~~~~ This proposal has only described the basic semantic and functionality of a jitclass. Additional features will be described in future enhancement proposals. numba-0.55.1/docs/source/proposals/np-where-override.py000664 000000 000000 00000003241 14174536160 023061 0ustar00rootroot000000 000000 import numpy as np from numba.core import types from numba.extending import overload @overload(np.where) def where(cond, x, y): """ Implement np.where(). """ # Choose implementation based on argument types. if isinstance(cond, types.Array): # Array where() => return an array of the same shape if all(ty.layout == 'C' for ty in (cond, x, y)): def where_impl(cond, x, y): """ Fast implementation for C-contiguous arrays """ shape = cond.shape if x.shape != shape or y.shape != shape: raise ValueError("all inputs should have the same shape") res = np.empty_like(x) cf = cond.flat xf = x.flat yf = y.flat rf = res.flat for i in range(cond.size): rf[i] = xf[i] if cf[i] else yf[i] return res else: def where_impl(cond, x, y): """ Generic implementation for other arrays """ shape = cond.shape if x.shape != shape or y.shape != shape: raise ValueError("all inputs should have the same shape") res = np.empty_like(x) for idx, c in np.ndenumerate(cond): res[idx] = x[idx] if c else y[idx] return res else: def where_impl(cond, x, y): """ Scalar where() => return a 0-dim array """ scal = x if cond else y return np.full_like(scal, scal) return where_impl numba-0.55.1/docs/source/proposals/recursion_callstack.svg000664 000000 000000 00000132167 14174536160 023732 0ustar00rootroot000000 000000 numba-0.55.1/docs/source/proposals/type-inference.rst000664 000000 000000 00000014770 14174536160 022625 0ustar00rootroot000000 000000 ====================== NBEP 5: Type Inference ====================== :Author: Siu Kwan Lam :Date: Sept 2016 :Status: Draft This document describes the current type inference implementation in numba. Introduction ============ Numba uses type information to ensure that every variable in the user code can be correctly lowered (translated into a low-level representation). The type of a variable describes the set of valid operations and available attributes. Resolving this information during compilation avoids the overhead of type checking and dispatching at runtime. However, Python is dynamically typed and the user does not declare variable types. Since type information is absent, we use type inference to reconstruct the missing information. Numba Type Semantic =================== Type inference operates on :term:`Numba IR`, a mostly static-single-assignment (SSA) encoding of the Python bytecode. Conceptually, all intermediate values in the Python code are explicitly assigned to a variable in the IR. Numba enforces that each IR variable to have one type only. A user variable (from the Python source code) can be mapped to multiple variables in the IR. They are *versions* of a variable. Each time a user variable is assigned to, a new version is created. From that point, all subsequent references will use the new version. The user variable *evolves* as the function logic updates its type. Merge points (e.g. subsequent block to an if-else, the loop body, etc..) in the control flow need extra care. At each merge point, a new version is implicitly created to merge the different variable versions from the incoming paths. The merging of the variable versions may translate into an implicit cast. Numba uses function overloading to emulate Python duck-typing. The type of a function can contain multiple call signatures that accept different argument types and yield different return types. The process to decide the best signature for an overloaded function is called *overload resolution*. Numba partially implements the C++ overload resolution scheme (`ISOCPP`_ 13.3 Overload Resolution). The scheme uses a "best fit" algorithm by ranking each argument symmetrically. The five possible rankings in increasing order of penalty are: * *Exact*: the expected type is the same as the actual type. * *Promotion*: the actual type can be upcast to the expected type by extending the precision without changing the behavior. * *Safe conversion*: the actual type can be cast to the expected type by changing the type without losing information. * *Unsafe conversion*: the actual type can be cast to the expected type by changing the type or downcasting the type even if it is imprecise. * *No match*: no valid operation can convert the actual type to the expected type. It is possible to have an ambiguous resolution. For example, a function with signatures ``(int16, int32)`` and ``(int32, int16)`` can become ambiguous if presented with the argument types ``(int32, int32)``, because demoting either argument to ``int16`` is equally "fit". Fortunately, numba can usually resolve such ambiguity by compiling a new version with the exact signature ``(int32, int32)``. When compilation is disabled and there are multiple signatures with equal fit, an exception is raised. Type Inference ============== The type inference in numba has three important components---type variable, constraint network, and typing context. * The *typing context* provides all the type information and typing related operations, including the logic for type unification, and the logic for typing of global and constant values. It defines the semantic of the language that can be compiled by numba. * A *type variable* holds the type of each variable (in the Numba IR). Conceptually, it is initialized to the universal type and, as it is re-assigned, it stores a common type by unifying the new type with the existing type. The common type must be able to represent values of the new type and the existing type. Type conversion is applied as necessary and precision loss is accepted for usability reason. * The *constraint network* is a dependency graph built from the IR. Each node represents an operation in the Numba IR and updates at least one type variable. There may be cycles due to loops in user code. The type inference process starts by seeding the argument types. These initial types are propagated in the constraint network, which eventually fills all the type variables. Due to cycles in the network, the process repeats until all type variables converge or it fails with undecidable types. Type unification always returns a more "general" (quoted because unsafe conversion is allowed) type. Types will converge to the least "general" type that can represent all possible values that the variable can hold. Since unification will never move down the type hierarchy and there is a single top type, the universal type---``object``, the type inference is guaranteed to converge. A failure in type inference can be caused by two reasons. The first reason is user error due to incorrect use of a type. This type of error will also trigger an exception in regular python execution. The second reason is due to the use of an unsupported feature, but the code is otherwise valid in regular python execution. Upon an error, the type inference will set all types to the object type. As a result, numba will fallback to *object-mode*. Since functions can be overloaded, the type inference needs to decide the type signature used at each call site. The overload resolution is applied to all known overload versions of the callee function described in *call-templates*. A call-template can either be concrete or abstract. A concrete call-template defines a fixed list of all possible signatures. An abstract call-template defines the logic to compute the accepted signature and it is used to implement generic functions. Numba-compiled functions are generic functions due to their ability to compile new versions. When it sees a new set of argument types, it triggers type inference to validate and determine the return type. When there are nested calls for numba-compiled functions, each call-site triggers type inference. This poses a problem to recursive functions because the type inference will also be triggered recursively. Currently, simple single recursion is supported if the signature is user-annotated by the user, which avoids unbound recursion in type inference that will never terminate. .. _ISOCPP: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4296.pdfnumba-0.55.1/docs/source/proposals/typing_recursion.rst000664 000000 000000 00000011652 14174536160 023307 0ustar00rootroot000000 000000 ======================== NBEP 6: Typing Recursion ======================== :Author: Siu Kwan Lam :Date: Sept 2016 :Status: Draft Introduction ============ This document proposes an enhancement to the type inference algorithm to support recursion without explicitly annotating the function signature. As a result, the proposal enables numba to type-infer both self-recursive and mutual-recursive functions under some limitations. In practice, these limitions can be easily overcome by specifying a compilation order. The Current State ================= Recursion support in numba is currently limited to self-recursion with explicit type annotation for the function. This limitation comes from the inability to determine the return type of a recursive call. This is because the callee is either the current function (for self-recursion) or a parent function (mutual-recursion) and its type inference process has been suspended while waiting for the function-type of its callee. This results in the formation of a cyclic dependency. For example, given a function ``foo()`` that calls ``bar()``, which in turns call ``foo()``:: def foo(x): if x > 0: return bar(x) else: return 1 def bar(x): return foo(x - 1) The type inferrence process of ``foo()`` depends on that of ``bar()``, which depends on ``foo()``. Therefore ``foo()`` depends on itself and the type inference algorithm cannot terminate. The Solution ============ The proposed solution has two components: 1. The introduction of a compile-time *callstack* that tracks the compiling functions. 2. The allowance of a partial type inference on functions by leveraging the return type on non-recursive control-flow paths. The compile-time callstack stores typing information of the functions being compiled. Like an ordinary callstack, it pushes a new record every time a function is "called". Since this occurs at compile-time, a "call" triggers a compilation of the callee. To detect recursion, the compile-time callstack is searched bottom-up (stack grows downward) for a record that matches the callee. As the record contains a reference to the type inference state, the type inference process can be resumed to determine the return type. Recall that the type inference process cannot be resumed normally because of the cyclic dependency of the return type. In practice, we can assume that a useful program must have a terminating condition, a path that does not recurse. So, the type inference process can make an initial guess for the return-type at the recursive call by using the return-type determined by the non-recursive paths. This allows type information to propagate on the recursive paths to generate the final return type, which is used to refine the type information by the subsequent iteration in the type inference process. The following figure illustrates the compile-time callstack when the compiler reaches the recursive call to ``foo()`` from ``bar()``: .. image:: recursion_callstack.svg :width: 400px At this time, the type inference process of ``foo()`` is suspended and that of ``bar()`` is active. The compiler can see that the callee is already compiling by searching the callstack. Knowing that it is a recursive call, the compiler can resume the type-inference on ``foo()`` by ignoring the paths that contain recursive calls. This means only the ``else`` branch is considered and we can easily tell that ``foo()`` returns an ``int`` in this case. The compiler will then set the initial return type of ``foo()`` and ``bar()`` to ``int``. The subsequent type propagation can use this information to complete the type inference of both functions, unifying the return-type of all returning paths. Limitations =========== For the proposed type inference algorithm to terminate, it assumes that at least one of the control path leads to a return-statement without undertaking a recursive call. Should this not be the case, the algorithm will raise an exception indicating a potential runaway recursion. For example:: @jit def first(x): # The recursing call must have a path that is non-recursing. if x > 0: return second(x) else: return 1 @jit def second(x): return third(x) @jit def third(x): return first(x - 1) The ``first()`` function must be the compiled first for the type inference algorithm to complete successfully. Compiling any other function first will lead to a failure in type inference. The type inference algorithm will treat it as a runaway recursion due to the lack of a non-recursive exit in the recursive callee. For example, compiling ``second()`` first will move the recursive call to ``first()``. When the compiler tries to resume the type inference process of ``second()``, it will fail to find a non-recursive path. This is a small limitation and can be overcome easily by code restructuring or precompiling in a specific order. numba-0.55.1/docs/source/reference/000775 000000 000000 00000000000 14174536160 017061 5ustar00rootroot000000 000000 numba-0.55.1/docs/source/reference/aot-compilation.rst000664 000000 000000 00000005403 14174536160 022714 0ustar00rootroot000000 000000 .. _aot-compilation: Ahead-of-Time compilation ========================= .. currentmodule:: numba.pycc .. class:: CC(extension_name, source_module=None) An object used to generate compiled extensions from Numba-compiled Python functions. *extension_name* is the name of the extension to be generated. *source_module* is the Python module containing the functions; if ``None``, it is inferred by examining the call stack. :class:`CC` instances have the following attributes and methods: .. attribute:: name (read-only attribute) The name of the extension module to be generated. .. attribute:: output_dir (read-write attribute) The directory the extension module will be written into. By default it is the directory the *source_module* is located in. .. attribute:: output_file (read-write attribute) The name of the file the extension module will be written to. By default this follows the Python naming convention for the current platform. .. attribute:: target_cpu (read-write attribute) The name of the CPU model to generate code for. This will select the appropriate instruction set extensions. By default, a generic CPU is selected in order to produce portable code. Recognized names for this attribute depend on the current architecture and LLVM version. If you have LLVM installed, ``llc -mcpu=help`` will give you a list. Examples on x86-64 are ``"ivybridge"``, ``"haswell"``, ``"skylake"`` or ``"broadwell"``. You can also give the value ``"host"`` which will select the current host CPU. .. attribute:: verbose (read-write attribute) If true, print out information while compiling the extension. False by default. .. decorator:: export(exported_name, sig) Mark the decorated function for compilation with the signature *sig*. The compiled function will be exposed as *exported_name* in the generated extension module. All exported names within a given :class:`CC` instance must be distinct, otherwise an exception is raised. .. method:: compile() Compile all exported functions and generate the extension module as specified by :attr:`output_dir` and :attr:`output_file`. .. method:: distutils_extension(**kwargs) Return a :py:class:`distutils.core.Extension` instance allowing to integrate generation of the extension module in a conventional ``setup.py``-driven build process. The optional *kwargs* let you pass optional parameters to the :py:class:`~distutils.core.Extension` constructor. In this mode of operation, it is not necessary to call :meth:`compile` yourself. Also, :attr:`output_dir` and :attr:`output_file` will be ignored. numba-0.55.1/docs/source/reference/deprecation.rst000664 000000 000000 00000032367 14174536160 022123 0ustar00rootroot000000 000000 .. _deprecation: =================== Deprecation Notices =================== This section contains information about deprecation of behaviours, features and APIs that have become undesirable/obsolete. Any information about the schedule for their deprecation and reasoning behind the changes, along with examples, is provided. However, first is a small section on how to suppress deprecation warnings that may be raised from Numba so as to prevent warnings propagating into code that is consuming Numba. Suppressing Deprecation warnings ================================ All Numba deprecations are issued via ``NumbaDeprecationWarning`` or ``NumbaPendingDeprecationWarning`` s, to suppress the reporting of these the following code snippet can be used:: from numba.core.errors import NumbaDeprecationWarning, NumbaPendingDeprecationWarning import warnings warnings.simplefilter('ignore', category=NumbaDeprecationWarning) warnings.simplefilter('ignore', category=NumbaPendingDeprecationWarning) The ``action`` used above is ``'ignore'``, other actions are available, see `The Warnings Filter `_ documentation for more information. .. note:: It is **strongly recommended** that applications and libraries which choose to suppress these warnings should pin their Numba dependency to a suitable version because their users will no longer be aware of the coming incompatibility. Deprecation of reflection for List and Set types ================================================ Reflection (:term:`reflection`) is the jargon used in Numba to describe the process of ensuring that changes made by compiled code to arguments that are mutable Python container data types are visible in the Python interpreter when the compiled function returns. Numba has for some time supported reflection of ``list`` and ``set`` data types and it is support for this reflection that is scheduled for deprecation with view to replace with a better implementation. Reason for deprecation ---------------------- First recall that for Numba to be able to compile a function in ``nopython`` mode all the variables must have a concrete type ascertained through type inference. In simple cases, it is clear how to reflect changes to containers inside ``nopython`` mode back to the original Python containers. However, reflecting changes to complex data structures with nested container types (for example, lists of lists of integers) quickly becomes impossible to do efficiently and consistently. After a number of years of experience with this problem, it is clear that providing this behaviour is both fraught with difficulty and often leads to code which does not have good performance (all reflected data has to go through special APIs to convert the data to native formats at call time and then back to CPython formats at return time). As a result of this, the sheer number of reported problems in the issue tracker, and how well a new approach that was taken with ``typed.Dict`` (typed dictionaries) has gone, the core developers have decided to deprecate the noted ``reflection`` behaviour. Example(s) of the impact ------------------------ At present only a warning of the upcoming change is issued. In future code such as:: from numba import njit @njit def foo(x): x.append(10) a = [1, 2, 3] foo(a) will require adjustment to use a ``typed.List`` instance, this typed container is synonymous to the :ref:`feature-typed-dict`. An example of translating the above is:: from numba import njit from numba.typed import List @njit def foo(x): x.append(10) a = [1, 2, 3] typed_a = List() [typed_a.append(x) for x in a] foo(typed_a) For more information about ``typed.List`` see :ref:`feature-typed-list`. Further usability enhancements for this feature were made in the 0.47.0 release cycle. Schedule -------- This feature will be removed with respect to this schedule: * Pending-deprecation warnings will be issued in version 0.44.0 * Prominent notice will be given for a minimum of two releases prior to full removal. Recommendations --------------- Projects that need/rely on the deprecated behaviour should pin their dependency on Numba to a version prior to removal of this behaviour, or consider following replacement instructions that will be issued outlining how to adjust to the change. Expected Replacement -------------------- As noted above ``typed.List`` will be used to permit similar functionality to reflection in the case of ``list`` s, a ``typed.Set`` will provide the equivalent for ``set`` (not implemented yet!). The advantages to this approach are: * That the containers are typed means type inference has to work less hard. * Nested containers (containers of containers of ...) are more easily supported. * Performance penalties currently incurred translating data to/from native formats are largely avoided. * Numba's ``typed.Dict`` will be able to use these containers as values. Deprecation of :term:`object mode` `fall-back` behaviour when using ``@jit`` ============================================================================ The ``numba.jit`` decorator has for a long time followed the behaviour of first attempting to compile the decorated function in :term:`nopython mode` and should this compilation fail it will `fall-back` and try again to compile but this time in :term:`object mode`. It it this `fall-back` behaviour which is being deprecated, the result of which will be that ``numba.jit`` will by default compile in :term:`nopython mode` and :term:`object mode` compilation will become `opt-in` only. Reason for deprecation ---------------------- The `fall-back` has repeatedly caused confusion for users as seemingly innocuous changes in user code can lead to drastic performance changes as code which may have once compiled in :term:`nopython mode` mode may silently switch to compiling in :term:`object mode` e.g:: from numba import jit @jit def foo(): l = [] for x in range(10): l.append(x) return l foo() assert foo.nopython_signatures # this was compiled in nopython mode @jit def bar(): l = [] for x in range(10): l.append(x) return reversed(l) # innocuous change, but no reversed support in nopython mode bar() assert not bar.nopython_signatures # this was not compiled in nopython mode Another reason to remove the `fall-back` is that it is confusing for the compiler engineers developing Numba as it causes internal state problems that are really hard to debug and it makes manipulating the compiler pipelines incredibly challenging. Further, it has long been considered best practice that the :term:`nopython mode` keyword argument in the ``numba.jit`` decorator is set to ``True`` and that any user effort spent should go into making code work in this mode as there's very little gain if it does not. The result is that, as Numba has evolved, the amount of use :term:`object mode` gets in practice and its general utility has decreased. It can be noted that there are some minor improvements available through the notion of :term:`loop-lifting`, the cases of this being used in practice are, however, rare and often a legacy from use of less-recent Numba whereby such behaviour was better accommodated/the use of ``@jit`` with `fall-back` was recommended. Example(s) of the impact ------------------------ At present a warning of the upcoming change is issued if ``@jit`` decorated code uses the `fall-back` compilation path. In future code such as:: @jit def bar(): l = [] for x in range(10): l.append(x) return reversed(l) bar() will simply not compile, a ``TypingError`` would be raised. Schedule -------- This feature will be removed with respect to this schedule: * Deprecation warnings will be issued in version 0.44.0 * Prominent notice will be given for a minimum of two releases prior to full removal. Recommendations --------------- Projects that need/rely on the deprecated behaviour should pin their dependency on Numba to a version prior to removal of this behaviour. Alternatively, to accommodate the scheduled deprecations, users with code compiled at present with ``@jit`` can supply the ``nopython=True`` keyword argument, if the code continues to compile then the code is already ready for this change. If the code does not compile, continue using the ``@jit`` decorator without ``nopython=True`` and profile the performance of the function. Then remove the decorator and again check the performance of the function. If there is no benefit to having the ``@jit`` decorator present consider removing it! If there is benefit to having the ``@jit`` decorator present, then to be future proof supply the keyword argument ``forceobj=True`` to ensure the function is always compiled in :term:`object mode`. .. _deprecation-strict-strides: Deprecation of the ``inspect_ptx()`` method =========================================== The undocumented ``inspect_ptx()`` method of functions decorated with ``@cuda.jit(device=True)`` is sometimes used to compile a Python function to PTX for use outside of Numba. An interface for this specific purpose is provided in the :func:`compile_ptx() ` function. ``inspect_ptx()`` has one or two longstanding issues and presents a maintenance burden for upcoming changes in the CUDA target, so it is deprecated and will be removed in favor of the use of :func:`compile_ptx() `. Recommendations --------------- Replace any code that compiles device functions to PTX using the following pattern: .. code-block:: python @cuda.jit(signature, device=True) def func(args): ... ptx_code = func.inspect_ptx(nvvm_options=nvvm_options).decode() with: .. code-block:: python def func(args): ... ptx_code, return_type = compile_ptx(func, signature, device=True, nvvm_options=nvvm_options) Schedule -------- - In Numba 0.54: ``inspect_ptx()`` was deprecated. - In Numba 0.55: ``inspect_ptx()`` was removed. Deprecation of eager compilation of CUDA device functions ========================================================= In future versions of Numba, the ``device`` kwarg to the ``@cuda.jit`` decorator will be obviated, and whether a device function or global kernel is compiled will be inferred from the context. With respect to kernel / device functions and lazy / eager compilation, four cases were handled: 1. ``device=True``, eager compilation with a signature provided 2. ``device=False``, eager compilation with a signature provided 3. ``device=True``, lazy compilation with no signature 4. ``device=False``, lazy compilation with no signature The latter two cases can be differentiated without the ``device`` kwarg, because it can be inferred from the calling context - if the call is from the host, then a global kernel should be compiled, and if the call is from a kernel or another device function, then a device function should be compiled. The first two cases cannot be differentiated in the absence of the ``device`` kwarg - without it, it will not be clear from a signature alone whether a device function or global kernel should be compiled. In order to resolve this, device functions will no longer be eagerly compiled. When a signature is provided to a device function, it will only be used to enforce the types of arguments that the function accepts. .. note:: In previous releases this notice stated that support for providing signatures to device functions would be removed completely - however, this precludes the common use case of enforcing the types that can be passed to a device function (and the automatic insertion of casts that it implies) so this notice has been updated to retain support for passing signatures. Schedule -------- - In Numba 0.54: Eager compilation of device functions will be deprecated. - In Numba 0.55: Eager compilation of device functions will be unsupported and the provision of signatures for device functions will only enforce casting. Deprecation of ``numba.core.base.BaseContext.add_user_function()`` ================================================================== ``add_user_function()`` offers the same functionality as ``insert_user_function()``, only with a check that the function has already been inserted at least once. It is now deprecated as it is no longer used internally and it is expected that it is not used externally. Recommendations --------------- Replace any uses of ``add_user_function()`` with ``insert_user_function()``. Schedule -------- - In Numba 0.55: ``add_user_function()`` will be deprecated. - In Numba 0.56: ``add_user_function()`` will be removed. Deprecation of CUDA Toolkits < 10.2 and devices with CC < 5.3 ============================================================= Support for: - Devices with Compute Capability < 5.3, and - CUDA toolkits less than 10.2 is deprecated and will be removed in future. Recommendations --------------- - For devices of Compute Capability 3.0 - 5.2, Numba 0.55.1 or earlier will be required. - CUDA toolkit 10.2 or later (ideally 11.2 or later) should be installed. Schedule -------- - In Numba 0.55.1: support for CC < 5.3 and CUDA toolkits < 10.2 are deprecated. - In Numba 0.56: support for CC < 5.3 and CUDA toolkits < 10.2 will be removed. numba-0.55.1/docs/source/reference/envvars.rst000664 000000 000000 00000053736 14174536160 021315 0ustar00rootroot000000 000000 .. _numba-envvars: Environment variables ===================== .. note:: This section relates to environment variables that impact Numba's runtime, for compile time environment variables see :ref:`numba-source-install-env_vars`. Numba allows its behaviour to be changed through the use of environment variables. Unless otherwise mentioned, those variables have integer values and default to zero. For convenience, Numba also supports the use of a configuration file to persist configuration settings. Note: To use this feature ``pyyaml`` must be installed. The configuration file must be named ``.numba_config.yaml`` and be present in the directory from which the Python interpreter is invoked. The configuration file, if present, is read for configuration settings before the environment variables are searched. This means that the environment variable settings will override the settings obtained from a configuration file (the configuration file is for setting permanent preferences whereas the environment variables are for ephemeral preferences). The format of the configuration file is a dictionary in ``YAML`` format that maps the environment variables below (without the ``NUMBA_`` prefix) to a desired value. For example, to permanently switch on developer mode (``NUMBA_DEVELOPER_MODE`` environment variable) and control flow graph printing (``NUMBA_DUMP_CFG`` environment variable), create a configuration file with the contents:: developer_mode: 1 dump_cfg: 1 This can be especially useful in the case of wanting to use a set color scheme based on terminal background color. For example, if the terminal background color is black, the ``dark_bg`` color scheme would be well suited and can be set for permanent use by adding:: color_scheme: dark_bg Jit flags --------- These variables globally override flags to the :func:`~numba.jit` decorator. .. envvar:: NUMBA_BOUNDSCHECK If set to 0 or 1, globally disable or enable bounds checking, respectively. The default if the variable is not set or set to an empty string is to use the ``boundscheck`` flag passed to the :func:`~numba.jit` decorator for a given function. See the documentation of :ref:`@jit ` for more information. Note, due to limitations in numba, the bounds checking currently produces exception messages that do not match those from NumPy. If you set ``NUMBA_FULL_TRACEBACKS=1``, the full exception message with the axis, index, and shape information will be printed to the terminal. Debugging --------- These variables influence what is printed out during compilation of :term:`JIT functions `. .. envvar:: NUMBA_DEVELOPER_MODE If set to non-zero, developer mode produces full tracebacks and disables help instructions. Default is zero. .. envvar:: NUMBA_FULL_TRACEBACKS If set to non-zero, enable full tracebacks when an exception occurs. Defaults to the value set by `NUMBA_DEVELOPER_MODE`. .. envvar:: NUMBA_SHOW_HELP If set to non-zero, show resources for getting help. Default is zero. .. envvar:: NUMBA_CAPTURED_ERRORS Alters the way in which Numba captures and handles exceptions that do not inherit from ``numba.core.errors.NumbaError`` during compilation (e.g. standard Python exceptions). This does not impact runtime exception handling. Valid values are: - ``"old_style"`` (default): this is the exception handling behaviour that is present in Numba versions <= 0.54.x. Numba will capture and wrap all errors occuring in compilation and depending on the compilation phase they will likely materialize as part of the message in a ``TypingError`` or a ``LoweringError``. - ``"new_style"`` this will treat any exception that does not inherit from ``numba.core.errors.NumbaError`` **and** is raised during compilation as a "hard error", i.e. the exception will propagate and compilation will halt. The purpose of this new style is to differentiate between intentionally raised exceptions and those which occur due to mistakes. For example, if an ``AttributeError`` occurs in the typing of an ``@overload`` function, under this new behaviour it is assumed that this a mistake in the implementation and compilation will halt due to this exception. This behaviour will eventually become the default. .. envvar:: NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING If set to non-zero error message highlighting is disabled. This is useful for running the test suite on CI systems. .. envvar:: NUMBA_COLOR_SCHEME Alters the color scheme used in error reporting (requires the ``colorama`` package to be installed to work). Valid values are: - ``no_color`` No color added, just bold font weighting. - ``dark_bg`` Suitable for terminals with a dark background. - ``light_bg`` Suitable for terminals with a light background. - ``blue_bg`` Suitable for terminals with a blue background. - ``jupyter_nb`` Suitable for use in Jupyter Notebooks. *Default value:* ``no_color``. The type of the value is ``string``. .. envvar:: NUMBA_HIGHLIGHT_DUMPS If set to non-zero and ``pygments`` is installed, syntax highlighting is applied to Numba IR, LLVM IR and assembly dumps. Default is zero. .. envvar:: NUMBA_DISABLE_PERFORMANCE_WARNINGS If set to non-zero the issuing of performance warnings is disabled. Default is zero. .. envvar:: NUMBA_DEBUG If set to non-zero, print out all possible debugging information during function compilation. Finer-grained control can be obtained using other variables below. .. envvar:: NUMBA_DEBUG_FRONTEND If set to non-zero, print out debugging information during operation of the compiler frontend, up to and including generation of the Numba Intermediate Representation. .. envvar:: NUMBA_DEBUGINFO If set to non-zero, enable debug for the full application by setting the default value of the ``debug`` option in ``jit``. Beware that enabling debug info significantly increases the memory consumption for each compiled function. Default value equals to the value of `NUMBA_ENABLE_PROFILING`. .. envvar:: NUMBA_EXTEND_VARIABLE_LIFETIMES If set to non-zero, extend the lifetime of variables to the end of the block in which their lifetime ends. This is particularly useful in conjunction with :envvar:`NUMBA_DEBUGINFO` as it helps with introspection of values. Default is zero. .. envvar:: NUMBA_GDB_BINARY Set the ``gdb`` binary for use in Numba's ``gdb`` support, this takes the form of a path and full name of the binary, for example: ``/path/from/root/to/binary/name_of_gdb_binary`` This is to permit the use of a ``gdb`` from a non-default location with a non-default name. If not set ``gdb`` is assumed to reside at ``/usr/bin/gdb``. .. envvar:: NUMBA_DEBUG_TYPEINFER If set to non-zero, print out debugging information about type inference. .. envvar:: NUMBA_ENABLE_PROFILING Enables JIT events of LLVM in order to support profiling of jitted functions. This option is automatically enabled under certain profilers. .. envvar:: NUMBA_TRACE If set to non-zero, trace certain function calls (function entry and exit events, including arguments and return values). .. envvar:: NUMBA_DUMP_BYTECODE If set to non-zero, print out the Python :py:term:`bytecode` of compiled functions. .. envvar:: NUMBA_DUMP_CFG If set to non-zero, print out information about the Control Flow Graph of compiled functions. .. envvar:: NUMBA_DUMP_IR If set to non-zero, print out the Numba Intermediate Representation of compiled functions. .. envvar:: NUMBA_DUMP_SSA If set to non-zero, print out the Numba Intermediate Representation of compiled functions after conversion to Static Single Assignment (SSA) form. .. envvar:: NUMBA_DEBUG_PRINT_AFTER Dump the Numba IR after declared pass(es). This is useful for debugging IR changes made by given passes. Accepted values are: * Any pass name (as given by the ``.name()`` method on the class) * Multiple pass names as a comma separated list, i.e. ``"foo_pass,bar_pass"`` * The token ``"all"``, which will print after all passes. The default value is ``"none"`` so as to prevent output. .. envvar:: NUMBA_DUMP_ANNOTATION If set to non-zero, print out types annotations for compiled functions. .. envvar:: NUMBA_DUMP_LLVM Dump the unoptimized LLVM assembly source of compiled functions. Unoptimized code is usually very verbose; therefore, :envvar:`NUMBA_DUMP_OPTIMIZED` is recommended instead. .. envvar:: NUMBA_DUMP_FUNC_OPT Dump the LLVM assembly source after the LLVM "function optimization" pass, but before the "module optimization" pass. This is useful mostly when developing Numba itself, otherwise use :envvar:`NUMBA_DUMP_OPTIMIZED`. .. envvar:: NUMBA_DUMP_OPTIMIZED Dump the LLVM assembly source of compiled functions after all optimization passes. The output includes the raw function as well as its CPython-compatible wrapper (whose name begins with ``wrapper.``). Note that the function is often inlined inside the wrapper, as well. .. envvar:: NUMBA_DEBUG_ARRAY_OPT Dump debugging information related to the processing associated with the ``parallel=True`` jit decorator option. .. envvar:: NUMBA_DEBUG_ARRAY_OPT_RUNTIME Dump debugging information related to the runtime scheduler associated with the ``parallel=True`` jit decorator option. .. envvar:: NUMBA_DEBUG_ARRAY_OPT_STATS Dump statistics about how many operators/calls are converted to parallel for-loops and how many are fused together, which are associated with the ``parallel=True`` jit decorator option. .. envvar:: NUMBA_PARALLEL_DIAGNOSTICS If set to an integer value between 1 and 4 (inclusive) diagnostic information about parallel transforms undertaken by Numba will be written to STDOUT. The higher the value set the more detailed the information produced. .. envvar:: NUMBA_DUMP_ASSEMBLY Dump the native assembly code of compiled functions. .. envvar:: NUMBA_LLVM_PASS_TIMINGS Set to ``1`` to enable recording of pass timings in LLVM; e.g. ``NUMBA_LLVM_PASS_TIMINGS=1``. See :ref:`developer-llvm-timings`. *Default value*: ``0`` (Off) .. seealso:: :ref:`numba-troubleshooting` and :ref:`architecture`. Compilation options ------------------- .. envvar:: NUMBA_OPT The optimization level; this option is passed straight to LLVM. *Default value:* 3 .. envvar:: NUMBA_LOOP_VECTORIZE If set to non-zero, enable LLVM loop vectorization. *Default value:* 1 (except on 32-bit Windows) .. envvar:: NUMBA_SLP_VECTORIZE If set to non-zero, enable LLVM superword-level parallelism vectorization. *Default value:* 1 .. envvar:: NUMBA_ENABLE_AVX If set to non-zero, enable AVX optimizations in LLVM. This is disabled by default on Sandy Bridge and Ivy Bridge architectures as it can sometimes result in slower code on those platforms. .. envvar:: NUMBA_DISABLE_INTEL_SVML If set to non-zero and Intel SVML is available, the use of SVML will be disabled. .. envvar:: NUMBA_DISABLE_JIT Disable JIT compilation entirely. The :func:`~numba.jit` decorator acts as if it performs no operation, and the invocation of decorated functions calls the original Python function instead of a compiled version. This can be useful if you want to run the Python debugger over your code. .. envvar:: NUMBA_CPU_NAME .. envvar:: NUMBA_CPU_FEATURES Override CPU and CPU features detection. By setting ``NUMBA_CPU_NAME=generic``, a generic CPU model is picked for the CPU architecture and the feature list (``NUMBA_CPU_FEATURES``) defaults to empty. CPU features must be listed with the format ``+feature1,-feature2`` where ``+`` indicates enable and ``-`` indicates disable. For example, ``+sse,+sse2,-avx,-avx2`` enables SSE and SSE2, and disables AVX and AVX2. These settings are passed to LLVM for configuring the compilation target. To get a list of available options, use the ``llc`` commandline tool from LLVM, for example:: llc -march=x86 -mattr=help .. tip:: To force all caching functions (``@jit(cache=True)``) to emit portable code (portable within the same architecture and OS), simply set ``NUMBA_CPU_NAME=generic``. .. envvar:: NUMBA_FUNCTION_CACHE_SIZE Override the size of the function cache for retaining recently deserialized functions in memory. In systems like `Dask `_, it is common for functions to be deserialized multiple times. Numba will cache functions as long as there is a reference somewhere in the interpreter. This cache size variable controls how many functions that are no longer referenced will also be retained, just in case they show up in the future. The implementation of this is not a true LRU, but the large size of the cache should be sufficient for most situations. Note: this is unrelated to the compilation cache. *Default value:* 128 .. envvar:: NUMBA_LLVM_REFPRUNE_PASS Turns on the LLVM pass level reference-count pruning pass and disables the regex based implementation in Numba. *Default value:* 1 (On) .. envvar:: NUMBA_LLVM_REFPRUNE_FLAGS When ``NUMBA_LLVM_REFPRUNE_PASS`` is on, this allows configuration of subpasses in the reference-count pruning LLVM pass. Valid values are any combinations of the below separated by `,` (case-insensitive): - ``all``: enable all subpasses. - ``per_bb``: enable per-basic-block level pruning, which is same as the old regex based implementation. - ``diamond``: enable inter-basic-block pruning that is a diamond shape pattern, i.e. a single-entry single-exit CFG subgraph where has an incref in the entry and a corresponding decref in the exit. - ``fanout``: enable inter-basic-block pruning that has a fanout pattern, i.e. a single-entry multiple-exit CFG subgraph where the entry has an incref and every exit has a corresponding decref. - ``fanout_raise``: same as ``fanout`` but allow subgraph exit nodes to be raising an exception and not have a corresponding decref. For example, ``all`` is the same as ``per_bb, diamond, fanout, fanout_raise`` *Default value:* "all" .. _numba-envvars-caching: Caching options --------------- Options for the compilation cache. .. envvar:: NUMBA_DEBUG_CACHE If set to non-zero, print out information about operation of the :ref:`JIT compilation cache `. .. envvar:: NUMBA_CACHE_DIR Override the location of the cache directory. If defined, this should be a valid directory path. If not defined, Numba picks the cache directory in the following order: 1. In-tree cache. Put the cache next to the corresponding source file under a ``__pycache__`` directory following how ``.pyc`` files are stored. 2. User-wide cache. Put the cache in the user's application directory using ``appdirs.user_cache_dir`` from the `Appdirs package `_. 3. IPython cache. Put the cache in an IPython specific application directory. Stores are made under the ``numba_cache`` in the directory returned by ``IPython.paths.get_ipython_cache_dir()``. Also see :ref:`docs on cache sharing ` and :ref:`docs on cache clearing ` .. _numba-envvars-gpu-support: GPU support ----------- .. envvar:: NUMBA_DISABLE_CUDA If set to non-zero, disable CUDA support. .. envvar:: NUMBA_FORCE_CUDA_CC If set, force the CUDA compute capability to the given version (a string of the type ``major.minor``), regardless of attached devices. .. envvar:: NUMBA_CUDA_DEFAULT_PTX_CC The default compute capability (a string of the type ``major.minor``) to target when compiling to PTX using ``cuda.compile_ptx``. The default is 5.2, which is the lowest non-deprecated compute capability in the most recent version of the CUDA toolkit supported (10.2 at present). .. envvar:: NUMBA_ENABLE_CUDASIM If set, don't compile and execute code for the GPU, but use the CUDA Simulator instead. For debugging purposes. .. envvar:: NUMBA_CUDA_ARRAY_INTERFACE_SYNC Whether to synchronize on streams provided by objects imported using the CUDA Array Interface. This defaults to 1. If set to 0, then no synchronization takes place, and the user of Numba (and other CUDA libraries) is responsible for ensuring correctness with respect to synchronization on streams. .. envvar:: NUMBA_CUDA_LOG_LEVEL For debugging purposes. If no other logging is configured, the value of this variable is the logging level for CUDA API calls. The default value is ``CRITICAL`` - to trace all API calls on standard error, set this to ``DEBUG``. .. envvar:: NUMBA_CUDA_LOG_API_ARGS By default the CUDA API call logs only give the names of functions called. Setting this variable to 1 also includes the values of arguments to Driver API calls in the logs. .. envvar:: NUMBA_CUDA_DRIVER Path of the directory in which the CUDA driver libraries are to be found. Normally this should not need to be set as Numba can locate the driver in standard locations. However, this variable can be used if the driver is in a non-standard location. .. envvar:: NUMBA_CUDA_LOG_SIZE Buffer size for logs produced by CUDA driver API operations. This defaults to 1024 and should not normally need to be modified - however, if an error in an API call produces a large amount of output that appears to be truncated (perhaps due to multiple long function names, for example) then this variable can be used to increase the buffer size and view the full error message. .. envvar:: NUMBA_CUDA_VERBOSE_JIT_LOG Whether the CUDA driver should produce verbose log messages. Defaults to 1, indicating that verbose messaging is enabled. This should not need to be modified under normal circumstances. .. envvar:: NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM When set to 1, the default stream is the per-thread default stream. When set to 0, the default stream is the legacy default stream. This defaults to 0, for the legacy default stream. See `Stream Synchronization Behavior `_ for an explanation of the legacy and per-thread default streams. This variable only takes effect when using Numba's internal CUDA bindings; when using the NVIDIA bindings, use the environment variable ``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` instead. .. seealso:: The `Default Stream section `_ in the NVIDIA Bindings documentation. .. envvar:: NUMBA_CUDA_LOW_OCCUPANCY_WARNINGS Enable warnings if the grid size is too small relative to the number of streaming multiprocessors (SM). This option is on by default (default value is 1). The heuristic checked is whether ``gridsize < 2 * (number of SMs)``. NOTE: The absence of a warning does not imply a good gridsize relative to the number of SMs. Disabling this warning will reduce the number of CUDA API calls (during JIT compilation), as the heuristic needs to check the number of SMs available on the device in the current context. .. envvar:: NUMBA_CUDA_WARN_ON_IMPLICIT_COPY Enable warnings if a kernel is launched with host memory which forces a copy to and from the device. This option is on by default (default value is 1). .. envvar:: NUMBA_CUDA_USE_NVIDIA_BINDING When set to 1, Numba will attempt to use the `NVIDIA CUDA Python binding `_ to make calls to the driver API instead of using its own ctypes binding. This defaults to 0 (off), as the NVIDIA binding is currently missing support for Per-Thread Default Streams and the profiler APIs. Threading Control ----------------- .. envvar:: NUMBA_NUM_THREADS If set, the number of threads in the thread pool for the parallel CPU target will take this value. Must be greater than zero. This value is independent of ``OMP_NUM_THREADS`` and ``MKL_NUM_THREADS``. *Default value:* The number of CPU cores on the system as determined at run time. This can be accessed via :obj:`numba.config.NUMBA_DEFAULT_NUM_THREADS`. See also the section on :ref:`setting_the_number_of_threads` for information on how to set the number of threads at runtime. .. envvar:: NUMBA_THREADING_LAYER This environment variable controls the library used for concurrent execution for the CPU parallel targets (``@vectorize(target='parallel')``, ``@guvectorize(target='parallel')`` and ``@njit(parallel=True)``). The variable type is string and by default is ``default`` which will select a threading layer based on what is available in the runtime. The valid values are (for more information about these see :ref:`the threading layer documentation `): * ``default`` - select a threading layer based on what is available in the current runtime. * ``safe`` - select a threading layer that is both fork and thread safe (requires the TBB package). * ``forksafe`` - select a threading layer that is fork safe. * ``threadsafe`` - select a threading layer that is thread safe. * ``tbb`` - A threading layer backed by Intel TBB. * ``omp`` - A threading layer backed by OpenMP. * ``workqueue`` - A simple built-in work-sharing task scheduler. .. envvar:: NUMBA_THREADING_LAYER_PRIORITY This environment variable controls the order in which the libraries used for concurrent execution, for the CPU parallel targets (``@vectorize(target='parallel')``, ``@guvectorize(target='parallel')`` and ``@njit(parallel=True)``), are prioritized for use. The variable type is string and by default is ``tbb omp workqueue``, with the priority taken based on position from the left of the string, left most being the highest. Valid values are any permutation of the three choices (for more information about these see :ref:`the threading layer documentation `.) numba-0.55.1/docs/source/reference/fpsemantics.rst000664 000000 000000 00000006642 14174536160 022137 0ustar00rootroot000000 000000 Floating-point pitfalls ======================= Precision and accuracy ---------------------- For some operations, Numba may use a different algorithm than Python or Numpy. The results may not be bit-by-bit compatible. The difference should generally be small and within reasonable expectations. However, small accumulated differences might produce large differences at the end, especially if a divergent function is involved. Math library implementations '''''''''''''''''''''''''''' Numba supports a variety of platforms and operating systems, each of which has its own math library implementation (referred to as ``libm`` from here in). The majority of math functions included in ``libm`` have specific requirements as set out by the IEEE 754 standard (like ``sin()``, ``exp()`` etc.), but each implementation may have bugs. Thus, on some platforms Numba has to exercise special care in order to workaround known ``libm`` issues. Another typical problem is when an operating system's ``libm`` function set is incomplete and needs to be supplemented by additional functions. These are provided with reference to the IEEE 754 and C99 standards and are often implemented in Numba in a manner similar to equivalent CPython functions. Linear algebra '''''''''''''' Numpy forces some linear algebra operations to run in double-precision mode even when a ``float32`` input is given. Numba will always observe the input's precision, and invoke single-precision linear algebra routines when all inputs are ``float32`` or ``complex64``. The implementations of the ``numpy.linalg`` routines in Numba only support the floating point types that are used in the LAPACK functions that provide the underlying core functionality. As a result only ``float32``, ``float64``, ``complex64`` and ``complex128`` types are supported. If a user has e.g. an ``int32`` type, an appropriate type conversion must be performed to a floating point type prior to its use in these routines. The reason for this decision is to essentially avoid having to replicate type conversion choices made in Numpy and to also encourage the user to choose the optimal floating point type for the operation they are undertaking. Mixed-types operations '''''''''''''''''''''' Numpy will most often return a ``float64`` as a result of a computation with mixed integer and floating-point operands (a typical example is the power operator ``**``). Numba by contrast will select the highest precision amongst the floating-point operands, so for example ``float32 ** int32`` will return a ``float32``, regardless of the input values. This makes performance characteristics easier to predict, but you should explicitly cast the input to ``float64`` if you need the extra precision. .. _ufunc-fpu-errors: Warnings and errors ------------------- When calling a :term:`ufunc` created with :func:`~numba.vectorize`, Numpy will determine whether an error occurred by examining the FPU error word. It may then print out a warning or raise an exception (such as ``RuntimeWarning: divide by zero encountered``), depending on the current error handling settings. Depending on how LLVM optimized the ufunc's code, however, some spurious warnings or errors may appear. If you get caught by this issue, we recommend you call :func:`numpy.seterr` to change Numpy's error handling settings, or the :class:`numpy.errstate` context manager to switch them temporarily:: with np.errstate(all='ignore'): x = my_ufunc(y) numba-0.55.1/docs/source/reference/index.rst000664 000000 000000 00000000353 14174536160 020723 0ustar00rootroot000000 000000 Reference Manual ================ .. toctree:: types.rst jit-compilation.rst aot-compilation.rst utils.rst envvars.rst pysupported.rst numpysupported.rst pysemantics.rst fpsemantics.rst deprecation.rst numba-0.55.1/docs/source/reference/jit-compilation.rst000664 000000 000000 00000060206 14174536160 022721 0ustar00rootroot000000 000000 Just-in-Time compilation ======================== JIT functions ------------- .. _jit-decorator: .. decorator:: numba.jit(signature=None, nopython=False, nogil=False, cache=False, forceobj=False, parallel=False, error_model='python', fastmath=False, locals={}, boundscheck=False) Compile the decorated function on-the-fly to produce efficient machine code. All parameters are optional. If present, the *signature* is either a single signature or a list of signatures representing the expected :ref:`numba-types` of function arguments and return values. Each signature can be given in several forms: * A tuple of :ref:`numba-types` arguments (for example ``(numba.int32, numba.double)``) representing the types of the function's arguments; Numba will then infer an appropriate return type from the arguments. * A call signature using :ref:`numba-types`, specifying both return type and argument types. This can be given in intuitive form (for example ``numba.void(numba.int32, numba.double)``). * A string representation of one of the above, for example ``"void(int32, double)"``. All type names used in the string are assumed to be defined in the ``numba.types`` module. *nopython* and *nogil* are boolean flags. *locals* is a mapping of local variable names to :ref:`numba-types`. This decorator has several modes of operation: * If one or more signatures are given in *signature*, a specialization is compiled for each of them. Calling the decorated function will then try to choose the best matching signature, and raise a :class:`TypeError` if no appropriate conversion is available for the function arguments. If converting succeeds, the compiled machine code is executed with the converted arguments and the return value is converted back according to the signature. * If no *signature* is given, the decorated function implements lazy compilation. Each call to the decorated function will try to re-use an existing specialization if it exists (for example, a call with two integer arguments may re-use a specialization for argument types ``(numba.int64, numba.int64)``). If no suitable specialization exists, a new specialization is compiled on-the-fly, stored for later use, and executed with the converted arguments. If true, *nopython* forces the function to be compiled in :term:`nopython mode`. If not possible, compilation will raise an error. If true, *forceobj* forces the function to be compiled in :term:`object mode`. Since object mode is slower than nopython mode, this is mostly useful for testing purposes. If true, *nogil* tries to release the :py:term:`global interpreter lock` inside the compiled function. The GIL will only be released if Numba can compile the function in :term:`nopython mode`, otherwise a compilation warning will be printed. .. _jit-decorator-cache: If true, *cache* enables a file-based cache to shorten compilation times when the function was already compiled in a previous invocation. The cache is maintained in the ``__pycache__`` subdirectory of the directory containing the source file; if the current user is not allowed to write to it, though, it falls back to a platform-specific user-wide cache directory (such as ``$HOME/.cache/numba`` on Unix platforms). .. _jit-decorator-parallel: If true, *parallel* enables the automatic parallelization of a number of common Numpy constructs as well as the fusion of adjacent parallel operations to maximize cache locality. The *error_model* option controls the divide-by-zero behavior. Setting it to 'python' causes divide-by-zero to raise exception like CPython. Setting it to 'numpy' causes divide-by-zero to set the result to *+/-inf* or *nan*. Not all functions can be cached, since some functionality cannot be always persisted to disk. When a function cannot be cached, a warning is emitted. .. _jit-decorator-fastmath: If true, *fastmath* enables the use of otherwise unsafe floating point transforms as described in the `LLVM documentation `_. Further, if :ref:`Intel SVML ` is installed faster but less accurate versions of some math intrinsics are used (answers to within ``4 ULP``). .. _jit-decorator-boundscheck: If True, ``boundscheck`` enables bounds checking for array indices. Out of bounds accesses will raise IndexError. The default is to not do bounds checking. If bounds checking is disabled, out of bounds accesses can produce garbage results or segfaults. However, enabling bounds checking will slow down typical functions, so it is recommended to only use this flag for debugging. You can also set the `NUMBA_BOUNDSCHECK` environment variable to 0 or 1 to globally override this flag. The *locals* dictionary may be used to force the :ref:`numba-types` of particular local variables, for example if you want to force the use of single precision floats at some point. In general, we recommend you let Numba's compiler infer the types of local variables by itself. Here is an example with two signatures:: @jit(["int32(int32)", "float32(float32)"], nopython=True) def f(x): ... Not putting any parentheses after the decorator is equivalent to calling the decorator without any arguments, i.e.:: @jit def f(x): ... is equivalent to:: @jit() def f(x): ... The decorator returns a :class:`Dispatcher` object. .. note:: If no *signature* is given, compilation errors will be raised when the actual compilation occurs, i.e. when the function is first called with some given argument types. .. note:: Compilation can be influenced by some dedicated :ref:`numba-envvars`. Generated JIT functions ----------------------- .. decorator:: numba.generated_jit(nopython=False, nogil=False, cache=False, forceobj=False, locals={}) Like the :func:`~numba.jit` decorator, but calls the decorated function at compile-time, passing the *types* of the function's arguments. The decorated function must return a callable which will be compiled as the function's implementation for those types, allowing flexible kinds of specialization. The :func:`~numba.generated_jit` decorator returns a :class:`Dispatcher` object. Dispatcher objects ------------------ .. class:: Dispatcher The class of objects created by calling :func:`~numba.jit` or :func:`~numba.generated_jit`. You shouldn't try to create such an object in any other way. Calling a Dispatcher object calls the compiled specialization for the arguments with which it is called, letting it act as an accelerated replacement for the Python function which was compiled. In addition, Dispatcher objects have the following methods and attributes: .. attribute:: py_func The pure Python function which was compiled. .. method:: inspect_types(file=None, pretty=False) Print out a listing of the function source code annotated line-by-line with the corresponding Numba IR, and the inferred types of the various variables. If *file* is specified, printing is done to that file object, otherwise to sys.stdout. If *pretty* is set to True then colored ANSI will be produced in a terminal and HTML in a notebook. .. seealso:: :ref:`architecture` .. method:: inspect_llvm(signature=None) Return a dictionary keying compiled function signatures to the human readable LLVM IR generated for the function. If the signature keyword is specified a string corresponding to that individual signature is returned. .. method:: inspect_asm(signature=None) Return a dictionary keying compiled function signatures to the human-readable native assembly code for the function. If the signature keyword is specified a string corresponding to that individual signature is returned. .. method:: inspect_cfg(signature=None, show_wrapped) Return a dictionary keying compiled function signatures to the control-flow graph objects for the function. If the signature keyword is specified a string corresponding to that individual signature is returned. The control-flow graph objects can be stringified (``str`` or ``repr``) to get the textual representation of the graph in DOT format. Or, use its ``.display(filename=None, view=False)`` method to plot the graph. The *filename* option can be set to a specific path for the rendered output to write to. If *view* option is True, the plot is opened by the system default application for the image format (PDF). In IPython notebook, the returned object can be plot inlined. Usage:: @jit def foo(): ... # opens the CFG in system default application foo.inspect_cfg(foo.signatures[0]).display(view=True) .. method:: inspect_disasm_cfg(signature=None) Return a dictionary keying compiled function signatures to the control-flow graph of the disassembly of the underlying compiled ``ELF`` object. If the signature keyword is specified a control-flow graph corresponding to that individual signature is returned. This function is execution environment aware and will produce SVG output in Jupyter notebooks and ASCII in terminals. Example:: @njit def foo(x): if x < 3: return x + 1 return x + 2 foo(10) print(foo.inspect_disasm_cfg(signature=foo.signatures[0])) Gives:: [0x08000040]> # method.__main__.foo_241_long_long (int64_t arg1, int64_t arg3); ─────────────────────────────────────────────────────────────────────┐ │ 0x8000040 │ │ ; arg3 ; [02] -r-x section size 279 named .text │ │ ;-- section..text: │ │ ;-- .text: │ │ ;-- __main__::foo$241(long long): │ │ ;-- rip: │ │ 25: method.__main__.foo_241_long_long (int64_t arg1, int64_t arg3); │ │ ; arg int64_t arg1 @ rdi │ │ ; arg int64_t arg3 @ rdx │ │ ; 2 │ │ cmp rdx, 2 │ │ jg 0x800004f │ └─────────────────────────────────────────────────────────────────────┘ f t │ │ │ └──────────────────────────────┐ └──┐ │ │ │ ┌─────────────────────────┐ ┌─────────────────────────┐ │ 0x8000046 │ │ 0x800004f │ │ ; arg3 │ │ ; arg3 │ │ inc rdx │ │ add rdx, 2 │ │ ; arg3 │ │ ; arg3 │ │ mov qword [rdi], rdx │ │ mov qword [rdi], rdx │ │ xor eax, eax │ │ xor eax, eax │ │ ret │ │ ret │ └─────────────────────────┘ └─────────────────────────┘ .. method:: recompile() Recompile all existing signatures. This can be useful for example if a global or closure variable was frozen by your function and its value in Python has changed. Since compiling isn't cheap, this is mainly for testing and interactive use. .. method:: parallel_diagnostics(signature=None, level=1) Print parallel diagnostic information for the given signature. If no signature is present it is printed for all known signatures. ``level`` is used to adjust the verbosity, ``level=1`` (default) is minimum verbosity, levels 2, 3, and 4 provide increasing levels of verbosity. .. method:: get_metadata(signature=None) Obtain the compilation metadata for a given signature. This is useful for developers of Numba and Numba extensions. Vectorized functions (ufuncs and DUFuncs) ----------------------------------------- .. decorator:: numba.vectorize(*, signatures=[], identity=None, nopython=True, target='cpu', forceobj=False, cache=False, locals={}) Compile the decorated function and wrap it either as a `Numpy ufunc`_ or a Numba :class:`~numba.DUFunc`. The optional *nopython*, *forceobj* and *locals* arguments have the same meaning as in :func:`numba.jit`. *signatures* is an optional list of signatures expressed in the same form as in the :func:`numba.jit` *signature* argument. If *signatures* is non-empty, then the decorator will compile the user Python function into a Numpy ufunc. If no *signatures* are given, then the decorator will wrap the user Python function in a :class:`~numba.DUFunc` instance, which will compile the user function at call time whenever Numpy can not find a matching loop for the input arguments. *signatures* is required if *target* is ``"parallel"``. *identity* is the identity (or unit) value of the function being implemented. Possible values are 0, 1, None, and the string ``"reorderable"``. The default is None. Both None and ``"reorderable"`` mean the function has no identity value; ``"reorderable"`` additionally specifies that reductions along multiple axes can be reordered. If there are several *signatures*, they must be ordered from the more specific to the least specific. Otherwise, Numpy's type-based dispatching may not work as expected. For example, the following is wrong:: @vectorize(["float64(float64)", "float32(float32)"]) def f(x): ... as running it over a single-precision array will choose the ``float64`` version of the compiled function, leading to much less efficient execution. The correct invocation is:: @vectorize(["float32(float32)", "float64(float64)"]) def f(x): ... *target* is a string for backend target; Available values are "cpu", "parallel", and "cuda". To use a multithreaded version, change the target to "parallel" (which requires signatures to be specified):: @vectorize(["float64(float64)", "float32(float32)"], target='parallel') def f(x): ... For the CUDA target, use "cuda":: @vectorize(["float64(float64)", "float32(float32)"], target='cuda') def f(x): ... The compiled function can be cached to reduce future compilation time. It is enabled by setting *cache* to True. Only the "cpu" and "parallel" targets support caching. .. decorator:: numba.guvectorize(signatures, layout, *, identity=None, nopython=True, target='cpu', forceobj=False, cache=False, locals={}) Generalized version of :func:`numba.vectorize`. While :func:`numba.vectorize` will produce a simple ufunc whose core functionality (the function you are decorating) operates on scalar operands and returns a scalar value, :func:`numba.guvectorize` allows you to create a `Numpy ufunc`_ whose core function takes array arguments of various dimensions. The additional argument *layout* is a string specifying, in symbolic form, the dimensionality and size relationship of the argument types and return types. For example, a matrix multiplication will have a layout string of ``"(m,n),(n,p)->(m,p)"``. Its definition might be (function body omitted):: @guvectorize(["void(float64[:,:], float64[:,:], float64[:,:])"], "(m,n),(n,p)->(m,p)") def f(a, b, result): """Fill-in *result* matrix such as result := a * b""" ... If one of the arguments should be a scalar, the corresponding layout specification is ``()`` and the argument will really be given to you as a zero-dimension array (you have to dereference it to get the scalar value). For example, a :ref:`one-dimension moving average ` with a parameterable window width may have a layout string of ``"(n),()->(n)"``. Note that any output will be given to you preallocated as an additional function argument: your code has to fill it with the appropriate values for the function you are implementing. If your function doesn't take an output array, you should omit the "arrow" in the layout string (e.g. ``"(n),(n)"``). When doing this, it is important to be aware that changes to the input arrays cannot always be relied on to be visible outside the execution of the ufunc, as NumPy may pass in temporary arrays as inputs (for example, if a cast is required). .. seealso:: Specification of the `layout string `_ as supported by Numpy. Note that Numpy uses the term "signature", which we unfortunately use for something else. The compiled function can be cached to reduce future compilation time. It is enabled by setting *cache* to True. Only the "cpu" and "parallel" targets support caching. .. _Numpy ufunc: http://docs.scipy.org/doc/numpy/reference/ufuncs.html .. class:: numba.DUFunc The class of objects created by calling :func:`numba.vectorize` with no signatures. DUFunc instances should behave similarly to Numpy :class:`~numpy.ufunc` objects with one important difference: call-time loop generation. When calling a ufunc, Numpy looks at the existing loops registered for that ufunc, and will raise a :class:`~python.TypeError` if it cannot find a loop that it cannot safely cast the inputs to suit. When calling a DUFunc, Numba delegates the call to Numpy. If the Numpy ufunc call fails, then Numba attempts to build a new loop for the given input types, and calls the ufunc again. If this second call attempt fails or a compilation error occurs, then DUFunc passes along the exception to the caller. .. seealso:: The ":ref:`dynamic-universal-functions`" section in the user's guide demonstrates the call-time behavior of :class:`~numba.DUFunc`, and discusses the impact of call order on how Numba generates the underlying :class:`~numpy.ufunc`. .. attribute:: ufunc The actual Numpy :class:`~numpy.ufunc` object being built by the :class:`~numba.DUFunc` instance. Note that the :class:`~numba.DUFunc` object maintains several important data structures required for proper ufunc functionality (specifically the dynamically compiled loops). Users should not pass the :class:`~numpy.ufunc` value around without ensuring the underlying :class:`~numba.DUFunc` will not be garbage collected. .. attribute:: nin The number of DUFunc (ufunc) inputs. See `ufunc.nin`_. .. attribute:: nout The number of DUFunc outputs. See `ufunc.nout`_. .. attribute:: nargs The total number of possible DUFunc arguments (should be :attr:`~numba.DUFunc.nin` + :attr:`~numba.DUFunc.nout`). See `ufunc.nargs`_. .. attribute:: ntypes The number of input types supported by the DUFunc. See `ufunc.ntypes`_. .. attribute:: types A list of the supported types given as strings. See `ufunc.types`_. .. attribute:: identity The identity value when using the ufunc as a reduction. See `ufunc.identity`_. .. method:: reduce(A, *, axis, dtype, out, keepdims) Reduces *A*\'s dimension by one by applying the DUFunc along one axis. See `ufunc.reduce`_. .. method:: accumulate(A, *, axis, dtype, out) Accumulate the result of applying the operator to all elements. See `ufunc.accumulate`_. .. method:: reduceat(A, indices, *, axis, dtype, out) Performs a (local) reduce with specified slices over a single axis. See `ufunc.reduceat`_. .. method:: outer(A, B) Apply the ufunc to all pairs (*a*, *b*) with *a* in *A*, and *b* in *B*. See `ufunc.outer`_. .. method:: at(A, indices, *, B) Performs unbuffered in place operation on operand *A* for elements specified by *indices*. If you are using Numpy 1.7 or earlier, this method will not be present. See `ufunc.at`_. .. note:: Vectorized functions can, in rare circumstances, show :ref:`unexpected warnings or errors `. .. _`ufunc.nin`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.nin.html#numpy.ufunc.nin .. _`ufunc.nout`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.nout.html#numpy.ufunc.nout .. _`ufunc.nargs`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.nargs.html#numpy.ufunc.nargs .. _`ufunc.ntypes`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.ntypes.html#numpy.ufunc.ntypes .. _`ufunc.types`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.types.html#numpy.ufunc.types .. _`ufunc.identity`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.identity.html#numpy.ufunc.identity .. _`ufunc.reduce`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.reduce.html#numpy.ufunc.reduce .. _`ufunc.accumulate`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.accumulate.html#numpy.ufunc.accumulate .. _`ufunc.reduceat`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.reduceat.html#numpy.ufunc.reduceat .. _`ufunc.outer`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.outer.html#numpy.ufunc.outer .. _`ufunc.at`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.at.html#numpy.ufunc.at C callbacks ----------- .. decorator:: numba.cfunc(signature, nopython=False, cache=False, locals={}) Compile the decorated function on-the-fly to produce efficient machine code. The compiled code is wrapped in a thin C callback that makes it callable using the natural C ABI. The *signature* is a single signature representing the signature of the C callback. It must have the same form as in :func:`~numba.jit`. The decorator does not check that the types in the signature have a well-defined representation in C. *nopython* and *cache* are boolean flags. *locals* is a mapping of local variable names to :ref:`numba-types`. They all have the same meaning as in :func:`~numba.jit`. The decorator returns a :class:`CFunc` object. .. note:: C callbacks currently do not support :term:`object mode`. .. class:: CFunc The class of objects created by :func:`~numba.cfunc`. :class:`CFunc` objects expose the following attributes and methods: .. attribute:: address The address of the compiled C callback, as an integer. .. attribute:: cffi A `cffi`_ function pointer instance, to be passed as an argument to `cffi`_-wrapped functions. The pointer's type is ``void *``, so only minimal type checking will happen when passing it to `cffi`_. .. attribute:: ctypes A :mod:`ctypes` callback instance, as if it were created using :func:`ctypes.CFUNCTYPE`. .. attribute:: native_name The name of the compiled C callback. .. method:: inspect_llvm() Return the human-readable LLVM IR generated for the C callback. :attr:`native_name` is the name under which this callback is defined in the IR. .. _cffi: https://cffi.readthedocs.org/ numba-0.55.1/docs/source/reference/numpysupported.rst000664 000000 000000 00000072763 14174536160 022750 0ustar00rootroot000000 000000 .. _numpy-support: ======================== Supported NumPy features ======================== One objective of Numba is having a seamless integration with `NumPy`_. NumPy arrays provide an efficient storage method for homogeneous sets of data. NumPy dtypes provide type information useful when compiling, and the regular, structured storage of potentially large amounts of data in memory provides an ideal memory layout for code generation. Numba excels at generating code that executes on top of NumPy arrays. NumPy support in Numba comes in many forms: * Numba understands calls to NumPy `ufuncs`_ and is able to generate equivalent native code for many of them. * NumPy arrays are directly supported in Numba. Access to Numpy arrays is very efficient, as indexing is lowered to direct memory accesses when possible. * Numba is able to generate `ufuncs`_ and `gufuncs`_. This means that it is possible to implement ufuncs and gufuncs within Python, getting speeds comparable to that of ufuncs/gufuncs implemented in C extension modules using the NumPy C API. .. _NumPy: http://www.numpy.org/ .. _ufuncs: http://docs.scipy.org/doc/numpy/reference/ufuncs.html .. _gufuncs: http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html The following sections focus on the Numpy features supported in :term:`nopython mode`, unless otherwise stated. Scalar types ============ Numba supports the following Numpy scalar types: * **Integers**: all integers of either signedness, and any width up to 64 bits * **Booleans** * **Real numbers:** single-precision (32-bit) and double-precision (64-bit) reals * **Complex numbers:** single-precision (2x32-bit) and double-precision (2x64-bit) complex numbers * **Datetimes and timestamps:** of any unit * **Character sequences** (but no operations are available on them) * **Structured scalars:** structured scalars made of any of the types above and arrays of the types above The following scalar types and features are not supported: * **Arbitrary Python objects** * **Half-precision and extended-precision** real and complex numbers * **Nested structured scalars** the fields of structured scalars may not contain other structured scalars The operations supported on NumPy scalars are almost the same as on the equivalent built-in types such as ``int`` or ``float``. You can use a type's constructor to convert from a different type or width. In addition you can use the ``view(np.)`` method to bitcast all ``int`` and ``float`` types within the same width. However, you must define the scalar using a NumPy constructor within a jitted function. For example, the following will work: .. code:: pycon >>> import numpy as np >>> from numba import njit >>> @njit ... def bitcast(): ... i = np.int64(-1) ... print(i.view(np.uint64)) ... >>> bitcast() 18446744073709551615 Whereas the following will not work: .. code:: pycon >>> import numpy as np >>> from numba import njit >>> @njit ... def bitcast(i): ... print(i.view(np.uint64)) ... >>> bitcast(np.int64(-1)) --------------------------------------------------------------------------- TypingError Traceback (most recent call last) ... TypingError: Failed in nopython mode pipeline (step: ensure IR is legal prior to lowering) 'view' can only be called on NumPy dtypes, try wrapping the variable with 'np.()' File "", line 3: def bitcast(i): print(i.view(np.uint64)) Structured scalars support attribute getting and setting, as well as member lookup using constant strings. Strings stored in a local or global tuple are considered constant strings and can be used for member lookup. .. literalinclude:: ../../../numba/tests/doc_examples/test_rec_array.py :language: python :start-after: magictoken.ex_rec_arr_const_index.begin :end-before: magictoken.ex_rec_arr_const_index.end :dedent: 8 It is also possible to use local or global tuples together with ``literal_unroll``: .. literalinclude:: ../../../numba/tests/doc_examples/test_rec_array.py :language: python :start-after: magictoken.ex_rec_arr_lit_unroll_index.begin :end-before: magictoken.ex_rec_arr_lit_unroll_index.end :dedent: 8 Record subtyping ---------------- .. warning:: This is an experimental feature. Numba allows `width subtyping `_ of structured scalars. For example, ``dtype([('a', 'f8'), ('b', 'i8')])`` will be considered a subtype of ``dtype([('a', 'f8')]``, because the second is a strict subset of the first, i.e. field ``a`` is of the same type and is in the same position in both types. The subtyping relationship will matter in cases where compilation for a certain input is not allowed, but the input is a subtype of another, allowed type. .. code-block:: python import numpy as np from numba import njit, typeof from numba.core import types record1 = np.array([1], dtype=[('a', 'f8')])[0] record2 = np.array([(2,3)], dtype=[('a', 'f8'), ('b', 'f8')])[0] @njit(types.float64(typeof(record1))) def foo(rec): return rec['a'] foo(record1) foo(record2) Without subtyping the last line would fail. With subtyping, no new compilation will be triggered, but the compiled function for ``record1`` will be used for ``record2``. .. seealso:: `Numpy scalars `_ reference. Array types =========== `Numpy arrays `_ of any of the scalar types above are supported, regardless of the shape or layout. Array access ------------ Arrays support normal iteration. Full basic indexing and slicing is supported. A subset of advanced indexing is also supported: only one advanced index is allowed, and it has to be a one-dimensional array (it can be combined with an arbitrary number of basic indices as well). .. seealso:: `Numpy indexing `_ reference. .. _structured-array-access: Structured array access ----------------------- Numba presently supports accessing fields of individual elements in structured arrays by attribute as well as by getting and setting. This goes slightly beyond the NumPy API, which only allows accessing fields by getting and setting. For example: .. code:: python from numba import njit import numpy as np record_type = np.dtype([("ival", np.int32), ("fval", np.float64)], align=True) def f(rec): value = 2.5 rec[0].ival = int(value) rec[0].fval = value return rec arr = np.ones(1, dtype=record_type) cfunc = njit(f) # Works print(cfunc(arr)) # Does not work print(f(arr)) The above code results in the output: .. code:: none [(2, 2.5)] Traceback (most recent call last): File "repro.py", line 22, in print(f(arr)) File "repro.py", line 9, in f rec[0].ival = int(value) AttributeError: 'numpy.void' object has no attribute 'ival' The Numba-compiled version of the function executes, but the pure Python version raises an error because of the unsupported use of attribute access. .. note:: This behavior will eventually be deprecated and removed. Attributes ---------- The following attributes of Numpy arrays are supported: * :attr:`~numpy.ndarray.dtype` * :attr:`~numpy.ndarray.flags` * :attr:`~numpy.ndarray.flat` * :attr:`~numpy.ndarray.itemsize` * :attr:`~numpy.ndarray.ndim` * :attr:`~numpy.ndarray.shape` * :attr:`~numpy.ndarray.size` * :attr:`~numpy.ndarray.strides` * :attr:`~numpy.ndarray.T` * :attr:`~numpy.ndarray.real` * :attr:`~numpy.ndarray.imag` The ``flags`` object '''''''''''''''''''' The object returned by the :attr:`~numpy.ndarray.flags` attribute supports the ``contiguous``, ``c_contiguous`` and ``f_contiguous`` attributes. The ``flat`` object ''''''''''''''''''' The object returned by the :attr:`~numpy.ndarray.flat` attribute supports iteration and indexing, but be careful: indexing is very slow on non-C-contiguous arrays. The ``real`` and ``imag`` attributes '''''''''''''''''''''''''''''''''''' Numpy supports these attributes regardless of the dtype but Numba chooses to limit their support to avoid potential user error. For numeric dtypes, Numba follows Numpy's behavior. The :attr:`~numpy.ndarray.real` attribute returns a view of the real part of the complex array and it behaves as an identity function for other numeric dtypes. The :attr:`~numpy.ndarray.imag` attribute returns a view of the imaginary part of the complex array and it returns a zero array with the same shape and dtype for other numeric dtypes. For non-numeric dtypes, including all structured/record dtypes, using these attributes will result in a compile-time (`TypingError`) error. This behavior differs from Numpy's but it is chosen to avoid the potential confusion with field names that overlap these attributes. Calculation ----------- The following methods of Numpy arrays are supported in their basic form (without any optional arguments): * :meth:`~numpy.ndarray.all` * :meth:`~numpy.ndarray.any` * :meth:`~numpy.ndarray.clip` * :meth:`~numpy.ndarray.conj` * :meth:`~numpy.ndarray.conjugate` * :meth:`~numpy.ndarray.cumprod` * :meth:`~numpy.ndarray.cumsum` * :meth:`~numpy.ndarray.max` * :meth:`~numpy.ndarray.mean` * :meth:`~numpy.ndarray.min` * :meth:`~numpy.ndarray.nonzero` * :meth:`~numpy.ndarray.prod` * :meth:`~numpy.ndarray.std` * :meth:`~numpy.ndarray.take` * :meth:`~numpy.ndarray.var` The corresponding top-level Numpy functions (such as :func:`numpy.prod`) are similarly supported. Other methods ------------- The following methods of Numpy arrays are supported: * :meth:`~numpy.ndarray.argmax` (``axis`` keyword argument supported). * :meth:`~numpy.ndarray.argmin` (``axis`` keyword argument supported). * :meth:`~numpy.ndarray.argsort` (``kind`` key word argument supported for values ``'quicksort'`` and ``'mergesort'``) * :meth:`~numpy.ndarray.astype` (only the 1-argument form) * :meth:`~numpy.ndarray.copy` (without arguments) * :meth:`~numpy.ndarray.dot` (only the 1-argument form) * :meth:`~numpy.ndarray.flatten` (no order argument; 'C' order only) * :meth:`~numpy.ndarray.item` (without arguments) * :meth:`~numpy.ndarray.itemset` (only the 1-argument form) * :meth:`~numpy.ndarray.ptp` (without arguments) * :meth:`~numpy.ndarray.ravel` (no order argument; 'C' order only) * :meth:`~numpy.ndarray.repeat` (no axis argument) * :meth:`~numpy.ndarray.reshape` (only the 1-argument form) * :meth:`~numpy.ndarray.sort` (without arguments) * :meth:`~numpy.ndarray.sum` (with or without the ``axis`` and/or ``dtype`` arguments.) * ``axis`` only supports ``integer`` values. * If the ``axis`` argument is a compile-time constant, all valid values are supported. An out-of-range value will result in a ``LoweringError`` at compile-time. * If the ``axis`` argument is not a compile-time constant, only values from 0 to 3 are supported. An out-of-range value will result in a runtime exception. * All numeric ``dtypes`` are supported in the ``dtype`` parameter. ``timedelta`` arrays can be used as input arrays but ``timedelta`` is not supported as ``dtype`` parameter. * When a ``dtype`` is given, it determines the type of the internal accumulator. When it is not, the selection is made automatically based on the input array's ``dtype``, mostly following the same rules as NumPy. However, on 64-bit Windows, Numba uses a 64-bit accumulator for integer inputs (``int64`` for ``int32`` inputs and ``uint64`` for ``uint32`` inputs), while NumPy would use a 32-bit accumulator in those cases. * :meth:`~numpy.ndarray.transpose` * :meth:`~numpy.ndarray.view` (only the 1-argument form) * :meth:`~numpy.ndarray.__contains__` Where applicable, the corresponding top-level NumPy functions (such as :func:`numpy.argmax`) are similarly supported. .. warning:: Sorting may be slightly slower than Numpy's implementation. Functions ========= Linear algebra -------------- Basic linear algebra is supported on 1-D and 2-D contiguous arrays of floating-point and complex numbers: * :func:`numpy.dot` * :func:`numpy.kron` ('C' and 'F' order only) * :func:`numpy.outer` * :func:`numpy.trace` (only the first argument). * :func:`numpy.vdot` * On Python 3.5 and above, the matrix multiplication operator from :pep:`465` (i.e. ``a @ b`` where ``a`` and ``b`` are 1-D or 2-D arrays). * :func:`numpy.linalg.cholesky` * :func:`numpy.linalg.cond` (only non string values in ``p``). * :func:`numpy.linalg.det` * :func:`numpy.linalg.eig` (only running with data that does not cause a domain change is supported e.g. real input -> real output, complex input -> complex output). * :func:`numpy.linalg.eigh` (only the first argument). * :func:`numpy.linalg.eigvals` (only running with data that does not cause a domain change is supported e.g. real input -> real output, complex input -> complex output). * :func:`numpy.linalg.eigvalsh` (only the first argument). * :func:`numpy.linalg.inv` * :func:`numpy.linalg.lstsq` * :func:`numpy.linalg.matrix_power` * :func:`numpy.linalg.matrix_rank` * :func:`numpy.linalg.norm` (only the 2 first arguments and only non string values in ``ord``). * :func:`numpy.linalg.pinv` * :func:`numpy.linalg.qr` (only the first argument). * :func:`numpy.linalg.slogdet` * :func:`numpy.linalg.solve` * :func:`numpy.linalg.svd` (only the 2 first arguments). .. note:: The implementation of these functions needs SciPy to be installed. Reductions ---------- The following reduction functions are supported: * :func:`numpy.diff` (only the 2 first arguments) * :func:`numpy.median` (only the first argument) * :func:`numpy.nancumprod` (only the first argument) * :func:`numpy.nancumsum` (only the first argument) * :func:`numpy.nanmax` (only the first argument) * :func:`numpy.nanmean` (only the first argument) * :func:`numpy.nanmedian` (only the first argument) * :func:`numpy.nanmin` (only the first argument) * :func:`numpy.nanpercentile` (only the 2 first arguments, complex dtypes unsupported) * :func:`numpy.nanquantile` (only the 2 first arguments, complex dtypes unsupported) * :func:`numpy.nanprod` (only the first argument) * :func:`numpy.nanstd` (only the first argument) * :func:`numpy.nansum` (only the first argument) * :func:`numpy.nanvar` (only the first argument) * :func:`numpy.percentile` (only the 2 first arguments, complex dtypes unsupported) * :func:`numpy.quantile` (only the 2 first arguments, complex dtypes unsupported) Other functions --------------- The following top-level functions are supported: * :func:`numpy.append` * :func:`numpy.arange` * :func:`numpy.argsort` (``kind`` key word argument supported for values ``'quicksort'`` and ``'mergesort'``) * :func:`numpy.argwhere` * :func:`numpy.array` (only the 2 first arguments) * :func:`numpy.array_equal` * :func:`numpy.array_split` * :func:`numpy.asarray` (only the 2 first arguments) * :func:`numpy.asarray_chkfinite` (only the 2 first arguments) * :func:`numpy.asfarray` * :func:`numpy.asfortranarray` (only the first argument) * :func:`numpy.atleast_1d` * :func:`numpy.atleast_2d` * :func:`numpy.atleast_3d` * :func:`numpy.bartlett` * :func:`numpy.bincount` * :func:`numpy.blackman` * :func:`numpy.broadcast_to` (only the 2 first arguments) * :func:`numpy.column_stack` * :func:`numpy.concatenate` * :func:`numpy.convolve` (only the 2 first arguments) * :func:`numpy.copy` (only the first argument) * :func:`numpy.corrcoef` (only the 3 first arguments, requires SciPy) * :func:`numpy.correlate` (only the 2 first arguments) * :func:`numpy.count_nonzero` (axis only supports scalar values) * :func:`numpy.cov` (only the 5 first arguments) * :func:`numpy.cross` (only the 2 first arguments; at least one of the input arrays should have ``shape[-1] == 3``) * If ``shape[-1] == 2`` for both inputs, please replace your :func:`numpy.cross` call with :func:`numba.np.extensions.cross2d`. * :func:`numpy.delete` (only the 2 first arguments) * :func:`numpy.diag` * :func:`numpy.digitize` * :func:`numpy.dstack` * :func:`numpy.dtype` (only the first argument) * :func:`numpy.ediff1d` * :func:`numpy.empty` (only the 2 first arguments) * :func:`numpy.empty_like` (only the 2 first arguments) * :func:`numpy.expand_dims` * :func:`numpy.extract` * :func:`numpy.eye` * :func:`numpy.fill_diagonal` * :func:`numpy.flatten` (no order argument; 'C' order only) * :func:`numpy.flatnonzero` * :func:`numpy.flip` (no axis argument) * :func:`numpy.fliplr` * :func:`numpy.flipud` * :func:`numpy.frombuffer` (only the 2 first arguments) * :func:`numpy.full` (only the 3 first arguments) * :func:`numpy.full_like` (only the 3 first arguments) * :func:`numpy.hamming` * :func:`numpy.hanning` * :func:`numpy.histogram` (only the 3 first arguments) * :func:`numpy.hstack` * :func:`numpy.identity` * :func:`numpy.kaiser` * :func:`numpy.iscomplex` * :func:`numpy.iscomplexobj` * :func:`numpy.isneginf` * :func:`numpy.isposinf` * :func:`numpy.isreal` * :func:`numpy.isrealobj` * :func:`numpy.isscalar` * :func:`numpy.interp` (only the 3 first arguments) * :func:`numpy.intersect1d` (only first 2 arguments, ar1 and ar2) * :func:`numpy.linspace` (only the 3-argument form) * :func:`numpy.logspace` (only the 3 first arguments) * :class:`numpy.ndenumerate` * :class:`numpy.ndindex` * :class:`numpy.nditer` (only the first argument) * :func:`numpy.ones` (only the 2 first arguments) * :func:`numpy.ones_like` (only the 2 first arguments) * :func:`numpy.partition` (only the 2 first arguments) * :func:`numpy.ptp` (only the first argument) * :func:`numpy.ravel` (no order argument; 'C' order only) * :func:`numpy.repeat` (no axis argument) * :func:`numpy.reshape` (no order argument; 'C' order only) * :func:`numpy.roll` (only the 2 first arguments; second argument ``shift`` must be an integer) * :func:`numpy.roots` * :func:`numpy.rot90` (only the 2 first arguments) * :func:`numpy.round_` * :func:`numpy.searchsorted` (only the 3 first arguments) * :func:`numpy.select` (only using homogeneous lists or tuples for the first two arguments, condlist and choicelist). Additionally, these two arguments can only contain arrays (unlike Numpy that also accepts tuples). * :func:`numpy.shape` * :func:`numpy.sinc` * :func:`numpy.sort` (no optional arguments) * :func:`numpy.split` * :func:`numpy.stack` * :func:`numpy.swapaxes` * :func:`numpy.take` (only the 2 first arguments) * :func:`numpy.take_along_axis` (the axis argument must be a literal value) * :func:`numpy.transpose` * :func:`numpy.trapz` (only the 3 first arguments) * :func:`numpy.tri` (only the 3 first arguments; third argument ``k`` must be an integer) * :func:`numpy.tril` (second argument ``k`` must be an integer) * :func:`numpy.tril_indices` (all arguments must be integer) * :func:`numpy.tril_indices_from` (second argument ``k`` must be an integer) * :func:`numpy.triu` (second argument ``k`` must be an integer) * :func:`numpy.triu_indices` (all arguments must be integer) * :func:`numpy.triu_indices_from` (second argument ``k`` must be an integer) * :func:`numpy.unique` (only the first argument) * :func:`numpy.vander` * :func:`numpy.vstack` * :func:`numpy.where` * :func:`numpy.zeros` (only the 2 first arguments) * :func:`numpy.zeros_like` (only the 2 first arguments) The following constructors are supported, both with a numeric input (to construct a scalar) or a sequence (to construct an array): * :class:`numpy.bool_` * :class:`numpy.complex64` * :class:`numpy.complex128` * :class:`numpy.float32` * :class:`numpy.float64` * :class:`numpy.int8` * :class:`numpy.int16` * :class:`numpy.int32` * :class:`numpy.int64` * :class:`numpy.intc` * :class:`numpy.intp` * :class:`numpy.uint8` * :class:`numpy.uint16` * :class:`numpy.uint32` * :class:`numpy.uint64` * :class:`numpy.uintc` * :class:`numpy.uintp` The following machine parameter classes are supported, with all purely numerical attributes: * :class:`numpy.iinfo` * :class:`numpy.finfo` (``machar`` attribute not supported) * :class:`numpy.MachAr` (with no arguments to the constructor) Literal arrays -------------- .. XXX should this part of the user's guide? Neither Python nor Numba has actual array literals, but you can construct arbitrary arrays by calling :func:`numpy.array` on a nested tuple:: a = numpy.array(((a, b, c), (d, e, f))) (nested lists are not yet supported by Numba) Modules ======= .. _numpy-random: ``random`` ---------- Numba supports top-level functions from the `numpy.random `_ module, but does not allow you to create individual RandomState instances. The same algorithms are used as for :ref:`the standard random module ` (and therefore the same notes apply), but with an independent internal state: seeding or drawing numbers from one generator won't affect the other. The following functions are supported. Initialization '''''''''''''' * :func:`numpy.random.seed`: with an integer argument only .. warning:: Calling :func:`numpy.random.seed` from interpreted code (including from :term:`object mode` code) will seed the NumPy random generator, not the Numba random generator. To seed the Numba random generator, see the example below. .. code-block:: python from numba import njit import numpy as np @njit def seed(a): np.random.seed(a) @njit def rand(): return np.random.rand() # Incorrect seeding np.random.seed(1234) print(rand()) np.random.seed(1234) print(rand()) # Correct seeding seed(1234) print(rand()) seed(1234) print(rand()) Simple random data '''''''''''''''''' * :func:`numpy.random.rand` * :func:`numpy.random.randint` (only the first two arguments) * :func:`numpy.random.randn` * :func:`numpy.random.random` * :func:`numpy.random.random_sample` * :func:`numpy.random.ranf` * :func:`numpy.random.sample` Permutations '''''''''''' * :func:`numpy.random.choice`: the optional *p* argument (probabilities array) is not supported * :func:`numpy.random.permutation` * :func:`numpy.random.shuffle`: the sequence argument must be a one-dimension Numpy array or buffer-providing object (such as a :class:`bytearray` or :class:`array.array`) Distributions ''''''''''''' .. warning:: The `size` argument is not supported in the following functions. * :func:`numpy.random.beta` * :func:`numpy.random.binomial` * :func:`numpy.random.chisquare` * :func:`numpy.random.dirichlet` * :func:`numpy.random.exponential` * :func:`numpy.random.f` * :func:`numpy.random.gamma` * :func:`numpy.random.geometric` * :func:`numpy.random.gumbel` * :func:`numpy.random.hypergeometric` * :func:`numpy.random.laplace` * :func:`numpy.random.logistic` * :func:`numpy.random.lognormal` * :func:`numpy.random.logseries` * :func:`numpy.random.multinomial` * :func:`numpy.random.negative_binomial` * :func:`numpy.random.normal` * :func:`numpy.random.pareto` * :func:`numpy.random.poisson` * :func:`numpy.random.power` * :func:`numpy.random.rayleigh` * :func:`numpy.random.standard_cauchy` * :func:`numpy.random.standard_exponential` * :func:`numpy.random.standard_gamma` * :func:`numpy.random.standard_normal` * :func:`numpy.random.standard_t` * :func:`numpy.random.triangular` * :func:`numpy.random.uniform` * :func:`numpy.random.vonmises` * :func:`numpy.random.wald` * :func:`numpy.random.weibull` * :func:`numpy.random.zipf` .. note:: Calling :func:`numpy.random.seed` from non-Numba code (or from :term:`object mode` code) will seed the Numpy random generator, not the Numba random generator. .. note:: Since version 0.28.0, the generator is thread-safe and fork-safe. Each thread and each process will produce independent streams of random numbers. ``stride_tricks`` ----------------- The following function from the :mod:`numpy.lib.stride_tricks` module is supported: * :func:`~numpy.lib.stride_tricks.as_strided` (the *strides* argument is mandatory, the *subok* argument is not supported) .. _supported_ufuncs: Standard ufuncs =============== One objective of Numba is having all the `standard ufuncs in NumPy `_ understood by Numba. When a supported ufunc is found when compiling a function, Numba maps the ufunc to equivalent native code. This allows the use of those ufuncs in Numba code that gets compiled in :term:`nopython mode`. Limitations ----------- Right now, only a selection of the standard ufuncs work in :term:`nopython mode`. Following is a list of the different standard ufuncs that Numba is aware of, sorted in the same way as in the NumPy documentation. Math operations --------------- ============== ============= =============== UFUNC MODE -------------- ------------------------------ name object mode nopython mode ============== ============= =============== add Yes Yes subtract Yes Yes multiply Yes Yes divide Yes Yes logaddexp Yes Yes logaddexp2 Yes Yes true_divide Yes Yes floor_divide Yes Yes negative Yes Yes power Yes Yes float_power Yes Yes remainder Yes Yes mod Yes Yes fmod Yes Yes divmod (*) Yes Yes abs Yes Yes absolute Yes Yes fabs Yes Yes rint Yes Yes sign Yes Yes conj Yes Yes exp Yes Yes exp2 Yes Yes log Yes Yes log2 Yes Yes log10 Yes Yes expm1 Yes Yes log1p Yes Yes sqrt Yes Yes square Yes Yes cbrt Yes Yes reciprocal Yes Yes conjugate Yes Yes gcd Yes Yes lcm Yes Yes ============== ============= =============== (\*) not supported on timedelta types Trigonometric functions ----------------------- ============== ============= =============== UFUNC MODE -------------- ------------------------------ name object mode nopython mode ============== ============= =============== sin Yes Yes cos Yes Yes tan Yes Yes arcsin Yes Yes arccos Yes Yes arctan Yes Yes arctan2 Yes Yes hypot Yes Yes sinh Yes Yes cosh Yes Yes tanh Yes Yes arcsinh Yes Yes arccosh Yes Yes arctanh Yes Yes deg2rad Yes Yes rad2deg Yes Yes degrees Yes Yes radians Yes Yes ============== ============= =============== Bit-twiddling functions ----------------------- ============== ============= =============== UFUNC MODE -------------- ------------------------------ name object mode nopython mode ============== ============= =============== bitwise_and Yes Yes bitwise_or Yes Yes bitwise_xor Yes Yes bitwise_not Yes Yes invert Yes Yes left_shift Yes Yes right_shift Yes Yes ============== ============= =============== Comparison functions -------------------- ============== ============= =============== UFUNC MODE -------------- ------------------------------ name object mode nopython mode ============== ============= =============== greater Yes Yes greater_equal Yes Yes less Yes Yes less_equal Yes Yes not_equal Yes Yes equal Yes Yes logical_and Yes Yes logical_or Yes Yes logical_xor Yes Yes logical_not Yes Yes maximum Yes Yes minimum Yes Yes fmax Yes Yes fmin Yes Yes ============== ============= =============== Floating functions ------------------ ============== ============= =============== UFUNC MODE -------------- ------------------------------ name object mode nopython mode ============== ============= =============== isfinite Yes Yes isinf Yes Yes isnan Yes Yes signbit Yes Yes copysign Yes Yes nextafter Yes Yes modf Yes No ldexp Yes (*) Yes frexp Yes No floor Yes Yes ceil Yes Yes trunc Yes Yes spacing Yes Yes ============== ============= =============== (\*) not supported on windows 32 bit Datetime functions ------------------ ============== ============= =============== UFUNC MODE -------------- ------------------------------ name object mode nopython mode ============== ============= =============== isnat Yes Yes ============== ============= =============== numba-0.55.1/docs/source/reference/pysemantics.rst000664 000000 000000 00000005516 14174536160 022161 0ustar00rootroot000000 000000 .. _pysemantics: Deviations from Python Semantics ================================ Bounds Checking --------------- By default, instead of causing an :class:`IndexError`, accessing an out-of-bound index of an array in a Numba-compiled function will return invalid values or lead to an access violation error (it's reading from invalid memory locations). Bounds checking can be enabled on a specific function via the :ref:`boundscheck ` option of the jit decorator. Additionally, the :envvar:`NUMBA_BOUNDSCHECK` can be set to 0 or 1 to globally override this flag. .. note:: Bounds checking will slow down typical functions so it is recommended to only use this flag for debugging purposes. Exceptions and Memory Allocation -------------------------------- Due to limitations in the current compiler when handling exceptions, memory allocated (almost always NumPy arrays) within a function that raises an exception will **leak**. This is a known issue that will be fixed, but in the meantime, it is best to do memory allocation outside of functions that can also raise exceptions. Integer width ------------- While Python has arbitrary-sized integers, integers in Numba-compiled functions get a fixed size through :term:`type inference` (usually, the size of a machine integer). This means that arithmetic operations can wrapround or produce undefined results or overflow. Type inference can be overridden by an explicit type specification, if fine-grained control of integer width is desired. .. seealso:: :ref:`Enhancement proposal 1: Changes in integer typing ` Boolean inversion ----------------- Calling the bitwise complement operator (the ``~`` operator) on a Python boolean returns an integer, while the same operator on a Numpy boolean returns another boolean:: >>> ~True -2 >>> ~np.bool_(True) False Numba follows the Numpy semantics. Global and closure variables ---------------------------- In :term:`nopython mode`, global and closure variables are *frozen* by Numba: a Numba-compiled function sees the value of those variables at the time the function was compiled. Also, it is not possible to change their values from the function. Numba **may or may not** copy global variables referenced inside a compiled function. Small global arrays are copied for potential compiler optimization with immutability assumption. However, large global arrays are not copied to conserve memory. The definition of "small" and "large" may change. Zero initialization of variables -------------------------------- Numba does not track variable liveness at runtime. For simplicity of implementation, all variables are zero-initialized. Example:: from numba import njit @njit def foo(): for i in range(0): pass print(i) # will print 0 and not raise UnboundLocalError foo() numba-0.55.1/docs/source/reference/pysupported.rst000664 000000 000000 00000115570 14174536160 022222 0ustar00rootroot000000 000000 .. _pysupported: ========================= Supported Python features ========================= Apart from the :ref:`pysupported-language` part below, which applies to both :term:`object mode` and :term:`nopython mode`, this page only lists the features supported in :term:`nopython mode`. .. warning:: Numba behavior differs from Python semantics in some situations. We strongly advise reviewing :ref:`pysemantics` to become familiar with these differences. .. _pysupported-language: Language ======== Constructs ---------- Numba strives to support as much of the Python language as possible, but some language features are not available inside Numba-compiled functions. Below is a quick reference for the support level of Python constructs. **Supported** constructs: - conditional branch: ``if .. elif .. else`` - loops: ``while``, ``for .. in``, ``break``, ``continue`` - basic generator: ``yield`` - assertion: ``assert`` **Partially supported** constructs: - exceptions: ``try .. except``, ``raise``, ``else`` and ``finally`` (See details in this :ref:`section `) - context manager: ``with`` (only support :ref:`numba.objmode() `) - list comprehension (see details in this :ref:`section `) **Unsupported** constructs: - async features: ``async with``, ``async for`` and ``async def`` - class definition: ``class`` (except for :ref:`@jitclass `) - set, dict and generator comprehensions - generator delegation: ``yield from`` Functions --------- Function calls '''''''''''''' Numba supports function calls using positional and named arguments, as well as arguments with default values and ``*args`` (note the argument for ``*args`` can only be a tuple, not a list). Explicit ``**kwargs`` are not supported. Function calls to locally defined inner functions are supported as long as they can be fully inlined. Functions as arguments '''''''''''''''''''''' Functions can be passed as argument into another function. But, they cannot be returned. For example: .. code-block:: python from numba import jit @jit def add1(x): return x + 1 @jit def bar(fn, x): return fn(x) @jit def foo(x): return bar(add1, x) # Passing add1 within numba compiled code. print(foo(1)) # Passing add1 into bar from interpreted code print(bar(add1, 1)) .. note:: Numba does not handle function objects as real objects. Once a function is assigned to a variable, the variable cannot be re-assigned to a different function. Inner function and closure ''''''''''''''''''''''''''' Numba now supports inner functions as long as they are non-recursive and only called locally, but not passed as argument or returned as result. The use of closure variables (variables defined in outer scopes) within an inner function is also supported. Recursive calls ''''''''''''''' Most recursive call patterns are supported. The only restriction is that the recursive callee must have a control-flow path that returns without recursing. Numba is able to type-infer recursive functions without specifying the function type signature (which is required in numba 0.28 and earlier). Recursive calls can even call into a different overload of the function. .. XXX add reference to NBEP Generators ---------- Numba supports generator functions and is able to compile them in :term:`object mode` and :term:`nopython mode`. The returned generator can be used both from Numba-compiled code and from regular Python code. Coroutine features of generators are not supported (i.e. the :meth:`generator.send`, :meth:`generator.throw`, :meth:`generator.close` methods). .. _pysupported-exception-handling: Exception handling ------------------ ``raise`` statement ''''''''''''''''''' The ``raise`` statement is only supported in the following forms: * ``raise SomeException`` * ``raise SomeException()``: in :term:`nopython mode`, constructor arguments must be :term:`compile-time constants ` It is currently unsupported to re-raise an exception created in compiled code. ``try .. except`` ''''''''''''''''' The ``try .. except`` construct is partially supported. The following forms of are supported: * the *bare* except that captures all exceptions: .. code-block:: python try: ... except: ... * using exactly the ``Exception`` class in the ``except`` clause: .. code-block:: python try: ... except Exception: ... This will match any exception that is a subclass of ``Exception`` as expected. Currently, instances of ``Exception`` and it's subclasses are the only kind of exception that can be raised in compiled code. .. warning:: Numba currently masks signals like ``KeyboardInterrupt`` and ``SystemExit``. These signaling exceptions are ignored during the execution of Numba compiled code. The Python interpreter will handle them as soon as the control is returned to it. Currently, exception objects are not materialized inside compiled functions. As a result, it is not possible to store an exception object into a user variable or to re-raise an exception. With this limitation, the only realistic use-case would look like: .. code-block:: python try: do_work() except Exception: handle_error_case() return error_code ``try .. except .. else .. finally`` '''''''''''''''''''''''''''''''''''' The ``else`` block and the ``finally`` block of a ``try .. except`` are supported: .. code-block:: python >>> @jit(nopython=True) ... def foo(): ... try: ... print('main block') ... except Exception: ... print('handler block') ... else: ... print('else block') ... finally: ... print('final block') ... >>> foo() main block else block final block The ``try .. finally`` construct without the ``except`` clause is also supported. .. _pysupported-builtin-types: Built-in types ============== int, bool --------- Arithmetic operations as well as truth values are supported. The following attributes and methods are supported: * ``.conjugate()`` * ``.real`` * ``.imag`` float, complex -------------- Arithmetic operations as well as truth values are supported. The following attributes and methods are supported: * ``.conjugate()`` * ``.real`` * ``.imag`` str --- Numba supports (Unicode) strings in Python 3. Strings can be passed into :term:`nopython mode` as arguments, as well as constructed and returned from :term:`nopython mode`. As in Python, slices (even of length 1) return a new, reference counted string. Optimized code paths for efficiently accessing single characters may be introduced in the future. The in-memory representation is the same as was introduced in Python 3.4, with each string having a tag to indicate whether the string is using a 1, 2, or 4 byte character width in memory. When strings of different encodings are combined (as in concatenation), the resulting string automatically uses the larger character width of the two input strings. String slices also use the same character width as the original string, even if the slice could be represented with a narrower character width. (These details are invisible to the user, of course.) The following constructors, functions, attributes and methods are currently supported: * ``str(int)`` * ``len()`` * ``+`` (concatenation of strings) * ``*`` (repetition of strings) * ``in``, ``.contains()`` * ``==``, ``<``, ``<=``, ``>``, ``>=`` (comparison) * ``.capitalize()`` * ``.casefold()`` * ``.center()`` * ``.count()`` * ``.endswith()`` * ``.endswith()`` * ``.expandtabs()`` * ``.find()`` * ``.index()`` * ``.isalnum()`` * ``.isalpha()`` * ``.isdecimal()`` * ``.isdigit()`` * ``.isidentifier()`` * ``.islower()`` * ``.isnumeric()`` * ``.isprintable()`` * ``.isspace()`` * ``.istitle()`` * ``.isupper()`` * ``.join()`` * ``.ljust()`` * ``.lower()`` * ``.lstrip()`` * ``.partition()`` * ``.replace()`` * ``.rfind()`` * ``.rindex()`` * ``.rjust()`` * ``.rpartition()`` * ``.rsplit()`` * ``.rstrip()`` * ``.split()`` * ``.splitlines()`` * ``.startswith()`` * ``.strip()`` * ``.swapcase()`` * ``.title()`` * ``.upper()`` * ``.zfill()`` Regular string literals (e.g. ``"ABC"``) as well as f-strings without format specs (e.g. ``"ABC_{a+1}"``) that only use string and integer variables (types with ``str()`` overload) are supported in :term:`nopython mode`. Additional operations as well as support for Python 2 strings / Python 3 bytes will be added in a future version of Numba. Python 2 Unicode objects will likely never be supported. .. warning:: The performance of some operations is known to be slower than the CPython implementation. These include substring search (``in``, ``.contains()`` and ``find()``) and string creation (like ``.split()``). Improving the string performance is an ongoing task, but the speed of CPython is unlikely to be surpassed for basic string operation in isolation. Numba is most successfully used for larger algorithms that happen to involve strings, where basic string operations are not the bottleneck. tuple ----- Tuple support is categorised into two categories based on the contents of a tuple. The first category is homogeneous tuples, these are tuples where the type of all the values in the tuple are the same, the second is heterogeneous tuples, these are tuples where the types of the values are different. .. note:: The ``tuple()`` constructor itself is NOT supported. homogeneous tuples ------------------ An example of a homogeneous tuple: .. code-block:: python homogeneous_tuple = (1, 2, 3, 4) The following operations are supported on homogeneous tuples: * Tuple construction. * Tuple unpacking. * Comparison between tuples. * Iteration and indexing. * Addition (concatenation) between tuples. * Slicing tuples with a constant slice. * The index method on tuples. heterogeneous tuples -------------------- An example of a heterogeneous tuple: .. code-block:: python heterogeneous_tuple = (1, 2j, 3.0, "a") The following operations are supported on heterogeneous tuples: * Comparison between tuples. * Indexing using an index value that is a compile time constant e.g. ``mytuple[7]``, where ``7`` is evidently a constant. * Iteration over a tuple (requires experimental :func:`literal_unroll` feature, see below). .. warning:: The following feature (:func:`literal_unroll`) is experimental and was added in version 0.47. To permit iteration over a heterogeneous tuple the special function :func:`numba.literal_unroll` must be used. This function has no effect other than to act as a token to permit the use of this feature. Example use: .. code-block:: python from numba import njit, literal_unroll @njit def foo(): heterogeneous_tuple = (1, 2j, 3.0, "a") for i in literal_unroll(heterogeneous_tuple): print(i) .. warning:: The following restrictions apply to the use of :func:`literal_unroll`: * :func:`literal_unroll` can only be used on tuples and constant lists of compile time constants, e.g. ``[1, 2j, 3, "a"]`` and the list not being mutated. * The only supported use pattern for :func:`literal_unroll` is loop iteration. * Only one :func:`literal_unroll` call is permitted per loop nest (i.e. nested heterogeneous tuple iteration loops are forbidden). * The usual type inference/stability rules still apply. A more involved use of :func:`literal_unroll` might be type specific dispatch, recall that string and integer literal values are considered their own type, for example: .. code-block:: python from numba import njit, types, literal_unroll from numba.extending import overload def dt(x): # dummy function to overload pass @overload(dt, inline='always') def ol_dt(li): if isinstance(li, types.StringLiteral): value = li.literal_value if value == "apple": def impl(li): return 1 elif value == "orange": def impl(li): return 2 elif value == "banana": def impl(li): return 3 return impl elif isinstance(li, types.IntegerLiteral): value = li.literal_value if value == 0xca11ab1e: def impl(li): # capture the dispatcher literal value return 0x5ca1ab1e + value return impl @njit def foo(): acc = 0 for t in literal_unroll(('apple', 'orange', 'banana', 3390155550)): acc += dt(t) return acc print(foo()) list ---- .. warning:: As of version 0.45.x the internal implementation for the list datatype in Numba is changing. Until recently, only a single implementation of the list datatype was available, the so-called *reflected-list* (see below). However, it was scheduled for deprecation from version 0.44.0 onwards due to its limitations. As of version 0.45.0 a new implementation, the so-called *typed-list* (see below), is available as an experimental feature. For more information, please see: :ref:`deprecation`. Creating and returning lists from JIT-compiled functions is supported, as well as all methods and operations. Lists must be strictly homogeneous: Numba will reject any list containing objects of different types, even if the types are compatible (for example, ``[1, 2.5]`` is rejected as it contains a :class:`int` and a :class:`float`). For example, to create a list of arrays:: In [1]: from numba import njit In [2]: import numpy as np In [3]: @njit ...: def foo(x): ...: lst = [] ...: for i in range(x): ...: lst.append(np.arange(i)) ...: return lst ...: In [4]: foo(4) Out[4]: [array([], dtype=int64), array([0]), array([0, 1]), array([0, 1, 2])] .. _feature-reflected-list: List Reflection ''''''''''''''' In nopython mode, Numba does not operate on Python objects. ``list`` are compiled into an internal representation. Any ``list`` arguments must be converted into this representation on the way in to nopython mode and their contained elements must be restored in the original Python objects via a process called :term:`reflection`. Reflection is required to maintain the same semantics as found in regular Python code. However, the reflection process can be expensive for large lists and it is not supported for lists that contain reflected data types. Users cannot use list-of-list as an argument because of this limitation. .. note:: When passing a list into a JIT-compiled function, any modifications made to the list will not be visible to the Python interpreter until the function returns. (A limitation of the reflection process.) .. warning:: List sorting currently uses a quicksort algorithm, which has different performance characterics than the algorithm used by Python. .. _feature-list-initial-value: Initial Values '''''''''''''' .. warning:: This is an experimental feature! Lists that: * Are constructed using the square braces syntax * Have values of a literal type will have their initial value stored in the ``.initial_value`` property on the type so as to permit inspection of these values at compile time. If required, to force value based dispatch the :ref:`literally ` function will accept such a list. Example: .. literalinclude:: ../../../numba/tests/doc_examples/test_literal_container_usage.py :language: python :caption: from ``test_ex_initial_value_list_compile_time_consts`` of ``numba/tests/doc_examples/test_literal_container_usage.py`` :start-after: magictoken.test_ex_initial_value_list_compile_time_consts.begin :end-before: magictoken.test_ex_initial_value_list_compile_time_consts.end :dedent: 12 :linenos: .. _feature-typed-list: Typed List '''''''''' .. note:: ``numba.typed.List`` is an experimental feature, if you encounter any bugs in functionality or suffer from unexpectedly bad performance, please report this, ideally by opening an issue on the Numba issue tracker. As of version 0.45.0 a new implementation of the list data type is available, the so-called *typed-list*. This is compiled library backed, type-homogeneous list data type that is an improvement over the *reflected-list* mentioned above. Additionally, lists can now be arbitrarily nested. Since the implementation is considered experimental, you will need to import it explicitly from the `numba.typed` module:: In [1]: from numba.typed import List In [2]: from numba import njit In [3]: @njit ...: def foo(l): ...: l.append(23) ...: return l ...: In [4]: mylist = List() In [5]: mylist.append(1) In [6]: foo(mylist) Out[6]: ListType[int64]([1, 23]) .. note:: As the typed-list stabilizes it will fully replace the reflected-list and the constructors `[]` and `list()` will create a typed-list instead of a reflected one. Here's an example using ``List()`` to create ``numba.typed.List`` inside a jit-compiled function and letting the compiler infer the item type: .. literalinclude:: ../../../numba/tests/doc_examples/test_typed_list_usage.py :language: python :caption: from ``ex_inferred_list_jit`` of ``numba/tests/doc_examples/test_typed_list_usage.py`` :start-after: magictoken.ex_inferred_list_jit.begin :end-before: magictoken.ex_inferred_list_jit.end :dedent: 12 :linenos: Here's an example of using ``List()`` to create a ``numba.typed.List`` outside of a jit-compiled function and then using it as an argument to a jit-compiled function: .. literalinclude:: ../../../numba/tests/doc_examples/test_typed_list_usage.py :language: python :caption: from ``ex_inferred_list`` of ``numba/tests/doc_examples/test_typed_list_usage.py`` :start-after: magictoken.ex_inferred_list.begin :end-before: magictoken.ex_inferred_list.end :dedent: 12 :linenos: Finally, here's an example of using a nested `List()`: .. literalinclude:: ../../../numba/tests/doc_examples/test_typed_list_usage.py :language: python :caption: from ``ex_nested_list`` of ``numba/tests/doc_examples/test_typed_list_usage.py`` :start-after: magictoken.ex_nested_list.begin :end-before: magictoken.ex_nested_list.end :dedent: 12 :linenos: .. _feature-literal-list: Literal List '''''''''''' .. warning:: This is an experimental feature! Numba supports the use of literal lists containing any values, for example:: l = ['a', 1, 2j, np.zeros(5,)] the predominant use of these lists is for use as a configuration object. The lists appear as a ``LiteralList`` type which inherits from ``Literal``, as a result the literal values of the list items are available at compile time. For example: .. literalinclude:: ../../../numba/tests/doc_examples/test_literal_container_usage.py :language: python :caption: from ``test_ex_literal_list`` of ``numba/tests/doc_examples/test_literal_container_usage.py`` :start-after: magictoken.test_ex_literal_list.begin :end-before: magictoken.test_ex_literal_list.end :dedent: 12 :linenos: Important things to note about these kinds of lists: #. They are immutable, use of mutating methods e.g. ``.pop()`` will result in compilation failure. Read-only static access and read only methods are supported e.g. ``len()``. #. Dynamic access of items is not possible, e.g. ``some_list[x]``, for a value ``x`` which is not a compile time constant. This is because it's impossible to statically determine the type of the item being accessed. #. Inside the compiler, these lists are actually just tuples with some extra things added to make them look like they are lists. #. They cannot be returned to the interpreter from a compiled function. .. _pysupported-comprehension: List comprehension '''''''''''''''''' Numba supports list comprehension. For example:: In [1]: from numba import njit In [2]: @njit ...: def foo(x): ...: return [[i for i in range(n)] for n in range(x)] ...: In [3]: foo(3) Out[3]: [[], [0], [0, 1]] .. note:: Prior to version 0.39.0, Numba did not support the creation of nested lists. Numba also supports "array comprehension" that is a list comprehension followed immediately by a call to :func:`numpy.array`. The following is an example that produces a 2D Numpy array:: from numba import jit import numpy as np @jit(nopython=True) def f(n): return np.array([ [ x * y for x in range(n) ] for y in range(n) ]) In this case, Numba is able to optimize the program to allocate and initialize the result array directly without allocating intermediate list objects. Therefore, the nesting of list comprehension here is not a problem since a multi-dimensional array is being created here instead of a nested list. Additionally, Numba supports parallel array comprehension when combined with the :ref:`parallel_jit_option` option on CPUs. set --- All methods and operations on sets are supported in JIT-compiled functions. Sets must be strictly homogeneous: Numba will reject any set containing objects of different types, even if the types are compatible (for example, ``{1, 2.5}`` is rejected as it contains a :class:`int` and a :class:`float`). The use of reference counted types, e.g. strings, in sets is unsupported. .. note:: When passing a set into a JIT-compiled function, any modifications made to the set will not be visible to the Python interpreter until the function returns. .. _feature-typed-dict: Typed Dict ---------- .. warning:: ``numba.typed.Dict`` is an experimental feature. The API may change in the future releases. .. note:: ``dict()`` was not supported in versions prior to 0.44. Currently, calling ``dict()`` translates to calling ``numba.typed.Dict()``. Numba only supports the use of ``dict()`` without any arguments. Such use is semantically equivalent to ``{}`` and ``numba.typed.Dict()``. It will create an instance of ``numba.typed.Dict`` where the key-value types will be later inferred by usage. Numba does not fully support the Python ``dict`` because it is an untyped container that can have any Python types as members. To generate efficient machine code, Numba needs the keys and the values of the dictionary to have fixed types, declared in advance. To achieve this, Numba has a typed dictionary, ``numba.typed.Dict``, for which the type-inference mechanism must be able to infer the key-value types by use, or the user must explicitly declare the key-value type using the ``Dict.empty()`` constructor method. This typed dictionary has the same API as the Python ``dict``, it implements the ``collections.MutableMapping`` interface and is usable in both interpreted Python code and JIT-compiled Numba functions. Because the typed dictionary stores keys and values in Numba's native, unboxed data layout, passing a Numba dictionary into nopython mode has very low overhead. However, this means that using a typed dictionary from the Python interpreter is slower than a regular dictionary because Numba has to box and unbox key and value objects when getting or setting items. An important difference of the typed dictionary in comparison to Python's ``dict`` is that **implicit casting** occurs when a key or value is stored. As a result the *setitem* operation may fail should the type-casting fail. It should be noted that the Numba typed dictionary is implemented using the same algorithm as the CPython 3.7 dictionary. As a consequence, the typed dictionary is ordered and has the same collision resolution as the CPython implementation. Further to the above in relation to type specification, there are limitations placed on the types that can be used as keys and/or values in the typed dictionary, most notably the Numba ``Set`` and ``List`` types are currently unsupported. Acceptable key/value types include but are not limited to: unicode strings, arrays (value only), scalars, tuples. It is expected that these limitations will be relaxed as Numba continues to improve. Here's an example of using ``dict()`` and ``{}`` to create ``numba.typed.Dict`` instances and letting the compiler infer the key-value types: .. literalinclude:: ../../../numba/tests/doc_examples/test_typed_dict_usage.py :language: python :caption: from ``test_ex_inferred_dict_njit`` of ``numba/tests/doc_examples/test_typed_dict_usage.py`` :start-after: magictoken.ex_inferred_dict_njit.begin :end-before: magictoken.ex_inferred_dict_njit.end :dedent: 12 :linenos: Here's an example of creating a ``numba.typed.Dict`` instance from interpreted code and using the dictionary in jit code: .. literalinclude:: ../../../numba/tests/doc_examples/test_typed_dict_usage.py :language: python :caption: from ``test_ex_typed_dict_from_cpython`` of ``numba/tests/doc_examples/test_typed_dict_usage.py`` :start-after: magictoken.ex_typed_dict_from_cpython.begin :end-before: magictoken.ex_typed_dict_from_cpython.end :dedent: 12 :linenos: Here's an example of creating a ``numba.typed.Dict`` instance from jit code and using the dictionary in interpreted code: .. literalinclude:: ../../../numba/tests/doc_examples/test_typed_dict_usage.py :language: python :caption: from ``test_ex_typed_dict_njit`` of ``numba/tests/doc_examples/test_typed_dict_usage.py`` :start-after: magictoken.ex_typed_dict_njit.begin :end-before: magictoken.ex_typed_dict_njit.end :dedent: 12 :linenos: It should be noted that ``numba.typed.Dict`` is not thread-safe. Specifically, functions which modify a dictionary from multiple threads will potentially corrupt memory, causing a range of possible failures. However, the dictionary can be safely read from multiple threads as long as the contents of the dictionary do not change during the parallel access. Dictionary comprehension '''''''''''''''''''''''' Numba supports dictionary comprehension under the assumption that a ``numba.typed.Dict`` instance can be created from the comprehension. For example:: In [1]: from numba import njit In [2]: @njit ...: def foo(n): ...: return {i: i**2 for i in range(n)} ...: In [3]: foo(3) Out[3]: DictType[int64,int64]({0: 0, 1: 1, 2: 4}) .. _feature-dict-initial-value: Initial Values '''''''''''''' .. warning:: This is an experimental feature! Typed dictionaries that: * Are constructed using the curly braces syntax * Have literal string keys * Have values of a literal type will have their initial value stored in the ``.initial_value`` property on the type so as to permit inspection of these values at compile time. If required, to force value based dispatch the :ref:`literally ` function will accept a typed dictionary. Example: .. literalinclude:: ../../../numba/tests/doc_examples/test_literal_container_usage.py :language: python :caption: from ``test_ex_initial_value_dict_compile_time_consts`` of ``numba/tests/doc_examples/test_literal_container_usage.py`` :start-after: magictoken.test_ex_initial_value_dict_compile_time_consts.begin :end-before: magictoken.test_ex_initial_value_dict_compile_time_consts.end :dedent: 12 :linenos: .. _feature-literal-str-key-dict: Heterogeneous Literal String Key Dictionary ------------------------------------------- .. warning:: This is an experimental feature! Numba supports the use of statically declared string key to any value dictionaries, for example:: d = {'a': 1, 'b': 'data', 'c': 2j} the predominant use of these dictionaries is to orchestrate advanced compilation dispatch or as a container for use as a configuration object. The dictionaries appear as a ``LiteralStrKeyDict`` type which inherits from ``Literal``, as a result the literal values of the keys and the types of the items are available at compile time. For example: .. literalinclude:: ../../../numba/tests/doc_examples/test_literal_container_usage.py :language: python :caption: from ``test_ex_literal_dict_compile_time_consts`` of ``numba/tests/doc_examples/test_literal_container_usage.py`` :start-after: magictoken.test_ex_literal_dict_compile_time_consts.begin :end-before: magictoken.test_ex_literal_dict_compile_time_consts.end :dedent: 12 :linenos: Important things to note about these kinds of dictionaries: #. They are immutable, use of mutating methods e.g. ``.pop()`` will result in compilation failure. Read-only static access and read only methods are supported e.g. ``len()``. #. Dynamic access of items is not possible, e.g. ``some_dictionary[x]``, for a value ``x`` which is not a compile time constant. This is because it's impossible statically determine the type of the item being accessed. #. Inside the compiler, these dictionaries are actually just named tuples with some extra things added to make them look like they are dictionaries. #. They cannot be returned to the interpreter from a compiled function. #. The ``.keys()``, ``.values()`` and ``.items()`` methods all functionally operate but return tuples opposed to iterables. None ---- The None value is supported for identity testing (when using an :class:`~numba.optional` type). bytes, bytearray, memoryview ---------------------------- The :class:`bytearray` type and, on Python 3, the :class:`bytes` type support indexing, iteration and retrieving the len(). The :class:`memoryview` type supports indexing, slicing, iteration, retrieving the len(), and also the following attributes: * :attr:`~memoryview.contiguous` * :attr:`~memoryview.c_contiguous` * :attr:`~memoryview.f_contiguous` * :attr:`~memoryview.itemsize` * :attr:`~memoryview.nbytes` * :attr:`~memoryview.ndim` * :attr:`~memoryview.readonly` * :attr:`~memoryview.shape` * :attr:`~memoryview.strides` Built-in functions ================== The following built-in functions are supported: .. warning:: Support for ``isinstance`` is an experimental feature. This feature is automatically enabled by simply using ``isinstance`` in JIT compiled code. * :func:`abs` * :class:`bool` * :func:`chr` * :class:`complex` * :func:`divmod` * :func:`enumerate` * :func:`filter` * :class:`float` * :func:`hash` (see :ref:`pysupported-hashing` below) * :class:`int`: only the one-argument form * :func:`iter`: only the one-argument form * :func:`isinstance` (experimental support only) * :func:`len` * :func:`min` * :func:`map` * :func:`max` * :func:`next`: only the one-argument form * :func:`ord` * :func:`print`: only numbers and strings; no ``file`` or ``sep`` argument * :class:`range`: The only permitted use of range is as a callable function (cannot pass range as an argument to a jitted function or return a range from a jitted function). * :func:`round` * :func:`sorted`: the ``key`` argument is not supported * :func:`sum` * :func:`type`: only the one-argument form, and only on some types (e.g. numbers and named tuples) * :func:`zip` .. _pysupported-hashing: Hashing ------- The :func:`hash` built-in is supported and produces hash values for all supported hashable types with the following Python version specific behavior: Under Python 3, hash values computed by Numba will exactly match those computed in CPython under the condition that the :attr:`sys.hash_info.algorithm` is ``siphash24`` (default). The ``PYTHONHASHSEED`` environment variable influences the hashing behavior in precisely the manner described in the CPython documentation. Standard library modules ======================== ``array`` --------- Limited support for the :class:`array.array` type is provided through the buffer protocol. Indexing, iteration and taking the len() is supported. All type codes are supported except for ``"u"``. ``cmath`` --------- The following functions from the :mod:`cmath` module are supported: * :func:`cmath.acos` * :func:`cmath.acosh` * :func:`cmath.asin` * :func:`cmath.asinh` * :func:`cmath.atan` * :func:`cmath.atanh` * :func:`cmath.cos` * :func:`cmath.cosh` * :func:`cmath.exp` * :func:`cmath.isfinite` * :func:`cmath.isinf` * :func:`cmath.isnan` * :func:`cmath.log` * :func:`cmath.log10` * :func:`cmath.phase` * :func:`cmath.polar` * :func:`cmath.rect` * :func:`cmath.sin` * :func:`cmath.sinh` * :func:`cmath.sqrt` * :func:`cmath.tan` * :func:`cmath.tanh` ``collections`` --------------- Named tuple classes, as returned by :func:`collections.namedtuple`, are supported in the same way regular tuples are supported. Attribute access and named parameters in the constructor are also supported. Creating a named tuple class inside Numba code is *not* supported; the class must be created at the global level. .. _ctypes-support: ``ctypes`` ---------- Numba is able to call ctypes-declared functions with the following argument and return types: * :class:`ctypes.c_int8` * :class:`ctypes.c_int16` * :class:`ctypes.c_int32` * :class:`ctypes.c_int64` * :class:`ctypes.c_uint8` * :class:`ctypes.c_uint16` * :class:`ctypes.c_uint32` * :class:`ctypes.c_uint64` * :class:`ctypes.c_float` * :class:`ctypes.c_double` * :class:`ctypes.c_void_p` ``enum`` -------- Both :class:`enum.Enum` and :class:`enum.IntEnum` subclasses are supported. ``math`` -------- The following functions from the :mod:`math` module are supported: * :func:`math.acos` * :func:`math.acosh` * :func:`math.asin` * :func:`math.asinh` * :func:`math.atan` * :func:`math.atan2` * :func:`math.atanh` * :func:`math.ceil` * :func:`math.copysign` * :func:`math.cos` * :func:`math.cosh` * :func:`math.degrees` * :func:`math.erf` * :func:`math.erfc` * :func:`math.exp` * :func:`math.expm1` * :func:`math.fabs` * :func:`math.floor` * :func:`math.frexp` * :func:`math.gamma` * :func:`math.gcd` * :func:`math.hypot` * :func:`math.isfinite` * :func:`math.isinf` * :func:`math.isnan` * :func:`math.ldexp` * :func:`math.lgamma` * :func:`math.log` * :func:`math.log10` * :func:`math.log1p` * :func:`math.pow` * :func:`math.radians` * :func:`math.sin` * :func:`math.sinh` * :func:`math.sqrt` * :func:`math.tan` * :func:`math.tanh` * :func:`math.trunc` ``operator`` ------------ The following functions from the :mod:`operator` module are supported: * :func:`operator.add` * :func:`operator.and_` * :func:`operator.eq` * :func:`operator.floordiv` * :func:`operator.ge` * :func:`operator.gt` * :func:`operator.iadd` * :func:`operator.iand` * :func:`operator.ifloordiv` * :func:`operator.ilshift` * :func:`operator.imatmul` (Python 3.5 and above) * :func:`operator.imod` * :func:`operator.imul` * :func:`operator.invert` * :func:`operator.ior` * :func:`operator.ipow` * :func:`operator.irshift` * :func:`operator.isub` * :func:`operator.itruediv` * :func:`operator.ixor` * :func:`operator.le` * :func:`operator.lshift` * :func:`operator.lt` * :func:`operator.matmul` (Python 3.5 and above) * :func:`operator.mod` * :func:`operator.mul` * :func:`operator.ne` * :func:`operator.neg` * :func:`operator.not_` * :func:`operator.or_` * :func:`operator.pos` * :func:`operator.pow` * :func:`operator.rshift` * :func:`operator.sub` * :func:`operator.truediv` * :func:`operator.xor` ``functools`` ------------- The :func:`functools.reduce` function is supported but the `initializer` argument is required. .. _pysupported-random: ``random`` ---------- Numba supports top-level functions from the :mod:`random` module, but does not allow you to create individual Random instances. A Mersenne-Twister generator is used, with a dedicated internal state. It is initialized at startup with entropy drawn from the operating system. * :func:`random.betavariate` * :func:`random.expovariate` * :func:`random.gammavariate` * :func:`random.gauss` * :func:`random.getrandbits`: number of bits must not be greater than 64 * :func:`random.lognormvariate` * :func:`random.normalvariate` * :func:`random.paretovariate` * :func:`random.randint` * :func:`random.random` * :func:`random.randrange` * :func:`random.seed`: with an integer argument only * :func:`random.shuffle`: the sequence argument must be a one-dimension Numpy array or buffer-providing object (such as a :class:`bytearray` or :class:`array.array`); the second (optional) argument is not supported * :func:`random.uniform` * :func:`random.triangular` * :func:`random.vonmisesvariate` * :func:`random.weibullvariate` .. warning:: Calling :func:`random.seed` from non-Numba code (or from :term:`object mode` code) will seed the Python random generator, not the Numba random generator. To seed the Numba random generator, see the example below. .. code-block:: python from numba import njit import random @njit def seed(a): random.seed(a) @njit def rand(): return random.random() # Incorrect seeding random.seed(1234) print(rand()) random.seed(1234) print(rand()) # Correct seeding seed(1234) print(rand()) seed(1234) print(rand()) .. note:: Since version 0.28.0, the generator is thread-safe and fork-safe. Each thread and each process will produce independent streams of random numbers. .. seealso:: Numba also supports most additional distributions from the :ref:`Numpy random module `. ``heapq`` --------- The following functions from the :mod:`heapq` module are supported: * :func:`heapq.heapify` * :func:`heapq.heappop` * :func:`heapq.heappush` * :func:`heapq.heappushpop` * :func:`heapq.heapreplace` * :func:`heapq.nlargest` : first two arguments only * :func:`heapq.nsmallest` : first two arguments only Note: the heap must be seeded with at least one value to allow its type to be inferred; heap items are assumed to be homogeneous in type. Third-party modules =================== .. I put this here as there's only one module (apart from Numpy), otherwise it should be a separate page. .. _cffi-support: ``cffi`` -------- Similarly to ctypes, Numba is able to call into `cffi`_-declared external functions, using the following C types and any derived pointer types: * :c:type:`char` * :c:type:`short` * :c:type:`int` * :c:type:`long` * :c:type:`long long` * :c:type:`unsigned char` * :c:type:`unsigned short` * :c:type:`unsigned int` * :c:type:`unsigned long` * :c:type:`unsigned long long` * :c:type:`int8_t` * :c:type:`uint8_t` * :c:type:`int16_t` * :c:type:`uint16_t` * :c:type:`int32_t` * :c:type:`uint32_t` * :c:type:`int64_t` * :c:type:`uint64_t` * :c:type:`float` * :c:type:`double` * :c:type:`ssize_t` * :c:type:`size_t` * :c:type:`void` The ``from_buffer()`` method of ``cffi.FFI`` and ``CompiledFFI`` objects is supported for passing Numpy arrays and other buffer-like objects. Only *contiguous* arguments are accepted. The argument to ``from_buffer()`` is converted to a raw pointer of the appropriate C type (for example a ``double *`` for a ``float64`` array). Additional type mappings for the conversion from a buffer to the appropriate C type may be registered with Numba. This may include struct types, though it is only permitted to call functions that accept pointers to structs - passing a struct by value is unsupported. For registering a mapping, use: .. function:: numba.core.typing.cffi_utils.register_type(cffi_type, numba_type) Out-of-line cffi modules must be registered with Numba prior to the use of any of their functions from within Numba-compiled functions: .. function:: numba.core.typing.cffi_utils.register_module(mod) Register the cffi out-of-line module ``mod`` with Numba. Inline cffi modules require no registration. .. _cffi: https://cffi.readthedocs.org/ numba-0.55.1/docs/source/reference/types.rst000664 000000 000000 00000031732 14174536160 020765 0ustar00rootroot000000 000000 .. _numba-types: ==================== Types and signatures ==================== Rationale ========= As an optimizing compiler, Numba needs to decide on the type of each variable to generate efficient machine code. Python's standard types are not precise enough for that, so we had to develop our own fine-grained type system. You will encounter Numba types mainly when trying to inspect the results of Numba's type inference, for :ref:`debugging ` or :ref:`educational ` purposes. However, you need to use types explicitly if compiling code :ref:`ahead-of-time `. Signatures ========== A signature specifies the type of a function. Exactly which kind of signature is allowed depends on the context (:term:`AOT` or :term:`JIT` compilation), but signatures always involve some representation of Numba types to specify the concrete types for the function's arguments and, if required, the function's return type. An example function signature would be the string ``"f8(i4, i4)"`` (or the equivalent ``"float64(int32, int32)"``) which specifies a function taking two 32-bit integers and returning a double-precision float. Basic types =========== The most basic types can be expressed through simple expressions. The symbols below refer to attributes of the main ``numba`` module (so if you read "boolean", it means that symbol can be accessed as ``numba.boolean``). Many types are available both as a canonical name and a shorthand alias, following Numpy's conventions. Numbers ------- The following table contains the elementary numeric types currently defined by Numba and their aliases. =================== ========= =================================== Type name(s) Shorthand Comments =================== ========= =================================== boolean b1 represented as a byte uint8, byte u1 8-bit unsigned byte uint16 u2 16-bit unsigned integer uint32 u4 32-bit unsigned integer uint64 u8 64-bit unsigned integer int8, char i1 8-bit signed byte int16 i2 16-bit signed integer int32 i4 32-bit signed integer int64 i8 64-bit signed integer intc -- C int-sized integer uintc -- C int-sized unsigned integer intp -- pointer-sized integer uintp -- pointer-sized unsigned integer ssize_t -- C ssize_t size_t -- C size_t float32 f4 single-precision floating-point number float64, double f8 double-precision floating-point number complex64 c8 single-precision complex number complex128 c16 double-precision complex number =================== ========= =================================== Arrays ------ The easy way to declare :class:`~numba.types.Array` types is to subscript an elementary type according to the number of dimensions. For example a 1-dimension single-precision array:: >>> numba.float32[:] array(float32, 1d, A) or a 3-dimension array of the same underlying type:: >>> numba.float32[:, :, :] array(float32, 3d, A) This syntax defines array types with no particular layout (producing code that accepts both non-contiguous and contiguous arrays), but you can specify a particular contiguity by using the ``::1`` index either at the beginning or the end of the index specification:: >>> numba.float32[::1] array(float32, 1d, C) >>> numba.float32[:, :, ::1] array(float32, 3d, C) >>> numba.float32[::1, :, :] array(float32, 3d, F) Functions --------- .. warning:: The feature of considering functions as first-class type objects is under development. Functions are often considered as certain transformations of input arguments to output values. Within Numba :term:`JIT` compiled functions, the functions can also be considered as objects, that is, functions can be passed around as arguments or return values, or used as items in sequences, in addition to being callable. First-class function support is enabled for all Numba :term:`JIT` compiled functions and Numba ``cfunc`` compiled functions except when: - using a non-CPU compiler, - the compiled function is a Python generator, - the compiled function has Omitted arguments, - or the compiled function returns Optional value. To disable first-class function support, use ``no_cfunc_wrapper=True`` decorator option. For instance, consider an example where the Numba :term:`JIT` compiled function applies user-specified functions as a composition to an input argument:: >>> @numba.njit ... def composition(funcs, x): ... r = x ... for f in funcs[::-1]: ... r = f(r) ... return r ... >>> @numba.cfunc("double(double)") ... def a(x): ... return x + 1.0 ... >>> @numba.njit ... def b(x): ... return x * x ... >>> composition((a, b), 0.5), 0.5 ** 2 + 1 (1.25, 1.25) >>> composition((b, a, b, b, a), 0.5), b(a(b(b(a(0.5))))) (36.75390625, 36.75390625) Here, ``cfunc`` compiled functions ``a`` and ``b`` are considered as first-class function objects because these are passed in to the Numba :term:`JIT` compiled function ``composition`` as arguments, that is, the ``composition`` is :term:`JIT` compiled independently from its argument function objects (that are collected in the input argument ``funcs``). Currently, first-class function objects can be Numba ``cfunc`` compiled functions, :term:`JIT` compiled functions, and objects that implement the Wrapper Address Protocol (WAP, see below) with the following restrictions: ======================== ============ ============== =========== Context JIT compiled cfunc compiled WAP objects ======================== ============ ============== =========== Can be used as arguments yes yes yes Can be called yes yes yes Can be used as items yes\* yes yes Can be returned yes yes yes Namespace scoping yes yes yes Automatic overload yes no no ======================== ============ ============== =========== \* at least one of the items in a sequence of first-class function objects must have a precise type. Wrapper Address Protocol - WAP ++++++++++++++++++++++++++++++ Wrapper Address Protocol provides an API for making any Python object a first-class function for Numba :term:`JIT` compiled functions. This assumes that the Python object represents a compiled function that can be called via its memory address (function pointer value) from Numba :term:`JIT` compiled functions. The so-called WAP objects must define the following two methods: .. method:: __wrapper_address__(self) -> int Return the memory address of a first-class function. This method is used when a Numba :term:`JIT` compiled function tries to call the given WAP instance. .. method:: signature(self) -> numba.typing.Signature Return the signature of the given first-class function. This method is used when passing in the given WAP instance to a Numba :term:`JIT` compiled function. In addition, the WAP object may implement the ``__call__`` method. This is necessary when calling WAP objects from Numba :term:`JIT` compiled functions in :term:`object mode`. As an example, let us call the standard math library function ``cos`` within a Numba :term:`JIT` compiled function. The memory address of ``cos`` can be established after loading the math library and using the ``ctypes`` package:: >>> import numba, ctypes, ctypes.util, math >>> libm = ctypes.cdll.LoadLibrary(ctypes.util.find_library('m')) >>> class LibMCos(numba.types.WrapperAddressProtocol): ... def __wrapper_address__(self): ... return ctypes.cast(libm.cos, ctypes.c_voidp).value ... def signature(self): ... return numba.float64(numba.float64) ... >>> @numba.njit ... def foo(f, x): ... return f(x) ... >>> foo(LibMCos(), 0.0) 1.0 >>> foo(LibMCos(), 0.5), math.cos(0.5) (0.8775825618903728, 0.8775825618903728) Miscellaneous Types ------------------- There are some non-numerical types that do not fit into the other categories. =================== ================================================= Type name(s) Comments =================== ================================================= pyobject generic Python object voidptr raw pointer, no operations can be performed on it =================== ================================================= Advanced types ============== For more advanced declarations, you have to explicitly call helper functions or classes provided by Numba. .. warning:: The APIs documented here are not guaranteed to be stable. Unless necessary, it is recommended to let Numba infer argument types by using the :ref:`signature-less variant of @jit `. .. A word of note: I only documented those types that can be genuinely useful to users, i.e. types that can be passed as parameters to a JIT function. Other types such as tuple are only usable in type inference. Inference --------- .. function:: numba.typeof(value) Create a Numba type accurately describing the given Python *value*. ``ValueError`` is raised if the value isn't supported in :term:`nopython mode`. :: >>> numba.typeof(np.empty(3)) array(float64, 1d, C) >>> numba.typeof((1, 2.0)) (int64, float64) >>> numba.typeof([0]) reflected list(int64) Numpy scalars ------------- Instead of using :func:`~numba.typeof`, non-trivial scalars such as structured types can also be constructed programmatically. .. function:: numba.from_dtype(dtype) Create a Numba type corresponding to the given Numpy *dtype*:: >>> struct_dtype = np.dtype([('row', np.float64), ('col', np.float64)]) >>> ty = numba.from_dtype(struct_dtype) >>> ty Record([('row', '>> ty[:, :] unaligned array(Record([('row', '`_. Arrays ------ .. class:: numba.types.Array(dtype, ndim, layout) Create an array type. *dtype* should be a Numba type. *ndim* is the number of dimensions of the array (a positive integer). *layout* is a string giving the layout of the array: ``A`` means any layout, ``C`` means C-contiguous and ``F`` means Fortran-contiguous. Optional types -------------- .. class:: numba.optional(typ) Create an optional type based on the underlying Numba type *typ*. The optional type will allow any value of either *typ* or :const:`None`. :: >>> @jit((optional(intp),)) ... def f(x): ... return x is not None ... >>> f(0) True >>> f(None) False Type annotations ----------------- .. function:: numba.extending.as_numba_type(py_type) Create a Numba type corresponding to the given Python *type annotation*. ``TypingError`` is raised if the type annotation can't be mapped to a Numba type. This function is meant to be used at statically compile time to evaluate Python type annotations. For runtime checking of Python objects see ``typeof`` above. For any numba type, ``as_numba_type(nb_type) == nb_type``. >>> numba.extending.as_numba_type(int) int64 >>> import typing # the Python library, not the Numba one >>> numba.extending.as_numba_type(typing.List[float]) ListType[float64] >>> numba.extending.as_numba_type(numba.int32) int32 ``as_numba_type`` is automatically updated to include any ``@jitclass``. >>> @jitclass ... class Counter: ... x: int ... ... def __init__(self): ... self.x = 0 ... ... def inc(self): ... old_val = self.x ... self.x += 1 ... return old_val ... >>> numba.extending.as_numba_type(Counter) instance.jitclass.Counter#11bad4278 Currently ``as_numba_type`` is only used to infer fields for ``@jitclass``. numba-0.55.1/docs/source/reference/utils.rst000664 000000 000000 00000002027 14174536160 020754 0ustar00rootroot000000 000000 ========= Utilities ========= Dealing with pointers ===================== These functions can be called from pure Python as well as in :term:`nopython mode`. .. function:: numba.carray(ptr, shape, dtype=None) Return a Numpy array view over the data pointed to by *ptr* with the given *shape*, in C order. If *dtype* is given, it is used as the array's dtype, otherwise the array's dtype is inferred from *ptr*'s type. As the returned array is a view, not a copy, writing to it will modify the original data. *ptr* should be a ctypes pointer object (either a typed pointer as created using :func:`~ctypes.POINTER`, or a :class:`~ctypes.c_void_p`). *shape* should be an integer or a tuple of integers. *dtype* should be a Numpy dtype or scalar class (i.e. both ``np.dtype('int8')`` and ``np.int8`` are accepted). .. function:: numba.farray(ptr, shape, dtype=None) Same as :func:`~numba.carray`, but the data is assumed to be laid out in Fortran order, and the array view is constructed accordingly. numba-0.55.1/docs/source/release-notes.rst000664 000000 000000 00000000133 14174536160 020420 0ustar00rootroot000000 000000 ====================== Release Notes ====================== .. include:: ../../CHANGE_LOG numba-0.55.1/docs/source/user/000775 000000 000000 00000000000 14174536160 016101 5ustar00rootroot000000 000000 numba-0.55.1/docs/source/user/5minguide.rst000664 000000 000000 00000021772 14174536160 020532 0ustar00rootroot000000 000000 .. _numba-5_mins: A ~5 minute guide to Numba ========================== Numba is a just-in-time compiler for Python that works best on code that uses NumPy arrays and functions, and loops. The most common way to use Numba is through its collection of decorators that can be applied to your functions to instruct Numba to compile them. When a call is made to a Numba-decorated function it is compiled to machine code "just-in-time" for execution and all or part of your code can subsequently run at native machine code speed! Out of the box Numba works with the following: * OS: Windows (32 and 64 bit), OSX, Linux (32 and 64 bit). Unofficial support on \*BSD. * Architecture: x86, x86_64, ppc64le, armv7l, armv8l (aarch64). Unofficial support on M1/Arm64. * GPUs: Nvidia CUDA. * CPython * NumPy 1.18 - latest How do I get it? ---------------- Numba is available as a `conda `_ package for the `Anaconda Python distribution `_:: $ conda install numba Numba also has wheels available:: $ pip install numba Numba can also be :ref:`compiled from source `, although we do not recommend it for first-time Numba users. Numba is often used as a core package so its dependencies are kept to an absolute minimum, however, extra packages can be installed as follows to provide additional functionality: * ``scipy`` - enables support for compiling ``numpy.linalg`` functions. * ``colorama`` - enables support for color highlighting in backtraces/error messages. * ``pyyaml`` - enables configuration of Numba via a YAML config file. * ``icc_rt`` - allows the use of the Intel SVML (high performance short vector math library, x86_64 only). Installation instructions are in the :ref:`performance tips `. Will Numba work for my code? ---------------------------- This depends on what your code looks like, if your code is numerically orientated (does a lot of math), uses NumPy a lot and/or has a lot of loops, then Numba is often a good choice. In these examples we'll apply the most fundamental of Numba's JIT decorators, ``@jit``, to try and speed up some functions to demonstrate what works well and what does not. Numba works well on code that looks like this:: from numba import jit import numpy as np x = np.arange(100).reshape(10, 10) @jit(nopython=True) # Set "nopython" mode for best performance, equivalent to @njit def go_fast(a): # Function is compiled to machine code when called the first time trace = 0.0 for i in range(a.shape[0]): # Numba likes loops trace += np.tanh(a[i, i]) # Numba likes NumPy functions return a + trace # Numba likes NumPy broadcasting print(go_fast(x)) It won't work very well, if at all, on code that looks like this:: from numba import jit import pandas as pd x = {'a': [1, 2, 3], 'b': [20, 30, 40]} @jit def use_pandas(a): # Function will not benefit from Numba jit df = pd.DataFrame.from_dict(a) # Numba doesn't know about pd.DataFrame df += 1 # Numba doesn't understand what this is return df.cov() # or this! print(use_pandas(x)) Note that Pandas is not understood by Numba and as a result Numba would simply run this code via the interpreter but with the added cost of the Numba internal overheads! What is ``nopython`` mode? -------------------------- The Numba ``@jit`` decorator fundamentally operates in two compilation modes, ``nopython`` mode and ``object`` mode. In the ``go_fast`` example above, ``nopython=True`` is set in the ``@jit`` decorator; this is instructing Numba to operate in ``nopython`` mode. The behaviour of the ``nopython`` compilation mode is to essentially compile the decorated function so that it will run entirely without the involvement of the Python interpreter. This is the recommended and best-practice way to use the Numba ``jit`` decorator as it leads to the best performance. Should the compilation in ``nopython`` mode fail, Numba can compile using ``object mode``. This is a fall back mode for the ``@jit`` decorator if ``nopython=True`` is not set (as seen in the ``use_pandas`` example above). In this mode Numba will identify loops that it can compile and compile those into functions that run in machine code, and it will run the rest of the code in the interpreter. For best performance avoid using this mode! How to measure the performance of Numba? ---------------------------------------- First, recall that Numba has to compile your function for the argument types given before it executes the machine code version of your function. This takes time. However, once the compilation has taken place Numba caches the machine code version of your function for the particular types of arguments presented. If it is called again with the same types, it can reuse the cached version instead of having to compile again. A really common mistake when measuring performance is to not account for the above behaviour and to time code once with a simple timer that includes the time taken to compile your function in the execution time. For example:: from numba import jit import numpy as np import time x = np.arange(100).reshape(10, 10) @jit(nopython=True) def go_fast(a): # Function is compiled and runs in machine code trace = 0.0 for i in range(a.shape[0]): trace += np.tanh(a[i, i]) return a + trace # DO NOT REPORT THIS... COMPILATION TIME IS INCLUDED IN THE EXECUTION TIME! start = time.time() go_fast(x) end = time.time() print("Elapsed (with compilation) = %s" % (end - start)) # NOW THE FUNCTION IS COMPILED, RE-TIME IT EXECUTING FROM CACHE start = time.time() go_fast(x) end = time.time() print("Elapsed (after compilation) = %s" % (end - start)) This, for example prints:: Elapsed (with compilation) = 0.33030009269714355 Elapsed (after compilation) = 6.67572021484375e-06 A good way to measure the impact Numba JIT has on your code is to time execution using the `timeit `_ module functions; these measure multiple iterations of execution and, as a result, can be made to accommodate for the compilation time in the first execution. As a side note, if compilation time is an issue, Numba JIT supports :ref:`on-disk caching ` of compiled functions and also has an :ref:`Ahead-Of-Time ` compilation mode. How fast is it? --------------- Assuming Numba can operate in ``nopython`` mode, or at least compile some loops, it will target compilation to your specific CPU. Speed up varies depending on application but can be one to two orders of magnitude. Numba has a :ref:`performance guide ` that covers common options for gaining extra performance. How does Numba work? -------------------- Numba reads the Python bytecode for a decorated function and combines this with information about the types of the input arguments to the function. It analyzes and optimizes your code, and finally uses the LLVM compiler library to generate a machine code version of your function, tailored to your CPU capabilities. This compiled version is then used every time your function is called. Other things of interest: ------------------------- Numba has quite a few decorators, we've seen ``@jit``, but there's also: * ``@njit`` - this is an alias for ``@jit(nopython=True)`` as it is so commonly used! * ``@vectorize`` - produces NumPy ``ufunc`` s (with all the ``ufunc`` methods supported). :ref:`Docs are here `. * ``@guvectorize`` - produces NumPy generalized ``ufunc`` s. :ref:`Docs are here `. * ``@stencil`` - declare a function as a kernel for a stencil like operation. :ref:`Docs are here `. * ``@jitclass`` - for jit aware classes. :ref:`Docs are here `. * ``@cfunc`` - declare a function for use as a native call back (to be called from C/C++ etc). :ref:`Docs are here `. * ``@overload`` - register your own implementation of a function for use in nopython mode, e.g. ``@overload(scipy.special.j0)``. :ref:`Docs are here `. Extra options available in some decorators: * ``parallel = True`` - :ref:`enable ` the :ref:`automatic parallelization ` of the function. * ``fastmath = True`` - enable :ref:`fast-math ` behaviour for the function. ctypes/cffi/cython interoperability: * ``cffi`` - The calling of :ref:`CFFI ` functions is supported in ``nopython`` mode. * ``ctypes`` - The calling of :ref:`ctypes ` wrapped functions is supported in ``nopython`` mode. * Cython exported functions :ref:`are callable `. GPU targets: ~~~~~~~~~~~~ Numba can target `Nvidia CUDA `_ GPUs. You can write a kernel in pure Python and have Numba handle the computation and data movement (or do this explicitly). Click for Numba documentation on :ref:`CUDA `. numba-0.55.1/docs/source/user/cfunc.rst000664 000000 000000 00000015244 14174536160 017737 0ustar00rootroot000000 000000 .. _cfunc: ==================================== Creating C callbacks with ``@cfunc`` ==================================== Interfacing with some native libraries (for example written in C or C++) can necessitate writing native callbacks to provide business logic to the library. The :func:`numba.cfunc` decorator creates a compiled function callable from foreign C code, using the signature of your choice. Basic usage =========== The ``@cfunc`` decorator has a similar usage to ``@jit``, but with an important difference: passing a single signature is mandatory. It determines the visible signature of the C callback:: from numba import cfunc @cfunc("float64(float64, float64)") def add(x, y): return x + y The C function object exposes the address of the compiled C callback as the :attr:`~CFunc.address` attribute, so that you can pass it to any foreign C or C++ library. It also exposes a :mod:`ctypes` callback object pointing to that callback; that object is also callable from Python, making it easy to check the compiled code:: @cfunc("float64(float64, float64)") def add(x, y): return x + y print(add.ctypes(4.0, 5.0)) # prints "9.0" Example ======= In this example, we are going to be using the ``scipy.integrate.quad`` function. That function accepts either a regular Python callback or a C callback wrapped in a :mod:`ctypes` callback object. Let's define a pure Python integrand and compile it as a C callback:: >>> import numpy as np >>> from numba import cfunc >>> def integrand(t): return np.exp(-t) / t**2 ...: >>> nb_integrand = cfunc("float64(float64)")(integrand) We can pass the ``nb_integrand`` object's :mod:`ctypes` callback to ``scipy.integrate.quad`` and check that the results are the same as with the pure Python function:: >>> import scipy.integrate as si >>> def do_integrate(func): """ Integrate the given function from 1.0 to +inf. """ return si.quad(func, 1, np.inf) ...: >>> do_integrate(integrand) (0.14849550677592208, 3.8736750296130505e-10) >>> do_integrate(nb_integrand.ctypes) (0.14849550677592208, 3.8736750296130505e-10) Using the compiled callback, the integration function does not invoke the Python interpreter each time it evaluates the integrand. In our case, the integration is made 18 times faster:: >>> %timeit do_integrate(integrand) 1000 loops, best of 3: 242 µs per loop >>> %timeit do_integrate(nb_integrand.ctypes) 100000 loops, best of 3: 13.5 µs per loop Dealing with pointers and array memory ====================================== A less trivial use case of C callbacks involves doing operation on some array of data passed by the caller. As C doesn't have a high-level abstraction similar to Numpy arrays, the C callback's signature will pass low-level pointer and size arguments. Nevertheless, the Python code for the callback will expect to exploit the power and expressiveness of Numpy arrays. In the following example, the C callback is expected to operate on 2-d arrays, with the signature ``void(double *input, double *output, int m, int n)``. You can implement such a callback thusly:: from numba import cfunc, types, carray c_sig = types.void(types.CPointer(types.double), types.CPointer(types.double), types.intc, types.intc) @cfunc(c_sig) def my_callback(in_, out, m, n): in_array = carray(in_, (m, n)) out_array = carray(out, (m, n)) for i in range(m): for j in range(n): out_array[i, j] = 2 * in_array[i, j] The :func:`numba.carray` function takes as input a data pointer and a shape and returns an array view of the given shape over that data. The data is assumed to be laid out in C order. If the data is laid out in Fortran order, :func:`numba.farray` should be used instead. Handling C structures ===================== With CFFI --------- For applications that have a lot of state, it is useful to pass data in C structures. To simplify the interoperability with C code, numba can convert a ``cffi`` type into a numba ``Record`` type using ``numba.core.typing.cffi_utils.map_type``:: from numba.core.typing import cffi_utils nbtype = cffi_utils.map_type(cffi_type, use_record_dtype=True) .. note:: **use_record_dtype=True** is needed otherwise pointers to C structures are returned as void pointers. .. note:: From v0.49 the ``numba.cffi_support`` module has been phased out in favour of ``numba.core.typing.cffi_utils`` For example:: from cffi import FFI src = """ /* Define the C struct */ typedef struct my_struct { int i1; float f2; double d3; float af4[7]; // arrays are supported } my_struct; /* Define a callback function */ typedef double (*my_func)(my_struct*, size_t); """ ffi = FFI() ffi.cdef(src) # Get the function signature from *my_func* sig = cffi_utils.map_type(ffi.typeof('my_func'), use_record_dtype=True) # Make the cfunc from numba import cfunc, carray @cfunc(sig) def foo(ptr, n): base = carray(ptr, n) # view pointer as an array of my_struct tmp = 0 for i in range(n): tmp += base[i].i1 * base[i].f2 / base[i].d3 tmp += base[i].af4.sum() # nested arrays are like normal numpy array return tmp With ``numba.types.Record.make_c_struct`` ----------------------------------------- The ``numba.types.Record`` type can be created manually to follow a C-structure's layout. To do that, use ``Record.make_c_struct``, for example:: my_struct = types.Record.make_c_struct([ # Provides a sequence of 2-tuples i.e. (name:str, type:Type) ('i1', types.int32), ('f2', types.float32), ('d3', types.float64), ('af4', types.NestedArray(dtype=types.float32, shape=(7,))), ]) Due to ABI limitations, structures should be passed as pointers using ``types.CPointer(my_struct)`` as the argument type. Inside the ``cfunc`` body, the ``my_struct*`` can be accessed with ``carray``. Full example ------------ See full example in ``examples/notebooks/Accessing C Struct Data.ipynb``. Signature specification ======================= The explicit ``@cfunc`` signature can use any :ref:`Numba types `, but only a subset of them make sense for a C callback. You should generally limit yourself to scalar types (such as ``int8`` or ``float64``) ,pointers to them (for example ``types.CPointer(types.int8)``), or pointers to ``Record`` type. Compilation options =================== A number of keyword-only arguments can be passed to the ``@cfunc`` decorator: ``nopython`` and ``cache``. Their meaning is similar to those in the ``@jit`` decorator. numba-0.55.1/docs/source/user/cli.rst000664 000000 000000 00000013705 14174536160 017410 0ustar00rootroot000000 000000 .. _cli: Command line interface ====================== Numba is a Python package, usually you ``import numba`` from Python and use the Python application programming interface (API). However, Numba also ships with a command line interface (CLI), i.e. a tool ``numba`` that is installed when you install Numba. Currently, the only purpose of the CLI is to allow you to quickly show some information about your system and installation, or to quickly get some debugging information for a Python script using Numba. .. _cli_usage: Usage ----- To use the Numba CLI from the terminal, use ``numba`` followed by the options and arguments like ``--help`` or ``-s``, as explained below. Sometimes it can happen that you get a "command not found" error when you type ``numba``, because your ``PATH`` isn't configured properly. In that case you can use the equivalent command ``python -m numba``. If that still gives "command not found", try to ``import numba`` as suggested here: :ref:`numba-source-install-check`. The two versions ``numba`` and ``python -m numba`` are the same. The first is shorter to type, but if you get a "command not found" error because your ``PATH`` doesn't contain the location where ``numba`` is installed, having the ``python -m numba`` variant is useful. To use the Numba CLI from IPython or Jupyter, use ``!numba``, i.e. prefix the command with an exclamation mark. This is a general IPython/Jupyter feature to execute shell commands, it is not available in the regular ``python`` terminal. .. _cli_help: Help ---- To see all available options, use ``numba --help``:: $ numba --help usage: numba [-h] [--annotate] [--dump-llvm] [--dump-optimized] [--dump-assembly] [--dump-cfg] [--dump-ast] [--annotate-html ANNOTATE_HTML] [-s] [filename] positional arguments: filename Python source filename optional arguments: -h, --help show this help message and exit --annotate Annotate source --dump-llvm Print generated llvm assembly --dump-optimized Dump the optimized llvm assembly --dump-assembly Dump the LLVM generated assembly --dump-cfg [Deprecated] Dump the control flow graph --dump-ast [Deprecated] Dump the AST --annotate-html ANNOTATE_HTML Output source annotation as html -s, --sysinfo Output system information for bug reporting .. _cli_sysinfo: System information ------------------ The ``numba -s`` (or the equivalent ``numba --sysinfo``) command prints a lot of information about your system and your Numba installation and relevant dependencies. Remember: you can use ``!numba -s`` with an exclamation mark to see this information from IPython or Jupyter. Example output:: $ numba -s System info: -------------------------------------------------------------------------------- __Time Stamp__ 2019-05-07 14:15:39.733994 __Hardware Information__ Machine : x86_64 CPU Name : haswell CPU count : 8 CPU Features : aes avx avx2 bmi bmi2 cmov cx16 f16c fma fsgsbase invpcid lzcnt mmx movbe pclmul popcnt rdrnd sahf sse sse2 sse3 sse4.1 sse4.2 ssse3 xsave xsaveopt __OS Information__ Platform : Darwin-18.5.0-x86_64-i386-64bit Release : 18.5.0 System Name : Darwin Version : Darwin Kernel Version 18.5.0: Mon Mar 11 20:40:32 PDT 2019; root:xnu-4903.251.3~3/RELEASE_X86_64 OS specific info : 10.14.4 x86_64 __Python Information__ Python Compiler : Clang 4.0.1 (tags/RELEASE_401/final) Python Implementation : CPython Python Version : 3.7.3 Python Locale : en_US UTF-8 __LLVM information__ LLVM version : 7.0.0 __CUDA Information__ CUDA driver library cannot be found or no CUDA enabled devices are present. Error class: __SVML Information__ SVML state, config.USING_SVML : False SVML library found and loaded : False llvmlite using SVML patched LLVM : True SVML operational : False __Threading Layer Information__ TBB Threading layer available : False +--> Disabled due to : Unknown import problem. OpenMP Threading layer available : False +--> Disabled due to : Unknown import problem. Workqueue Threading layer available : True __Numba Environment Variable Information__ None set. __Conda Information__ conda_build_version : 3.17.8 conda_env_version : 4.6.14 platform : osx-64 python_version : 3.7.3.final.0 root_writable : True __Current Conda Env__ (output truncated due to length) .. _cli_debug: Debugging --------- As shown in the help output above, the ``numba`` command includes options that can help you to debug Numba compiled code. To try it out, create an example script called ``myscript.py``:: import numba @numba.jit def f(x): return 2 * x f(42) and then execute one of the following commands:: $ numba myscript.py --annotate $ numba myscript.py --annotate-html myscript.html $ numba myscript.py --dump-llvm $ numba myscript.py --dump-optimized $ numba myscript.py --dump-assembly numba-0.55.1/docs/source/user/examples.rst000664 000000 000000 00000003000 14174536160 020442 0ustar00rootroot000000 000000 ======== Examples ======== Mandelbrot ---------- .. literalinclude:: ../../../numba/tests/doc_examples/test_examples.py :language: python :caption: from ``test_mandelbrot`` of ``numba/tests/doc_examples/test_examples.py`` :start-after: magictoken.ex_mandelbrot.begin :end-before: magictoken.ex_mandelbrot.end :dedent: 12 :linenos: .. _example-movemean: Moving average -------------- .. literalinclude:: ../../../numba/tests/doc_examples/test_examples.py :language: python :caption: from ``test_moving_average`` of ``numba/tests/doc_examples/test_examples.py`` :start-after: magictoken.ex_moving_average.begin :end-before: magictoken.ex_moving_average.end :dedent: 12 :linenos: Multi-threading --------------- The code below showcases the potential performance improvement when using the :ref:`nogil ` feature. For example, on a 4-core machine, the following results were printed:: numpy (1 thread) 145 ms numba (1 thread) 128 ms numba (4 threads) 35 ms .. note:: If preferred it's possible to use the standard `concurrent.futures `_ module rather than spawn threads and dispatch tasks by hand. .. literalinclude:: ../../../numba/tests/doc_examples/test_examples.py :language: python :caption: from ``test_no_gil`` of ``numba/tests/doc_examples/test_examples.py`` :start-after: magictoken.ex_no_gil.begin :end-before: magictoken.ex_no_gil.end :dedent: 12 :linenos: numba-0.55.1/docs/source/user/faq.rst000664 000000 000000 00000037644 14174536160 017420 0ustar00rootroot000000 000000 ========================== Frequently Asked Questions ========================== Installation ============ Numba could not be imported --------------------------- If you are seeing an exception on importing Numba with an error message that starts with:: ImportError: Numba could not be imported. here are some common issues and things to try to fix it. #. Your installation has more than one version of Numba a given environment. Common ways this occurs include: * Installing Numba with conda and then installing again with pip. * Installing Numba with pip and then updating to a new version with pip (pip re-installations don't seem to always clean up very well). To fix this the best approach is to create an entirely new environment and install a single version of Numba in that environment using a package manager of your choice. #. Your installation has Numba for Python version X but you are running with Python version Y. This occurs due to a variety of Python environment mix-up/mismatch problems. The most common mismatch comes from installing Numba into the site-packages/environment of one version of Python by using a base or system installation of Python that is a different version, this typically happens through the use of the "wrong" ``pip`` binary. This will obviously cause problems as the C-Extensions on which Numba relies are bound to specific Python versions. A way to check if this likely the problem is to see if the path to the ``python`` binary at:: python -c 'import sys; print(sys.executable)' matches the path to your installation tool and/or matches the reported installation location and if the Python versions match up across all of these. Note that Python version ``X.Y.A`` is compatible with ``X.Y.B``. To fix this the best approach is to create an entirely new environment and ensure that the installation tool used to install Numba is the one from that environment/the Python versions at install and run time match. #. Your core system libraries are too old. This is a somewhat rare occurrence, but there are occasions when a very old (typically out of support) version of Linux is in use it doesn't have a ``glibc`` library with sufficiently new versioned symbols for Numba's shared libraries to resolve against. The fix for this is to update your OS system libraries/update your OS. #. You are using an IDE e.g. Spyder. There are some unknown issues in relation to installing Numba via IDEs, but it would appear that these are likely variations of 1. or 2. with the same suggested fixes. Also, try installation from outside of the IDE with the command line. If you have an installation problem which is not one of the above problems, please do ask on `numba.discourse.group `_ and if possible include the path where Numba is installed and also the output of:: python -c 'import sys; print(sys.executable)' Programming =========== Can I pass a function as an argument to a jitted function? ---------------------------------------------------------- As of Numba 0.39, you can, so long as the function argument has also been JIT-compiled:: @jit(nopython=True) def f(g, x): return g(x) + g(-x) result = f(jitted_g_function, 1) However, dispatching with arguments that are functions has extra overhead. If this matters for your application, you can also use a factory function to capture the function argument in a closure:: def make_f(g): # Note: a new f() is created each time make_f() is called! @jit(nopython=True) def f(x): return g(x) + g(-x) return f f = make_f(jitted_g_function) result = f(1) Improving the dispatch performance of functions in Numba is an ongoing task. Numba doesn't seem to care when I modify a global variable ---------------------------------------------------------- Numba considers global variables as compile-time constants. If you want your jitted function to update itself when you have modified a global variable's value, one solution is to recompile it using the :meth:`~Dispatcher.recompile` method. This is a relatively slow operation, though, so you may instead decide to rearchitect your code and turn the global variable into a function argument. Can I debug a jitted function? ------------------------------ Calling into :mod:`pdb` or other such high-level facilities is currently not supported from Numba-compiled code. However, you can temporarily disable compilation by setting the :envvar:`NUMBA_DISABLE_JIT` environment variable. How can I create a Fortran-ordered array? ----------------------------------------- Numba currently doesn't support the ``order`` argument to most Numpy functions such as :func:`numpy.empty` (because of limitations in the :term:`type inference` algorithm). You can work around this issue by creating a C-ordered array and then transposing it. For example:: a = np.empty((3, 5), order='F') b = np.zeros(some_shape, order='F') can be rewritten as:: a = np.empty((5, 3)).T b = np.zeros(some_shape[::-1]).T How can I increase integer width? --------------------------------- By default, Numba will generally use machine integer width for integer variables. On a 32-bit machine, you may sometimes need the magnitude of 64-bit integers instead. You can simply initialize relevant variables as ``np.int64`` (for example ``np.int64(0)`` instead of ``0``). It will propagate to all computations involving those variables. .. _parallel_faqs: How can I tell if ``parallel=True`` worked? ------------------------------------------- If the ``parallel=True`` transformations failed for a function decorated as such, a warning will be displayed. See also :ref:`numba-parallel-diagnostics` for information about parallel diagnostics. Performance =========== Does Numba inline functions? ---------------------------- Numba gives enough information to LLVM so that functions short enough can be inlined. This only works in :term:`nopython mode`. Does Numba vectorize array computations (SIMD)? ----------------------------------------------- Numba doesn't implement such optimizations by itself, but it lets LLVM apply them. Why has my loop not vectorized? ------------------------------- Numba enables the loop-vectorize optimization in LLVM by default. While it is a powerful optimization, not all loops are applicable. Sometimes, loop-vectorization may fail due to subtle details like memory access pattern. To see additional diagnostic information from LLVM, add the following lines: .. code-block:: python import llvmlite.binding as llvm llvm.set_option('', '--debug-only=loop-vectorize') This tells LLVM to print debug information from the **loop-vectorize** pass to stderr. Each function entry looks like: .. code-block:: text LV: Checking a loop in "" from LV: Loop hints: force=? width=0 unroll=0 ... LV: Vectorization is possible but not beneficial. LV: Interleaving is not beneficial. Each function entry is separated by an empty line. The reason for rejecting the vectorization is usually at the end of the entry. In the example above, LLVM rejected the vectorization because doing so will not speedup the loop. In this case, it can be due to memory access pattern. For instance, the array being looped over may not be in contiguous layout. When memory access pattern is non-trivial such that it cannot determine the access memory region, LLVM may reject with the following message: .. code-block:: text LV: Can't vectorize due to memory conflicts Another common reason is: .. code-block:: text LV: Not vectorizing: loop did not meet vectorization requirements. In this case, vectorization is rejected because the vectorized code may behave differently. This is a case to try turning on ``fastmath=True`` to allow fastmath instructions. Why are the ``typed`` containers slower when used from the interpreter? ----------------------------------------------------------------------- The Numba ``typed`` containers found in ``numba.typed`` e.g. ``numba.typed.List`` store their data in an efficient form for access from JIT compiled code. When these containers are used from the CPython interpreter, the data involved has to be converted from/to the container format. This process is relatively costly and as a result impacts performance. In JIT compiled code no such penalty exists and so operations on the containers are much quicker and often faster than the pure Python equivalent. Does Numba automatically parallelize code? ------------------------------------------ It can, in some cases: * Ufuncs and gufuncs with the ``target="parallel"`` option will run on multiple threads. * The ``parallel=True`` option to ``@jit`` will attempt to optimize array operations and run them in parallel. It also adds support for ``prange()`` to explicitly parallelize a loop. You can also manually run computations on multiple threads yourself and use the ``nogil=True`` option (see :ref:`releasing the GIL `). Numba can also target parallel execution on GPU architectures using its CUDA and HSA backends. Can Numba speed up short-running functions? ------------------------------------------- Not significantly. New users sometimes expect to JIT-compile such functions:: def f(x, y): return x + y and get a significant speedup over the Python interpreter. But there isn't much Numba can improve here: most of the time is probably spent in CPython's function call mechanism, rather than the function itself. As a rule of thumb, if a function takes less than 10 µs to execute: leave it. The exception is that you *should* JIT-compile that function if it is called from another jitted function. There is a delay when JIT-compiling a complicated function, how can I improve it? --------------------------------------------------------------------------------- Try to pass ``cache=True`` to the ``@jit`` decorator. It will keep the compiled version on disk for later use. A more radical alternative is :ref:`ahead-of-time compilation `. GPU Programming =============== How do I work around the ``CUDA intialized before forking`` error? ------------------------------------------------------------------ On Linux, the ``multiprocessing`` module in the Python standard library defaults to using the ``fork`` method for creating new processes. Because of the way process forking duplicates state between the parent and child processes, CUDA will not work correctly in the child process if the CUDA runtime was initialized *prior* to the fork. Numba detects this and raises a ``CudaDriverError`` with the message ``CUDA initialized before forking``. One approach to avoid this error is to make all calls to ``numba.cuda`` functions inside the child processes or after the process pool is created. However, this is not always possible, as you might want to query the number of available GPUs before starting the process pool. In Python 3, you can change the process start method, as described in the `multiprocessing documentation `_. Switching from ``fork`` to ``spawn`` or ``forkserver`` will avoid the CUDA initalization issue, although the child processes will not inherit any global variables from their parent. Integration with other utilities ================================ Can I "freeze" an application which uses Numba? ----------------------------------------------- If you're using PyInstaller or a similar utility to freeze an application, you may encounter issues with llvmlite. llvmlite needs a non-Python DLL for its working, but it won't be automatically detected by freezing utilities. You have to inform the freezing utility of the DLL's location: it will usually be named ``llvmlite/binding/libllvmlite.so`` or ``llvmlite/binding/llvmlite.dll``, depending on your system. I get errors when running a script twice under Spyder ----------------------------------------------------- When you run a script in a console under Spyder, Spyder first tries to reload existing modules. This doesn't work well with Numba, and can produce errors like ``TypeError: No matching definition for argument type(s)``. There is a fix in the Spyder preferences. Open the "Preferences" window, select "Console", then "Advanced Settings", click the "Set UMR excluded modules" button, and add ``numba`` inside the text box that pops up. To see the setting take effect, be sure to restart the IPython console or kernel. .. _llvm-locale-bug: Why does Numba complain about the current locale? ------------------------------------------------- If you get an error message such as the following:: RuntimeError: Failed at nopython (nopython mode backend) LLVM will produce incorrect floating-point code in the current locale it means you have hit a LLVM bug which causes incorrect handling of floating-point constants. This is known to happen with certain third-party libraries such as the Qt backend to matplotlib. To work around the bug, you need to force back the locale to its default value, for example:: import locale locale.setlocale(locale.LC_NUMERIC, 'C') How do I get Numba development builds? -------------------------------------- Pre-release versions of Numba can be installed with conda:: $ conda install -c numba/label/dev numba Miscellaneous ============= Where does the project name "Numba" come from? ---------------------------------------------- "Numba" is a combination of "NumPy" and "Mamba". Mambas are some of the fastest snakes in the world, and Numba makes your Python code fast. How do I reference/cite/acknowledge Numba in other work? -------------------------------------------------------- For academic use, the best option is to cite our ACM Proceedings: `Numba: a LLVM-based Python JIT compiler. `_ You can also find `the sources on github `_, including `a pre-print pdf `_, in case you don't have access to the ACM site but would like to read the paper. Other related papers ~~~~~~~~~~~~~~~~~~~~ A paper describing ParallelAccelerator technology, that is activated when the ``parallel=True`` jit option is used, can be found `here `_. How do I write a minimal working reproducer for a problem with Numba? --------------------------------------------------------------------- A minimal working reproducer for Numba should include: 1. The source code of the function(s) that reproduce the problem. 2. Some example data and a demonstration of calling the reproducing code with that data. As Numba compiles based on type information, unless your problem is numerical, it's fine to just provide dummy data of the right type, e.g. use ``numpy.ones`` of the correct ``dtype``/size/shape for arrays. 3. Ideally put 1. and 2. into a script with all the correct imports. Make sure your script actually executes and reproduces the problem before submitting it! The target is to make it so that the script can just be copied directly from the `issue tracker `_ and run by someone else such that they can see the same problem as you are having. Having made a reproducer, now remove every part of the code that does not contribute directly to reproducing the problem to create a "minimal" reproducer. This means removing imports that aren't used, removing variables that aren't used or have no effect, removing lines of code which have no effect, reducing the complexity of expressions, and shrinking input data to the minimal amount required to trigger the problem. Doing the above really helps out the Numba issue triage process and will enable a faster response to your problem! `Suggested further reading `_ on writing minimal working reproducers. numba-0.55.1/docs/source/user/generated-jit.rst000664 000000 000000 00000004546 14174536160 021366 0ustar00rootroot000000 000000 .. _generated-jit: ================================================ Flexible specializations with ``@generated_jit`` ================================================ While the :func:`~numba.jit` decorator is useful for many situations, sometimes you want to write a function that has different implementations depending on its input types. The :func:`~numba.generated_jit` decorator allows the user to control the selection of a specialization at compile-time, while fully retaining runtime execution speed of a JIT function. Example ======= Suppose you want to write a function which returns whether a given value is a "missing" value according to certain conventions. For the sake of the example, let's adopt the following definition: - for floating-point arguments, a missing value is a ``NaN`` - for Numpy datetime64 and timedelta64 arguments, a missing value is a ``NaT`` - other types don't have the concept of a missing value. That compile-time logic is easily implemented using the :func:`~numba.generated_jit` decorator:: import numpy as np from numba import generated_jit, types @generated_jit(nopython=True) def is_missing(x): """ Return True if the value is missing, False otherwise. """ if isinstance(x, types.Float): return lambda x: np.isnan(x) elif isinstance(x, (types.NPDatetime, types.NPTimedelta)): # The corresponding Not-a-Time value missing = x('NaT') return lambda x: x == missing else: return lambda x: False There are several things to note here: * The decorated function is called with the :ref:`Numba types ` of the arguments, not their values. * The decorated function doesn't actually compute a result, it returns a callable implementing the actual definition of the function for the given types. * It is possible to pre-compute some data at compile-time (the ``missing`` variable above) to have them reused inside the compiled implementation. * The function definitions use the same names for arguments as in the decorated function, this is required to ensure passing arguments by name works as expected. Compilation options =================== The :func:`~numba.generated_jit` decorator supports the same keyword-only arguments as the :func:`~numba.jit` decorator, for example the ``nopython`` and ``cache`` options. numba-0.55.1/docs/source/user/index.rst000664 000000 000000 00000000557 14174536160 017751 0ustar00rootroot000000 000000 User Manual =========== .. toctree:: 5minguide.rst overview.rst installing.rst jit.rst generated-jit.rst vectorize.rst jitclass.rst cfunc.rst pycc.rst parallel.rst stencil.rst withobjmode.rst jit-module.rst performance-tips.rst threading-layer.rst cli.rst troubleshoot.rst faq.rst examples.rst talks.rst numba-0.55.1/docs/source/user/installing.rst000664 000000 000000 00000031314 14174536160 021001 0ustar00rootroot000000 000000 Installation ============ Compatibility ------------- Numba is compatible with Python 3.7--3.10, and Numpy versions 1.18 up to 1.21. Our supported platforms are: * Linux x86 (32-bit and 64-bit) * Linux ppcle64 (POWER8, POWER9) * Windows 7 and later (32-bit and 64-bit) * OS X 10.9 and later (64-bit and unofficial support on M1/Arm64) * \*BSD (unofficial support only) * NVIDIA GPUs of compute capability 5.3 and later * Compute capabilities 3.0 - 5.2 are supported, but deprecated. * ARMv7 (32-bit little-endian, such as Raspberry Pi 2 and 3) * ARMv8 (64-bit little-endian, such as the NVIDIA Jetson) :ref:`numba-parallel` is only available on 64-bit platforms. Installing using conda on x86/x86_64/POWER Platforms ---------------------------------------------------- The easiest way to install Numba and get updates is by using ``conda``, a cross-platform package manager and software distribution maintained by Anaconda, Inc. You can either use `Anaconda `_ to get the full stack in one download, or `Miniconda `_ which will install the minimum packages required for a conda environment. Once you have conda installed, just type:: $ conda install numba or:: $ conda update numba Note that Numba, like Anaconda, only supports PPC in 64-bit little-endian mode. To enable CUDA GPU support for Numba, install the latest `graphics drivers from NVIDIA `_ for your platform. (Note that the open source Nouveau drivers shipped by default with many Linux distributions do not support CUDA.) Then install the ``cudatoolkit`` package:: $ conda install cudatoolkit You do not need to install the CUDA SDK from NVIDIA. Installing using pip on x86/x86_64 Platforms -------------------------------------------- Binary wheels for Windows, Mac, and Linux are also available from `PyPI `_. You can install Numba using ``pip``:: $ pip install numba This will download all of the needed dependencies as well. You do not need to have LLVM installed to use Numba (in fact, Numba will ignore all LLVM versions installed on the system) as the required components are bundled into the llvmlite wheel. To use CUDA with Numba installed by `pip`, you need to install the `CUDA SDK `_ from NVIDIA. Please refer to :ref:`cudatoolkit-lookup` for details. Numba can also detect CUDA libraries installed system-wide on Linux. .. _numba-install-armv7: Installing on Linux ARMv7 Platforms ----------------------------------- `Berryconda `_ is a conda-based Python distribution for the Raspberry Pi. We are now uploading packages to the ``numba`` channel on Anaconda Cloud for 32-bit little-endian, ARMv7-based boards, which currently includes the Raspberry Pi 2 and 3, but not the Pi 1 or Zero. These can be installed using conda from the ``numba`` channel:: $ conda install -c numba numba Berryconda and Numba may work on other Linux-based ARMv7 systems, but this has not been tested. Installing on Linux ARMv8 (AArch64) Platforms --------------------------------------------- We build and test conda packages on the `NVIDIA Jetson TX2 `_, but they are likely to work for other AArch64 platforms. (Note that while the Raspberry Pi CPU is 64-bit, Raspbian runs it in 32-bit mode, so look at :ref:`numba-install-armv7` instead.) Conda-forge support for AArch64 is still quite experimental and packages are limited, but it does work enough for Numba to build and pass tests. To set up the environment: * Install `miniforge `_. This will create a minimal conda environment. * Then you can install Numba from the ``numba`` channel:: $ conda install -c numba numba On CUDA-enabled systems, like the Jetson, the CUDA toolkit should be automatically detected in the environment. .. _numba-source-install-instructions: Installing from source ---------------------- Installing Numba from source is fairly straightforward (similar to other Python packages), but installing `llvmlite `_ can be quite challenging due to the need for a special LLVM build. If you are building from source for the purposes of Numba development, see :ref:`buildenv` for details on how to create a Numba development environment with conda. If you are building Numba from source for other reasons, first follow the `llvmlite installation guide `_. Once that is completed, you can download the latest Numba source code from `Github `_:: $ git clone git://github.com/numba/numba.git Source archives of the latest release can also be found on `PyPI `_. In addition to ``llvmlite``, you will also need: * A C compiler compatible with your Python installation. If you are using Anaconda, you can use the following conda packages: * Linux ``x86``: ``gcc_linux-32`` and ``gxx_linux-32`` * Linux ``x86_64``: ``gcc_linux-64`` and ``gxx_linux-64`` * Linux ``POWER``: ``gcc_linux-ppc64le`` and ``gxx_linux-ppc64le`` * Linux ``ARM``: no conda packages, use the system compiler * Mac OSX: ``clang_osx-64`` and ``clangxx_osx-64`` or the system compiler at ``/usr/bin/clang`` (Mojave onwards) * Windows: a version of Visual Studio appropriate for the Python version in use * `NumPy `_ Then you can build and install Numba from the top level of the source tree:: $ python setup.py install .. _numba-source-install-env_vars: Build time environment variables and configuration of optional components ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Below are environment variables that are applicable to altering how Numba would otherwise build by default along with information on configuration options. .. envvar:: NUMBA_DISABLE_OPENMP (default: not set) To disable compilation of the OpenMP threading backend set this environment variable to a non-empty string when building. If not set (default): * For Linux and Windows it is necessary to provide OpenMP C headers and runtime libraries compatible with the compiler tool chain mentioned above, and for these to be accessible to the compiler via standard flags. * For OSX the conda packages ``llvm-openmp`` and ``intel-openmp`` provide suitable C headers and libraries. If the compilation requirements are not met the OpenMP threading backend will not be compiled .. envvar:: NUMBA_DISABLE_TBB (default: not set) To disable the compilation of the TBB threading backend set this environment variable to a non-empty string when building. If not set (default) the TBB C headers and libraries must be available at compile time. If building with ``conda build`` this requirement can be met by installing the ``tbb-devel`` package. If not building with ``conda build`` the requirement can be met via a system installation of TBB or through the use of the ``TBBROOT`` environment variable to provide the location of the TBB installation. For more information about setting ``TBBROOT`` see the `Intel documentation `_. .. _numba-source-install-check: Dependency List --------------- Numba has numerous required and optional dependencies which additionally may vary with target operating system and hardware. The following lists them all (as of July 2020). * Required build time: * ``setuptools`` * ``numpy`` * ``llvmlite`` * Compiler toolchain mentioned above * Required run time: * ``setuptools`` * ``numpy`` * ``llvmlite`` * Optional build time: See :ref:`numba-source-install-env_vars` for more details about additional options for the configuration and specification of these optional components. * ``llvm-openmp`` (OSX) - provides headers for compiling OpenMP support into Numba's threading backend * ``intel-openmp`` (OSX) - provides OpenMP library support for Numba's threading backend. * ``tbb-devel`` - provides TBB headers/libraries for compiling TBB support into Numba's threading backend (version >= 2021 required). * Optional runtime are: * ``scipy`` - provides cython bindings used in Numba's ``np.linalg.*`` support * ``tbb`` - provides the TBB runtime libraries used by Numba's TBB threading backend (version >= 2021 required). * ``jinja2`` - for "pretty" type annotation output (HTML) via the ``numba`` CLI * ``cffi`` - permits use of CFFI bindings in Numba compiled functions * ``intel-openmp`` - (OSX) provides OpenMP library support for Numba's OpenMP threading backend * ``ipython`` - if in use, caching will use IPython's cache directories/caching still works * ``pyyaml`` - permits the use of a ``.numba_config.yaml`` file for storing per project configuration options * ``colorama`` - makes error message highlighting work * ``icc_rt`` - (numba channel) allows Numba to use Intel SVML for extra performance * ``pygments`` - for "pretty" type annotation * ``gdb`` as an executable on the ``$PATH`` - if you would like to use the gdb support * Compiler toolchain mentioned above, if you would like to use ``pycc`` for Ahead-of-Time (AOT) compilation * ``r2pipe`` - required for assembly CFG inspection. * ``radare2`` as an executable on the ``$PATH`` - required for assembly CFG inspection. `See here `_ for information on obtaining and installing. * ``graphviz`` - for some CFG inspection functionality. * ``pickle5`` - provides Python 3.8 pickling features for faster pickling in Python 3.7. * ``typeguard`` - used by ``runtests.py`` for :ref:`runtime type-checking `. * ``cuda-python`` - The NVIDIA CUDA Python bindings. See :ref:`cuda-bindings`. Numba requires Version 11.6 or greater. * To build the documentation: * ``sphinx`` * ``pygments`` * ``sphinx_rtd_theme`` * ``numpydoc`` * ``make`` as an executable on the ``$PATH`` Checking your installation -------------------------- You should be able to import Numba from the Python prompt:: $ python Python 3.10.2 | packaged by conda-forge | (main, Jan 14 2022, 08:02:09) [GCC 9.4.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> import numba >>> numba.__version__ '0.55.1' You can also try executing the ``numba --sysinfo`` (or ``numba -s`` for short) command to report information about your system capabilities. See :ref:`cli` for further information. :: $ numba -s System info: -------------------------------------------------------------------------------- __Time Stamp__ Report started (local time) : 2022-01-18 10:35:08.981319 __Hardware Information__ Machine : x86_64 CPU Name : skylake-avx512 CPU Count : 12 CPU Features : 64bit adx aes avx avx2 avx512bw avx512cd avx512dq avx512f avx512vl bmi bmi2 clflushopt clwb cmov cx16 cx8 f16c fma fsgsbase fxsr invpcid lzcnt mmx movbe pclmul pku popcnt prfchw rdrnd rdseed rtm sahf sse sse2 sse3 sse4.1 sse4.2 ssse3 xsave xsavec xsaveopt xsaves __OS Information__ Platform Name : Linux-5.4.0-94-generic-x86_64-with-glibc2.31 Platform Release : 5.4.0-94-generic OS Name : Linux OS Version : #106-Ubuntu SMP Thu Jan 6 23:58:14 UTC 2022 __Python Information__ Python Compiler : GCC 9.4.0 Python Implementation : CPython Python Version : 3.10.2 Python Locale : en_GB.UTF-8 __LLVM information__ LLVM Version : 11.1.0 __CUDA Information__ Found 1 CUDA devices id 0 b'Quadro RTX 8000' [SUPPORTED] Compute Capability: 7.5 PCI Device ID: 0 PCI Bus ID: 21 UUID: GPU-e6489c45-5b68-3b03-bab7-0e7c8e809643 Watchdog: Enabled FP32/FP64 Performance Ratio: 32 (output truncated due to length) numba-0.55.1/docs/source/user/jit-module.rst000664 000000 000000 00000006155 14174536160 020713 0ustar00rootroot000000 000000 .. _jit-module: ============================================ Automatic module jitting with ``jit_module`` ============================================ A common usage pattern is to have an entire module containing user-defined functions that all need to be jitted. One option to accomplish this is to manually apply the ``@jit`` decorator to each function definition. This approach works and is great in many cases. However, for large modules with many functions, manually ``jit``-wrapping each function definition can be tedious. For these situations, Numba provides another option, the ``jit_module`` function, to automatically replace functions declared in a module with their ``jit``-wrapped equivalents. It's important to note the conditions under which ``jit_module`` will *not* impact a function: 1. Functions which have already been wrapped with a Numba decorator (e.g. ``jit``, ``vectorize``, ``cfunc``, etc.) are not impacted by ``jit_module``. 2. Functions which are declared outside the module from which ``jit_module`` is called are not automatically ``jit``-wrapped. 3. Function declarations which occur logically after calling ``jit_module`` are not impacted. All other functions in a module will have the ``@jit`` decorator automatically applied to them. See the following section for an example use case. .. note:: This feature is for use by module authors. ``jit_module`` should not be called outside the context of a module containing functions to be jitted. Example usage ============= Let's assume we have a Python module we've created, ``mymodule.py`` (shown below), which contains several functions. Some of these functions are defined in ``mymodule.py`` while others are imported from other modules. We wish to have all the functions which are defined in ``mymodule.py`` jitted using ``jit_module``. .. _jit-module-usage: .. code-block:: python # mymodule.py from numba import jit, jit_module def inc(x): return x + 1 def add(x, y): return x + y import numpy as np # Use NumPy's mean function mean = np.mean @jit(nogil=True) def mul(a, b): return a * b jit_module(nopython=True, error_model="numpy") def div(a, b): return a / b There are several things to note in the above example: - Both the ``inc`` and ``add`` functions will be replaced with their ``jit``-wrapped equivalents with :ref:`compilation options ` ``nopython=True`` and ``error_model="numpy"``. - The ``mean`` function, because it's defined *outside* of ``mymodule.py`` in NumPy, will not be modified. - ``mul`` will not be modified because it has been manually decorated with ``jit``. - ``div`` will not be automatically ``jit``-wrapped because it is declared after ``jit_module`` is called. When the above module is imported, we have: .. code-block:: python >>> import mymodule >>> mymodule.inc CPUDispatcher() >>> mymodule.mean API === .. warning:: This feature is experimental. The supported features may change with or without notice. .. autofunction:: numba.jit_module numba-0.55.1/docs/source/user/jit.rst000664 000000 000000 00000013422 14174536160 017423 0ustar00rootroot000000 000000 .. _jit: =================================== Compiling Python code with ``@jit`` =================================== Numba provides several utilities for code generation, but its central feature is the :func:`numba.jit` decorator. Using this decorator, you can mark a function for optimization by Numba's JIT compiler. Various invocation modes trigger differing compilation options and behaviours. Basic usage =========== .. _jit-lazy: Lazy compilation ---------------- The recommended way to use the ``@jit`` decorator is to let Numba decide when and how to optimize:: from numba import jit @jit def f(x, y): # A somewhat trivial example return x + y In this mode, compilation will be deferred until the first function execution. Numba will infer the argument types at call time, and generate optimized code based on this information. Numba will also be able to compile separate specializations depending on the input types. For example, calling the ``f()`` function above with integer or complex numbers will generate different code paths:: >>> f(1, 2) 3 >>> f(1j, 2) (2+1j) Eager compilation ----------------- You can also tell Numba the function signature you are expecting. The function ``f()`` would now look like:: from numba import jit, int32 @jit(int32(int32, int32)) def f(x, y): # A somewhat trivial example return x + y ``int32(int32, int32)`` is the function's signature. In this case, the corresponding specialization will be compiled by the ``@jit`` decorator, and no other specialization will be allowed. This is useful if you want fine-grained control over types chosen by the compiler (for example, to use single-precision floats). If you omit the return type, e.g. by writing ``(int32, int32)`` instead of ``int32(int32, int32)``, Numba will try to infer it for you. Function signatures can also be strings, and you can pass several of them as a list; see the :func:`numba.jit` documentation for more details. Of course, the compiled function gives the expected results:: >>> f(1,2) 3 and if we specified ``int32`` as return type, the higher-order bits get discarded:: >>> f(2**31, 2**31 + 1) 1 Calling and inlining other functions ==================================== Numba-compiled functions can call other compiled functions. The function calls may even be inlined in the native code, depending on optimizer heuristics. For example:: @jit def square(x): return x ** 2 @jit def hypot(x, y): return math.sqrt(square(x) + square(y)) The ``@jit`` decorator *must* be added to any such library function, otherwise Numba may generate much slower code. Signature specifications ======================== Explicit ``@jit`` signatures can use a number of types. Here are some common ones: * ``void`` is the return type of functions returning nothing (which actually return :const:`None` when called from Python) * ``intp`` and ``uintp`` are pointer-sized integers (signed and unsigned, respectively) * ``intc`` and ``uintc`` are equivalent to C ``int`` and ``unsigned int`` integer types * ``int8``, ``uint8``, ``int16``, ``uint16``, ``int32``, ``uint32``, ``int64``, ``uint64`` are fixed-width integers of the corresponding bit width (signed and unsigned) * ``float32`` and ``float64`` are single- and double-precision floating-point numbers, respectively * ``complex64`` and ``complex128`` are single- and double-precision complex numbers, respectively * array types can be specified by indexing any numeric type, e.g. ``float32[:]`` for a one-dimensional single-precision array or ``int8[:,:]`` for a two-dimensional array of 8-bit integers. .. _jit-options: Compilation options =================== A number of keyword-only arguments can be passed to the ``@jit`` decorator. .. _jit-nopython: ``nopython`` ------------ Numba has two compilation modes: :term:`nopython mode` and :term:`object mode`. The former produces much faster code, but has limitations that can force Numba to fall back to the latter. To prevent Numba from falling back, and instead raise an error, pass ``nopython=True``. :: @jit(nopython=True) def f(x, y): return x + y .. seealso:: :ref:`numba-troubleshooting` .. _jit-nogil: ``nogil`` --------- Whenever Numba optimizes Python code to native code that only works on native types and variables (rather than Python objects), it is not necessary anymore to hold Python's :py:term:`global interpreter lock` (GIL). Numba will release the GIL when entering such a compiled function if you passed ``nogil=True``. :: @jit(nogil=True) def f(x, y): return x + y Code running with the GIL released runs concurrently with other threads executing Python or Numba code (either the same compiled function, or another one), allowing you to take advantage of multi-core systems. This will not be possible if the function is compiled in :term:`object mode`. When using ``nogil=True``, you'll have to be wary of the usual pitfalls of multi-threaded programming (consistency, synchronization, race conditions, etc.). .. _jit-cache: ``cache`` --------- To avoid compilation times each time you invoke a Python program, you can instruct Numba to write the result of function compilation into a file-based cache. This is done by passing ``cache=True``:: @jit(cache=True) def f(x, y): return x + y .. _parallel_jit_option: ``parallel`` ------------ Enables automatic parallelization (and related optimizations) for those operations in the function known to have parallel semantics. For a list of supported operations, see :ref:`numba-parallel`. This feature is enabled by passing ``parallel=True`` and must be used in conjunction with ``nopython=True``:: @jit(nopython=True, parallel=True) def f(x, y): return x + y .. seealso:: :ref:`numba-parallel` numba-0.55.1/docs/source/user/jitclass.rst000664 000000 000000 00000017130 14174536160 020451 0ustar00rootroot000000 000000 .. _jitclass: =========================================== Compiling Python classes with ``@jitclass`` =========================================== .. note:: This is a early version of jitclass support. Not all compiling features are exposed or implemented, yet. Numba supports code generation for classes via the :func:`numba.experimental.jitclass` decorator. A class can be marked for optimization using this decorator along with a specification of the types of each field. We call the resulting class object a *jitclass*. All methods of a jitclass are compiled into nopython functions. The data of a jitclass instance is allocated on the heap as a C-compatible structure so that any compiled functions can have direct access to the underlying data, bypassing the interpreter. Basic usage =========== Here's an example of a jitclass: .. literalinclude:: ../../../numba/tests/doc_examples/test_jitclass.py :language: python :start-after: magictoken.ex_jitclass.begin :end-before: magictoken.ex_jitclass.end :dedent: 8 In the above example, a ``spec`` is provided as a list of 2-tuples. The tuples contain the name of the field and the Numba type of the field. Alternatively, user can use a dictionary (an ``OrderedDict`` preferably for stable field ordering), which maps field names to types. The definition of the class requires at least a ``__init__`` method for initializing each defined fields. Uninitialized fields contains garbage data. Methods and properties (getters and setters only) can be defined. They will be automatically compiled. Inferred class member types from type annotations with ``as_numba_type`` ======================================================================== Fields of a ``jitclass`` can also be inferred from Python type annotations. .. literalinclude:: ../../../numba/tests/doc_examples/test_jitclass.py :language: python :start-after: magictoken.ex_jitclass_type_hints.begin :end-before: magictoken.ex_jitclass_type_hints.end :dedent: 8 Any type annotations on the class will be used to extend the spec if that field is not already present. The Numba type corresponding to the given Python type is inferred using ``as_numba_type``. For example, if we have the class .. code-block:: python @jitclass([("w", int32), ("y", float64[:])]) class Foo: w: int x: float y: np.ndarray z: SomeOtherType def __init__(self, w: int, x: float, y: np.ndarray, z: SomeOtherType): ... then the full spec used for ``Foo`` will be: * ``"w": int32`` (specified in the ``spec``) * ``"x": float64`` (added from type annotation) * ``"y": array(float64, 1d, A)`` (specified in the ``spec``) * ``"z": numba.as_numba_type(SomeOtherType)`` (added from type annotation) Here ``SomeOtherType`` could be any supported Python type (e.g. ``bool``, ``typing.Dict[int, typing.Tuple[float, float]]``, or another ``jitclass``). Note that only type annotations on the class will be used to infer spec elements. Method type annotations (e.g. those of ``__init__`` above) are ignored. Numba requires knowing the dtype and rank of numpy arrays, which cannot currently be expressed with type annotations. Because of this, numpy arrays need to be included in the ``spec`` explicitly. Specifying ``numba.typed`` containers as class members explicitly ================================================================= The following patterns demonstrate how to specify a ``numba.typed.Dict`` or ``numba.typed.List`` explicitly as part of the ``spec`` passed to ``jitclass``. First, using explicit Numba types and explicit construction. .. code-block:: python from numba import jitclass, types, typed # key and value types kv_ty = (types.int64, types.unicode_type) # A container class with: # * member 'd' holding a typed dictionary of int64 -> unicode string (kv_ty) # * member 'l' holding a typed list of float64 @jitclass([('d', types.DictType(*kv_ty)), ('l', types.ListType(types.float64))]) class ContainerHolder(object): def __init__(self): # initialize the containers self.d = typed.Dict.empty(*kv_ty) self.l = typed.List.empty_list(types.float64) container = ContainerHolder() container.d[1] = "apple" container.d[2] = "orange" container.l.append(123.) container.l.append(456.) print(container.d) # {1: apple, 2: orange} print(container.l) # [123.0, 456.0] Another useful pattern is to use the ``numba.typed`` container attribute ``_numba_type_`` to find the type of a container, this can be accessed directly from an instance of the container in the Python interpreter. The same information can be obtained by calling :func:`numba.typeof` on the instance. For example: .. code-block:: python from numba import jitclass, typed, typeof d = typed.Dict() d[1] = "apple" d[2] = "orange" l = typed.List() l.append(123.) l.append(456.) @jitclass([('d', typeof(d)), ('l', typeof(l))]) class ContainerInstHolder(object): def __init__(self, dict_inst, list_inst): self.d = dict_inst self.l = list_inst container = ContainerInstHolder(d, l) print(container.d) # {1: apple, 2: orange} print(container.l) # [123.0, 456.0] It is worth noting that the instance of the container in a ``jitclass`` must be initialized before use, for example, this will cause an invalid memory access as ``self.d`` is written to without ``d`` being initialized as a ``type.Dict`` instance of the type specified. .. code-block:: python from numba import jitclass, types dict_ty = types.DictType(types.int64, types.unicode_type) @jitclass([('d', dict_ty)]) class NotInitialisingContainer(object): def __init__(self): self.d[10] = "apple" # this is invalid, `d` is not initialized NotInitialisingContainer() # segmentation fault/memory access violation Support operations ================== The following operations of jitclasses work in both the interpreter and Numba compiled functions: * calling the jitclass class object to construct a new instance (e.g. ``mybag = Bag(123)``); * read/write access to attributes and properties (e.g. ``mybag.value``); * calling methods (e.g. ``mybag.increment(3)``); * calling static methods as instance attributes (e.g. ``mybag.add(1, 1)``); * calling static methods as class attributes (e.g. ``Bag.add(1, 2)``); Using jitclasses in Numba compiled function is more efficient. Short methods can be inlined (at the discretion of LLVM inliner). Attributes access are simply reading from a C structure. Using jitclasses from the interpreter has the same overhead of calling any Numba compiled function from the interpreter. Arguments and return values must be unboxed or boxed between Python objects and native representation. Values encapsulated by a jitclass does not get boxed into Python object when the jitclass instance is handed to the interpreter. It is during attribute access to the field values that they are boxed. Calling static methods as class attributes is only supported outside of the class definition (i.e. code cannot call ``Bag.add()`` from within another method of ``Bag``). Limitations =========== * A jitclass class object is treated as a function (the constructor) inside a Numba compiled function. * ``isinstance()`` only works in the interpreter. * Manipulating jitclass instances in the interpreter is not optimized, yet. * Support for jitclasses are available on CPU only. (Note: Support for GPU devices is planned for a future release.) The decorator: ``@jitclass`` ============================ .. autofunction:: numba.experimental.jitclass numba-0.55.1/docs/source/user/overview.rst000664 000000 000000 00000002117 14174536160 020502 0ustar00rootroot000000 000000 Overview ======== Numba is a compiler for Python array and numerical functions that gives you the power to speed up your applications with high performance functions written directly in Python. Numba generates optimized machine code from pure Python code using the `LLVM compiler infrastructure `_. With a few simple annotations, array-oriented and math-heavy Python code can be just-in-time optimized to performance similar as C, C++ and Fortran, without having to switch languages or Python interpreters. Numba's main features are: * :ref:`on-the-fly code generation ` (at import time or runtime, at the user's preference) * native code generation for the CPU (default) and :doc:`GPU hardware <../cuda/index>` * integration with the Python scientific software stack (thanks to Numpy) Here is how a Numba-optimized function, taking a Numpy array as argument, might look like:: @numba.jit def sum2d(arr): M, N = arr.shape result = 0.0 for i in range(M): for j in range(N): result += arr[i,j] return result numba-0.55.1/docs/source/user/parallel.rst000664 000000 000000 00000060141 14174536160 020431 0ustar00rootroot000000 000000 .. Copyright (c) 2017 Intel Corporation SPDX-License-Identifier: BSD-2-Clause .. _numba-parallel: ======================================= Automatic parallelization with ``@jit`` ======================================= Setting the :ref:`parallel_jit_option` option for :func:`~numba.jit` enables a Numba transformation pass that attempts to automatically parallelize and perform other optimizations on (part of) a function. At the moment, this feature only works on CPUs. Some operations inside a user defined function, e.g. adding a scalar value to an array, are known to have parallel semantics. A user program may contain many such operations and while each operation could be parallelized individually, such an approach often has lackluster performance due to poor cache behavior. Instead, with auto-parallelization, Numba attempts to identify such operations in a user program, and fuse adjacent ones together, to form one or more kernels that are automatically run in parallel. The process is fully automated without modifications to the user program, which is in contrast to Numba's :func:`~numba.vectorize` or :func:`~numba.guvectorize` mechanism, where manual effort is required to create parallel kernels. .. _numba-parallel-supported: Supported Operations ==================== In this section, we give a list of all the array operations that have parallel semantics and for which we attempt to parallelize. #. All numba array operations that are supported by :ref:`case-study-array-expressions`, which include common arithmetic functions between Numpy arrays, and between arrays and scalars, as well as Numpy ufuncs. They are often called `element-wise` or `point-wise` array operations: * unary operators: ``+`` ``-`` ``~`` * binary operators: ``+`` ``-`` ``*`` ``/`` ``/?`` ``%`` ``|`` ``>>`` ``^`` ``<<`` ``&`` ``**`` ``//`` * comparison operators: ``==`` ``!=`` ``<`` ``<=`` ``>`` ``>=`` * :ref:`Numpy ufuncs ` that are supported in :term:`nopython mode`. * User defined :class:`~numba.DUFunc` through :func:`~numba.vectorize`. #. Numpy reduction functions ``sum``, ``prod``, ``min``, ``max``, ``argmin``, and ``argmax``. Also, array math functions ``mean``, ``var``, and ``std``. #. Numpy array creation functions ``zeros``, ``ones``, ``arange``, ``linspace``, and several random functions (rand, randn, ranf, random_sample, sample, random, standard_normal, chisquare, weibull, power, geometric, exponential, poisson, rayleigh, normal, uniform, beta, binomial, f, gamma, lognormal, laplace, randint, triangular). #. Numpy ``dot`` function between a matrix and a vector, or two vectors. In all other cases, Numba's default implementation is used. #. Multi-dimensional arrays are also supported for the above operations when operands have matching dimension and size. The full semantics of Numpy broadcast between arrays with mixed dimensionality or size is not supported, nor is the reduction across a selected dimension. #. Array assignment in which the target is an array selection using a slice or a boolean array, and the value being assigned is either a scalar or another selection where the slice range or bitarray are inferred to be compatible. #. The ``reduce`` operator of ``functools`` is supported for specifying parallel reductions on 1D Numpy arrays but the initial value argument is mandatory. .. _numba-prange: Explicit Parallel Loops ======================== Another feature of the code transformation pass (when ``parallel=True``) is support for explicit parallel loops. One can use Numba's ``prange`` instead of ``range`` to specify that a loop can be parallelized. The user is required to make sure that the loop does not have cross iteration dependencies except for supported reductions. A reduction is inferred automatically if a variable is updated by a binary function/operator using its previous value in the loop body. The initial value of the reduction is inferred automatically for the ``+=``, ``-=``, ``*=``, and ``/=`` operators. For other functions/operators, the reduction variable should hold the identity value right before entering the ``prange`` loop. Reductions in this manner are supported for scalars and for arrays of arbitrary dimensions. The example below demonstrates a parallel loop with a reduction (``A`` is a one-dimensional Numpy array):: from numba import njit, prange @njit(parallel=True) def prange_test(A): s = 0 # Without "parallel=True" in the jit-decorator # the prange statement is equivalent to range for i in prange(A.shape[0]): s += A[i] return s The following example demonstrates a product reduction on a two-dimensional array:: from numba import njit, prange import numpy as np @njit(parallel=True) def two_d_array_reduction_prod(n): shp = (13, 17) result1 = 2 * np.ones(shp, np.int_) tmp = 2 * np.ones_like(result1) for i in prange(n): result1 *= tmp return result1 .. note:: When using Python's ``range`` to induce a loop, Numba types the induction variable as a signed integer. This is also the case for Numba's ``prange`` when ``parallel=False``. However, for ``parallel=True``, if the range is identifiable as strictly positive, the type of the induction variable will be ``uint64``. The impact of a ``uint64`` induction variable is often most noticable when undertaking operations involving it and a signed integer. Under Numba's type coercion rules, such a case will commonly result in the operation producing a floating point result type. Care should be taken, however, when reducing into slices or elements of an array if the elements specified by the slice or index are written to simultaneously by multiple parallel threads. The compiler may not detect such cases and then a race condition would occur. The following example demonstrates such a case where a race condition in the execution of the parallel for-loop results in an incorrect return value:: from numba import njit, prange import numpy as np @njit(parallel=True) def prange_wrong_result(x): n = x.shape[0] y = np.zeros(4) for i in prange(n): # accumulating into the same element of `y` from different # parallel iterations of the loop results in a race condition y[:] += x[i] return y as does the following example where the accumulating element is explicitly specified:: from numba import njit, prange import numpy as np @njit(parallel=True) def prange_wrong_result(x): n = x.shape[0] y = np.zeros(4) for i in prange(n): # accumulating into the same element of `y` from different # parallel iterations of the loop results in a race condition y[i % 4] += x[i] return y whereas performing a whole array reduction is fine:: from numba import njit, prange import numpy as np @njit(parallel=True) def prange_ok_result_whole_arr(x): n = x.shape[0] y = np.zeros(4) for i in prange(n): y += x[i] return y as is creating a slice reference outside of the parallel reduction loop:: from numba import njit, prange import numpy as np @njit(parallel=True) def prange_ok_result_outer_slice(x): n = x.shape[0] y = np.zeros(4) z = y[:] for i in prange(n): z += x[i] return y Examples ======== In this section, we give an example of how this feature helps parallelize Logistic Regression:: @numba.jit(nopython=True, parallel=True) def logistic_regression(Y, X, w, iterations): for i in range(iterations): w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X) return w We will not discuss details of the algorithm, but instead focus on how this program behaves with auto-parallelization: 1. Input ``Y`` is a vector of size ``N``, ``X`` is an ``N x D`` matrix, and ``w`` is a vector of size ``D``. 2. The function body is an iterative loop that updates variable ``w``. The loop body consists of a sequence of vector and matrix operations. 3. The inner ``dot`` operation produces a vector of size ``N``, followed by a sequence of arithmetic operations either between a scalar and vector of size ``N``, or two vectors both of size ``N``. 4. The outer ``dot`` produces a vector of size ``D``, followed by an inplace array subtraction on variable ``w``. 5. With auto-parallelization, all operations that produce array of size ``N`` are fused together to become a single parallel kernel. This includes the inner ``dot`` operation and all point-wise array operations following it. 6. The outer ``dot`` operation produces a result array of different dimension, and is not fused with the above kernel. Here, the only thing required to take advantage of parallel hardware is to set the :ref:`parallel_jit_option` option for :func:`~numba.jit`, with no modifications to the ``logistic_regression`` function itself. If we were to give an equivalence parallel implementation using :func:`~numba.guvectorize`, it would require a pervasive change that rewrites the code to extract kernel computation that can be parallelized, which was both tedious and challenging. Unsupported Operations ====================== This section contains a non-exhaustive list of commonly encountered but currently unsupported features: #. **Mutating a list is not threadsafe** Concurrent write operations on container types (i.e. lists, sets and dictionaries) in a ``prange`` parallel region are not threadsafe e.g.:: @njit(parallel=True) def invalid(): z = [] for i in prange(10000): z.append(i) return z It is highly likely that the above will result in corruption or an access violation as containers require thread-safety under mutation but this feature is not implemented. #. **Induction variables are not associated with thread ID** The use of the induction variable induced by a ``prange`` based loop in conjunction with ``get_num_threads`` as a method of ensuring safe writes into a pre-sized container is not valid e.g.:: @njit(parallel=True) def invalid(): n = get_num_threads() z = [0 for _ in range(n)] for i in prange(100): z[i % n] += i return z The above can on occasion appear to work, but it does so by luck. There's no guarantee about which indexes are assigned to which executing threads or the order in which the loop iterations execute. .. _numba-parallel-diagnostics: Diagnostics =========== .. note:: At present not all parallel transforms and functions can be tracked through the code generation process. Occasionally diagnostics about some loops or transforms may be missing. The :ref:`parallel_jit_option` option for :func:`~numba.jit` can produce diagnostic information about the transforms undertaken in automatically parallelizing the decorated code. This information can be accessed in two ways, the first is by setting the environment variable :envvar:`NUMBA_PARALLEL_DIAGNOSTICS`, the second is by calling :meth:`~Dispatcher.parallel_diagnostics`, both methods give the same information and print to ``STDOUT``. The level of verbosity in the diagnostic information is controlled by an integer argument of value between 1 and 4 inclusive, 1 being the least verbose and 4 the most. For example:: @njit(parallel=True) def test(x): n = x.shape[0] a = np.sin(x) b = np.cos(a * a) acc = 0 for i in prange(n - 2): for j in prange(n - 1): acc += b[i] + b[j + 1] return acc test(np.arange(10)) test.parallel_diagnostics(level=4) produces:: ================================================================================ ======= Parallel Accelerator Optimizing: Function test, example.py (4) ======= ================================================================================ Parallel loop listing for Function test, example.py (4) --------------------------------------|loop #ID @njit(parallel=True) | def test(x): | n = x.shape[0] | a = np.sin(x)---------------------| #0 b = np.cos(a * a)-----------------| #1 acc = 0 | for i in prange(n - 2):-----------| #3 for j in prange(n - 1):-------| #2 acc += b[i] + b[j + 1] | return acc | --------------------------------- Fusing loops --------------------------------- Attempting fusion of parallel loops (combines loops with similar properties)... Trying to fuse loops #0 and #1: - fusion succeeded: parallel for-loop #1 is fused into for-loop #0. Trying to fuse loops #0 and #3: - fusion failed: loop dimension mismatched in axis 0. slice(0, x_size0.1, 1) != slice(0, $40.4, 1) ----------------------------- Before Optimization ------------------------------ Parallel region 0: +--0 (parallel) +--1 (parallel) Parallel region 1: +--3 (parallel) +--2 (parallel) -------------------------------------------------------------------------------- ------------------------------ After Optimization ------------------------------ Parallel region 0: +--0 (parallel, fused with loop(s): 1) Parallel region 1: +--3 (parallel) +--2 (serial) Parallel region 0 (loop #0) had 1 loop(s) fused. Parallel region 1 (loop #3) had 0 loop(s) fused and 1 loop(s) serialized as part of the larger parallel loop (#3). -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- ---------------------------Loop invariant code motion--------------------------- Instruction hoisting: loop #0: Failed to hoist the following: dependency: $arg_out_var.10 = getitem(value=x, index=$parfor__index_5.99) dependency: $0.6.11 = getattr(value=$0.5, attr=sin) dependency: $expr_out_var.9 = call $0.6.11($arg_out_var.10, func=$0.6.11, args=[Var($arg_out_var.10, example.py (7))], kws=(), vararg=None) dependency: $arg_out_var.17 = $expr_out_var.9 * $expr_out_var.9 dependency: $0.10.20 = getattr(value=$0.9, attr=cos) dependency: $expr_out_var.16 = call $0.10.20($arg_out_var.17, func=$0.10.20, args=[Var($arg_out_var.17, example.py (8))], kws=(), vararg=None) loop #3: Has the following hoisted: $const58.3 = const(int, 1) $58.4 = _n_23 - $const58.3 -------------------------------------------------------------------------------- To aid users unfamiliar with the transforms undertaken when the :ref:`parallel_jit_option` option is used, and to assist in the understanding of the subsequent sections, the following definitions are provided: * Loop fusion `Loop fusion `_ is a technique whereby loops with equivalent bounds may be combined under certain conditions to produce a loop with a larger body (aiming to improve data locality). * Loop serialization Loop serialization occurs when any number of ``prange`` driven loops are present inside another ``prange`` driven loop. In this case the outermost of all the ``prange`` loops executes in parallel and any inner ``prange`` loops (nested or otherwise) are treated as standard ``range`` based loops. Essentially, nested parallelism does not occur. * Loop invariant code motion `Loop invariant code motion `_ is an optimization technique that analyses a loop to look for statements that can be moved outside the loop body without changing the result of executing the loop, these statements are then "hoisted" out of the loop to save repeated computation. * Allocation hoisting Allocation hoisting is a specialized case of loop invariant code motion that is possible due to the design of some common NumPy allocation methods. Explanation of this technique is best driven by an example: .. code-block:: python @njit(parallel=True) def test(n): for i in prange(n): temp = np.zeros((50, 50)) # <--- Allocate a temporary array with np.zeros() for j in range(50): temp[j, j] = i # ...do something with temp internally, this is transformed to approximately the following: .. code-block:: python @njit(parallel=True) def test(n): for i in prange(n): temp = np.empty((50, 50)) # <--- np.zeros() is rewritten as np.empty() temp[:] = 0 # <--- and then a zero initialisation for j in range(50): temp[j, j] = i # ...do something with temp then after hoisting: .. code-block:: python @njit(parallel=True) def test(n): temp = np.empty((50, 50)) # <--- allocation is hoisted as a loop invariant as `np.empty` is considered pure for i in prange(n): temp[:] = 0 # <--- this remains as assignment is a side effect for j in range(50): temp[j, j] = i # ...do something with temp it can be seen that the ``np.zeros`` allocation is split into an allocation and an assignment, and then the allocation is hoisted out of the loop in ``i``, this producing more efficient code as the allocation only occurs once. The parallel diagnostics report sections ---------------------------------------- The report is split into the following sections: #. Code annotation This is the first section and contains the source code of the decorated function with loops that have parallel semantics identified and enumerated. The ``loop #ID`` column on the right of the source code lines up with identified parallel loops. From the example, ``#0`` is ``np.sin``, ``#1`` is ``np.cos`` and ``#2`` and ``#3`` are ``prange()``: .. code-block:: python Parallel loop listing for Function test, example.py (4) --------------------------------------|loop #ID @njit(parallel=True) | def test(x): | n = x.shape[0] | a = np.sin(x)---------------------| #0 b = np.cos(a * a)-----------------| #1 acc = 0 | for i in prange(n - 2):-----------| #3 for j in prange(n - 1):-------| #2 acc += b[i] + b[j + 1] | return acc | It is worth noting that the loop IDs are enumerated in the order they are discovered which is not necessarily the same order as present in the source. Further, it should also be noted that the parallel transforms use a static counter for loop ID indexing. As a consequence it is possible for the loop ID index to not start at 0 due to use of the same counter for internal optimizations/transforms taking place that are invisible to the user. #. Fusing loops This section describes the attempts made at fusing discovered loops noting which succeeded and which failed. In the case of failure to fuse a reason is given (e.g. dependency on other data). From the example: .. code-block:: text --------------------------------- Fusing loops --------------------------------- Attempting fusion of parallel loops (combines loops with similar properties)... Trying to fuse loops #0 and #1: - fusion succeeded: parallel for-loop #1 is fused into for-loop #0. Trying to fuse loops #0 and #3: - fusion failed: loop dimension mismatched in axis 0. slice(0, x_size0.1, 1) != slice(0, $40.4, 1) It can be seen that fusion of loops ``#0`` and ``#1`` was attempted and this succeeded (both are based on the same dimensions of ``x``). Following the successful fusion of ``#0`` and ``#1``, fusion was attempted between ``#0`` (now including the fused ``#1`` loop) and ``#3``. This fusion failed because there is a loop dimension mismatch, ``#0`` is size ``x.shape`` whereas ``#3`` is size ``x.shape[0] - 2``. #. Before Optimization This section shows the structure of the parallel regions in the code before any optimization has taken place, but with loops associated with their final parallel region (this is to make before/after optimization output directly comparable). Multiple parallel regions may exist if there are loops which cannot be fused, in this case code within each region will execute in parallel, but each parallel region will run sequentially. From the example: .. code-block:: text Parallel region 0: +--0 (parallel) +--1 (parallel) Parallel region 1: +--3 (parallel) +--2 (parallel) As alluded to by the `Fusing loops` section, there are necessarily two parallel regions in the code. The first contains loops ``#0`` and ``#1``, the second contains ``#3`` and ``#2``, all loops are marked ``parallel`` as no optimization has taken place yet. #. After Optimization This section shows the structure of the parallel regions in the code after optimization has taken place. Again, parallel regions are enumerated with their corresponding loops but this time loops which are fused or serialized are noted and a summary is presented. From the example: .. code-block:: text Parallel region 0: +--0 (parallel, fused with loop(s): 1) Parallel region 1: +--3 (parallel) +--2 (serial) Parallel region 0 (loop #0) had 1 loop(s) fused. Parallel region 1 (loop #3) had 0 loop(s) fused and 1 loop(s) serialized as part of the larger parallel loop (#3). It can be noted that parallel region 0 contains loop ``#0`` and, as seen in the `fusing loops` section, loop ``#1`` is fused into loop ``#0``. It can also be noted that parallel region 1 contains loop ``#3`` and that loop ``#2`` (the inner ``prange()``) has been serialized for execution in the body of loop ``#3``. #. Loop invariant code motion This section shows for each loop, after optimization has occurred: * the instructions that failed to be hoisted and the reason for failure (dependency/impure). * the instructions that were hoisted. * any allocation hoisting that may have occurred. From the example: .. code-block:: text Instruction hoisting: loop #0: Failed to hoist the following: dependency: $arg_out_var.10 = getitem(value=x, index=$parfor__index_5.99) dependency: $0.6.11 = getattr(value=$0.5, attr=sin) dependency: $expr_out_var.9 = call $0.6.11($arg_out_var.10, func=$0.6.11, args=[Var($arg_out_var.10, example.py (7))], kws=(), vararg=None) dependency: $arg_out_var.17 = $expr_out_var.9 * $expr_out_var.9 dependency: $0.10.20 = getattr(value=$0.9, attr=cos) dependency: $expr_out_var.16 = call $0.10.20($arg_out_var.17, func=$0.10.20, args=[Var($arg_out_var.17, example.py (8))], kws=(), vararg=None) loop #3: Has the following hoisted: $const58.3 = const(int, 1) $58.4 = _n_23 - $const58.3 The first thing to note is that this information is for advanced users as it refers to the :term:`Numba IR` of the function being transformed. As an example, the expression ``a * a`` in the example source partly translates to the expression ``$arg_out_var.17 = $expr_out_var.9 * $expr_out_var.9`` in the IR, this clearly cannot be hoisted out of ``loop #0`` because it is not loop invariant! Whereas in ``loop #3``, the expression ``$const58.3 = const(int, 1)`` comes from the source ``b[j + 1]``, the number ``1`` is clearly a constant and so can be hoisted out of the loop. .. seealso:: :ref:`parallel_jit_option`, :ref:`Parallel FAQs ` numba-0.55.1/docs/source/user/performance-tips.rst000664 000000 000000 00000024266 14174536160 022123 0ustar00rootroot000000 000000 .. _performance-tips: Performance Tips ================ This is a short guide to features present in Numba that can help with obtaining the best performance from code. Two examples are used, both are entirely contrived and exist purely for pedagogical reasons to motivate discussion. The first is the computation of the trigonometric identity ``cos(x)^2 + sin(x)^2``, the second is a simple element wise square root of a vector with reduction over summation. All performance numbers are indicative only and unless otherwise stated were taken from running on an Intel ``i7-4790`` CPU (4 hardware threads) with an input of ``np.arange(1.e7)``. .. note:: A reasonably effective approach to achieving high performance code is to profile the code running with real data and use that to guide performance tuning. The information presented here is to demonstrate features, not to act as canonical guidance! No Python mode vs Object mode ----------------------------- A common pattern is to decorate functions with ``@jit`` as this is the most flexible decorator offered by Numba. ``@jit`` essentially encompasses two modes of compilation, first it will try and compile the decorated function in no Python mode, if this fails it will try again to compile the function using object mode. Whilst the use of looplifting in object mode can enable some performance increase, getting functions to compile under no python mode is really the key to good performance. To make it such that only no python mode is used and if compilation fails an exception is raised the decorators ``@njit`` and ``@jit(nopython=True)`` can be used (the first is an alias of the second for convenience). Loops ----- Whilst NumPy has developed a strong idiom around the use of vector operations, Numba is perfectly happy with loops too. For users familiar with C or Fortran, writing Python in this style will work fine in Numba (after all, LLVM gets a lot of use in compiling C lineage languages). For example:: @njit def ident_np(x): return np.cos(x) ** 2 + np.sin(x) ** 2 @njit def ident_loops(x): r = np.empty_like(x) n = len(x) for i in range(n): r[i] = np.cos(x[i]) ** 2 + np.sin(x[i]) ** 2 return r The above run at almost identical speeds when decorated with ``@njit``, without the decorator the vectorized function is a couple of orders of magnitude faster. +-----------------+-------+----------------+ | Function Name | @njit | Execution time | +=================+=======+================+ | ``ident_np`` | No | 0.581s | +-----------------+-------+----------------+ | ``ident_np`` | Yes | 0.659s | +-----------------+-------+----------------+ | ``ident_loops`` | No | 25.2s | +-----------------+-------+----------------+ | ``ident_loops`` | Yes | 0.670s | +-----------------+-------+----------------+ .. _fast-math: Fastmath -------- In certain classes of applications strict IEEE 754 compliance is less important. As a result it is possible to relax some numerical rigour with view of gaining additional performance. The way to achieve this behaviour in Numba is through the use of the ``fastmath`` keyword argument:: @njit(fastmath=False) def do_sum(A): acc = 0. # without fastmath, this loop must accumulate in strict order for x in A: acc += np.sqrt(x) return acc @njit(fastmath=True) def do_sum_fast(A): acc = 0. # with fastmath, the reduction can be vectorized as floating point # reassociation is permitted. for x in A: acc += np.sqrt(x) return acc +-----------------+-----------------+ | Function Name | Execution time | +=================+=================+ | ``do_sum`` | 35.2 ms | +-----------------+-----------------+ | ``do_sum_fast`` | 17.8 ms | +-----------------+-----------------+ In some cases you may wish to opt-in to only a subset of possible fast-math optimizations. This can be done by supplying a set of `LLVM fast-math flags `_ to ``fastmath``.:: def add_assoc(x, y): return (x - y) + y print(njit(fastmath=False)(add_assoc)(0, np.inf)) # nan print(njit(fastmath=True) (add_assoc)(0, np.inf)) # 0.0 print(njit(fastmath={'reassoc', 'nsz'})(add_assoc)(0, np.inf)) # 0.0 print(njit(fastmath={'reassoc'}) (add_assoc)(0, np.inf)) # nan print(njit(fastmath={'nsz'}) (add_assoc)(0, np.inf)) # nan Parallel=True ------------- If code contains operations that are parallelisable (:ref:`and supported `) Numba can compile a version that will run in parallel on multiple native threads (no GIL!). This parallelisation is performed automatically and is enabled by simply adding the ``parallel`` keyword argument:: @njit(parallel=True) def ident_parallel(x): return np.cos(x) ** 2 + np.sin(x) ** 2 Executions times are as follows: +--------------------+-----------------+ | Function Name | Execution time | +====================+=================+ | ``ident_parallel`` | 112 ms | +--------------------+-----------------+ The execution speed of this function with ``parallel=True`` present is approximately 5x that of the NumPy equivalent and 6x that of standard ``@njit``. Numba parallel execution also has support for explicit parallel loop declaration similar to that in OpenMP. To indicate that a loop should be executed in parallel the ``numba.prange`` function should be used, this function behaves like Python ``range`` and if ``parallel=True`` is not set it acts simply as an alias of ``range``. Loops induced with ``prange`` can be used for embarrassingly parallel computation and also reductions. Revisiting the reduce over sum example, assuming it is safe for the sum to be accumulated out of order, the loop in ``n`` can be parallelised through the use of ``prange``. Further, the ``fastmath=True`` keyword argument can be added without concern in this case as the assumption that out of order execution is valid has already been made through the use of ``parallel=True`` (as each thread computes a partial sum). :: @njit(parallel=True) def do_sum_parallel(A): # each thread can accumulate its own partial sum, and then a cross # thread reduction is performed to obtain the result to return n = len(A) acc = 0. for i in prange(n): acc += np.sqrt(A[i]) return acc @njit(parallel=True, fastmath=True) def do_sum_parallel_fast(A): n = len(A) acc = 0. for i in prange(n): acc += np.sqrt(A[i]) return acc Execution times are as follows, ``fastmath`` again improves performance. +-------------------------+-----------------+ | Function Name | Execution time | +=========================+=================+ | ``do_sum_parallel`` | 9.81 ms | +-------------------------+-----------------+ | ``do_sum_parallel_fast``| 5.37 ms | +-------------------------+-----------------+ .. _intel-svml: Intel SVML ---------- Intel provides a short vector math library (SVML) that contains a large number of optimised transcendental functions available for use as compiler intrinsics. If the ``icc_rt`` package is present in the environment (or the SVML libraries are simply locatable!) then Numba automatically configures the LLVM back end to use the SVML intrinsic functions where ever possible. SVML provides both high and low accuracy versions of each intrinsic and the version that is used is determined through the use of the ``fastmath`` keyword. The default is to use high accuracy which is accurate to within ``1 ULP``, however if ``fastmath`` is set to ``True`` then the lower accuracy versions of the intrinsics are used (answers to within ``4 ULP``). First obtain SVML, using conda for example:: conda install -c numba icc_rt Rerunning the identity function example ``ident_np`` from above with various combinations of options to ``@njit`` and with/without SVML yields the following performance results (input size ``np.arange(1.e8)``). For reference, with just NumPy the function executed in ``5.84s``: +-----------------------------------+--------+-------------------+ | ``@njit`` kwargs | SVML | Execution time | +===================================+========+===================+ | ``None`` | No | 5.95s | +-----------------------------------+--------+-------------------+ | ``None`` | Yes | 2.26s | +-----------------------------------+--------+-------------------+ | ``fastmath=True`` | No | 5.97s | +-----------------------------------+--------+-------------------+ | ``fastmath=True`` | Yes | 1.8s | +-----------------------------------+--------+-------------------+ | ``parallel=True`` | No | 1.36s | +-----------------------------------+--------+-------------------+ | ``parallel=True`` | Yes | 0.624s | +-----------------------------------+--------+-------------------+ | ``parallel=True, fastmath=True`` | No | 1.32s | +-----------------------------------+--------+-------------------+ | ``parallel=True, fastmath=True`` | Yes | 0.576s | +-----------------------------------+--------+-------------------+ It is evident that SVML significantly increases the performance of this function. The impact of ``fastmath`` in the case of SVML not being present is zero, this is expected as there is nothing in the original function that would benefit from relaxing numerical strictness. Linear algebra -------------- Numba supports most of ``numpy.linalg`` in no Python mode. The internal implementation relies on a LAPACK and BLAS library to do the numerical work and it obtains the bindings for the necessary functions from SciPy. Therefore, to achieve good performance in ``numpy.linalg`` functions with Numba it is necessary to use a SciPy built against a well optimised LAPACK/BLAS library. In the case of the Anaconda distribution SciPy is built against Intel's MKL which is highly optimised and as a result Numba makes use of this performance. numba-0.55.1/docs/source/user/pycc.rst000664 000000 000000 00000007241 14174536160 017575 0ustar00rootroot000000 000000 ============================ Compiling code ahead of time ============================ .. _pycc: While Numba's main use case is :term:`Just-in-Time compilation`, it also provides a facility for :term:`Ahead-of-Time compilation` (AOT). Overview ======== Benefits -------- #. AOT compilation produces a compiled extension module which does not depend on Numba: you can distribute the module on machines which do not have Numba installed (but Numpy is required). #. There is no compilation overhead at runtime (but see the ``@jit`` :ref:`cache ` option), nor any overhead of importing Numba. .. seealso:: Compiled extension modules are discussed in the `Python packaging user guide `_. Limitations ----------- #. AOT compilation only allows for regular functions, not :term:`ufuncs `. #. You have to specify function signatures explicitly. #. Each exported function can have only one signature (but you can export several different signatures under different names). #. AOT compilation produces generic code for your CPU's architectural family (for example "x86-64"), while JIT compilation produces code optimized for your particular CPU model. Usage ===== Standalone example ------------------ :: from numba.pycc import CC cc = CC('my_module') # Uncomment the following line to print out the compilation steps #cc.verbose = True @cc.export('multf', 'f8(f8, f8)') @cc.export('multi', 'i4(i4, i4)') def mult(a, b): return a * b @cc.export('square', 'f8(f8)') def square(a): return a ** 2 if __name__ == "__main__": cc.compile() If you run this Python script, it will generate an extension module named ``my_module``. Depending on your platform, the actual filename may be ``my_module.so``, ``my_module.pyd``, ``my_module.cpython-34m.so``, etc. The generated module has three functions: ``multf``, ``multi`` and ``square``. ``multi`` operates on 32-bit integers (``i4``), while ``multf`` and ``square`` operate on double-precision floats (``f8``):: >>> import my_module >>> my_module.multi(3, 4) 12 >>> my_module.square(1.414) 1.9993959999999997 Distutils integration --------------------- You can also integrate the compilation step for your extension modules in your ``setup.py`` script, using distutils or setuptools:: from distutils.core import setup from source_module import cc setup(..., ext_modules=[cc.distutils_extension()]) The ``source_module`` above is the module defining the ``cc`` object. Extensions compiled like this will be automatically included in the build files for your Python project, so you can distribute them inside binary packages such as wheels or Conda packages. Note that in the case of using conda, the compilers used for AOT need to be those that are available in the Anaconda distribution. Signature syntax ---------------- The syntax for exported signatures is the same as in the ``@jit`` decorator. You can read more about it in the :ref:`types ` reference. Here is an example of exporting an implementation of the second-order centered difference on a 1d array:: @cc.export('centdiff_1d', 'f8[:](f8[:], f8)') def centdiff_1d(u, dx): D = np.empty_like(u) D[0] = 0 D[-1] = 0 for i in range(1, len(D) - 1): D[i] = (u[i+1] - 2 * u[i] + u[i-1]) / dx**2 return D .. (example from http://nbviewer.ipython.org/gist/ketch/ae87a94f4ef0793d5d52) You can also omit the return type, which will then be inferred by Numba:: @cc.export('centdiff_1d', '(f8[:], f8)') def centdiff_1d(u, dx): # Same code as above ... numba-0.55.1/docs/source/user/stencil.rst000664 000000 000000 00000024760 14174536160 020305 0ustar00rootroot000000 000000 .. Copyright (c) 2017 Intel Corporation SPDX-License-Identifier: BSD-2-Clause .. _numba-stencil: ================================ Using the ``@stencil`` decorator ================================ Stencils are a common computational pattern in which array elements are updated according to some fixed pattern called the stencil kernel. Numba provides the ``@stencil`` decorator so that users may easily specify a stencil kernel and Numba then generates the looping code necessary to apply that kernel to some input array. Thus, the stencil decorator allows clearer, more concise code and in conjunction with :ref:`the parallel jit option ` enables higher performance through parallelization of the stencil execution. Basic usage =========== An example use of the ``@stencil`` decorator:: from numba import stencil @stencil def kernel1(a): return 0.25 * (a[0, 1] + a[1, 0] + a[0, -1] + a[-1, 0]) The stencil kernel is specified by what looks like a standard Python function definition but there are different semantics with respect to array indexing. Stencils produce an output array of the same size and shape as the input array although depending on the kernel definition may have a different type. Conceptually, the stencil kernel is run once for each element in the output array. The return value from the stencil kernel is the value written into the output array for that particular element. The parameter ``a`` represents the input array over which the kernel is applied. Indexing into this array takes place with respect to the current element of the output array being processed. For example, if element ``(x, y)`` is being processed then ``a[0, 0]`` in the stencil kernel corresponds to ``a[x + 0, y + 0]`` in the input array. Similarly, ``a[-1, 1]`` in the stencil kernel corresponds to ``a[x - 1, y + 1]`` in the input array. Depending on the specified kernel, the kernel may not be applicable to the borders of the output array as this may cause the input array to be accessed out-of-bounds. The way in which the stencil decorator handles this situation is dependent upon which :ref:`stencil-mode` is selected. The default mode is for the stencil decorator to set the border elements of the output array to zero. To invoke a stencil on an input array, call the stencil as if it were a regular function and pass the input array as the argument. For example, using the kernel defined above:: >>> import numpy as np >>> input_arr = np.arange(100).reshape((10, 10)) array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29], [30, 31, 32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], [50, 51, 52, 53, 54, 55, 56, 57, 58, 59], [60, 61, 62, 63, 64, 65, 66, 67, 68, 69], [70, 71, 72, 73, 74, 75, 76, 77, 78, 79], [80, 81, 82, 83, 84, 85, 86, 87, 88, 89], [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]]) >>> output_arr = kernel1(input_arr) array([[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [ 0., 11., 12., 13., 14., 15., 16., 17., 18., 0.], [ 0., 21., 22., 23., 24., 25., 26., 27., 28., 0.], [ 0., 31., 32., 33., 34., 35., 36., 37., 38., 0.], [ 0., 41., 42., 43., 44., 45., 46., 47., 48., 0.], [ 0., 51., 52., 53., 54., 55., 56., 57., 58., 0.], [ 0., 61., 62., 63., 64., 65., 66., 67., 68., 0.], [ 0., 71., 72., 73., 74., 75., 76., 77., 78., 0.], [ 0., 81., 82., 83., 84., 85., 86., 87., 88., 0.], [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]) >>> input_arr.dtype dtype('int64') >>> output_arr.dtype dtype('float64') Note that the stencil decorator has determined that the output type of the specified stencil kernel is ``float64`` and has thus created the output array as ``float64`` while the input array is of type ``int64``. Stencil Parameters ================== Stencil kernel definitions may take any number of arguments with the following provisions. The first argument must be an array. The size and shape of the output array will be the same as that of the first argument. Additional arguments may either be scalars or arrays. For array arguments, those arrays must be at least as large as the first argument (array) in each dimension. Array indexing is relative for all such input array arguments. .. _stencil-kernel-shape-inference: Kernel shape inference and border handling ========================================== In the above example and in most cases, the array indexing in the stencil kernel will exclusively use ``Integer`` literals. In such cases, the stencil decorator is able to analyze the stencil kernel to determine its size. In the above example, the stencil decorator determines that the kernel is ``3 x 3`` in shape since indices ``-1`` to ``1`` are used for both the first and second dimensions. Note that the stencil decorator also correctly handles non-symmetric and non-square stencil kernels. Based on the size of the stencil kernel, the stencil decorator is able to compute the size of the border in the output array. If applying the kernel to some element of input array would cause an index to be out-of-bounds then that element belongs to the border of the output array. In the above example, points ``-1`` and ``+1`` are accessed in each dimension and thus the output array has a border of size one in all dimensions. The parallel mode is able to infer kernel indices as constants from simple expressions if possible. For example:: @njit(parallel=True) def stencil_test(A): c = 2 B = stencil( lambda a, c: 0.3 * (a[-c+1] + a[0] + a[c-1]))(A, c) return B Stencil decorator options ========================= .. note:: The stencil decorator may be augmented in the future to provide additional mechanisms for border handling. At present, only one behaviour is implemented, ``"constant"`` (see ``func_or_mode`` below for details). .. _stencil-neighborhood: ``neighborhood`` ---------------- Sometimes it may be inconvenient to write the stencil kernel exclusively with ``Integer`` literals. For example, let us say we would like to compute the trailing 30-day moving average of a time series of data. One could write ``(a[-29] + a[-28] + ... + a[-1] + a[0]) / 30`` but the stencil decorator offers a more concise form using the ``neighborhood`` option:: @stencil(neighborhood = ((-29, 0),)) def kernel2(a): cumul = 0 for i in range(-29, 1): cumul += a[i] return cumul / 30 The neighborhood option is a tuple of tuples. The outer tuple's length is equal to the number of dimensions of the input array. The inner tuple's lengths are always two because each element of the inner tuple corresponds to minimum and maximum index offsets used in the corresponding dimension. If a user specifies a neighborhood but the kernel accesses elements outside the specified neighborhood, **the behavior is undefined.** .. _stencil-mode: ``func_or_mode`` ---------------- The optional ``func_or_mode`` parameter controls how the border of the output array is handled. Currently, there is only one supported value, ``"constant"``. In ``constant`` mode, the stencil kernel is not applied in cases where the kernel would access elements outside the valid range of the input array. In such cases, those elements in the output array are assigned to a constant value, as specified by the ``cval`` parameter. ``cval`` -------- The optional cval parameter defaults to zero but can be set to any desired value, which is then used for the border of the output array if the ``func_or_mode`` parameter is set to ``constant``. The cval parameter is ignored in all other modes. The type of the cval parameter must match the return type of the stencil kernel. If the user wishes the output array to be constructed from a particular type then they should ensure that the stencil kernel returns that type. ``standard_indexing`` --------------------- By default, all array accesses in a stencil kernel are processed as relative indices as described above. However, sometimes it may be advantageous to pass an auxiliary array (e.g. an array of weights) to a stencil kernel and have that array use standard Python indexing rather than relative indexing. For this purpose, there is the stencil decorator option ``standard_indexing`` whose value is a collection of strings whose names match those parameters to the stencil function that are to be accessed with standard Python indexing rather than relative indexing:: @stencil(standard_indexing=("b",)) def kernel3(a, b): return a[-1] * b[0] + a[0] + b[1] ``StencilFunc`` =============== The stencil decorator returns a callable object of type ``StencilFunc``. ``StencilFunc`` objects contains a number of attributes but the only one of potential interest to users is the ``neighborhood`` attribute. If the ``neighborhood`` option was passed to the stencil decorator then the provided neighborhood is stored in this attribute. Else, upon first execution or compilation, the system calculates the neighborhood as described above and then stores the computed neighborhood into this attribute. A user may then inspect the attribute if they wish to verify that the calculated neighborhood is correct. Stencil invocation options ========================== Internally, the stencil decorator transforms the specified stencil kernel into a regular Python function. This function will have the same parameters as specified in the stencil kernel definition but will also include the following optional parameter. .. _stencil-function-out: ``out`` ------- The optional ``out`` parameter is added to every stencil function generated by Numba. If specified, the ``out`` parameter tells Numba that the user is providing their own pre-allocated array to be used for the output of the stencil. In this case, the stencil function will not allocate its own output array. Users should assure that the return type of the stencil kernel can be safely cast to the element-type of the user-specified output array following the `Numpy ufunc casting rules`_. .. _`Numpy ufunc casting rules`: http://docs.scipy.org/doc/numpy/reference/ufuncs.html#casting-rules An example usage is shown below:: >>> import numpy as np >>> input_arr = np.arange(100).reshape((10, 10)) >>> output_arr = np.full(input_arr.shape, 0.0) >>> kernel1(input_arr, out=output_arr) numba-0.55.1/docs/source/user/talks.rst000664 000000 000000 00000006420 14174536160 017753 0ustar00rootroot000000 000000 Talks and Tutorials =================== .. note:: This is a selection of talks and tutorials that have been given by members of the Numba team as well as Numba users. If you know of a Numba-related talk that should be included on this list, please `open an issue `_. Talks on Numba -------------- * AnacondaCON 2018 - Accelerating Scientific Workloads with Numba - Siu Kwan Lam (`Video `__) * `DIANA-HEP Meeting, 23 April 2018 `__ - Overview of Numba - Stan Seibert Talks on Applications of Numba ------------------------------ * GPU Technology Conference 2016 - Accelerating a Spectral Algorithm for Plasma Physics with Python/Numba on GPU - Manuel Kirchen & Rémi Lehe (`Slides `__) * `DIANA-HEP Meeting, 23 April 2018 `_ - Use of Numba in XENONnT - Chris Tunnell * `DIANA-HEP Meeting, 23 April 2018 `_ - Extending Numba for HEP data types - Jim Pivarski * STAC Summit, Nov 1 2017 - Scaling High-Performance Python with Minimal Effort - Ehsan Totoni (`Video `__, `Slides `__) * SciPy 2018 - UMAP: Uniform Manifold Approximation and Projection for Dimensional Reduction - Leland McInnes (`Video `__, `Github `__) * PyData Berlin 2018 - Extending Pandas using Apache Arrow and Numba - Uwe L. Korn (`Video `__, `Blog `__) * FOSDEM 2019 - Extending Numba - Joris Geessels (`Video, Slides & Examples `__) * PyCon India 2019 - Real World Numba: Taking the Path of Least Resistance - Ankit Mahato (`Video `__) * SciPy 2019 - How to Accelerate an Existing Codebase with Numba - Siu Kwan Lam & Stanley Seibert (`Video `__) * SciPy 2019 - Real World Numba: Creating a Skeleton Analysis Library - Juan Nunez-Iglesias (`Video `__) * SciPy 2019 - Fast Gradient Boosting Decision Trees with PyGBM and Numba - Nicholas Hug (`Video `__) * PyCon Sweden 2020 - Accelerating Scientific Computing using Numba - Ankit Mahato (`Video `__) Tutorials --------- * SciPy 2017 - Numba: Tell those C++ Bullies to Get Lost - Gil Forsyth & Lorena Barba (`Video `__, `Notebooks `__) * GPU Technology Conference 2018 - GPU Computing in Python with Numba - Stan Seibert (`Notebooks `__) * PyData Amsterdam 2019 - Create CUDA kernels from Python using Numba and CuPy - Valentin Haenel (`Video `__) numba-0.55.1/docs/source/user/threading-layer.rst000664 000000 000000 00000031244 14174536160 021716 0ustar00rootroot000000 000000 .. _numba-threading-layer: The Threading Layers ==================== This section is about the Numba threading layer, this is the library that is used internally to perform the parallel execution that occurs through the use of the ``parallel`` targets for CPUs, namely: * The use of the ``parallel=True`` kwarg in ``@jit`` and ``@njit``. * The use of the ``target='parallel'`` kwarg in ``@vectorize`` and ``@guvectorize``. .. note:: If a code base does not use the ``threading`` or ``multiprocessing`` modules (or any other sort of parallelism) the defaults for the threading layer that ship with Numba will work well, no further action is required! Which threading layers are available? ------------------------------------- There are three threading layers available and they are named as follows: * ``tbb`` - A threading layer backed by Intel TBB. * ``omp`` - A threading layer backed by OpenMP. * ``workqueue`` -A simple built-in work-sharing task scheduler. In practice, the only threading layer guaranteed to be present is ``workqueue``. The ``omp`` layer requires the presence of a suitable OpenMP runtime library. The ``tbb`` layer requires the presence of Intel's TBB libraries, these can be obtained via the conda command:: $ conda install tbb If you installed Numba with ``pip``, TBB can be enabled by running:: $ pip install tbb Due to compatibility issues with manylinux1 and other portability concerns, the OpenMP threading layer is disabled in the Numba binary wheels on PyPI. .. note:: The default manner in which Numba searches for and loads a threading layer is tolerant of missing libraries, incompatible runtimes etc. .. _numba-threading-layer-setting-mech: Setting the threading layer --------------------------- The threading layer is set via the environment variable ``NUMBA_THREADING_LAYER`` or through assignment to ``numba.config.THREADING_LAYER``. If the programmatic approach to setting the threading layer is used it must occur logically before any Numba based compilation for a parallel target has occurred. There are two approaches to choosing a threading layer, the first is by selecting a threading layer that is safe under various forms of parallel execution, the second is through explicit selection via the threading layer name (e.g. ``tbb``). Setting the threading layer selection priority ---------------------------------------------- By default the threading layers are searched in the order of ``'tbb'``, ``'omp'``, then ``'workqueue'``. To change this search order whilst maintaining the selection of a threading layer based on availability, the environment variable :envvar:`NUMBA_THREADING_LAYER_PRIORITY` can be used. Note that it can also be set via :py:data:`numba.config.THREADING_LAYER_PRIORITY`. Similar to :py:data:`numba.config.THREADING_LAYER`, it must occur logically before any Numba based compilation for a parallel target has occurred. For example, to instruct Numba to choose ``omp`` first if available, then ``tbb`` and so on, set the environment variable as ``NUMBA_THREADING_LAYER_PRIORITY="omp tbb workqueue"``. Or programmatically, ``numba.config.THREADING_LAYER_PRIORITY = ["omp", "tbb", "workqueue"]``. Selecting a threading layer for safe parallel execution ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Parallel execution is fundamentally derived from core Python libraries in four forms (the first three also apply to code using parallel execution via other means!): * ``threads`` from the ``threading`` module. * ``spawn`` ing processes from the ``multiprocessing`` module via ``spawn`` (default on Windows, only available in Python 3.4+ on Unix) * ``fork`` ing processes from the ``multiprocessing`` module via ``fork`` (default on Unix). * ``fork`` ing processes from the ``multiprocessing`` module through the use of a ``forkserver`` (only available in Python 3 on Unix). Essentially a new process is spawned and then forks are made from this new process on request. Any library in use with these forms of parallelism must exhibit safe behaviour under the given paradigm. As a result, the threading layer selection methods are designed to provide a way to choose a threading layer library that is safe for a given paradigm in an easy, cross platform and environment tolerant manner. The options that can be supplied to the :ref:`setting mechanisms ` are as follows: * ``default`` provides no specific safety guarantee and is the default. * ``safe`` is both fork and thread safe, this requires the ``tbb`` package (Intel TBB libraries) to be installed. * ``forksafe`` provides a fork safe library. * ``threadsafe`` provides a thread safe library. To discover the threading layer that was selected, the function ``numba.threading_layer()`` may be called after parallel execution. For example, on a Linux machine with no TBB installed:: from numba import config, njit, threading_layer import numpy as np # set the threading layer before any parallel target compilation config.THREADING_LAYER = 'threadsafe' @njit(parallel=True) def foo(a, b): return a + b x = np.arange(10.) y = x.copy() # this will force the compilation of the function, select a threading layer # and then execute in parallel foo(x, y) # demonstrate the threading layer chosen print("Threading layer chosen: %s" % threading_layer()) which produces:: Threading layer chosen: omp and this makes sense as GNU OpenMP, as present on Linux, is thread safe. Selecting a named threading layer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Advanced users may wish to select a specific threading layer for their use case, this is done by directly supplying the threading layer name to the :ref:`setting mechanisms `. The options and requirements are as follows: +----------------------+-----------+-------------------------------------------+ | Threading Layer Name | Platform | Requirements | +======================+===========+===========================================+ | ``tbb`` | All | The ``tbb`` package (``$ conda install | | | | tbb``) | +----------------------+-----------+-------------------------------------------+ | ``omp`` | Linux | GNU OpenMP libraries (very likely this | | | | will already exist) | | | | | | | Windows | MS OpenMP libraries (very likely this will| | | | already exist) | | | | | | | OSX | The ``intel-openmp`` package (``$ conda | | | | install intel-openmp``) | +----------------------+-----------+-------------------------------------------+ | ``workqueue`` | All | None | +----------------------+-----------+-------------------------------------------+ Should the threading layer not load correctly Numba will detect this and provide a hint about how to resolve the problem. It should also be noted that the Numba diagnostic command ``numba -s`` has a section ``__Threading Layer Information__`` that reports on the availability of threading layers in the current environment. Extra notes ----------- The threading layers have fairly complex interactions with CPython internals and system level libraries, some additional things to note: * The installation of Intel's TBB libraries vastly widens the options available in the threading layer selection process. * On Linux, the ``omp`` threading layer is not fork safe due to the GNU OpenMP runtime library (``libgomp``) not being fork safe. If a fork occurs in a program that is using the ``omp`` threading layer, a detection mechanism is present that will try and gracefully terminate the forked child and print an error message to ``STDERR``. * On systems with the ``fork(2)`` system call available, if the TBB backed threading layer is in use and a ``fork`` call is made from a thread other than the thread that launched TBB (typically the main thread) then this results in undefined behaviour and a warning will be displayed on ``STDERR``. As ``spawn`` is essentially ``fork`` followed by ``exec`` it is safe to ``spawn`` from a non-main thread, but as this cannot be differentiated from just a ``fork`` call the warning message will still be displayed. * On OSX, the ``intel-openmp`` package is required to enable the OpenMP based threading layer. .. _setting_the_number_of_threads: Setting the Number of Threads ----------------------------- The number of threads used by numba is based on the number of CPU cores available (see :obj:`numba.config.NUMBA_DEFAULT_NUM_THREADS`), but it can be overridden with the :envvar:`NUMBA_NUM_THREADS` environment variable. The total number of threads that numba launches is in the variable :obj:`numba.config.NUMBA_NUM_THREADS`. For some use cases, it may be desirable to set the number of threads to a lower value, so that numba can be used with higher level parallelism. The number of threads can be set dynamically at runtime using :func:`numba.set_num_threads`. Note that :func:`~.set_num_threads` only allows setting the number of threads to a smaller value than :obj:`~.NUMBA_NUM_THREADS`. Numba always launches :obj:`numba.config.NUMBA_NUM_THREADS` threads, but :func:`~.set_num_threads` causes it to mask out unused threads so they aren't used in computations. The current number of threads used by numba can be accessed with :func:`numba.get_num_threads`. Both functions work inside of a jitted function. .. _numba-threading-layer-thread-masking: Example of Limiting the Number of Threads ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In this example, suppose the machine we are running on has 8 cores (so :obj:`numba.config.NUMBA_NUM_THREADS` would be ``8``). Suppose we want to run some code with ``@njit(parallel=True)``, but we also want to run our code concurrently in 4 different processes. With the default number of threads, each Python process would run 8 threads, for a total in 4*8 = 32 threads, which is oversubscription for our 8 cores. We should rather limit each process to 2 threads, so that the total will be 4*2 = 8, which matches our number of physical cores. There are two ways to do this. One is to set the :envvar:`NUMBA_NUM_THREADS` environment variable to ``2``. .. code:: bash $ NUMBA_NUM_THREADS=2 python ourcode.py However, there are two downsides to this approach: 1. :envvar:`NUMBA_NUM_THREADS` must be set before Numba is imported, and ideally before Python is launched. As soon as Numba is imported the environment variable is read and that number of threads is locked in as the number of threads Numba launches. 2. If we want to later increase the number of threads used by the process, we cannot. :envvar:`NUMBA_NUM_THREADS` sets the *maximum* number of threads that are launched for a process. Calling :func:`~.set_num_threads()` with a value greater than :obj:`numba.config.NUMBA_NUM_THREADS` results in an error. The advantage of this approach is that we can do it from outside of the process without changing the code. Another approach is to use the :func:`numba.set_num_threads` function in our code .. code:: python from numba import njit, set_num_threads @njit(parallel=True) def func(): ... set_num_threads(2) func() If we call ``set_num_threads(2)`` before executing our parallel code, it has the same effect as calling the process with ``NUMBA_NUM_THREADS=2``, in that the parallel code will only execute on 2 threads. However, we can later call ``set_num_threads(8)`` to increase the number of threads back to the default size. And we do not have to worry about setting it before Numba gets imported. It only needs to be called before the parallel function is run. API Reference ~~~~~~~~~~~~~ .. py:data:: numba.config.NUMBA_NUM_THREADS The total (maximum) number of threads launched by numba. Defaults to :obj:`numba.config.NUMBA_DEFAULT_NUM_THREADS`, but can be overridden with the :envvar:`NUMBA_NUM_THREADS` environment variable. .. py:data:: numba.config.NUMBA_DEFAULT_NUM_THREADS The number of usable CPU cores on the system (as determined by ``len(os.sched_getaffinity(0))``, if supported by the OS, or ``multiprocessing.cpu_count()`` if not). This is the default value for :obj:`numba.config.NUMBA_NUM_THREADS` unless the :envvar:`NUMBA_NUM_THREADS` environment variable is set. .. autofunction:: numba.set_num_threads .. autofunction:: numba.get_num_threads numba-0.55.1/docs/source/user/troubleshoot.rst000664 000000 000000 00000120272 14174536160 021370 0ustar00rootroot000000 000000 .. _numba-troubleshooting: ======================== Troubleshooting and tips ======================== .. _what-to-compile: What to compile =============== The general recommendation is that you should only try to compile the critical paths in your code. If you have a piece of performance-critical computational code amongst some higher-level code, you may factor out the performance-critical code in a separate function and compile the separate function with Numba. Letting Numba focus on that small piece of performance-critical code has several advantages: * it reduces the risk of hitting unsupported features; * it reduces the compilation times; * it allows you to evolve the higher-level code which is outside of the compiled function much easier. .. _code-doesnt-compile: My code doesn't compile ======================= There can be various reasons why Numba cannot compile your code, and raises an error instead. One common reason is that your code relies on an unsupported Python feature, especially in :term:`nopython mode`. Please see the list of :ref:`pysupported`. If you find something that is listed there and still fails compiling, please :ref:`report a bug `. When Numba tries to compile your code it first tries to work out the types of all the variables in use, this is so it can generate a type specific implementation of your code that can be compiled down to machine code. A common reason for Numba failing to compile (especially in :term:`nopython mode`) is a type inference failure, essentially Numba cannot work out what the type of all the variables in your code should be. For example, let's consider this trivial function:: @jit(nopython=True) def f(x, y): return x + y If you call it with two numbers, Numba is able to infer the types properly:: >>> f(1, 2) 3 If however you call it with a tuple and a number, Numba is unable to say what the result of adding a tuple and number is, and therefore compilation errors out:: >>> f(1, (2,)) Traceback (most recent call last): File "", line 1, in File "/numba/numba/dispatcher.py", line 339, in _compile_for_args reraise(type(e), e, None) File "/numba/numba/six.py", line 658, in reraise raise value.with_traceback(tb) numba.errors.TypingError: Failed at nopython (nopython frontend) Invalid use of + with parameters (int64, tuple(int64 x 1)) Known signatures: * (int64, int64) -> int64 * (int64, uint64) -> int64 * (uint64, int64) -> int64 * (uint64, uint64) -> uint64 * (float32, float32) -> float32 * (float64, float64) -> float64 * (complex64, complex64) -> complex64 * (complex128, complex128) -> complex128 * (uint16,) -> uint64 * (uint8,) -> uint64 * (uint64,) -> uint64 * (uint32,) -> uint64 * (int16,) -> int64 * (int64,) -> int64 * (int8,) -> int64 * (int32,) -> int64 * (float32,) -> float32 * (float64,) -> float64 * (complex64,) -> complex64 * (complex128,) -> complex128 * parameterized [1] During: typing of intrinsic-call at (3) File "", line 3: The error message helps you find out what went wrong: "Invalid use of + with parameters (int64, tuple(int64 x 1))" is to be interpreted as "Numba encountered an addition of variables typed as integer and 1-tuple of integer, respectively, and doesn't know about any such operation". Note that if you allow object mode:: @jit def g(x, y): return x + y compilation will succeed and the compiled function will raise at runtime as Python would do:: >>> g(1, (2,)) Traceback (most recent call last): File "", line 1, in TypeError: unsupported operand type(s) for +: 'int' and 'tuple' My code has a type unification problem ====================================== Another common reason for Numba not being able to compile your code is that it cannot statically determine the return type of a function. The most likely cause of this is the return type depending on a value that is available only at runtime. Again, this is most often problematic when using :term:`nopython mode`. The concept of type unification is simply trying to find a type in which two variables could safely be represented. For example a 64 bit float and a 64 bit complex number could both be represented in a 128 bit complex number. As an example of type unification failure, this function has a return type that is determined at runtime based on the value of `x`:: In [1]: from numba import jit In [2]: @jit(nopython=True) ...: def f(x): ...: if x > 10: ...: return (1,) ...: else: ...: return 1 ...: In [3]: f(10) Trying to execute this function, errors out as follows:: TypingError: Failed at nopython (nopython frontend) Can't unify return type from the following types: tuple(int64 x 1), int64 Return of: IR name '$8.2', type '(int64 x 1)', location: File "", line 4: def f(x): if x > 10: return (1,) ^ Return of: IR name '$12.2', type 'int64', location: File "", line 6: def f(x): else: return 1 The error message "Can't unify return type from the following types: tuple(int64 x 1), int64" should be read as "Numba cannot find a type that can safely represent a 1-tuple of integer and an integer". .. _code-has-untyped-list: My code has an untyped list problem =================================== As :ref:`noted previously ` the first part of Numba compiling your code involves working out what the types of all the variables are. In the case of lists, a list must contain items that are of the same type or can be empty if the type can be inferred from some later operation. What is not possible is to have a list which is defined as empty and has no inferable type (i.e. an untyped list). For example, this is using a list of a known type:: from numba import jit @jit(nopython=True) def f(): return [1, 2, 3] # this list is defined on construction with `int` type This is using an empty list, but the type can be inferred:: from numba import jit @jit(nopython=True) def f(x): tmp = [] # defined empty for i in range(x): tmp.append(i) # list type can be inferred from the type of `i` return tmp This is using an empty list and the type cannot be inferred:: from numba import jit @jit(nopython=True) def f(x): tmp = [] # defined empty return (tmp, x) # ERROR: the type of `tmp` is unknown Whilst slightly contrived, if you need an empty list and the type cannot be inferred but you know what type you want the list to be, this "trick" can be used to instruct the typing mechanism:: from numba import jit import numpy as np @jit(nopython=True) def f(x): # define empty list, but instruct that the type is np.complex64 tmp = [np.complex64(x) for x in range(0)] return (tmp, x) # the type of `tmp` is known, but it is still empty The compiled code is too slow ============================= The most common reason for slowness of a compiled JIT function is that compiling in :term:`nopython mode` has failed and the Numba compiler has fallen back to :term:`object mode`. :term:`object mode` currently provides little to no speedup compared to regular Python interpretation, and its main point is to allow an internal optimization known as :term:`loop-lifting`: this optimization will allow to compile inner loops in :term:`nopython mode` regardless of what code surrounds those inner loops. To find out if type inference succeeded on your function, you can use the :meth:`~Dispatcher.inspect_types` method on the compiled function. For example, let's take the following function:: @jit def f(a, b): s = a + float(b) return s When called with numbers, this function should be fast as Numba is able to convert number types to floating-point numbers. Let's see:: >>> f(1, 2) 3.0 >>> f.inspect_types() f (int64, int64) -------------------------------------------------------------------------------- # --- LINE 7 --- @jit # --- LINE 8 --- def f(a, b): # --- LINE 9 --- # label 0 # a.1 = a :: int64 # del a # b.1 = b :: int64 # del b # $0.2 = global(float: ) :: Function() # $0.4 = call $0.2(b.1, ) :: (int64,) -> float64 # del b.1 # del $0.2 # $0.5 = a.1 + $0.4 :: float64 # del a.1 # del $0.4 # s = $0.5 :: float64 # del $0.5 s = a + float(b) # --- LINE 10 --- # $0.7 = cast(value=s) :: float64 # del s # return $0.7 return s Without trying to understand too much of the Numba intermediate representation, it is still visible that all variables and temporary values have had their types inferred properly: for example *a* has the type ``int64``, *$0.5* has the type ``float64``, etc. However, if *b* is passed as a string, compilation will fall back on object mode as the float() constructor with a string is currently not supported by Numba:: >>> f(1, "2") 3.0 >>> f.inspect_types() [... snip annotations for other signatures, see above ...] ================================================================================ f (int64, str) -------------------------------------------------------------------------------- # --- LINE 7 --- @jit # --- LINE 8 --- def f(a, b): # --- LINE 9 --- # label 0 # a.1 = a :: pyobject # del a # b.1 = b :: pyobject # del b # $0.2 = global(float: ) :: pyobject # $0.4 = call $0.2(b.1, ) :: pyobject # del b.1 # del $0.2 # $0.5 = a.1 + $0.4 :: pyobject # del a.1 # del $0.4 # s = $0.5 :: pyobject # del $0.5 s = a + float(b) # --- LINE 10 --- # $0.7 = cast(value=s) :: pyobject # del s # return $0.7 return s Here we see that all variables end up typed as ``pyobject``. This means that the function was compiled in object mode and values are passed around as generic Python objects, without Numba trying to look into them to reason about their raw values. This is a situation you want to avoid when caring about the speed of your code. If a function fails to compile in ``nopython`` mode warnings will be emitted with explanation as to why compilation failed. For example with the ``f()`` function above (slightly edited for documentation purposes):: >>> f(1, 2) 3.0 >>> f(1, "2") example.py:7: NumbaWarning: Compilation is falling back to object mode WITH looplifting enabled because Function "f" failed type inference due to: Invalid use of Function() with argument(s) of type(s): (unicode_type) * parameterized In definition 0: TypeError: float() only support for numbers raised from /numba/typing/builtins.py:880 In definition 1: TypeError: float() only support for numbers raised from /numba/typing/builtins.py:880 This error is usually caused by passing an argument of a type that is unsupported by the named function. [1] During: resolving callee type: Function() [2] During: typing of call at example.py (9) File "example.py", line 9: def f(a, b): s = a + float(b) ^ /numba/compiler.py:722: NumbaWarning: Function "f" was compiled in object mode without forceobj=True. File "example.py", line 8: @jit def f(a, b): ^ 3.0 Disabling JIT compilation ========================= In order to debug code, it is possible to disable JIT compilation, which makes the ``jit`` decorator (and the ``njit`` decorator) act as if they perform no operation, and the invocation of decorated functions calls the original Python function instead of a compiled version. This can be toggled by setting the :envvar:`NUMBA_DISABLE_JIT` enviroment variable to ``1``. When this mode is enabled, the ``vectorize`` and ``guvectorize`` decorators will still result in compilation of a ufunc, as there is no straightforward pure Python implementation of these functions. .. _debugging-jit-compiled-code: Debugging JIT compiled code with GDB ==================================== Setting the ``debug`` keyword argument in the ``jit`` decorator (e.g. ``@jit(debug=True)``) enables the emission of debug info in the jitted code. To debug, GDB version 7.0 or above is required. Currently, the following debug info is available: * Function name will be shown in the backtrace along with type information and values (if available). * Source location (filename and line number) is available. For example, users can set a break point by the absolute filename and line number; e.g. ``break /path/to/myfile.py:6``. * Arguments to the current function can be show with ``info args`` * Local variables in the current function can be shown with ``info locals``. * The type of variables can be shown with ``whatis myvar``. * The value of variables can be shown with ``print myvar`` or ``display myvar``. * Simple numeric types, i.e. int, float and double, are shown in their native representation. * Other types are shown as a structure based on Numba's memory model representation of the type. Known issues: * Stepping depends heavily on optimization level. At full optimization (equivalent to O3), most of the variables are optimized out. It is often beneficial to use the environment variable :envvar:`NUMBA_OPT` to adjust the optimization level and :envvar:`NUMBA_EXTEND_VARIABLE_LIFETIMES` to extend the lifetime of variables to the end of their scope so as to get a debugging experience closer to the semantics of Python execution. * Memory consumption increases significantly with debug info enabled. The compiler emits extra information (`DWARF `_) along with the instructions. The emitted object code can be 2x bigger with debug info. Internal details: * Since Python semantics allow variables to bind to value of different types, Numba internally creates multiple versions of the variable for each type. So for code like:: x = 1 # type int x = 2.3 # type float x = (1, 2, 3) # type 3-tuple of int Each assignments will store to a different variable name. In the debugger, the variables will be ``x``, ``x$1`` and ``x$2``. (In the Numba IR, they are ``x``, ``x.1`` and ``x.2``.) * When debug is enabled, inlining of functions at LLVM IR level is disabled. Example debug usage ------------------- The python source: .. code-block:: python :linenos: from numba import njit @njit(debug=True) def foo(a): b = a + 1 c = a * 2.34 d = (a, b, c) print(a, b, c, d) r = foo(123) print(r) In the terminal: .. code-block:: none :emphasize-lines: 1, 3, 7, 12, 14, 16, 20, 22, 26, 28, 30, 32, 34, 36 $ NUMBA_OPT=0 NUMBA_EXTEND_VARIABLE_LIFETIMES=1 gdb -q python Reading symbols from python... (gdb) break test1.py:5 No source file named test1.py. Make breakpoint pending on future shared library load? (y or [n]) y Breakpoint 1 (test1.py:5) pending. (gdb) run test1.py Starting program: /bin/python test1.py ... Breakpoint 1, __main__::foo_241[abi:c8tJTC_2fWgEeGLSgydRTQUgiqKEZ6gEoDvQJmaQIA](long long) (a=123) at test1.py:5 5 b = a + 1 (gdb) info args a = 123 (gdb) n 6 c = a * 2.34 (gdb) info locals b = 124 c = 0 d = {f0 = 0, f1 = 0, f2 = 0} (gdb) n 7 d = (a, b, c) (gdb) info locals b = 124 c = 287.81999999999999 d = {f0 = 0, f1 = 0, f2 = 0} (gdb) whatis b type = int64 (gdb) whatis d type = Tuple(int64, int64, float64) ({i64, i64, double}) (gdb) n 8 print(a, b, c, d) (gdb) print b $1 = 124 (gdb) print d $2 = {f0 = 123, f1 = 124, f2 = 287.81999999999999} (gdb) bt #0 __main__::foo_241[abi:c8tJTC_2fWgEeGLSgydRTQUgiqKEZ6gEoDvQJmaQIA](long long) (a=123) at test1.py:8 #1 0x00007ffff06439fa in cpython::__main__::foo_241[abi:c8tJTC_2fWgEeGLSgydRTQUgiqKEZ6gEoDvQJmaQIA](long long) () Globally override debug setting ------------------------------- It is possible to enable debug for the full application by setting environment variable ``NUMBA_DEBUGINFO=1``. This sets the default value of the ``debug`` option in ``jit``. Debug can be turned off on individual functions by setting ``debug=False``. Beware that enabling debug info significantly increases the memory consumption for each compiled function. For large application, this may cause out-of-memory error. Using Numba's direct ``gdb`` bindings in ``nopython`` mode =========================================================== Numba (version 0.42.0 and later) has some additional functions relating to ``gdb`` support for CPUs that make it easier to debug programs. All the ``gdb`` related functions described in the following work in the same manner irrespective of whether they are called from the standard CPython interpreter or code compiled in either :term:`nopython mode` or :term:`object mode`. .. note:: This feature is experimental! .. warning:: This feature does unexpected things if used from Jupyter or alongside the ``pdb`` module. It's behaviour is harmless, just hard to predict! Set up ------ Numba's ``gdb`` related functions make use of a ``gdb`` binary, the location and name of this binary can be configured via the :envvar:`NUMBA_GDB_BINARY` environment variable if desired. .. note:: Numba's ``gdb`` support requires the ability for ``gdb`` to attach to another process. On some systems (notably Ubuntu Linux) default security restrictions placed on ``ptrace`` prevent this from being possible. This restriction is enforced at the system level by the Linux security module `Yama`. Documentation for this module and the security implications of making changes to its behaviour can be found in the `Linux Kernel documentation `_. The `Ubuntu Linux security documentation `_ discusses how to adjust the behaviour of `Yama` on with regards to ``ptrace_scope`` so as to permit the required behaviour. Basic ``gdb`` support --------------------- .. warning:: Calling :func:`numba.gdb` and/or :func:`numba.gdb_init` more than once in the same program is not advisable, unexpected things may happen. If multiple breakpoints are desired within a program, launch ``gdb`` once via :func:`numba.gdb` or :func:`numba.gdb_init` and then use :func:`numba.gdb_breakpoint` to register additional breakpoint locations. The most simple function for adding ``gdb`` support is :func:`numba.gdb`, which, at the call location, will: * launch ``gdb`` and attach it to the running process. * create a breakpoint at the site of the :func:`numba.gdb()` function call, the attached ``gdb`` will pause execution here awaiting user input. use of this functionality is best motivated by example, continuing with the example used above: .. code-block:: python :linenos: from numba import njit, gdb @njit(debug=True) def foo(a): b = a + 1 gdb() # instruct Numba to attach gdb at this location and pause execution c = a * 2.34 d = (a, b, c) print(a, b, c, d) r= foo(123) print(r) In the terminal (``...`` on a line by itself indicates output that is not presented for brevity): .. code-block:: none :emphasize-lines: 1, 4, 8, 13, 24, 26, 28, 30, 32, 37 $ NUMBA_OPT=0 NUMBA_EXTEND_VARIABLE_LIFETIMES=1 python demo_gdb.py ... Breakpoint 1, 0x00007fb75238d830 in numba_gdb_breakpoint () from numba/_helperlib.cpython-39-x86_64-linux-gnu.so (gdb) s Single stepping until exit from function numba_gdb_breakpoint, which has no line number information. 0x00007fb75233e1cf in numba::misc::gdb_hook::hook_gdb::_3clocals_3e::impl_242[abi:c8tJTIeFCjyCbUFRqqOAK_2f6h0phxApMogijRBAA_3d](StarArgTuple) () (gdb) s Single stepping until exit from function _ZN5numba4misc8gdb_hook8hook_gdb12_3clocals_3e8impl_242B44c8tJTIeFCjyCbUFRqqOAK_2f6h0phxApMogijRBAA_3dE12StarArgTuple, which has no line number information. __main__::foo_241[abi:c8tJTC_2fWgEeGLSgydRTQUgiqKEZ6gEoDvQJmaQIA](long long) (a=123) at demo_gdb.py:7 7 c = a * 2.34 (gdb) l 2 3 @njit(debug=True) 4 def foo(a): 5 b = a + 1 6 gdb() # instruct Numba to attach gdb at this location and pause execution 7 c = a * 2.34 8 d = (a, b, c) 9 print(a, b, c, d) 10 11 r= foo(123) (gdb) p a $1 = 123 (gdb) p b $2 = 124 (gdb) p c $3 = 0 (gdb) b 9 Breakpoint 2 at 0x7fb73d1f7287: file demo_gdb.py, line 9. (gdb) c Continuing. Breakpoint 2, __main__::foo_241[abi:c8tJTC_2fWgEeGLSgydRTQUgiqKEZ6gEoDvQJmaQIA](long long) (a=123) at demo_gdb.py:9 9 print(a, b, c, d) (gdb) info locals b = 124 c = 287.81999999999999 d = {f0 = 123, f1 = 124, f2 = 287.81999999999999} It can be seen in the above example that execution of the code is paused at the location of the ``gdb()`` function call at end of the ``numba_gdb_breakpoint`` function (this is the Numba internal symbol registered as breakpoint with ``gdb``). Issuing a ``step`` twice at this point moves to the stack frame of the compiled Python source. From there, it can be seen that the variables ``a`` and ``b`` have been evaluated but ``c`` has not, as demonstrated by printing their values, this is precisely as expected given the location of the ``gdb()`` call. Issuing a ``break`` on line 9 and then continuing execution leads to the evaluation of line ``7``. The variable ``c`` is assigned a value as a result of the execution and this can be seen in output of ``info locals`` when the breakpoint is hit. Running with ``gdb`` enabled ---------------------------- The functionality provided by :func:`numba.gdb` (launch and attach ``gdb`` to the executing process and pause on a breakpoint) is also available as two separate functions: * :func:`numba.gdb_init` this function injects code at the call site to launch and attach ``gdb`` to the executing process but does not pause execution. * :func:`numba.gdb_breakpoint` this function injects code at the call site that will call the special ``numba_gdb_breakpoint`` function that is registered as a breakpoint in Numba's ``gdb`` support. This is demonstrated in the next section. This functionality enables more complex debugging capabilities. Again, motivated by example, debugging a 'segfault' (memory access violation signalling ``SIGSEGV``): .. code-block:: python :linenos: from numba import njit, gdb_init import numpy as np # NOTE debug=True switches bounds-checking on, but for the purposes of this # example it is explicitly turned off so that the out of bounds index is # not caught! @njit(debug=True, boundscheck=False) def foo(a, index): gdb_init() # instruct Numba to attach gdb at this location, but not to pause execution b = a + 1 c = a * 2.34 d = c[index] # access an address that is a) invalid b) out of the page print(a, b, c, d) bad_index = int(1e9) # this index is invalid z = np.arange(10) r = foo(z, bad_index) print(r) In the terminal (``...`` on a line by itself indicates output that is not presented for brevity): .. code-block:: none :emphasize-lines: 1, 6, 8, 10, 12 $ NUMBA_OPT=0 python demo_gdb_segfault.py ... Program received signal SIGSEGV, Segmentation fault. 0x00007f5a4ca655eb in __main__::foo_241[abi:c8tJTC_2fWgEeGLSgydRTQUgiqKEZ6gEoDvQJmaQIA](Array, long long) (a=..., index=1000000000) at demo_gdb_segfault.py:12 12 d = c[index] # access an address that is a) invalid b) out of the page (gdb) p index $1 = 1000000000 (gdb) p c $2 = {meminfo = 0x5586cfb95830 "\001", parent = 0x0, nitems = 10, itemsize = 8, data = 0x5586cfb95860, shape = {10}, strides = {8}} (gdb) whatis c type = array(float64, 1d, C) ({i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}) (gdb) p c.nitems $3 = 10 In the ``gdb`` output it can be noted that a ``SIGSEGV`` signal was caught, and the line in which the access violation occurred is printed. Continuing the example as a debugging session demonstration, first ``index`` can be printed, and it is evidently 1e9. Printing ``c`` shows that it is a structure, so the type needs looking up and it can be seen that is it an ``array(float64, 1d, C)`` type. Given the segfault came from an invalid access it would be informative to check the number of items in the array and compare that to the index requested. Inspecting the ``nitems`` member of the structure ``c`` shows 10 items. It's therefore clear that the segfault comes from an invalid access of index ``1000000000`` in an array containing ``10`` items. Adding breakpoints to code -------------------------- The next example demonstrates using multiple breakpoints that are defined through the invocation of the :func:`numba.gdb_breakpoint` function: .. code-block:: python :linenos: from numba import njit, gdb_init, gdb_breakpoint @njit(debug=True) def foo(a): gdb_init() # instruct Numba to attach gdb at this location b = a + 1 gdb_breakpoint() # instruct gdb to break at this location c = a * 2.34 d = (a, b, c) gdb_breakpoint() # and to break again at this location print(a, b, c, d) r= foo(123) print(r) In the terminal (``...`` on a line by itself indicates output that is not presented for brevity): .. code-block:: none :emphasize-lines: 1, 4, 9, 20, 22, 24, 29, 31 $ NUMBA_OPT=0 python demo_gdb_breakpoints.py ... Breakpoint 1, 0x00007fb65bb4c830 in numba_gdb_breakpoint () from numba/_helperlib.cpython-39-x86_64-linux-gnu.so (gdb) step Single stepping until exit from function numba_gdb_breakpoint, which has no line number information. __main__::foo_241[abi:c8tJTC_2fWgEeGLSgydRTQUgiqKEZ6gEoDvQJmaQIA](long long) (a=123) at demo_gdb_breakpoints.py:8 8 c = a * 2.34 (gdb) l 3 @njit(debug=True) 4 def foo(a): 5 gdb_init() # instruct Numba to attach gdb at this location 6 b = a + 1 7 gdb_breakpoint() # instruct gdb to break at this location 8 c = a * 2.34 9 d = (a, b, c) 10 gdb_breakpoint() # and to break again at this location 11 print(a, b, c, d) 12 (gdb) p b $1 = 124 (gdb) p c $2 = 0 (gdb) c Continuing. Breakpoint 1, 0x00007fb65bb4c830 in numba_gdb_breakpoint () from numba/_helperlib.cpython-39-x86_64-linux-gnu.so (gdb) step 11 print(a, b, c, d) (gdb) p c $3 = 287.81999999999999 From the ``gdb`` output it can be seen that execution paused at line 8 as a breakpoint was hit, and after a ``continue`` was issued, it broke again at line 11 where the next breakpoint was hit. Debugging in parallel regions ----------------------------- The follow example is quite involved, it executes with ``gdb`` instrumentation from the outset as per the example above, but it also uses threads and makes use of the breakpoint functionality. Further, the last iteration of the parallel section calls the function ``work``, which is actually just a binding to ``glibc``'s ``free(3)`` in this case, but could equally be some involved function that is presenting a segfault for unknown reasons. .. code-block:: python :linenos: from numba import njit, prange, gdb_init, gdb_breakpoint import ctypes def get_free(): lib = ctypes.cdll.LoadLibrary('libc.so.6') free_binding = lib.free free_binding.argtypes = [ctypes.c_void_p,] free_binding.restype = None return free_binding work = get_free() @njit(debug=True, parallel=True) def foo(): gdb_init() # instruct Numba to attach gdb at this location, but not to pause execution counter = 0 n = 9 for i in prange(n): if i > 3 and i < 8: # iterations 4, 5, 6, 7 will break here gdb_breakpoint() if i == 8: # last iteration segfaults work(0xBADADD) counter += 1 return counter r = foo() print(r) In the terminal (``...`` on a line by itself indicates output that is not presented for brevity), note the setting of ``NUMBA_NUM_THREADS`` to 4 to ensure that there are 4 threads running in the parallel section: .. code-block:: none :emphasize-lines: 1, 19, 29, 44, 50, 56, 62, 69 $ NUMBA_NUM_THREADS=4 NUMBA_OPT=0 python demo_gdb_threads.py Attaching to PID: 21462 ... Attaching to process 21462 [New LWP 21467] [New LWP 21468] [New LWP 21469] [New LWP 21470] [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib64/libthread_db.so.1". 0x00007f59ec31756d in nanosleep () at ../sysdeps/unix/syscall-template.S:81 81 T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) Breakpoint 1 at 0x7f59d631e8f0: file numba/_helperlib.c, line 1090. Continuing. [Switching to Thread 0x7f59d1fd1700 (LWP 21470)] Thread 5 "python" hit Breakpoint 1, numba_gdb_breakpoint () at numba/_helperlib.c:1090 1090 } (gdb) info threads Id Target Id Frame 1 Thread 0x7f59eca2f740 (LWP 21462) "python" pthread_cond_wait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 2 Thread 0x7f59d37d4700 (LWP 21467) "python" pthread_cond_wait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 3 Thread 0x7f59d2fd3700 (LWP 21468) "python" pthread_cond_wait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 4 Thread 0x7f59d27d2700 (LWP 21469) "python" numba_gdb_breakpoint () at numba/_helperlib.c:1090 * 5 Thread 0x7f59d1fd1700 (LWP 21470) "python" numba_gdb_breakpoint () at numba/_helperlib.c:1090 (gdb) thread apply 2-5 info locals Thread 2 (Thread 0x7f59d37d4700 (LWP 21467)): No locals. Thread 3 (Thread 0x7f59d2fd3700 (LWP 21468)): No locals. Thread 4 (Thread 0x7f59d27d2700 (LWP 21469)): No locals. Thread 5 (Thread 0x7f59d1fd1700 (LWP 21470)): sched$35 = '\000' counter__arr = '\000' , "\001\000\000\000\000\000\000\000\b\000\000\000\000\000\000\000\370B]\"hU\000\000\001", '\000' counter = 0 (gdb) continue Continuing. [Switching to Thread 0x7f59d27d2700 (LWP 21469)] Thread 4 "python" hit Breakpoint 1, numba_gdb_breakpoint () at numba/_helperlib.c:1090 1090 } (gdb) continue Continuing. [Switching to Thread 0x7f59d1fd1700 (LWP 21470)] Thread 5 "python" hit Breakpoint 1, numba_gdb_breakpoint () at numba/_helperlib.c:1090 1090 } (gdb) continue Continuing. [Switching to Thread 0x7f59d27d2700 (LWP 21469)] Thread 4 "python" hit Breakpoint 1, numba_gdb_breakpoint () at numba/_helperlib.c:1090 1090 } (gdb) continue Continuing. Thread 5 "python" received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7f59d1fd1700 (LWP 21470)] __GI___libc_free (mem=0xbadadd) at malloc.c:2935 2935 if (chunk_is_mmapped(p)) /* release mmapped memory. */ (gdb) bt #0 __GI___libc_free (mem=0xbadadd) at malloc.c:2935 #1 0x00007f59d37ded84 in $3cdynamic$3e::__numba_parfor_gufunc__0x7ffff80a61ae3e31$244(Array, Array) () at :24 #2 0x00007f59d17ce326 in __gufunc__._ZN13$3cdynamic$3e45__numba_parfor_gufunc__0x7ffff80a61ae3e31$244E5ArrayIyLi1E1C7mutable7alignedE5ArrayIxLi1E1C7mutable7alignedE () #3 0x00007f59d37d7320 in thread_worker () from /numba/numba/npyufunc/workqueue.cpython-37m-x86_64-linux-gnu.so #4 0x00007f59ec626e25 in start_thread (arg=0x7f59d1fd1700) at pthread_create.c:308 #5 0x00007f59ec350bad in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113 In the output it can be seen that there are 4 threads launched and that they all break at the breakpoint, further that ``Thread 5`` receives a signal ``SIGSEGV`` and that back tracing shows that it came from ``__GI___libc_free`` with the invalid address in ``mem``, as expected. Using the ``gdb`` command language ---------------------------------- Both the :func:`numba.gdb` and :func:`numba.gdb_init` functions accept unlimited string arguments which will be passed directly to ``gdb`` as command line arguments when it initializes, this makes it easy to set breakpoints on other functions and perform repeated debugging tasks without having to manually type them every time. For example, this code runs with ``gdb`` attached and sets a breakpoint on ``_dgesdd`` (say for example the arguments passed to the LAPACK's double precision divide and conqueror SVD function need debugging). .. code-block:: python :linenos: from numba import njit, gdb import numpy as np @njit(debug=True) def foo(a): # instruct Numba to attach gdb at this location and on launch, switch # breakpoint pending on , and then set a breakpoint on the function # _dgesdd, continue execution, and once the breakpoint is hit, backtrace gdb('-ex', 'set breakpoint pending on', '-ex', 'b dgesdd_', '-ex','c', '-ex','bt') b = a + 10 u, s, vh = np.linalg.svd(b) return s # just return singular values z = np.arange(70.).reshape(10, 7) r = foo(z) print(r) In the terminal (``...`` on a line by itself indicates output that is not presented for brevity), note that no interaction is required to break and backtrace: .. code-block:: none :emphasize-lines: 1 $ NUMBA_OPT=0 python demo_gdb_args.py Attaching to PID: 22300 GNU gdb (GDB) Red Hat Enterprise Linux 8.0.1-36.el7 ... Attaching to process 22300 Reading symbols from /bin/python3.7...done. 0x00007f652305a550 in __nanosleep_nocancel () at ../sysdeps/unix/syscall-template.S:81 81 T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) Breakpoint 1 at 0x7f650d0618f0: file numba/_helperlib.c, line 1090. Continuing. Breakpoint 1, numba_gdb_breakpoint () at numba/_helperlib.c:1090 1090 } Breakpoint 2 at 0x7f65102322e0 (2 locations) Continuing. Breakpoint 2, 0x00007f65182be5f0 in mkl_lapack.dgesdd_ () from /lib/python3.7/site-packages/numpy/core/../../../../libmkl_rt.so #0 0x00007f65182be5f0 in mkl_lapack.dgesdd_ () from /lib/python3.7/site-packages/numpy/core/../../../../libmkl_rt.so #1 0x00007f650d065b71 in numba_raw_rgesdd (kind=kind@entry=100 'd', jobz=, jobz@entry=65 'A', m=m@entry=10, n=n@entry=7, a=a@entry=0x561c6fbb20c0, lda=lda@entry=10, s=0x561c6facf3a0, u=0x561c6fb680e0, ldu=10, vt=0x561c6fd375c0, ldvt=7, work=0x7fff4c926c30, lwork=-1, iwork=0x7fff4c926c40, info=0x7fff4c926c20) at numba/_lapack.c:1277 #2 0x00007f650d06768f in numba_ez_rgesdd (ldvt=7, vt=0x561c6fd375c0, ldu=10, u=0x561c6fb680e0, s=0x561c6facf3a0, lda=10, a=0x561c6fbb20c0, n=7, m=10, jobz=65 'A', kind=) at numba/_lapack.c:1307 #3 numba_ez_gesdd (kind=, jobz=, m=10, n=7, a=0x561c6fbb20c0, lda=10, s=0x561c6facf3a0, u=0x561c6fb680e0, ldu=10, vt=0x561c6fd375c0, ldvt=7) at numba/_lapack.c:1477 #4 0x00007f650a3147a3 in numba::targets::linalg::svd_impl::$3clocals$3e::svd_impl$243(Array, omitted$28default$3d1$29) () #5 0x00007f650a1c0489 in __main__::foo$241(Array) () at demo_gdb_args.py:15 #6 0x00007f650a1c2110 in cpython::__main__::foo$241(Array) () #7 0x00007f650cd096a4 in call_cfunc () from /numba/numba/_dispatcher.cpython-37m-x86_64-linux-gnu.so ... How does the ``gdb`` binding work? ---------------------------------- For advanced users and debuggers of Numba applications it's important to know some of the internal implementation details of the outlined ``gdb`` bindings. The :func:`numba.gdb` and :func:`numba.gdb_init` functions work by injecting the following into the function's LLVM IR: * At the call site of the function first inject a call to ``getpid(3)`` to get the PID of the executing process and store this for use later, then inject a ``fork(3)`` call: * In the parent: * Inject a call ``sleep(3)`` (hence the pause whilst ``gdb`` loads). * Inject a call to the ``numba_gdb_breakpoint`` function (only :func:`numba.gdb` does this). * In the child: * Inject a call to ``execl(3)`` with the arguments ``numba.config.GDB_BINARY``, the ``attach`` command and the PID recorded earlier. Numba has a special ``gdb`` command file that contains instructions to break on the symbol ``numba_gdb_breakpoint`` and then ``finish``, this is to make sure that the program stops on the breakpoint but the frame it stops in is the compiled Python frame (or one ``step`` away from, depending on optimisation). This command file is also added to the arguments and finally and any user specified arguments are added. At the call site of a :func:`numba.gdb_breakpoint` a call is injected to the special ``numba_gdb_breakpoint`` symbol, which is already registered and instrumented as a place to break and ``finish`` immediately. As a result of this, a e.g. :func:`numba.gdb` call will cause a fork in the program, the parent will sleep whilst the child launches ``gdb`` and attaches it to the parent and tells the parent to continue. The launched ``gdb`` has the ``numba_gdb_breakpoint`` symbol registered as a breakpoint and when the parent continues and stops sleeping it will immediately call ``numba_gdb_breakpoint`` on which the child will break. Additional :func:`numba.gdb_breakpoint` calls create calls to the registered breakpoint hence the program will also break at these locations. .. _debugging-cuda-python-code: Debugging CUDA Python code ========================== Using the simulator ------------------- CUDA Python code can be run in the Python interpreter using the CUDA Simulator, allowing it to be debugged with the Python debugger or with print statements. To enable the CUDA simulator, set the environment variable :envvar:`NUMBA_ENABLE_CUDASIM` to 1. For more information on the CUDA Simulator, see :ref:`the CUDA Simulator documentation `. Debug Info ---------- By setting the ``debug`` argument to ``cuda.jit`` to ``True`` (``@cuda.jit(debug=True)``), Numba will emit source location in the compiled CUDA code. Unlike the CPU target, only filename and line information are available, but no variable type information is emitted. The information is sufficient to debug memory error with `cuda-memcheck `_. For example, given the following cuda python code: .. code-block:: python :linenos: import numpy as np from numba import cuda @cuda.jit(debug=True) def foo(arr): arr[cuda.threadIdx.x] = 1 arr = np.arange(30) foo[1, 32](arr) # more threads than array elements We can use ``cuda-memcheck`` to find the memory error: .. code-block:: none $ cuda-memcheck python chk_cuda_debug.py ========= CUDA-MEMCHECK ========= Invalid __global__ write of size 8 ========= at 0x00000148 in /home/user/chk_cuda_debug.py:6:cudapy::__main__::foo$241(Array<__int64, int=1, C, mutable, aligned>) ========= by thread (31,0,0) in block (0,0,0) ========= Address 0x500a600f8 is out of bounds ... ========= ========= Invalid __global__ write of size 8 ========= at 0x00000148 in /home/user/chk_cuda_debug.py:6:cudapy::__main__::foo$241(Array<__int64, int=1, C, mutable, aligned>) ========= by thread (30,0,0) in block (0,0,0) ========= Address 0x500a600f0 is out of bounds ... numba-0.55.1/docs/source/user/vectorize.rst000664 000000 000000 00000034617 14174536160 020660 0ustar00rootroot000000 000000 ================================== Creating NumPy universal functions ================================== There are two types of universal functions: * Those which operate on scalars, these are "universal functions" or *ufuncs* (see ``@vectorize`` below). * Those which operate on higher dimensional arrays and scalars, these are "generalized universal functions" or *gufuncs* (``@guvectorize`` below). .. _vectorize: The ``@vectorize`` decorator ============================ Numba's vectorize allows Python functions taking scalar input arguments to be used as NumPy `ufuncs`_. Creating a traditional NumPy ufunc is not the most straightforward process and involves writing some C code. Numba makes this easy. Using the :func:`~numba.vectorize` decorator, Numba can compile a pure Python function into a ufunc that operates over NumPy arrays as fast as traditional ufuncs written in C. .. _ufuncs: http://docs.scipy.org/doc/numpy/reference/ufuncs.html Using :func:`~numba.vectorize`, you write your function as operating over input scalars, rather than arrays. Numba will generate the surrounding loop (or *kernel*) allowing efficient iteration over the actual inputs. The :func:`~numba.vectorize` decorator has two modes of operation: * Eager, or decoration-time, compilation: If you pass one or more type signatures to the decorator, you will be building a Numpy universal function (ufunc). The rest of this subsection describes building ufuncs using decoration-time compilation. * Lazy, or call-time, compilation: When not given any signatures, the decorator will give you a Numba dynamic universal function (:class:`~numba.DUFunc`) that dynamically compiles a new kernel when called with a previously unsupported input type. A later subsection, ":ref:`dynamic-universal-functions`", describes this mode in more depth. As described above, if you pass a list of signatures to the :func:`~numba.vectorize` decorator, your function will be compiled into a Numpy ufunc. In the basic case, only one signature will be passed:: from numba import vectorize, float64 @vectorize([float64(float64, float64)]) def f(x, y): return x + y If you pass several signatures, beware that you have to pass most specific signatures before least specific ones (e.g., single-precision floats before double-precision floats), otherwise type-based dispatching will not work as expected:: @vectorize([int32(int32, int32), int64(int64, int64), float32(float32, float32), float64(float64, float64)]) def f(x, y): return x + y The function will work as expected over the specified array types:: >>> a = np.arange(6) >>> f(a, a) array([ 0, 2, 4, 6, 8, 10]) >>> a = np.linspace(0, 1, 6) >>> f(a, a) array([ 0. , 0.4, 0.8, 1.2, 1.6, 2. ]) but it will fail working on other types:: >>> a = np.linspace(0, 1+1j, 6) >>> f(a, a) Traceback (most recent call last): File "", line 1, in TypeError: ufunc 'ufunc' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe'' You might ask yourself, "why would I go through this instead of compiling a simple iteration loop using the :ref:`@jit ` decorator?". The answer is that NumPy ufuncs automatically get other features such as reduction, accumulation or broadcasting. Using the example above:: >>> a = np.arange(12).reshape(3, 4) >>> a array([[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]]) >>> f.reduce(a, axis=0) array([12, 15, 18, 21]) >>> f.reduce(a, axis=1) array([ 6, 22, 38]) >>> f.accumulate(a) array([[ 0, 1, 2, 3], [ 4, 6, 8, 10], [12, 15, 18, 21]]) >>> f.accumulate(a, axis=1) array([[ 0, 1, 3, 6], [ 4, 9, 15, 22], [ 8, 17, 27, 38]]) .. seealso:: `Standard features of ufuncs `_ (NumPy documentation). .. note:: Only the broadcasting features of ufuncs are supported in compiled code. The :func:`~numba.vectorize` decorator supports multiple ufunc targets: ================= =============================================================== Target Description ================= =============================================================== cpu Single-threaded CPU parallel Multi-core CPU cuda CUDA GPU .. NOTE:: This creates an *ufunc-like* object. See `documentation for CUDA ufunc <../cuda/ufunc.html>`_ for detail. ================= =============================================================== A general guideline is to choose different targets for different data sizes and algorithms. The "cpu" target works well for small data sizes (approx. less than 1KB) and low compute intensity algorithms. It has the least amount of overhead. The "parallel" target works well for medium data sizes (approx. less than 1MB). Threading adds a small delay. The "cuda" target works well for big data sizes (approx. greater than 1MB) and high compute intensity algorithms. Transferring memory to and from the GPU adds significant overhead. .. _guvectorize: The ``@guvectorize`` decorator ============================== While :func:`~numba.vectorize` allows you to write ufuncs that work on one element at a time, the :func:`~numba.guvectorize` decorator takes the concept one step further and allows you to write ufuncs that will work on an arbitrary number of elements of input arrays, and take and return arrays of differing dimensions. The typical example is a running median or a convolution filter. Contrary to :func:`~numba.vectorize` functions, :func:`~numba.guvectorize` functions don't return their result value: they take it as an array argument, which must be filled in by the function. This is because the array is actually allocated by NumPy's dispatch mechanism, which calls into the Numba-generated code. Similar to :func:`~numba.vectorize` decorator, :func:`~numba.guvectorize` also has two modes of operation: Eager, or decoration-time compilation and lazy, or call-time compilation. Here is a very simple example:: @guvectorize([(int64[:], int64, int64[:])], '(n),()->(n)') def g(x, y, res): for i in range(x.shape[0]): res[i] = x[i] + y The underlying Python function simply adds a given scalar (``y``) to all elements of a 1-dimension array. What's more interesting is the declaration. There are two things there: * the declaration of input and output *layouts*, in symbolic form: ``(n),()->(n)`` tells NumPy that the function takes a *n*-element one-dimension array, a scalar (symbolically denoted by the empty tuple ``()``) and returns a *n*-element one-dimension array; * the list of supported concrete *signatures* as per ``@vectorize``; here, as in the above example, we demonstrate ``int64`` arrays. .. note:: 1D array type can also receive scalar arguments (those with shape ``()``). In the above example, the second argument also could be declared as ``int64[:]``. In that case, the value must be read by ``y[0]``. We can now check what the compiled ufunc does, over a simple example:: >>> a = np.arange(5) >>> a array([0, 1, 2, 3, 4]) >>> g(a, 2) array([2, 3, 4, 5, 6]) The nice thing is that NumPy will automatically dispatch over more complicated inputs, depending on their shapes:: >>> a = np.arange(6).reshape(2, 3) >>> a array([[0, 1, 2], [3, 4, 5]]) >>> g(a, 10) array([[10, 11, 12], [13, 14, 15]]) >>> g(a, np.array([10, 20])) array([[10, 11, 12], [23, 24, 25]]) .. note:: Both :func:`~numba.vectorize` and :func:`~numba.guvectorize` support passing ``nopython=True`` :ref:`as in the @jit decorator `. Use it to ensure the generated code does not fallback to :term:`object mode`. .. _overwriting-input-values: Overwriting input values ------------------------ In most cases, writing to inputs may also appear to work - however, this behaviour cannot be relied on. Consider the following example function:: @guvectorize([(float64[:], float64[:])], '()->()') def init_values(invals, outvals): invals[0] = 6.5 outvals[0] = 4.2 Calling the `init_values` function with an array of `float64` type results in visible changes to the input:: >>> invals = np.zeros(shape=(3, 3), dtype=np.float64) >>> outvals = init_values(invals) >>> invals array([[6.5, 6.5, 6.5], [6.5, 6.5, 6.5], [6.5, 6.5, 6.5]]) >>> outvals array([[4.2, 4.2, 4.2], [4.2, 4.2, 4.2], [4.2, 4.2, 4.2]]) This works because NumPy can pass the input data directly into the `init_values` function as the data `dtype` matches that of the declared argument. However, it may also create and pass in a temporary array, in which case changes to the input are lost. For example, this can occur when casting is required. To demonstrate, we can use an array of `float32` with the `init_values` function:: >>> invals = np.zeros(shape=(3, 3), dtype=np.float32) >>> outvals = init_values(invals) >>> invals array([[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]], dtype=float32) In this case, there is no change to the `invals` array because the temporary casted array was mutated instead. .. _dynamic-universal-functions: Dynamic universal functions =========================== As described above, if you do not pass any signatures to the :func:`~numba.vectorize` decorator, your Python function will be used to build a dynamic universal function, or :class:`~numba.DUFunc`. For example:: from numba import vectorize @vectorize def f(x, y): return x * y The resulting :func:`f` is a :class:`~numba.DUFunc` instance that starts with no supported input types. As you make calls to :func:`f`, Numba generates new kernels whenever you pass a previously unsupported input type. Given the example above, the following set of interpreter interactions illustrate how dynamic compilation works:: >>> f >>> f.ufunc >>> f.ufunc.types [] The example above shows that :class:`~numba.DUFunc` instances are not ufuncs. Rather than subclass ufunc's, :class:`~numba.DUFunc` instances work by keeping a :attr:`~numba.DUFunc.ufunc` member, and then delegating ufunc property reads and method calls to this member (also known as type aggregation). When we look at the initial types supported by the ufunc, we can verify there are none. Let's try to make a call to :func:`f`:: >>> f(3,4) 12 >>> f.types # shorthand for f.ufunc.types ['ll->l'] If this was a normal Numpy ufunc, we would have seen an exception complaining that the ufunc couldn't handle the input types. When we call :func:`f` with integer arguments, not only do we receive an answer, but we can verify that Numba created a loop supporting C :code:`long` integers. We can add additional loops by calling :func:`f` with different inputs:: >>> f(1.,2.) 2.0 >>> f.types ['ll->l', 'dd->d'] We can now verify that Numba added a second loop for dealing with floating-point inputs, :code:`"dd->d"`. If we mix input types to :func:`f`, we can verify that `Numpy ufunc casting rules`_ are still in effect:: >>> f(1,2.) 2.0 >>> f.types ['ll->l', 'dd->d'] .. _`Numpy ufunc casting rules`: http://docs.scipy.org/doc/numpy/reference/ufuncs.html#casting-rules This example demonstrates that calling :func:`f` with mixed types caused Numpy to select the floating-point loop, and cast the integer argument to a floating-point value. Thus, Numba did not create a special :code:`"dl->d"` kernel. This :class:`~numba.DUFunc` behavior leads us to a point similar to the warning given above in "`The @vectorize decorator`_" subsection, but instead of signature declaration order in the decorator, call order matters. If we had passed in floating-point arguments first, any calls with integer arguments would be cast to double-precision floating-point values. For example:: >>> @vectorize ... def g(a, b): return a / b ... >>> g(2.,3.) 0.66666666666666663 >>> g(2,3) 0.66666666666666663 >>> g.types ['dd->d'] If you require precise support for various type signatures, you should specify them in the :func:`~numba.vectorize` decorator, and not rely on dynamic compilation. Dynamic generalized universal functions ======================================= Similar to a dynamic universal function, if you do not specify any types to the :func:`~numba.guvectorize` decorator, your Python function will be used to build a dynamic generalized universal function, or :class:`~numba.GUFunc`. For example:: from numba import guvectorize @guvectorize('(n),()->(n)') def g(x, y, res): for i in range(x.shape[0]): res[i] = x[i] + y We can verify the resulting function :func:`g` is a :class:`~numba.GUFunc` instance that starts with no supported input types. For instance:: >>> g >>> g.ufunc >>> g.ufunc.types [] Similar to a :class:`~numba.DUFunc`, as one make calls to :func:`g()`, numba generates new kernels for previously unsupported input types. The following set of interpreter interactions will illustrate how dynamic compilation works for a :class:`~numba.GUFunc`:: >>> x = np.arange(5, dtype=np.int64) >>> y = 10 >>> res = np.zeros_like(x) >>> g(x, y, res) >>> res array([5, 6, 7, 8, 9]) >>> g.types ['ll->l'] If this was a normal :func:`guvectorize` function, we would have seen an exception complaining that the ufunc could not handle the given input types. When we call :func:`g()` with the input arguments, numba creates a new loop for the input types. We can add additional loops by calling :func:`g` with new arguments:: >>> x = np.arange(5, dtype=np.double) >>> y = 2.2 >>> res = np.zeros_like(x) >>> g(x, y, res) We can now verify that Numba added a second loop for dealing with floating-point inputs, :code:`"dd->d"`. >>> g.types # shorthand for g.ufunc.types ['ll->l', 'dd->d'] One can also verify that Numpy ufunc casting rules are working as expected:: >>> x = np.arange(5, dtype=np.int64) >>> y = 2.2 >>> res = np.zeros_like(x) >>> g(x, y, res) >>> res If you need precise support for various type signatures, you should not rely on dynamic compilation and instead, specify the types them as first argument in the :func:`~numba.guvectorize` decorator. numba-0.55.1/docs/source/user/withobjmode.rst000664 000000 000000 00000002336 14174536160 021152 0ustar00rootroot000000 000000 ============================================================ Callback into the Python Interpreter from within JIT'ed code ============================================================ There are rare but real cases when a nopython-mode function needs to callback into the Python interpreter to invoke code that cannot be compiled by Numba. Such cases include: - logging progress for long running JIT'ed functions; - use data structures that are not currently supported by Numba; - debugging inside JIT'ed code using the Python debugger. When Numba callbacks into the Python interpreter, the following has to happen: - acquire the GIL; - convert values in native representation back into Python objects; - call-back into the Python interpreter; - convert returned values from the Python-code into native representation; - release the GIL. These steps can be expensive. Users **should not** rely on the feature described here on performance-critical paths. .. _with_objmode: The ``objmode`` context-manager =============================== .. warning:: This feature can be easily mis-used. Users should first consider alternative approaches to achieve their intended goal before using this feature. .. autofunction:: numba.objmode numba-0.55.1/mypy.ini000664 000000 000000 00000003270 14174536160 014374 0ustar00rootroot000000 000000 # Global options: [mypy] warn_unused_configs = True follow_imports = silent show_error_context = True files = **/numba/core/types/*.py, **/numba/core/datamodel/*.py, **/numba/core/rewrites/*.py, **/numba/core/unsafe/*.py # Per-module options: # To classify a given module as Level 1, 2 or 3 it must be added both in files (variable above) and in the lists below. # Level 1 - modules checked on the strictest settings. ;[mypy-] ;warn_return_any = True ;disallow_any_expr = True ;disallow_any_explicit = True ;disallow_any_generics = True ;disallow_subclassing_any = True ;disallow_untyped_calls = True ;disallow_untyped_defs = True ;disallow_incomplete_defs = True ;check_untyped_defs = True ;disallow_untyped_decorators = True ;warn_unused_ignores = True ;follow_imports = normal ;warn_unreachable = True ;strict_equality = True # Level 2 - module that pass reasonably strict settings. # No untyped functions allowed. Imports must be typed or explicitly ignored. ;[mypy-] ;warn_return_any = True ;disallow_untyped_defs = True ;disallow_incomplete_defs = True ;follow_imports = normal # Level 3 - modules that pass mypy default settings (only those in `files` global setting and not in previous levels) # Function/variables are annotated to avoid mypy erros, but annotations are not complete. [mypy-numba.core.*] warn_return_any = True # Level 4 - modules that do not pass mypy check: they are excluded from "files" setting in global section # External packages that lack annotations [mypy-llvmlite.*] ignore_missing_imports = True [mypy-numpy.*] ignore_missing_imports = True [mypy-winreg.*] # this can be removed after Mypy 0.78 is out with the latest typeshed ignore_missing_imports = True numba-0.55.1/numba/000775 000000 000000 00000000000 14174536160 013775 5ustar00rootroot000000 000000 numba-0.55.1/numba/__init__.py000664 000000 000000 00000016131 14174536160 016110 0ustar00rootroot000000 000000 """ Expose top-level symbols that are safe for import * """ import platform import re import sys import warnings from ._version import get_versions from numba.misc.init_utils import generate_version_info __version__ = get_versions()['version'] version_info = generate_version_info(__version__) del get_versions del generate_version_info from numba.core import config from numba.core import types, errors # Re-export typeof from numba.misc.special import ( typeof, prange, pndindex, gdb, gdb_breakpoint, gdb_init, literally, literal_unroll, ) # Re-export error classes from numba.core.errors import * # Re-export types itself import numba.core.types as types # Re-export all type names from numba.core.types import * # Re-export decorators from numba.core.decorators import (cfunc, generated_jit, jit, njit, stencil, jit_module) # Re-export vectorize decorators and the thread layer querying function from numba.np.ufunc import (vectorize, guvectorize, threading_layer, get_num_threads, set_num_threads) # Re-export Numpy helpers from numba.np.numpy_support import carray, farray, from_dtype # Re-export experimental from numba import experimental # Initialize withcontexts import numba.core.withcontexts from numba.core.withcontexts import objmode_context as objmode # Initialize target extensions import numba.core.target_extension # Initialize typed containers import numba.typed # Keep this for backward compatibility. def test(argv, **kwds): # To speed up the import time, avoid importing `unittest` and other test # dependencies unless the user is actually trying to run tests. from numba.testing import _runtests as runtests return runtests.main(argv, **kwds) __all__ = """ cfunc from_dtype guvectorize jit experimental njit stencil jit_module typeof prange gdb gdb_breakpoint gdb_init vectorize objmode literal_unroll get_num_threads set_num_threads """.split() + types.__all__ + errors.__all__ _min_llvmlite_version = (0, 38, 0) _min_llvm_version = (11, 0, 0) def _ensure_llvm(): """ Make sure llvmlite is operational. """ import warnings import llvmlite # Only look at the the major, minor and bugfix version numbers. # Ignore other stuffs regex = re.compile(r'(\d+)\.(\d+).(\d+)') m = regex.match(llvmlite.__version__) if m: ver = tuple(map(int, m.groups())) if ver < _min_llvmlite_version: msg = ("Numba requires at least version %d.%d.%d of llvmlite.\n" "Installed version is %s.\n" "Please update llvmlite." % (_min_llvmlite_version + (llvmlite.__version__,))) raise ImportError(msg) else: # Not matching? warnings.warn("llvmlite version format not recognized!") from llvmlite.binding import llvm_version_info, check_jit_execution if llvm_version_info < _min_llvm_version: msg = ("Numba requires at least version %d.%d.%d of LLVM.\n" "Installed llvmlite is built against version %d.%d.%d.\n" "Please update llvmlite." % (_min_llvm_version + llvm_version_info)) raise ImportError(msg) check_jit_execution() def _ensure_critical_deps(): """ Make sure Python, NumPy and SciPy have supported versions. """ from numba.np.numpy_support import numpy_version from numba.core.utils import PYVERSION if PYVERSION < (3, 7): raise ImportError("Numba needs Python 3.7 or greater") if numpy_version < (1, 18): raise ImportError("Numba needs NumPy 1.18 or greater") elif numpy_version > (1, 21): raise ImportError("Numba needs NumPy 1.21 or less") try: import scipy except ImportError: pass else: sp_version = tuple(map(int, scipy.__version__.split('.')[:2])) if sp_version < (1, 0): raise ImportError("Numba requires SciPy version 1.0 or greater") def _try_enable_svml(): """ Tries to enable SVML if configuration permits use and the library is found. """ if not config.DISABLE_INTEL_SVML: try: if sys.platform.startswith('linux'): llvmlite.binding.load_library_permanently("libsvml.so") elif sys.platform.startswith('darwin'): llvmlite.binding.load_library_permanently("libsvml.dylib") elif sys.platform.startswith('win'): llvmlite.binding.load_library_permanently("svml_dispmd") else: return False # The SVML library is loaded, therefore SVML *could* be supported. # Now see if LLVM has been compiled with the SVML support patch. # If llvmlite has the checking function `has_svml` and it returns # True, then LLVM was compiled with SVML support and the the setup # for SVML can proceed. We err on the side of caution and if the # checking function is missing, regardless of that being fine for # most 0.23.{0,1} llvmlite instances (i.e. conda or pip installed), # we assume that SVML was not compiled in. llvmlite 0.23.2 is a # bugfix release with the checking function present that will always # produce correct behaviour. For context see: #3006. try: if not getattr(llvmlite.binding.targets, "has_svml")(): # has detection function, but no svml compiled in, therefore # disable SVML return False except AttributeError: if platform.machine() == 'x86_64' and config.DEBUG: msg = ("SVML was found but llvmlite >= 0.23.2 is " "needed to support it.") warnings.warn(msg) # does not have detection function, cannot detect reliably, # disable SVML. return False # All is well, detection function present and reports SVML is # compiled in, set the vector library to SVML. llvmlite.binding.set_option('SVML', '-vector-library=SVML') return True except: if platform.machine() == 'x86_64' and config.DEBUG: warnings.warn("SVML was not found/could not be loaded.") return False _ensure_llvm() _ensure_critical_deps() # we know llvmlite is working as the above tests passed, import it now as SVML # needs to mutate runtime options (sets the `-vector-library`). import llvmlite """ Is set to True if Intel SVML is in use. """ config.USING_SVML = _try_enable_svml() # ---------------------- WARNING WARNING WARNING ---------------------------- # The following imports occur below here (SVML init) because somewhere in their # import sequence they have a `@njit` wrapped function. This triggers too early # a bind to the underlying LLVM libraries which then irretrievably sets the LLVM # SVML state to "no SVML". See https://github.com/numba/numba/issues/4689 for # context. # ---------------------- WARNING WARNING WARNING ---------------------------- numba-0.55.1/numba/__main__.py000664 000000 000000 00000000230 14174536160 016062 0ustar00rootroot000000 000000 """Expose Numba command via ``python -m numba``.""" import sys from numba.misc.numba_entry import main if __name__ == '__main__': sys.exit(main()) numba-0.55.1/numba/_arraystruct.h000664 000000 000000 00000000763 14174536160 016676 0ustar00rootroot000000 000000 #ifndef NUMBA_ARYSTRUCT_H_ #define NUMBA_ARYSTRUCT_H_ /* * Fill in the *arystruct* with information from the Numpy array *obj*. * *arystruct*'s layout is defined in numba.targets.arrayobj (look * for the ArrayTemplate class). */ typedef struct { void *meminfo; /* see _nrt_python.c and nrt.h in numba/core/runtime */ PyObject *parent; npy_intp nitems; npy_intp itemsize; void *data; npy_intp shape_and_strides[]; } arystruct_t; #endif /* NUMBA_ARYSTRUCT_H_ */ numba-0.55.1/numba/_devicearray.cpp000664 000000 000000 00000013037 14174536160 017142 0ustar00rootroot000000 000000 /* This file contains the base class implementation for all device arrays. The * base class is implemented in C so that computing typecodes for device arrays * can be implemented efficiently. */ #include "_pymodule.h" /* Include _devicearray., but make sure we don't get the definitions intended * for consumers of the Device Array API. */ #define NUMBA_IN_DEVICEARRAY_CPP_ #include "_devicearray.h" /* DeviceArray PyObject implementation. Note that adding more members here is * presently prohibited because mapped and managed arrays derive from both * DeviceArray and NumPy's ndarray, which is also a C extension class - the * layout of the object cannot be resolved if this class also has members beyond * PyObject_HEAD. */ class DeviceArray { PyObject_HEAD }; /* Trivial traversal - DeviceArray instances own nothing. */ static int DeviceArray_traverse(DeviceArray *self, visitproc visit, void *arg) { return 0; } /* Trivial clear of all references - DeviceArray instances own nothing. */ static int DeviceArray_clear(DeviceArray *self) { return 0; } /* The _devicearray.DeviceArray type */ PyTypeObject DeviceArrayType = { PyVarObject_HEAD_INIT(NULL, 0) "_devicearray.DeviceArray", /* tp_name */ sizeof(DeviceArray), /* tp_basicsize */ 0, /* tp_itemsize */ 0, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call*/ 0, /* tp_str*/ 0, /* tp_getattro*/ 0, /* tp_setattro*/ 0, /* tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags*/ "DeviceArray object", /* tp_doc */ (traverseproc) DeviceArray_traverse, /* tp_traverse */ (inquiry) DeviceArray_clear, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ 0, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ 0, /* tp_init */ 0, /* tp_alloc */ 0, /* tp_new */ 0, /* tp_free */ 0, /* tp_is_gc */ 0, /* tp_bases */ 0, /* tp_mro */ 0, /* tp_cache */ 0, /* tp_subclasses */ 0, /* tp_weaklist */ 0, /* tp_del */ 0, /* tp_version_tag */ 0, /* tp_finalize */ #if PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 8 0, /* tp_vectorcall */ 0, /* tp_print */ #endif }; /* CUDA device array C API */ static void *_DeviceArray_API[1] = { (void*)&DeviceArrayType }; MOD_INIT(_devicearray) { PyObject *m = nullptr; PyObject *d = nullptr; PyObject *c_api = nullptr; int error = 0; MOD_DEF(m, "_devicearray", "No docs", NULL) if (m == NULL) goto error_occurred; c_api = PyCapsule_New((void *)_DeviceArray_API, "numba._devicearray._DEVICEARRAY_API", NULL); if (c_api == NULL) goto error_occurred; DeviceArrayType.tp_new = PyType_GenericNew; if (PyType_Ready(&DeviceArrayType) < 0) goto error_occurred; Py_INCREF(&DeviceArrayType); error = PyModule_AddObject(m, "DeviceArray", (PyObject*)(&DeviceArrayType)); if (error) goto error_occurred; d = PyModule_GetDict(m); if (d == NULL) goto error_occurred; error = PyDict_SetItemString(d, "_DEVICEARRAY_API", c_api); Py_DECREF(c_api); if (error) goto error_occurred; return MOD_SUCCESS_VAL(m); error_occurred: Py_XDECREF(m); Py_XDECREF(c_api); Py_XDECREF((PyObject*)&DeviceArrayType); return MOD_ERROR_VAL; } numba-0.55.1/numba/_devicearray.h000664 000000 000000 00000001232 14174536160 016601 0ustar00rootroot000000 000000 #ifndef NUMBA_DEVICEARRAY_H_ #define NUMBA_DEVICEARRAY_H_ #ifdef __cplusplus extern "C" { #endif /* These definitions should only be used by consumers of the Device Array API. * Consumers access the API through the opaque pointer stored in * _devicearray._DEVICEARRAY_API. We don't want these definitions in * _devicearray.cpp itself because they would conflict with the actual * implementations there. */ #ifndef NUMBA_IN_DEVICEARRAY_CPP_ extern void **DeviceArray_API; #define DeviceArrayType (*(PyTypeObject*)DeviceArray_API[0]) #endif /* ndef NUMBA_IN_DEVICEARRAY_CPP */ #ifdef __cplusplus } #endif #endif /* NUMBA_DEVICEARRAY_H_ */ numba-0.55.1/numba/_dispatcher.cpp000664 000000 000000 00000121210 14174536160 016763 0ustar00rootroot000000 000000 #include "_pymodule.h" #include #include #include #include #include "_typeof.h" #include "frameobject.h" #include "core/typeconv/typeconv.hpp" #include "_devicearray.h" /* * Notes on the C_TRACE macro: * * The original C_TRACE macro (from ceval.c) would call * PyTrace_C_CALL et al., for which the frame argument wouldn't * be usable. Since we explicitly synthesize a frame using the * original Python code object, we call PyTrace_CALL instead so * the profiler can report the correct source location. * * Likewise, while ceval.c would call PyTrace_C_EXCEPTION in case * of error, the profiler would simply expect a RETURN in case of * a Python function, so we generate that here (making sure the * exception state is preserved correctly). * */ /* * NOTE: There is a version split for tracing code. Python 3.10 introduced a * trace_info structure to help make tracing more robust. See: * https://github.com/python/cpython/pull/24726 */ #if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 10) /* * Code originally from: * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L36-L40 */ typedef struct { PyCodeObject *code; // The code object for the bounds. May be NULL. PyCodeAddressRange bounds; // Only valid if code != NULL. CFrame cframe; } PyTraceInfo; /* * Code originally from: * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Objects/codeobject.c#L1257-L1266 * NOTE: The function is renamed. */ static void _nb_PyLineTable_InitAddressRange(const char *linetable, Py_ssize_t length, int firstlineno, PyCodeAddressRange *range) { range->opaque.lo_next = linetable; range->opaque.limit = range->opaque.lo_next + length; range->ar_start = -1; range->ar_end = 0; range->opaque.computed_line = firstlineno; range->ar_line = -1; } /* * Code originally from: * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Objects/codeobject.c#L1269-L1275 * NOTE: The function is renamed. */ static int _nb_PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds) { const char *linetable = PyBytes_AS_STRING(co->co_linetable); Py_ssize_t length = PyBytes_GET_SIZE(co->co_linetable); _nb_PyLineTable_InitAddressRange(linetable, length, co->co_firstlineno, bounds); return bounds->ar_line; } /* * Code originally from: * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5468-L5475 * NOTE: The call to _PyCode_InitAddressRange is renamed. */ static void initialize_trace_info(PyTraceInfo *trace_info, PyFrameObject *frame) { if (trace_info->code != frame->f_code) { trace_info->code = frame->f_code; _nb_PyCode_InitAddressRange(frame->f_code, &trace_info->bounds); } } /* * Code originally from: * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5477-L5501 */ static int call_trace(Py_tracefunc func, PyObject *obj, PyThreadState *tstate, PyFrameObject *frame, PyTraceInfo *trace_info, int what, PyObject *arg) { int result; if (tstate->tracing) return 0; tstate->tracing++; tstate->cframe->use_tracing = 0; if (frame->f_lasti < 0) { frame->f_lineno = frame->f_code->co_firstlineno; } else { initialize_trace_info(trace_info, frame); frame->f_lineno = _PyCode_CheckLineNumber(frame->f_lasti*sizeof(_Py_CODEUNIT), &trace_info->bounds); } result = func(obj, frame, what, arg); frame->f_lineno = 0; tstate->cframe->use_tracing = ((tstate->c_tracefunc != NULL) || (tstate->c_profilefunc != NULL)); tstate->tracing--; return result; } /* * Code originally from: * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5445-L5466 */ static int call_trace_protected(Py_tracefunc func, PyObject *obj, PyThreadState *tstate, PyFrameObject *frame, PyTraceInfo *trace_info, int what, PyObject *arg) { PyObject *type, *value, *traceback; int err; PyErr_Fetch(&type, &value, &traceback); err = call_trace(func, obj, tstate, frame, trace_info, what, arg); if (err == 0) { PyErr_Restore(type, value, traceback); return 0; } else { Py_XDECREF(type); Py_XDECREF(value); Py_XDECREF(traceback); return -1; } } /* * Code originally from: * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5810-L5839 * NOTE: The state test https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5811 * has been removed, it's dealt with in call_cfunc. */ #define C_TRACE(x, call) \ if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \ tstate, tstate->frame, &trace_info, PyTrace_CALL,\ cfunc)) \ x = NULL; \ else \ { \ x = call; \ if (tstate->c_profilefunc != NULL) \ { \ if (x == NULL) \ { \ call_trace_protected(tstate->c_profilefunc, \ tstate->c_profileobj, \ tstate, tstate->frame, \ &trace_info, \ PyTrace_RETURN, cfunc); \ /* XXX should pass (type, value, tb) */ \ } \ else \ { \ if (call_trace(tstate->c_profilefunc, \ tstate->c_profileobj, \ tstate, tstate->frame, \ &trace_info, \ PyTrace_RETURN, cfunc)) \ { \ Py_DECREF(x); \ x = NULL; \ } \ } \ } \ } #else /* * Code originally from: * https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4242-L4257 */ static int call_trace(Py_tracefunc func, PyObject *obj, PyThreadState *tstate, PyFrameObject *frame, int what, PyObject *arg) { int result; if (tstate->tracing) return 0; tstate->tracing++; tstate->use_tracing = 0; result = func(obj, frame, what, arg); tstate->use_tracing = ((tstate->c_tracefunc != NULL) || (tstate->c_profilefunc != NULL)); tstate->tracing--; return result; } /* * Code originally from: * https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4220-L4240 */ static int call_trace_protected(Py_tracefunc func, PyObject *obj, PyThreadState *tstate, PyFrameObject *frame, int what, PyObject *arg) { PyObject *type, *value, *traceback; int err; PyErr_Fetch(&type, &value, &traceback); err = call_trace(func, obj, tstate, frame, what, arg); if (err == 0) { PyErr_Restore(type, value, traceback); return 0; } else { Py_XDECREF(type); Py_XDECREF(value); Py_XDECREF(traceback); return -1; } } /* * Code originally from: * https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4520-L4549 * NOTE: The state test https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4521 * has been removed, it's dealt with in call_cfunc. */ #define C_TRACE(x, call) \ if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \ tstate, tstate->frame, PyTrace_CALL, cfunc)) \ x = NULL; \ else \ { \ x = call; \ if (tstate->c_profilefunc != NULL) \ { \ if (x == NULL) \ { \ call_trace_protected(tstate->c_profilefunc, \ tstate->c_profileobj, \ tstate, tstate->frame, \ PyTrace_RETURN, cfunc); \ /* XXX should pass (type, value, tb) */ \ } \ else \ { \ if (call_trace(tstate->c_profilefunc, \ tstate->c_profileobj, \ tstate, tstate->frame, \ PyTrace_RETURN, cfunc)) \ { \ Py_DECREF(x); \ x = NULL; \ } \ } \ } \ } #endif typedef std::vector TypeTable; typedef std::vector Functions; /* The Dispatcher class is the base class of all dispatchers in the CPU and CUDA targets. Its main responsibilities are: - Resolving the best overload to call for a given set of arguments, and - Calling the resolved overload. This logic is implemented within this class for efficiency (lookup of the appropriate overload needs to be fast) and ease of implementation (calling directly into a compiled function using a function pointer is easier within the C++ code where the overload has been resolved). */ class Dispatcher { public: PyObject_HEAD /* Whether compilation of new overloads is permitted */ char can_compile; /* Whether fallback to object mode is permitted */ char can_fallback; /* Whether types must match exactly when resolving overloads. If not, conversions (e.g. float32 -> float64) are permitted when searching for a match. */ char exact_match_required; /* Borrowed reference */ PyObject *fallbackdef; /* Whether to fold named arguments and default values (false for lifted loops) */ int fold_args; /* Whether the last positional argument is a stararg */ int has_stararg; /* Tuple of argument names */ PyObject *argnames; /* Tuple of default values */ PyObject *defargs; /* Number of arguments to function */ int argct; /* Used for selecting overloaded function implementations */ TypeManager *tm; /* An array of overloads */ Functions functions; /* A flattened array of argument types to all overloads * (invariant: sizeof(overloads) == argct * sizeof(functions)) */ TypeTable overloads; /* Add a new overload. Parameters: - args: An array of Type objects, one for each parameter - callable: The callable implementing this overload. */ void addDefinition(Type args[], PyObject *callable) { overloads.reserve(argct + overloads.size()); for (int i=0; iselectOverload(sig, &overloads[0], selected, argct, ovct, allow_unsafe, exact_match_required); } if (matches == 1) { return functions[selected]; } return NULL; } /* Remove all overloads */ void clear() { functions.clear(); overloads.clear(); } }; static int Dispatcher_traverse(Dispatcher *self, visitproc visit, void *arg) { Py_VISIT(self->defargs); return 0; } static void Dispatcher_dealloc(Dispatcher *self) { Py_XDECREF(self->argnames); Py_XDECREF(self->defargs); self->clear(); Py_TYPE(self)->tp_free((PyObject*)self); } static int Dispatcher_init(Dispatcher *self, PyObject *args, PyObject *kwds) { PyObject *tmaddrobj; void *tmaddr; int argct; int can_fallback; int has_stararg = 0; int exact_match_required = 0; if (!PyArg_ParseTuple(args, "OiiO!O!i|ii", &tmaddrobj, &argct, &self->fold_args, &PyTuple_Type, &self->argnames, &PyTuple_Type, &self->defargs, &can_fallback, &has_stararg, &exact_match_required )) { return -1; } Py_INCREF(self->argnames); Py_INCREF(self->defargs); tmaddr = PyLong_AsVoidPtr(tmaddrobj); self->tm = static_cast(tmaddr); self->argct = argct; self->can_compile = 1; self->can_fallback = can_fallback; self->fallbackdef = NULL; self->has_stararg = has_stararg; self->exact_match_required = exact_match_required; return 0; } static PyObject * Dispatcher_clear(Dispatcher *self, PyObject *args) { self->clear(); Py_RETURN_NONE; } static PyObject* Dispatcher_Insert(Dispatcher *self, PyObject *args, PyObject *kwds) { /* The cuda kwarg is a temporary addition until CUDA overloads are compiled * functions. Once they are compiled functions, kwargs can be removed from * this function. */ static char *keywords[] = { (char*)"sig", (char*)"func", (char*)"objectmode", (char*)"cuda", NULL }; PyObject *sigtup, *cfunc; int i, sigsz; int *sig; int objectmode = 0; int cuda = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|ip", keywords, &sigtup, &cfunc, &objectmode, &cuda)) { return NULL; } if (!cuda && !PyObject_TypeCheck(cfunc, &PyCFunction_Type) ) { PyErr_SetString(PyExc_TypeError, "must be builtin_function_or_method"); return NULL; } sigsz = PySequence_Fast_GET_SIZE(sigtup); sig = new int[sigsz]; for (i = 0; i < sigsz; ++i) { sig[i] = PyLong_AsLong(PySequence_Fast_GET_ITEM(sigtup, i)); } /* The reference to cfunc is borrowed; this only works because the derived Python class also stores an (owned) reference to cfunc. */ self->addDefinition(sig, cfunc); /* Add pure python fallback */ if (!self->fallbackdef && objectmode){ self->fallbackdef = cfunc; } delete[] sig; Py_RETURN_NONE; } static void explain_issue(PyObject *dispatcher, PyObject *args, PyObject *kws, const char *method_name, const char *default_msg) { PyObject *callback, *result; callback = PyObject_GetAttrString(dispatcher, method_name); if (!callback) { PyErr_SetString(PyExc_TypeError, default_msg); return; } result = PyObject_Call(callback, args, kws); Py_DECREF(callback); if (result != NULL) { PyErr_Format(PyExc_RuntimeError, "%s must raise an exception", method_name); Py_DECREF(result); } } static void explain_ambiguous(PyObject *dispatcher, PyObject *args, PyObject *kws) { explain_issue(dispatcher, args, kws, "_explain_ambiguous", "Ambiguous overloading"); } static void explain_matching_error(PyObject *dispatcher, PyObject *args, PyObject *kws) { explain_issue(dispatcher, args, kws, "_explain_matching_error", "No matching definition"); } static int search_new_conversions(PyObject *dispatcher, PyObject *args, PyObject *kws) { PyObject *callback, *result; int res; callback = PyObject_GetAttrString(dispatcher, "_search_new_conversions"); if (!callback) { return -1; } result = PyObject_Call(callback, args, kws); Py_DECREF(callback); if (result == NULL) { return -1; } if (!PyBool_Check(result)) { Py_DECREF(result); PyErr_SetString(PyExc_TypeError, "_search_new_conversions() should return a boolean"); return -1; } res = (result == Py_True) ? 1 : 0; Py_DECREF(result); return res; } /* A custom, fast, inlinable version of PyCFunction_Call() */ static PyObject * call_cfunc(Dispatcher *self, PyObject *cfunc, PyObject *args, PyObject *kws, PyObject *locals) { PyCFunctionWithKeywords fn; PyThreadState *tstate; assert(PyCFunction_Check(cfunc)); assert(PyCFunction_GET_FLAGS(cfunc) == (METH_VARARGS | METH_KEYWORDS)); fn = (PyCFunctionWithKeywords) PyCFunction_GET_FUNCTION(cfunc); tstate = PyThreadState_GET(); #if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 10) /* * On Python 3.10+ trace_info comes from somewhere up in PyFrameEval et al, * Numba doesn't have access to that so creates an equivalent struct and * wires it up against the cframes. This is passed into the tracing * functions. * * Code originally from: * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L1611-L1622 */ PyTraceInfo trace_info; trace_info.code = NULL; // not initialized CFrame *prev_cframe = tstate->cframe; trace_info.cframe.use_tracing = prev_cframe->use_tracing; trace_info.cframe.previous = prev_cframe; if (trace_info.cframe.use_tracing && tstate->c_profilefunc) #else /* * On Python prior to 3.10, tracing state is a member of the threadstate */ if (tstate->use_tracing && tstate->c_profilefunc) #endif { /* * The following code requires some explaining: * * We want the jit-compiled function to be visible to the profiler, so we * need to synthesize a frame for it. * The PyFrame_New() constructor doesn't do anything with the 'locals' value if the 'code's * 'CO_NEWLOCALS' flag is set (which is always the case nowadays). * So, to get local variables into the frame, we have to manually set the 'f_locals' * member, then call `PyFrame_LocalsToFast`, where a subsequent call to the `frame.f_locals` * property (by virtue of the `frame_getlocals` function in frameobject.c) will find them. */ PyCodeObject *code = (PyCodeObject*)PyObject_GetAttrString((PyObject*)self, "__code__"); PyObject *globals = PyDict_New(); PyObject *builtins = PyEval_GetBuiltins(); PyFrameObject *frame = NULL; PyObject *result = NULL; if (!code) { PyErr_Format(PyExc_RuntimeError, "No __code__ attribute found."); goto error; } /* Populate builtins, which is required by some JITted functions */ if (PyDict_SetItemString(globals, "__builtins__", builtins)) { goto error; } /* unset the CO_OPTIMIZED flag, make the frame get a new locals dict */ code->co_flags &= 0xFFFE; frame = PyFrame_New(tstate, code, globals, locals); if (frame == NULL) { goto error; } /* Populate the 'fast locals' in `frame` */ PyFrame_LocalsToFast(frame, 0); tstate->frame = frame; C_TRACE(result, fn(PyCFunction_GET_SELF(cfunc), args, kws)); /* write changes back to locals? */ PyFrame_FastToLocals(frame); tstate->frame = frame->f_back; error: Py_XDECREF(frame); Py_XDECREF(globals); Py_XDECREF(code); return result; } else { return fn(PyCFunction_GET_SELF(cfunc), args, kws); } } static PyObject* compile_and_invoke(Dispatcher *self, PyObject *args, PyObject *kws, PyObject *locals) { /* Compile a new one */ PyObject *cfa, *cfunc, *retval; cfa = PyObject_GetAttrString((PyObject*)self, "_compile_for_args"); if (cfa == NULL) return NULL; /* NOTE: we call the compiled function ourselves instead of letting the Python derived class do it. This is for proper behaviour of globals() in jitted functions (issue #476). */ cfunc = PyObject_Call(cfa, args, kws); Py_DECREF(cfa); if (cfunc == NULL) return NULL; if (PyObject_TypeCheck(cfunc, &PyCFunction_Type)) { retval = call_cfunc(self, cfunc, args, kws, locals); } else { /* Re-enter interpreter */ retval = PyObject_Call(cfunc, args, kws); } Py_DECREF(cfunc); return retval; } /* A copy of compile_and_invoke, that only compiles. This is needed for CUDA * kernels, because its overloads are Python instances of the _Kernel class, * rather than compiled functions. Once CUDA overloads are compiled functions, * cuda_compile_only can be removed. */ static PyObject* cuda_compile_only(Dispatcher *self, PyObject *args, PyObject *kws, PyObject *locals) { /* Compile a new one */ PyObject *cfa, *cfunc; cfa = PyObject_GetAttrString((PyObject*)self, "_compile_for_args"); if (cfa == NULL) return NULL; cfunc = PyObject_Call(cfa, args, kws); Py_DECREF(cfa); return cfunc; } static int find_named_args(Dispatcher *self, PyObject **pargs, PyObject **pkws) { PyObject *oldargs = *pargs, *newargs; PyObject *kws = *pkws; Py_ssize_t pos_args = PyTuple_GET_SIZE(oldargs); Py_ssize_t named_args, total_args, i; Py_ssize_t func_args = PyTuple_GET_SIZE(self->argnames); Py_ssize_t defaults = PyTuple_GET_SIZE(self->defargs); /* Last parameter with a default value */ Py_ssize_t last_def = (self->has_stararg) ? func_args - 2 : func_args - 1; /* First parameter with a default value */ Py_ssize_t first_def = last_def - defaults + 1; /* Minimum number of required arguments */ Py_ssize_t minargs = first_def; if (kws != NULL) named_args = PyDict_Size(kws); else named_args = 0; total_args = pos_args + named_args; if (!self->has_stararg && total_args > func_args) { PyErr_Format(PyExc_TypeError, "too many arguments: expected %d, got %d", (int) func_args, (int) total_args); return -1; } else if (total_args < minargs) { if (minargs == func_args) PyErr_Format(PyExc_TypeError, "not enough arguments: expected %d, got %d", (int) minargs, (int) total_args); else PyErr_Format(PyExc_TypeError, "not enough arguments: expected at least %d, got %d", (int) minargs, (int) total_args); return -1; } newargs = PyTuple_New(func_args); if (!newargs) return -1; /* First pack the stararg */ if (self->has_stararg) { Py_ssize_t stararg_size = Py_MAX(0, pos_args - func_args + 1); PyObject *stararg = PyTuple_New(stararg_size); if (!stararg) { Py_DECREF(newargs); return -1; } for (i = 0; i < stararg_size; i++) { PyObject *value = PyTuple_GET_ITEM(oldargs, func_args - 1 + i); Py_INCREF(value); PyTuple_SET_ITEM(stararg, i, value); } /* Put it in last position */ PyTuple_SET_ITEM(newargs, func_args - 1, stararg); } for (i = 0; i < pos_args; i++) { PyObject *value = PyTuple_GET_ITEM(oldargs, i); if (self->has_stararg && i >= func_args - 1) { /* Skip stararg */ break; } Py_INCREF(value); PyTuple_SET_ITEM(newargs, i, value); } /* Iterate over missing positional arguments, try to find them in named arguments or default values. */ for (i = pos_args; i < func_args; i++) { PyObject *name = PyTuple_GET_ITEM(self->argnames, i); if (self->has_stararg && i >= func_args - 1) { /* Skip stararg */ break; } if (kws != NULL) { /* Named argument? */ PyObject *value = PyDict_GetItem(kws, name); if (value != NULL) { Py_INCREF(value); PyTuple_SET_ITEM(newargs, i, value); named_args--; continue; } } if (i >= first_def && i <= last_def) { /* Argument has a default value? */ PyObject *value = PyTuple_GET_ITEM(self->defargs, i - first_def); Py_INCREF(value); PyTuple_SET_ITEM(newargs, i, value); continue; } else if (i < func_args - 1 || !self->has_stararg) { PyErr_Format(PyExc_TypeError, "missing argument '%s'", PyString_AsString(name)); Py_DECREF(newargs); return -1; } } if (named_args) { PyErr_Format(PyExc_TypeError, "some keyword arguments unexpected"); Py_DECREF(newargs); return -1; } *pargs = newargs; *pkws = NULL; return 0; } /* * Management of thread-local */ #ifdef _MSC_VER #define THREAD_LOCAL(ty) __declspec(thread) ty #else /* Non-standard C99 extension that's understood by gcc and clang */ #define THREAD_LOCAL(ty) __thread ty #endif static THREAD_LOCAL(bool) use_tls_target_stack; struct raii_use_tls_target_stack { bool old_setting; raii_use_tls_target_stack(bool new_setting) : old_setting(use_tls_target_stack) { use_tls_target_stack = new_setting; } ~raii_use_tls_target_stack() { use_tls_target_stack = old_setting; } }; static PyObject* Dispatcher_call(Dispatcher *self, PyObject *args, PyObject *kws) { PyObject *tmptype, *retval = NULL; int *tys = NULL; int argct; int i; int prealloc[24]; int matches; PyObject *cfunc; PyThreadState *ts = PyThreadState_Get(); PyObject *locals = NULL; // Check TLS target stack if (use_tls_target_stack) { raii_use_tls_target_stack turn_off(false); PyObject * meth_call_tls_target; meth_call_tls_target = PyObject_GetAttrString((PyObject*)self, "_call_tls_target"); if (!meth_call_tls_target) return NULL; // Transfer control to self._call_tls_target retval = PyObject_Call(meth_call_tls_target, args, kws); Py_DECREF(meth_call_tls_target); return retval; } /* If compilation is enabled, ensure that an exact match is found and if * not compile one */ int exact_match_required = self->can_compile ? 1 : self->exact_match_required; #if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 10) if (ts->tracing && ts->c_profilefunc) { #else if (ts->use_tracing && ts->c_profilefunc) { #endif locals = PyEval_GetLocals(); if (locals == NULL) { goto CLEANUP; } } if (self->fold_args) { if (find_named_args(self, &args, &kws)) return NULL; } else Py_INCREF(args); /* Now we own a reference to args */ argct = PySequence_Fast_GET_SIZE(args); if (argct < (Py_ssize_t) (sizeof(prealloc) / sizeof(int))) tys = prealloc; else tys = new int[argct]; for (i = 0; i < argct; ++i) { tmptype = PySequence_Fast_GET_ITEM(args, i); tys[i] = typeof_typecode((PyObject *) self, tmptype); if (tys[i] == -1) { if (self->can_fallback){ /* We will clear the exception if fallback is allowed. */ PyErr_Clear(); } else { goto CLEANUP; } } } /* We only allow unsafe conversions if compilation of new specializations has been disabled. Note that the number of matches is returned in matches by resolve, which accepts it as a reference. */ cfunc = self->resolve(tys, matches, !self->can_compile, exact_match_required); if (matches == 0 && !self->can_compile) { /* * If we can't compile a new specialization, look for * matching signatures for which conversions haven't been * registered on the C++ TypeManager. */ int res = search_new_conversions((PyObject *) self, args, kws); if (res < 0) { retval = NULL; goto CLEANUP; } if (res > 0) { /* Retry with the newly registered conversions */ cfunc = self->resolve(tys, matches, !self->can_compile, exact_match_required); } } if (matches == 1) { /* Definition is found */ retval = call_cfunc(self, cfunc, args, kws, locals); } else if (matches == 0) { /* No matching definition */ if (self->can_compile) { retval = compile_and_invoke(self, args, kws, locals); } else if (self->fallbackdef) { /* Have object fallback */ retval = call_cfunc(self, self->fallbackdef, args, kws, locals); } else { /* Raise TypeError */ explain_matching_error((PyObject *) self, args, kws); retval = NULL; } } else if (self->can_compile) { /* Ambiguous, but are allowed to compile */ retval = compile_and_invoke(self, args, kws, locals); } else { /* Ambiguous */ explain_ambiguous((PyObject *) self, args, kws); retval = NULL; } CLEANUP: if (tys != prealloc) delete[] tys; Py_DECREF(args); return retval; } /* Based on Dispatcher_call above, with the following differences: 1. It does not invoke the definition of the function. 2. It returns the definition, instead of a value returned by the function. This is because CUDA functions are, at present, _Kernel objects rather than compiled functions. */ static PyObject* Dispatcher_cuda_call(Dispatcher *self, PyObject *args, PyObject *kws) { PyObject *tmptype, *retval = NULL; int *tys = NULL; int argct; int i; int prealloc[24]; int matches; PyObject *cfunc; PyThreadState *ts = PyThreadState_Get(); PyObject *locals = NULL; /* If compilation is enabled, ensure that an exact match is found and if * not compile one */ int exact_match_required = self->can_compile ? 1 : self->exact_match_required; #if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 10) if (ts->tracing && ts->c_profilefunc) { #else if (ts->use_tracing && ts->c_profilefunc) { #endif locals = PyEval_GetLocals(); if (locals == NULL) { goto CLEANUP; } } if (self->fold_args) { if (find_named_args(self, &args, &kws)) return NULL; } else Py_INCREF(args); /* Now we own a reference to args */ argct = PySequence_Fast_GET_SIZE(args); if (argct < (Py_ssize_t) (sizeof(prealloc) / sizeof(int))) tys = prealloc; else tys = new int[argct]; for (i = 0; i < argct; ++i) { tmptype = PySequence_Fast_GET_ITEM(args, i); tys[i] = typeof_typecode((PyObject *) self, tmptype); if (tys[i] == -1) { if (self->can_fallback){ /* We will clear the exception if fallback is allowed. */ PyErr_Clear(); } else { goto CLEANUP; } } } /* We only allow unsafe conversions if compilation of new specializations has been disabled. */ cfunc = self->resolve(tys, matches, !self->can_compile, exact_match_required); if (matches == 0 && !self->can_compile) { /* * If we can't compile a new specialization, look for * matching signatures for which conversions haven't been * registered on the C++ TypeManager. */ int res = search_new_conversions((PyObject *) self, args, kws); if (res < 0) { retval = NULL; goto CLEANUP; } if (res > 0) { /* Retry with the newly registered conversions */ cfunc = self->resolve(tys, matches, !self->can_compile, exact_match_required); } } if (matches == 1) { /* Definition is found */ retval = cfunc; Py_INCREF(retval); } else if (matches == 0) { /* No matching definition */ if (self->can_compile) { retval = cuda_compile_only(self, args, kws, locals); } else if (self->fallbackdef) { /* Have object fallback */ retval = call_cfunc(self, self->fallbackdef, args, kws, locals); } else { /* Raise TypeError */ explain_matching_error((PyObject *) self, args, kws); retval = NULL; } } else if (self->can_compile) { /* Ambiguous, but are allowed to compile */ retval = cuda_compile_only(self, args, kws, locals); } else { /* Ambiguous */ explain_ambiguous((PyObject *) self, args, kws); retval = NULL; } CLEANUP: if (tys != prealloc) delete[] tys; Py_DECREF(args); return retval; } static int import_devicearray(void) { PyObject *devicearray = PyImport_ImportModule("numba._devicearray"); if (devicearray == NULL) { return -1; } Py_DECREF(devicearray); DeviceArray_API = (void**)PyCapsule_Import("numba._devicearray._DEVICEARRAY_API", 0); if (DeviceArray_API == NULL) { return -1; } return 0; } static PyMethodDef Dispatcher_methods[] = { { "_clear", (PyCFunction)Dispatcher_clear, METH_NOARGS, NULL }, { "_insert", (PyCFunction)Dispatcher_Insert, METH_VARARGS | METH_KEYWORDS, "insert new definition"}, { "_cuda_call", (PyCFunction)Dispatcher_cuda_call, METH_VARARGS | METH_KEYWORDS, "CUDA call resolution" }, { NULL }, }; static PyMemberDef Dispatcher_members[] = { {(char*)"_can_compile", T_BOOL, offsetof(Dispatcher, can_compile), 0, NULL }, {NULL} /* Sentinel */ }; static PyTypeObject DispatcherType = { PyVarObject_HEAD_INIT(NULL, 0) "_dispatcher.Dispatcher", /* tp_name */ sizeof(Dispatcher), /* tp_basicsize */ 0, /* tp_itemsize */ (destructor)Dispatcher_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ (PyCFunctionWithKeywords)Dispatcher_call, /* tp_call*/ 0, /* tp_str*/ 0, /* tp_getattro*/ 0, /* tp_setattro*/ 0, /* tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags*/ "Dispatcher object", /* tp_doc */ (traverseproc) Dispatcher_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ Dispatcher_methods, /* tp_methods */ Dispatcher_members, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ (initproc)Dispatcher_init, /* tp_init */ 0, /* tp_alloc */ 0, /* tp_new */ 0, /* tp_free */ 0, /* tp_is_gc */ 0, /* tp_bases */ 0, /* tp_mro */ 0, /* tp_cache */ 0, /* tp_subclasses */ 0, /* tp_weaklist */ 0, /* tp_del */ 0, /* tp_version_tag */ 0, /* tp_finalize */ #if PY_MAJOR_VERSION == 3 /* Python 3.8 has two slots, 3.9 has one. */ #if PY_MINOR_VERSION > 7 0, /* tp_vectorcall */ #if PY_MINOR_VERSION == 8 0, /* tp_print */ #endif #endif #endif }; static PyObject *compute_fingerprint(PyObject *self, PyObject *args) { PyObject *val; if (!PyArg_ParseTuple(args, "O:compute_fingerprint", &val)) return NULL; return typeof_compute_fingerprint(val); } static PyObject *set_use_tls_target_stack(PyObject *self, PyObject *args) { int val; if (!PyArg_ParseTuple(args, "p", &val)) return NULL; bool old = use_tls_target_stack; use_tls_target_stack = val; // return the old value if (old) { Py_RETURN_TRUE; } else { Py_RETURN_FALSE; } } static PyMethodDef ext_methods[] = { #define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL } declmethod(typeof_init), declmethod(compute_fingerprint), declmethod(set_use_tls_target_stack), { NULL }, #undef declmethod }; MOD_INIT(_dispatcher) { if (import_devicearray() < 0) { PyErr_Print(); PyErr_SetString(PyExc_ImportError, "numba._devicearray failed to import"); return MOD_ERROR_VAL; } PyObject *m; MOD_DEF(m, "_dispatcher", "No docs", ext_methods) if (m == NULL) return MOD_ERROR_VAL; DispatcherType.tp_new = PyType_GenericNew; if (PyType_Ready(&DispatcherType) < 0) { return MOD_ERROR_VAL; } Py_INCREF(&DispatcherType); PyModule_AddObject(m, "Dispatcher", (PyObject*)(&DispatcherType)); return MOD_SUCCESS_VAL(m); } numba-0.55.1/numba/_dynfunc.c000664 000000 000000 00000041147 14174536160 015755 0ustar00rootroot000000 000000 /* * Definition of Environment and Closure objects. * This module is included by _dynfuncmod.c and by pycc-compiled modules. */ #include "_pymodule.h" #include /* NOTE: EnvironmentObject and ClosureObject must be kept in sync with * the definitions in numba/targets/base.py (EnvBody and ClosureBody). */ /* * EnvironmentObject hosts data needed for execution of compiled functions. */ typedef struct { PyObject_HEAD PyObject *globals; /* Assorted "constants" that are needed at runtime to execute the compiled function. This can include frozen closure variables, lifted loops, etc. */ PyObject *consts; } EnvironmentObject; static PyMemberDef env_members[] = { {"globals", T_OBJECT, offsetof(EnvironmentObject, globals), READONLY, NULL}, {"consts", T_OBJECT, offsetof(EnvironmentObject, consts), READONLY, NULL}, {NULL} /* Sentinel */ }; static int env_traverse(EnvironmentObject *env, visitproc visit, void *arg) { Py_VISIT(env->globals); Py_VISIT(env->consts); return 0; } static int env_clear(EnvironmentObject *env) { Py_CLEAR(env->globals); Py_CLEAR(env->consts); return 0; } static void env_dealloc(EnvironmentObject *env) { PyObject_GC_UnTrack((PyObject *) env); env_clear(env); Py_TYPE(env)->tp_free((PyObject *) env); } static EnvironmentObject * env_new_empty(PyTypeObject* type) { return (EnvironmentObject *) PyType_GenericNew(type, NULL, NULL); } static PyObject * env_new(PyTypeObject* type, PyObject* args, PyObject* kwds) { PyObject *globals; EnvironmentObject *env; static char *kwlist[] = {"globals", 0}; if (!PyArg_ParseTupleAndKeywords( args, kwds, "O!:function", kwlist, &PyDict_Type, &globals)) return NULL; env = env_new_empty(type); if (env == NULL) return NULL; Py_INCREF(globals); env->globals = globals; env->consts = PyList_New(0); if (!env->consts) { Py_DECREF(env); return NULL; } return (PyObject *) env; } static PyTypeObject EnvironmentType = { PyVarObject_HEAD_INIT(NULL, 0) "_dynfunc.Environment", /*tp_name*/ sizeof(EnvironmentObject), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor) env_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash */ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ 0, /* tp_doc */ (traverseproc) env_traverse, /* tp_traverse */ (inquiry) env_clear, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ 0, /* tp_methods */ env_members, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ 0, /* tp_init */ 0, /* tp_alloc */ env_new, /* tp_new */ 0, /* tp_free */ 0, /* tp_is_gc */ 0, /* tp_bases */ 0, /* tp_mro */ 0, /* tp_cache */ 0, /* tp_subclasses */ 0, /* tp_weaklist */ 0, /* tp_del */ 0, /* tp_version_tag */ 0, /* tp_finalize */ #if PY_MAJOR_VERSION == 3 /* Python 3.8 has two slots, 3.9 has one. */ #if PY_MINOR_VERSION > 7 0, /* tp_vectorcall */ #if PY_MINOR_VERSION == 8 0, /* tp_print */ #endif #endif #endif }; /* A closure object is created for each call to make_function(), and stored as the resulting PyCFunction object's "self" pointer. It points to an EnvironmentObject which is constructed during compilation. This allows for two things: - lifetime management of dependent data (e.g. lifted loop dispatchers) - access to the execution environment by the compiled function (for example the globals module) */ /* Closure is a variable-sized object for binary compatibility with Generator (see below). */ #define CLOSURE_HEAD \ PyObject_VAR_HEAD \ EnvironmentObject *env; typedef struct { CLOSURE_HEAD /* The dynamically-filled method definition for the PyCFunction object using this closure. */ PyMethodDef def; /* Arbitrary object to keep alive during the closure's lifetime. (put a tuple to put several objects alive). In practice, this helps keep the LLVM module and its generated code alive. */ PyObject *keepalive; PyObject *weakreflist; } ClosureObject; static int closure_traverse(ClosureObject *clo, visitproc visit, void *arg) { Py_VISIT(clo->env); Py_VISIT(clo->keepalive); return 0; } static void closure_dealloc(ClosureObject *clo) { PyObject_GC_UnTrack((PyObject *) clo); if (clo->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) clo); PyObject_Free((void *) clo->def.ml_name); PyObject_Free((void *) clo->def.ml_doc); Py_XDECREF(clo->env); Py_XDECREF(clo->keepalive); Py_TYPE(clo)->tp_free((PyObject *) clo); } static PyTypeObject ClosureType = { PyVarObject_HEAD_INIT(NULL, 0) "_dynfunc._Closure", /*tp_name*/ sizeof(ClosureObject), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor) closure_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash */ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ 0, /* tp_doc */ (traverseproc) closure_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ offsetof(ClosureObject, weakreflist), /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ 0, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ 0, /* tp_init */ 0, /* tp_alloc */ 0, /* tp_new */ 0, /* tp_free */ 0, /* tp_is_gc */ 0, /* tp_bases */ 0, /* tp_mro */ 0, /* tp_cache */ 0, /* tp_subclasses */ 0, /* tp_weaklist */ 0, /* tp_del */ 0, /* tp_version_tag */ 0, /* tp_finalize */ #if PY_MAJOR_VERSION == 3 /* Python 3.8 has two slots, 3.9 has one. */ #if PY_MINOR_VERSION > 7 0, /* tp_vectorcall */ #if PY_MINOR_VERSION == 8 0, /* tp_print */ #endif #endif #endif }; /* Return an owned piece of character data duplicating a Python string object's value. */ static char * dup_string(PyObject *strobj) { const char *tmp = NULL; char *str; tmp = PyString_AsString(strobj); if (tmp == NULL) return NULL; /* Using PyObject_Malloc allows this memory to be tracked for leaks. */ str = PyObject_Malloc(strlen(tmp) + 1); if (str == NULL) { PyErr_NoMemory(); return NULL; } strcpy(str, tmp); return str; } /* Create and initialize a new Closure object */ static ClosureObject * closure_new(PyObject *name, PyObject *doc, PyCFunction fnaddr, EnvironmentObject *env, PyObject *keepalive) { ClosureObject *clo = (ClosureObject *) PyType_GenericAlloc(&ClosureType, 0); if (clo == NULL) return NULL; clo->def.ml_name = dup_string(name); if (!clo->def.ml_name) { Py_DECREF(clo); return NULL; } clo->def.ml_meth = fnaddr; clo->def.ml_flags = METH_VARARGS | METH_KEYWORDS; clo->def.ml_doc = dup_string(doc); if (!clo->def.ml_doc) { Py_DECREF(clo); return NULL; } Py_INCREF(env); clo->env = env; Py_XINCREF(keepalive); clo->keepalive = keepalive; return clo; } /* Create a new PyCFunction object wrapping a closure defined by the given arguments. */ static PyObject * pycfunction_new(PyObject *module, PyObject *name, PyObject *doc, PyCFunction fnaddr, EnvironmentObject *env, PyObject *keepalive) { PyObject *funcobj; PyObject *modname = NULL; ClosureObject *closure = NULL; closure = closure_new(name, doc, fnaddr, env, keepalive); if (closure == NULL) goto FAIL; modname = PyObject_GetAttrString(module, "__name__"); if (modname == NULL) goto FAIL; funcobj = PyCFunction_NewEx(&closure->def, (PyObject *) closure, modname); Py_DECREF(closure); Py_DECREF(modname); return funcobj; FAIL: Py_XDECREF(closure); Py_XDECREF(modname); return NULL; } /* * Python-facing wrapper for Numba-compiled generator. * Note the Environment's offset inside the struct is the same as in the * Closure object. This is required to simplify generation of Python wrappers. */ typedef void (*gen_finalizer_t)(void *); typedef struct { CLOSURE_HEAD PyCFunctionWithKeywords nextfunc; gen_finalizer_t finalizer; PyObject *weakreflist; union { double dummy; /* Force alignment */ char state[0]; }; } GeneratorObject; static int generator_traverse(GeneratorObject *gen, visitproc visit, void *arg) { /* XXX this doesn't traverse the state, which can own references to PyObjects */ Py_VISIT(gen->env); return 0; } static int generator_clear(GeneratorObject *gen) { if (gen->finalizer != NULL) { gen->finalizer(gen->state); gen->finalizer = NULL; } Py_CLEAR(gen->env); gen->nextfunc = NULL; return 0; } static void generator_dealloc(GeneratorObject *gen) { PyObject_GC_UnTrack((PyObject *) gen); if (gen->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) gen); /* XXX The finalizer may be called after the LLVM module has been destroyed (typically at interpreter shutdown) */ #if PY_MAJOR_VERSION >= 3 #if PY_MINOR_VERSION >= 7 if (!_Py_IsFinalizing()) #else if (!_Py_Finalizing) #endif #endif if (gen->finalizer != NULL) gen->finalizer(gen->state); Py_XDECREF(gen->env); Py_TYPE(gen)->tp_free((PyObject *) gen); } static PyObject * generator_iternext(GeneratorObject *gen) { PyObject *res, *args; if (gen->nextfunc == NULL) { PyErr_SetString(PyExc_RuntimeError, "cannot call next() on finalized generator"); return NULL; } args = PyTuple_Pack(1, (PyObject *) gen); if (args == NULL) return NULL; res = (*gen->nextfunc)((PyObject *) gen, args, NULL); Py_DECREF(args); return res; } static PyTypeObject GeneratorType = { PyVarObject_HEAD_INIT(NULL, 0) "_dynfunc._Generator", /* tp_name*/ offsetof(GeneratorObject, state), /* tp_basicsize*/ 1, /* tp_itemsize*/ (destructor) generator_dealloc, /* tp_dealloc*/ 0, /* tp_print*/ 0, /* tp_getattr*/ 0, /* tp_setattr*/ 0, /* tp_compare*/ 0, /* tp_repr*/ 0, /* tp_as_number*/ 0, /* tp_as_sequence*/ 0, /* tp_as_mapping*/ 0, /* tp_hash */ 0, /* tp_call*/ 0, /* tp_str*/ 0, /* tp_getattro*/ 0, /* tp_setattro*/ 0, /* tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, /* tp_flags*/ 0, /* tp_doc */ (traverseproc) generator_traverse, /* tp_traverse */ (inquiry) generator_clear, /* tp_clear */ 0, /* tp_richcompare */ offsetof(GeneratorObject, weakreflist), /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc) generator_iternext, /* tp_iternext */ 0, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ 0, /* tp_init */ 0, /* tp_alloc */ 0, /* tp_new */ 0, /* tp_free */ 0, /* tp_is_gc */ 0, /* tp_bases */ 0, /* tp_mro */ 0, /* tp_cache */ 0, /* tp_subclasses */ 0, /* tp_weaklist */ 0, /* tp_del */ 0, /* tp_version_tag */ 0, /* tp_finalize */ #if PY_MAJOR_VERSION == 3 /* Python 3.8 has two slots, 3.9 has one. */ #if PY_MINOR_VERSION > 7 0, /* tp_vectorcall */ #if PY_MINOR_VERSION == 8 0, /* tp_print */ #endif #endif #endif }; /* Dynamically create a new generator object */ static PyObject * Numba_make_generator(Py_ssize_t gen_state_size, void *initial_state, PyCFunctionWithKeywords nextfunc, gen_finalizer_t finalizer, EnvironmentObject *env) { GeneratorObject *gen; gen = (GeneratorObject *) PyType_GenericAlloc(&GeneratorType, gen_state_size); if (gen == NULL) return NULL; memcpy(gen->state, initial_state, gen_state_size); gen->nextfunc = nextfunc; Py_XINCREF(env); gen->env = env; gen->finalizer = finalizer; return (PyObject *) gen; } /* Initialization subroutine for use by modules including this */ static int init_dynfunc_module(PyObject *module) { if (PyType_Ready(&ClosureType)) return -1; if (PyType_Ready(&EnvironmentType)) return -1; if (PyType_Ready(&GeneratorType)) return -1; return 0; } numba-0.55.1/numba/_dynfuncmod.c000664 000000 000000 00000005113 14174536160 016446 0ustar00rootroot000000 000000 #include "_dynfunc.c" /* Python-facing function to dynamically create a new C function object */ static PyObject* make_function(PyObject *self, PyObject *args) { PyObject *module, *fname, *fdoc, *fnaddrobj; void *fnaddr; EnvironmentObject *env; PyObject *keepalive; if (!PyArg_ParseTuple(args, "OOOOO!|O", &module, &fname, &fdoc, &fnaddrobj, &EnvironmentType, &env, &keepalive)) { return NULL; } fnaddr = PyLong_AsVoidPtr(fnaddrobj); if (fnaddr == NULL && PyErr_Occurred()) return NULL; return pycfunction_new(module, fname, fdoc, fnaddr, env, keepalive); } static PyMethodDef ext_methods[] = { #define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL } declmethod(make_function), { NULL }, #undef declmethod }; static PyObject * build_c_helpers_dict(void) { PyObject *dct = PyDict_New(); if (dct == NULL) goto error; #define _declpointer(name, value) do { \ PyObject *o = PyLong_FromVoidPtr(value); \ if (o == NULL) goto error; \ if (PyDict_SetItemString(dct, name, o)) { \ Py_DECREF(o); \ goto error; \ } \ Py_DECREF(o); \ } while (0) #define declmethod(func) _declpointer(#func, &Numba_##func) #define declpointer(ptr) _declpointer(#ptr, &ptr) declmethod(make_generator); #undef declmethod return dct; error: Py_XDECREF(dct); return NULL; } MOD_INIT(_dynfunc) { PyObject *m, *impl_info; MOD_DEF(m, "_dynfunc", "No docs", ext_methods) if (m == NULL) return MOD_ERROR_VAL; if (init_dynfunc_module(m)) return MOD_ERROR_VAL; impl_info = Py_BuildValue( "{snsnsn}", "offsetof_closure_body", offsetof(ClosureObject, env), "offsetof_env_body", offsetof(EnvironmentObject, globals), "offsetof_generator_state", offsetof(GeneratorObject, state) ); if (impl_info == NULL) return MOD_ERROR_VAL; PyModule_AddObject(m, "_impl_info", impl_info); Py_INCREF(&ClosureType); PyModule_AddObject(m, "_Closure", (PyObject *) (&ClosureType)); Py_INCREF(&EnvironmentType); PyModule_AddObject(m, "Environment", (PyObject *) (&EnvironmentType)); Py_INCREF(&GeneratorType); PyModule_AddObject(m, "_Generator", (PyObject *) (&GeneratorType)); PyModule_AddObject(m, "c_helpers", build_c_helpers_dict()); return MOD_SUCCESS_VAL(m); } numba-0.55.1/numba/_hashtable.c000664 000000 000000 00000036653 14174536160 016250 0ustar00rootroot000000 000000 /* * This file and _hashtable.h are from CPython 3.5. The symbols have been * renamed from _Py_hashxxx to _Numba_hashxxx to avoid name clashes with * the CPython definitions (including at runtime through dynamic linking). * Those CPython APIs are private and can change in incompatible ways at * any time. * * Command line used for renaming: * $ sed -i -r 's/\b_Py_(has[h]table)/_Numba_\1/ig' numba/_hashtable.h numba/_hashtable.c */ /* The implementation of the hash table (_Numba_hashtable_t) is based on the cfuhash project: http://sourceforge.net/projects/libcfu/ Copyright of cfuhash: ---------------------------------- Creation date: 2005-06-24 21:22:40 Authors: Don Change log: Copyright (c) 2005 Don Owens All rights reserved. This code is released under the BSD license: Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ---------------------------------- */ #include "_pymodule.h" #include "_hashtable.h" #define HASHTABLE_MIN_SIZE 16 #define HASHTABLE_HIGH 0.50 #define HASHTABLE_LOW 0.10 #define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH) #define BUCKETS_HEAD(SLIST) \ ((_Numba_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST))) #define TABLE_HEAD(HT, BUCKET) \ ((_Numba_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET])) #define ENTRY_NEXT(ENTRY) \ ((_Numba_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY)) #define HASHTABLE_ITEM_SIZE(HT) \ (sizeof(_Numba_hashtable_entry_t) + (HT)->data_size) /* Forward declaration */ static void hashtable_rehash(_Numba_hashtable_t *ht); static void _Py_slist_init(_Py_slist_t *list) { list->head = NULL; } static void _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) { item->next = list->head; list->head = item; } static void _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, _Py_slist_item_t *item) { if (previous != NULL) previous->next = item->next; else list->head = item->next; } Py_uhash_t _Numba_hashtable_hash_int(const void *key) { return (Py_uhash_t)key; } Py_uhash_t _Numba_hashtable_hash_ptr(const void *key) { return (Py_uhash_t)_Py_HashPointer((void *)key); } int _Numba_hashtable_compare_direct(const void *key, const _Numba_hashtable_entry_t *entry) { return entry->key == key; } /* makes sure the real size of the buckets array is a power of 2 */ static size_t round_size(size_t s) { size_t i; if (s < HASHTABLE_MIN_SIZE) return HASHTABLE_MIN_SIZE; i = 1; while (i < s) i <<= 1; return i; } _Numba_hashtable_t * _Numba_hashtable_new_full(size_t data_size, size_t init_size, _Numba_hashtable_hash_func hash_func, _Numba_hashtable_compare_func compare_func, _Numba_hashtable_copy_data_func copy_data_func, _Numba_hashtable_free_data_func free_data_func, _Numba_hashtable_get_data_size_func get_data_size_func, _Numba_hashtable_allocator_t *allocator) { _Numba_hashtable_t *ht; size_t buckets_size; _Numba_hashtable_allocator_t alloc; if (allocator == NULL) { alloc.malloc = PyMem_RawMalloc; alloc.free = PyMem_RawFree; } else alloc = *allocator; ht = (_Numba_hashtable_t *)alloc.malloc(sizeof(_Numba_hashtable_t)); if (ht == NULL) return ht; ht->num_buckets = round_size(init_size); ht->entries = 0; ht->data_size = data_size; buckets_size = ht->num_buckets * sizeof(ht->buckets[0]); ht->buckets = alloc.malloc(buckets_size); if (ht->buckets == NULL) { alloc.free(ht); return NULL; } memset(ht->buckets, 0, buckets_size); ht->hash_func = hash_func; ht->compare_func = compare_func; ht->copy_data_func = copy_data_func; ht->free_data_func = free_data_func; ht->get_data_size_func = get_data_size_func; ht->alloc = alloc; return ht; } _Numba_hashtable_t * _Numba_hashtable_new(size_t data_size, _Numba_hashtable_hash_func hash_func, _Numba_hashtable_compare_func compare_func) { return _Numba_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE, hash_func, compare_func, NULL, NULL, NULL, NULL); } size_t _Numba_hashtable_size(_Numba_hashtable_t *ht) { size_t size; size_t hv; size = sizeof(_Numba_hashtable_t); /* buckets */ size += ht->num_buckets * sizeof(_Numba_hashtable_entry_t *); /* entries */ size += ht->entries * HASHTABLE_ITEM_SIZE(ht); /* data linked from entries */ if (ht->get_data_size_func) { for (hv = 0; hv < ht->num_buckets; hv++) { _Numba_hashtable_entry_t *entry; for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { void *data; data = _Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); size += ht->get_data_size_func(data); } } } return size; } #ifdef Py_DEBUG void _Numba_hashtable_print_stats(_Numba_hashtable_t *ht) { size_t size; size_t chain_len, max_chain_len, total_chain_len, nchains; _Numba_hashtable_entry_t *entry; size_t hv; double load; size = _Numba_hashtable_size(ht); load = (double)ht->entries / ht->num_buckets; max_chain_len = 0; total_chain_len = 0; nchains = 0; for (hv = 0; hv < ht->num_buckets; hv++) { entry = TABLE_HEAD(ht, hv); if (entry != NULL) { chain_len = 0; for (; entry; entry = ENTRY_NEXT(entry)) { chain_len++; } if (chain_len > max_chain_len) max_chain_len = chain_len; total_chain_len += chain_len; nchains++; } } printf("hash table %p: entries=%" PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ", ht, ht->entries, ht->num_buckets, load * 100.0); if (nchains) printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains); printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u kB\n", max_chain_len, size / 1024); } #endif /* Get an entry. Return NULL if the key does not exist. */ _Numba_hashtable_entry_t * _Numba_hashtable_get_entry(_Numba_hashtable_t *ht, const void *key) { Py_uhash_t key_hash; size_t index; _Numba_hashtable_entry_t *entry; key_hash = ht->hash_func(key); index = key_hash & (ht->num_buckets - 1); for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { if (entry->key_hash == key_hash && ht->compare_func(key, entry)) break; } return entry; } static int _hashtable_pop_entry(_Numba_hashtable_t *ht, const void *key, void *data, size_t data_size) { Py_uhash_t key_hash; size_t index; _Numba_hashtable_entry_t *entry, *previous; key_hash = ht->hash_func(key); index = key_hash & (ht->num_buckets - 1); previous = NULL; for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { if (entry->key_hash == key_hash && ht->compare_func(key, entry)) break; previous = entry; } if (entry == NULL) return 0; _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous, (_Py_slist_item_t *)entry); ht->entries--; if (data != NULL) _Numba_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); ht->alloc.free(entry); if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW) hashtable_rehash(ht); return 1; } /* Add a new entry to the hash. The key must not be present in the hash table. Return 0 on success, -1 on memory error. */ int _Numba_hashtable_set(_Numba_hashtable_t *ht, const void *key, void *data, size_t data_size) { Py_uhash_t key_hash; size_t index; _Numba_hashtable_entry_t *entry; assert(data != NULL || data_size == 0); #ifndef NDEBUG /* Don't write the assertion on a single line because it is interesting to know the duplicated entry if the assertion failed. The entry can be read using a debugger. */ entry = _Numba_hashtable_get_entry(ht, key); assert(entry == NULL); #endif key_hash = ht->hash_func(key); index = key_hash & (ht->num_buckets - 1); entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht)); if (entry == NULL) { /* memory allocation failed */ return -1; } entry->key = (void *)key; entry->key_hash = key_hash; assert(data_size == ht->data_size); memcpy(_Numba_HASHTABLE_ENTRY_DATA(entry), data, data_size); _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); ht->entries++; if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH) hashtable_rehash(ht); return 0; } /* Get data from an entry. Copy entry data into data and return 1 if the entry exists, return 0 if the entry does not exist. */ int _Numba_hashtable_get(_Numba_hashtable_t *ht, const void *key, void *data, size_t data_size) { _Numba_hashtable_entry_t *entry; assert(data != NULL); entry = _Numba_hashtable_get_entry(ht, key); if (entry == NULL) return 0; _Numba_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); return 1; } int _Numba_hashtable_pop(_Numba_hashtable_t *ht, const void *key, void *data, size_t data_size) { assert(data != NULL); assert(ht->free_data_func == NULL); return _hashtable_pop_entry(ht, key, data, data_size); } /* Delete an entry. The entry must exist. */ void _Numba_hashtable_delete(_Numba_hashtable_t *ht, const void *key) { #ifndef NDEBUG int found = _hashtable_pop_entry(ht, key, NULL, 0); assert(found); #else (void)_hashtable_pop_entry(ht, key, NULL, 0); #endif } /* Prototype for a pointer to a function to be called foreach key/value pair in the hash by hashtable_foreach(). Iteration stops if a non-zero value is returned. */ int _Numba_hashtable_foreach(_Numba_hashtable_t *ht, int (*func) (_Numba_hashtable_entry_t *entry, void *arg), void *arg) { _Numba_hashtable_entry_t *entry; size_t hv; for (hv = 0; hv < ht->num_buckets; hv++) { for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { int res = func(entry, arg); if (res) return res; } } return 0; } static void hashtable_rehash(_Numba_hashtable_t *ht) { size_t buckets_size, new_size, bucket; _Py_slist_t *old_buckets = NULL; size_t old_num_buckets; new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR)); if (new_size == ht->num_buckets) return; old_num_buckets = ht->num_buckets; buckets_size = new_size * sizeof(ht->buckets[0]); old_buckets = ht->buckets; ht->buckets = ht->alloc.malloc(buckets_size); if (ht->buckets == NULL) { /* cancel rehash on memory allocation failure */ ht->buckets = old_buckets ; /* memory allocation failed */ return; } memset(ht->buckets, 0, buckets_size); ht->num_buckets = new_size; for (bucket = 0; bucket < old_num_buckets; bucket++) { _Numba_hashtable_entry_t *entry, *next; for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) { size_t entry_index; assert(ht->hash_func(entry->key) == entry->key_hash); next = ENTRY_NEXT(entry); entry_index = entry->key_hash & (new_size - 1); _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry); } } ht->alloc.free(old_buckets); } void _Numba_hashtable_clear(_Numba_hashtable_t *ht) { _Numba_hashtable_entry_t *entry, *next; size_t i; for (i=0; i < ht->num_buckets; i++) { for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) { next = ENTRY_NEXT(entry); if (ht->free_data_func) ht->free_data_func(_Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); ht->alloc.free(entry); } _Py_slist_init(&ht->buckets[i]); } ht->entries = 0; hashtable_rehash(ht); } void _Numba_hashtable_destroy(_Numba_hashtable_t *ht) { size_t i; for (i = 0; i < ht->num_buckets; i++) { _Py_slist_item_t *entry = ht->buckets[i].head; while (entry) { _Py_slist_item_t *entry_next = entry->next; if (ht->free_data_func) ht->free_data_func(_Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); ht->alloc.free(entry); entry = entry_next; } } ht->alloc.free(ht->buckets); ht->alloc.free(ht); } /* Return a copy of the hash table */ _Numba_hashtable_t * _Numba_hashtable_copy(_Numba_hashtable_t *src) { _Numba_hashtable_t *dst; _Numba_hashtable_entry_t *entry; size_t bucket; int err; void *data, *new_data; dst = _Numba_hashtable_new_full(src->data_size, src->num_buckets, src->hash_func, src->compare_func, src->copy_data_func, src->free_data_func, src->get_data_size_func, &src->alloc); if (dst == NULL) return NULL; for (bucket=0; bucket < src->num_buckets; bucket++) { entry = TABLE_HEAD(src, bucket); for (; entry; entry = ENTRY_NEXT(entry)) { if (src->copy_data_func) { data = _Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); new_data = src->copy_data_func(data); if (new_data != NULL) err = _Numba_hashtable_set(dst, entry->key, &new_data, src->data_size); else err = 1; } else { data = _Numba_HASHTABLE_ENTRY_DATA(entry); err = _Numba_hashtable_set(dst, entry->key, data, src->data_size); } if (err) { _Numba_hashtable_destroy(dst); return NULL; } } } return dst; } numba-0.55.1/numba/_hashtable.h000664 000000 000000 00000010356 14174536160 016245 0ustar00rootroot000000 000000 /* * See _hashtable.c for more information about this file. */ #ifndef Py_HASHTABLE_H #define Py_HASHTABLE_H /* The whole API is private */ #ifndef Py_LIMITED_API typedef struct _Py_slist_item_s { struct _Py_slist_item_s *next; } _Py_slist_item_t; typedef struct { _Py_slist_item_t *head; } _Py_slist_t; #define _Py_SLIST_ITEM_NEXT(ITEM) (((_Py_slist_item_t *)ITEM)->next) #define _Py_SLIST_HEAD(SLIST) (((_Py_slist_t *)SLIST)->head) typedef struct { /* used by _Numba_hashtable_t.buckets to link entries */ _Py_slist_item_t _Py_slist_item; const void *key; Py_uhash_t key_hash; /* data follows */ } _Numba_hashtable_entry_t; #define _Numba_HASHTABLE_ENTRY_DATA(ENTRY) \ ((char *)(ENTRY) + sizeof(_Numba_hashtable_entry_t)) #define _Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(ENTRY) \ (*(void **)_Numba_HASHTABLE_ENTRY_DATA(ENTRY)) #define _Numba_HASHTABLE_ENTRY_READ_DATA(TABLE, DATA, DATA_SIZE, ENTRY) \ do { \ assert((DATA_SIZE) == (TABLE)->data_size); \ memcpy(DATA, _Numba_HASHTABLE_ENTRY_DATA(ENTRY), DATA_SIZE); \ } while (0) typedef Py_uhash_t (*_Numba_hashtable_hash_func) (const void *key); typedef int (*_Numba_hashtable_compare_func) (const void *key, const _Numba_hashtable_entry_t *he); typedef void* (*_Numba_hashtable_copy_data_func)(void *data); typedef void (*_Numba_hashtable_free_data_func)(void *data); typedef size_t (*_Numba_hashtable_get_data_size_func)(void *data); typedef struct { /* allocate a memory block */ void* (*malloc) (size_t size); /* release a memory block */ void (*free) (void *ptr); } _Numba_hashtable_allocator_t; typedef struct { size_t num_buckets; size_t entries; /* Total number of entries in the table. */ _Py_slist_t *buckets; size_t data_size; _Numba_hashtable_hash_func hash_func; _Numba_hashtable_compare_func compare_func; _Numba_hashtable_copy_data_func copy_data_func; _Numba_hashtable_free_data_func free_data_func; _Numba_hashtable_get_data_size_func get_data_size_func; _Numba_hashtable_allocator_t alloc; } _Numba_hashtable_t; /* hash and compare functions for integers and pointers */ PyAPI_FUNC(Py_uhash_t) _Numba_hashtable_hash_ptr(const void *key); PyAPI_FUNC(Py_uhash_t) _Numba_hashtable_hash_int(const void *key); PyAPI_FUNC(int) _Numba_hashtable_compare_direct(const void *key, const _Numba_hashtable_entry_t *entry); PyAPI_FUNC(_Numba_hashtable_t *) _Numba_hashtable_new( size_t data_size, _Numba_hashtable_hash_func hash_func, _Numba_hashtable_compare_func compare_func); PyAPI_FUNC(_Numba_hashtable_t *) _Numba_hashtable_new_full( size_t data_size, size_t init_size, _Numba_hashtable_hash_func hash_func, _Numba_hashtable_compare_func compare_func, _Numba_hashtable_copy_data_func copy_data_func, _Numba_hashtable_free_data_func free_data_func, _Numba_hashtable_get_data_size_func get_data_size_func, _Numba_hashtable_allocator_t *allocator); PyAPI_FUNC(_Numba_hashtable_t *) _Numba_hashtable_copy(_Numba_hashtable_t *src); PyAPI_FUNC(void) _Numba_hashtable_clear(_Numba_hashtable_t *ht); PyAPI_FUNC(void) _Numba_hashtable_destroy(_Numba_hashtable_t *ht); typedef int (*_Numba_hashtable_foreach_func) (_Numba_hashtable_entry_t *entry, void *arg); PyAPI_FUNC(int) _Numba_hashtable_foreach( _Numba_hashtable_t *ht, _Numba_hashtable_foreach_func func, void *arg); PyAPI_FUNC(size_t) _Numba_hashtable_size(_Numba_hashtable_t *ht); PyAPI_FUNC(_Numba_hashtable_entry_t*) _Numba_hashtable_get_entry( _Numba_hashtable_t *ht, const void *key); PyAPI_FUNC(int) _Numba_hashtable_set( _Numba_hashtable_t *ht, const void *key, void *data, size_t data_size); PyAPI_FUNC(int) _Numba_hashtable_get( _Numba_hashtable_t *ht, const void *key, void *data, size_t data_size); PyAPI_FUNC(int) _Numba_hashtable_pop( _Numba_hashtable_t *ht, const void *key, void *data, size_t data_size); PyAPI_FUNC(void) _Numba_hashtable_delete( _Numba_hashtable_t *ht, const void *key); #define _Numba_HASHTABLE_SET(TABLE, KEY, DATA) \ _Numba_hashtable_set(TABLE, KEY, &(DATA), sizeof(DATA)) #define _Numba_HASHTABLE_GET(TABLE, KEY, DATA) \ _Numba_hashtable_get(TABLE, KEY, &(DATA), sizeof(DATA)) #endif /* Py_LIMITED_API */ #endif numba-0.55.1/numba/_helperlib.c000664 000000 000000 00000077320 14174536160 016257 0ustar00rootroot000000 000000 /* * Helper functions used by Numba at runtime. * This C file is meant to be included after defining the * NUMBA_EXPORT_FUNC() and NUMBA_EXPORT_DATA() macros. */ #include "_pymodule.h" #include #include #include #ifdef _MSC_VER #define int64_t signed __int64 #define uint64_t unsigned __int64 #define uint32_t unsigned __int32 #else #include #endif #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include #include #include #include "_arraystruct.h" /* * Other helpers. */ /* Fix fmod() and fmodf() for windows x64 VC 9.0 (VS 2008) https://support.microsoft.com/en-us/kb/982107 */ static void (*fnclex)(void) = NULL; NUMBA_EXPORT_FUNC(double) numba_fixed_fmod(double x, double y){ fnclex(); /* no inline asm in x64 =( */ return fmod(x, y); } NUMBA_EXPORT_FUNC(float) numba_fixed_fmodf(float x, float y) { fnclex(); /* no inline asm in x64 =( */ return fmodf(x, y); } NUMBA_EXPORT_FUNC(void) numba_set_fnclex(void *fn){ fnclex = fn; } /* provide 64-bit division function to 32-bit platforms */ NUMBA_EXPORT_FUNC(int64_t) numba_sdiv(int64_t a, int64_t b) { return a / b; } NUMBA_EXPORT_FUNC(uint64_t) numba_udiv(uint64_t a, uint64_t b) { return a / b; } /* provide 64-bit remainder function to 32-bit platforms */ NUMBA_EXPORT_FUNC(int64_t) numba_srem(int64_t a, int64_t b) { return a % b; } NUMBA_EXPORT_FUNC(uint64_t) numba_urem(uint64_t a, uint64_t b) { return a % b; } /* provide frexp and ldexp; these wrappers deal with special cases * (zero, nan, infinity) directly, to sidestep platform differences. */ NUMBA_EXPORT_FUNC(double) numba_frexp(double x, int *exp) { if (!Py_IS_FINITE(x) || !x) *exp = 0; else x = frexp(x, exp); return x; } NUMBA_EXPORT_FUNC(float) numba_frexpf(float x, int *exp) { if (Py_IS_NAN(x) || Py_IS_INFINITY(x) || !x) *exp = 0; else x = frexpf(x, exp); return x; } NUMBA_EXPORT_FUNC(double) numba_ldexp(double x, int exp) { if (Py_IS_FINITE(x) && x && exp) x = ldexp(x, exp); return x; } NUMBA_EXPORT_FUNC(float) numba_ldexpf(float x, int exp) { if (Py_IS_FINITE(x) && x && exp) x = ldexpf(x, exp); return x; } /* provide complex power */ NUMBA_EXPORT_FUNC(void) numba_cpow(Py_complex *a, Py_complex *b, Py_complex *out) { errno = 0; *out = _Py_c_pow(*a, *b); if (errno == EDOM) { /* _Py_c_pow() doesn't bother returning the right value in this case, as Python raises ZeroDivisionError */ out->real = out->imag = Py_NAN; } } NUMBA_EXPORT_FUNC(void) numba_cpowf(npy_cfloat *a, npy_cfloat *b, npy_cfloat *out) { Py_complex _a, _b, _out; _a.real = npy_crealf(*a); _a.imag = npy_cimagf(*a); _b.real = npy_crealf(*b); _b.imag = npy_cimagf(*b); numba_cpow(&_a, &_b, &_out); *out = npy_cpackf((float) _out.real, (float) _out.imag); } /* C99 math functions: redirect to system implementations */ NUMBA_EXPORT_FUNC(double) numba_gamma(double x) { return tgamma(x); } NUMBA_EXPORT_FUNC(float) numba_gammaf(float x) { return tgammaf(x); } NUMBA_EXPORT_FUNC(double) numba_lgamma(double x) { return lgamma(x); } NUMBA_EXPORT_FUNC(float) numba_lgammaf(float x) { return lgammaf(x); } NUMBA_EXPORT_FUNC(double) numba_erf(double x) { return erf(x); } NUMBA_EXPORT_FUNC(float) numba_erff(float x) { return erff(x); } NUMBA_EXPORT_FUNC(double) numba_erfc(double x) { return erfc(x); } NUMBA_EXPORT_FUNC(float) numba_erfcf(float x) { return erfcf(x); } /* Note npy_signbit() is actually a polymorphic macro */ NUMBA_EXPORT_FUNC(int) numba_signbitf(float a) { return npy_signbit(a); } NUMBA_EXPORT_FUNC(int) numba_signbit(npy_double a) { return npy_signbit(a); } /* Unpack any Python complex-like object into a Py_complex structure */ NUMBA_EXPORT_FUNC(int) numba_complex_adaptor(PyObject* obj, Py_complex *out) { PyObject* fobj; PyArray_Descr *dtype; double val[2]; // Convert from python complex or numpy complex128 if (PyComplex_Check(obj)) { out->real = PyComplex_RealAsDouble(obj); out->imag = PyComplex_ImagAsDouble(obj); } // Convert from numpy complex64 else if (PyArray_IsScalar(obj, ComplexFloating)) { dtype = PyArray_DescrFromScalar(obj); if (dtype == NULL) { return 0; } if (PyArray_CastScalarDirect(obj, dtype, &val[0], NPY_CDOUBLE) < 0) { Py_DECREF(dtype); return 0; } out->real = val[0]; out->imag = val[1]; Py_DECREF(dtype); } else { fobj = PyNumber_Float(obj); if (!fobj) return 0; out->real = PyFloat_AsDouble(fobj); out->imag = 0.; Py_DECREF(fobj); } return 1; } /* Minimum PyBufferObject structure to hack inside it */ typedef struct { PyObject_HEAD PyObject *b_base; void *b_ptr; Py_ssize_t b_size; Py_ssize_t b_offset; } PyBufferObject_Hack; /* Get data address of record data buffer */ NUMBA_EXPORT_FUNC(void *) numba_extract_record_data(PyObject *recordobj, Py_buffer *pbuf) { PyObject *attrdata; void *ptr; attrdata = PyObject_GetAttrString(recordobj, "data"); if (!attrdata) return NULL; if (-1 == PyObject_GetBuffer(attrdata, pbuf, 0)){ Py_DECREF(attrdata); return NULL; } else { ptr = pbuf->buf; } Py_DECREF(attrdata); return ptr; } /* * Return a record instance with dtype as the record type, and backed * by a copy of the memory area pointed to by (pdata, size). */ NUMBA_EXPORT_FUNC(PyObject *) numba_recreate_record(void *pdata, int size, PyObject *dtype) { PyObject *numpy = NULL; PyObject *numpy_record = NULL; PyObject *aryobj = NULL; PyObject *dtypearg = NULL; PyObject *record = NULL; PyArray_Descr *descr = NULL; if (dtype == NULL) { PyErr_Format(PyExc_RuntimeError, "In 'numba_recreate_record', 'dtype' is NULL"); return NULL; } numpy = PyImport_ImportModuleNoBlock("numpy"); if (!numpy) goto CLEANUP; numpy_record = PyObject_GetAttrString(numpy, "record"); if (!numpy_record) goto CLEANUP; dtypearg = PyTuple_Pack(2, numpy_record, dtype); if (!dtypearg || !PyArray_DescrConverter(dtypearg, &descr)) goto CLEANUP; /* This steals a reference to descr, so we don't have to DECREF it */ aryobj = PyArray_FromString(pdata, size, descr, 1, NULL); if (!aryobj) goto CLEANUP; record = PySequence_GetItem(aryobj, 0); CLEANUP: Py_XDECREF(numpy); Py_XDECREF(numpy_record); Py_XDECREF(aryobj); Py_XDECREF(dtypearg); return record; } NUMBA_EXPORT_FUNC(int) numba_adapt_ndarray(PyObject *obj, arystruct_t* arystruct) { PyArrayObject *ndary; int i, ndim; npy_intp *p; if (!PyArray_Check(obj)) { return -1; } ndary = (PyArrayObject*)obj; ndim = PyArray_NDIM(ndary); arystruct->data = PyArray_DATA(ndary); arystruct->nitems = PyArray_SIZE(ndary); arystruct->itemsize = PyArray_ITEMSIZE(ndary); arystruct->parent = obj; p = arystruct->shape_and_strides; for (i = 0; i < ndim; i++, p++) { *p = PyArray_DIM(ndary, i); } for (i = 0; i < ndim; i++, p++) { *p = PyArray_STRIDE(ndary, i); } arystruct->meminfo = NULL; return 0; } NUMBA_EXPORT_FUNC(int) numba_get_buffer(PyObject *obj, Py_buffer *buf) { /* Ask for shape and strides, but no suboffsets */ return PyObject_GetBuffer(obj, buf, PyBUF_RECORDS_RO); } NUMBA_EXPORT_FUNC(void) numba_adapt_buffer(Py_buffer *buf, arystruct_t *arystruct) { int i; npy_intp *p; arystruct->data = buf->buf; arystruct->itemsize = buf->itemsize; arystruct->parent = buf->obj; arystruct->nitems = 1; p = arystruct->shape_and_strides; for (i = 0; i < buf->ndim; i++, p++) { *p = buf->shape[i]; arystruct->nitems *= buf->shape[i]; } for (i = 0; i < buf->ndim; i++, p++) { *p = buf->strides[i]; } arystruct->meminfo = NULL; } NUMBA_EXPORT_FUNC(void) numba_release_buffer(Py_buffer *buf) { PyBuffer_Release(buf); } NUMBA_EXPORT_FUNC(PyObject *) numba_ndarray_new(int nd, npy_intp *dims, /* shape */ npy_intp *strides, void* data, int type_num, int itemsize) { PyObject *ndary; int flags = NPY_ARRAY_BEHAVED; ndary = PyArray_New((PyTypeObject*)&PyArray_Type, nd, dims, type_num, strides, data, 0, flags, NULL); return ndary; } /* * Handle reshaping of zero-sized array. * See numba_attempt_nocopy_reshape() below. */ static int nocopy_empty_reshape(npy_intp nd, const npy_intp *dims, const npy_intp *strides, npy_intp newnd, const npy_intp *newdims, npy_intp *newstrides, npy_intp itemsize, int is_f_order) { int i; /* Just make the strides vaguely reasonable * (they can have any value in theory). */ for (i = 0; i < newnd; i++) newstrides[i] = itemsize; return 1; /* reshape successful */ } /* * Straight from Numpy's _attempt_nocopy_reshape() * (np/core/src/multiarray/shape.c). * Attempt to reshape an array without copying data * * This function should correctly handle all reshapes, including * axes of length 1. Zero strides should work but are untested. * * If a copy is needed, returns 0 * If no copy is needed, returns 1 and fills `npy_intp *newstrides` * with appropriate strides */ NUMBA_EXPORT_FUNC(int) numba_attempt_nocopy_reshape(npy_intp nd, const npy_intp *dims, const npy_intp *strides, npy_intp newnd, const npy_intp *newdims, npy_intp *newstrides, npy_intp itemsize, int is_f_order) { int oldnd; npy_intp olddims[NPY_MAXDIMS]; npy_intp oldstrides[NPY_MAXDIMS]; npy_intp np, op, last_stride; int oi, oj, ok, ni, nj, nk; oldnd = 0; /* * Remove axes with dimension 1 from the old array. They have no effect * but would need special cases since their strides do not matter. */ for (oi = 0; oi < nd; oi++) { if (dims[oi]!= 1) { olddims[oldnd] = dims[oi]; oldstrides[oldnd] = strides[oi]; oldnd++; } } np = 1; for (ni = 0; ni < newnd; ni++) { np *= newdims[ni]; } op = 1; for (oi = 0; oi < oldnd; oi++) { op *= olddims[oi]; } if (np != op) { /* different total sizes; no hope */ return 0; } if (np == 0) { /* the Numpy code does not handle 0-sized arrays */ return nocopy_empty_reshape(nd, dims, strides, newnd, newdims, newstrides, itemsize, is_f_order); } /* oi to oj and ni to nj give the axis ranges currently worked with */ oi = 0; oj = 1; ni = 0; nj = 1; while (ni < newnd && oi < oldnd) { np = newdims[ni]; op = olddims[oi]; while (np != op) { if (np < op) { /* Misses trailing 1s, these are handled later */ np *= newdims[nj++]; } else { op *= olddims[oj++]; } } /* Check whether the original axes can be combined */ for (ok = oi; ok < oj - 1; ok++) { if (is_f_order) { if (oldstrides[ok+1] != olddims[ok]*oldstrides[ok]) { /* not contiguous enough */ return 0; } } else { /* C order */ if (oldstrides[ok] != olddims[ok+1]*oldstrides[ok+1]) { /* not contiguous enough */ return 0; } } } /* Calculate new strides for all axes currently worked with */ if (is_f_order) { newstrides[ni] = oldstrides[oi]; for (nk = ni + 1; nk < nj; nk++) { newstrides[nk] = newstrides[nk - 1]*newdims[nk - 1]; } } else { /* C order */ newstrides[nj - 1] = oldstrides[oj - 1]; for (nk = nj - 1; nk > ni; nk--) { newstrides[nk - 1] = newstrides[nk]*newdims[nk]; } } ni = nj++; oi = oj++; } /* * Set strides corresponding to trailing 1s of the new shape. */ if (ni >= 1) { last_stride = newstrides[ni - 1]; } else { last_stride = itemsize; } if (is_f_order) { last_stride *= newdims[ni - 1]; } for (nk = ni; nk < newnd; nk++) { newstrides[nk] = last_stride; } return 1; } /* * Cython utilities. */ /* Fetch the address of the given function, as exposed by a cython module */ static void * import_cython_function(const char *module_name, const char *function_name) { PyObject *module, *capi, *cobj; void *res = NULL; const char *capsule_name; module = PyImport_ImportModule(module_name); if (module == NULL) return NULL; capi = PyObject_GetAttrString(module, "__pyx_capi__"); Py_DECREF(module); if (capi == NULL) return NULL; cobj = PyMapping_GetItemString(capi, (char *)function_name); Py_DECREF(capi); if (cobj == NULL) { PyErr_Clear(); PyErr_Format(PyExc_ValueError, "No function '%s' found in __pyx_capi__ of '%s'", function_name, module_name); return NULL; } /* 2.7+ => Cython exports a PyCapsule */ capsule_name = PyCapsule_GetName(cobj); if (capsule_name != NULL) { res = PyCapsule_GetPointer(cobj, capsule_name); } Py_DECREF(cobj); return res; } NUMBA_EXPORT_FUNC(PyObject *) _numba_import_cython_function(PyObject *self, PyObject *args) { const char *module_name; const char *function_name; void *p = NULL; PyObject *res; if (!PyArg_ParseTuple(args, "ss", &module_name, &function_name)) { return NULL; } p = import_cython_function(module_name, function_name); if (p == NULL) { return NULL; } res = PyLong_FromVoidPtr(p); if (res == NULL) { PyErr_SetString(PyExc_RuntimeError, "Could not convert function address to int"); return NULL; } return res; } /* We use separate functions for datetime64 and timedelta64, to ensure * proper type checking. */ NUMBA_EXPORT_FUNC(npy_int64) numba_extract_np_datetime(PyObject *td) { if (!PyArray_IsScalar(td, Datetime)) { PyErr_SetString(PyExc_TypeError, "expected a numpy.datetime64 object"); return -1; } return PyArrayScalar_VAL(td, Timedelta); } NUMBA_EXPORT_FUNC(npy_int64) numba_extract_np_timedelta(PyObject *td) { if (!PyArray_IsScalar(td, Timedelta)) { PyErr_SetString(PyExc_TypeError, "expected a numpy.timedelta64 object"); return -1; } return PyArrayScalar_VAL(td, Timedelta); } NUMBA_EXPORT_FUNC(PyObject *) numba_create_np_datetime(npy_int64 value, int unit_code) { PyDatetimeScalarObject *obj = (PyDatetimeScalarObject *) PyArrayScalar_New(Datetime); if (obj != NULL) { obj->obval = value; obj->obmeta.base = unit_code; obj->obmeta.num = 1; } return (PyObject *) obj; } NUMBA_EXPORT_FUNC(PyObject *) numba_create_np_timedelta(npy_int64 value, int unit_code) { PyTimedeltaScalarObject *obj = (PyTimedeltaScalarObject *) PyArrayScalar_New(Timedelta); if (obj != NULL) { obj->obval = value; obj->obmeta.base = unit_code; obj->obmeta.num = 1; } return (PyObject *) obj; } NUMBA_EXPORT_FUNC(uint64_t) numba_fptoui(double x) { /* First cast to signed int of the full width to make sure sign extension happens (this can make a difference on some platforms...). */ return (uint64_t) (int64_t) x; } NUMBA_EXPORT_FUNC(uint64_t) numba_fptouif(float x) { return (uint64_t) (int64_t) x; } NUMBA_EXPORT_FUNC(void) numba_gil_ensure(PyGILState_STATE *state) { *state = PyGILState_Ensure(); } NUMBA_EXPORT_FUNC(void) numba_gil_release(PyGILState_STATE *state) { PyGILState_Release(*state); } NUMBA_EXPORT_FUNC(PyObject *) numba_py_type(PyObject *obj) { return (PyObject *) Py_TYPE(obj); } /* * Functions for tagging an arbitrary Python object with an arbitrary pointer. * These functions make strong lifetime assumptions, see below. */ static PyObject *private_data_dict = NULL; static PyObject * _get_private_data_dict(void) { if (private_data_dict == NULL) private_data_dict = PyDict_New(); return private_data_dict; } NUMBA_EXPORT_FUNC(void) numba_set_pyobject_private_data(PyObject *obj, void *ptr) { PyObject *dct = _get_private_data_dict(); /* This assumes the reference to setobj is kept alive until the call to numba_reset_set_private_data()! */ PyObject *key = PyLong_FromVoidPtr((void *) obj); PyObject *value = PyLong_FromVoidPtr(ptr); if (!dct || !value || !key) goto error; if (PyDict_SetItem(dct, key, value)) goto error; Py_DECREF(key); Py_DECREF(value); return; error: Py_FatalError("unable to set private data"); } NUMBA_EXPORT_FUNC(void *) numba_get_pyobject_private_data(PyObject *obj) { PyObject *dct = _get_private_data_dict(); PyObject *value, *key = PyLong_FromVoidPtr((void *) obj); void *ptr; if (!dct || !key) goto error; value = PyDict_GetItem(dct, key); Py_DECREF(key); if (!value) return NULL; else { ptr = PyLong_AsVoidPtr(value); if (ptr == NULL && PyErr_Occurred()) goto error; return ptr; } error: Py_FatalError("unable to get private data"); return NULL; } NUMBA_EXPORT_FUNC(void) numba_reset_pyobject_private_data(PyObject *obj) { PyObject *dct = _get_private_data_dict(); PyObject *key = PyLong_FromVoidPtr((void *) obj); if (!key) goto error; if (PyDict_DelItem(dct, key)) PyErr_Clear(); Py_DECREF(key); return; error: Py_FatalError("unable to reset private data"); } NUMBA_EXPORT_FUNC(int) numba_unpack_slice(PyObject *obj, Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step) { PySliceObject *slice = (PySliceObject *) obj; if (!PySlice_Check(obj)) { PyErr_Format(PyExc_TypeError, "Expected a slice object, got '%s'", Py_TYPE(slice)->tp_name); return -1; } #define FETCH_MEMBER(NAME, DEFAULT) \ if (slice->NAME != Py_None) { \ Py_ssize_t v = PyNumber_AsSsize_t(slice->NAME, \ PyExc_OverflowError); \ if (v == -1 && PyErr_Occurred()) \ return -1; \ *NAME = v; \ } \ else { \ *NAME = DEFAULT; \ } FETCH_MEMBER(step, 1) FETCH_MEMBER(stop, (*step > 0) ? PY_SSIZE_T_MAX : PY_SSIZE_T_MIN) FETCH_MEMBER(start, (*step > 0) ? 0 : PY_SSIZE_T_MAX) return 0; #undef FETCH_MEMBER } NUMBA_EXPORT_FUNC(int) numba_fatal_error(void) { PyGILState_Ensure(); Py_FatalError("in Numba-compiled function"); return 0; /* unreachable */ } /* Insert a frame into the traceback for (funcname, filename, lineno). */ /* This function is CPython's _PyTraceback_Add, renamed, see: * https://github.com/python/cpython/blob/d545869d084e70d4838310e79b52a25a72a1ca56/Python/traceback.c#L246 * and modified for Python 2.x based on * https://github.com/python/cpython/blob/2e1a34025cde19bddf12a2eac8fedb6afcca8339/Modules/_ctypes/callbacks.c#L151-L174 */ static void traceback_add(const char *funcname, const char *filename, int lineno) { PyObject *globals = NULL; PyCodeObject *code = NULL; PyFrameObject *frame = NULL; PyObject *exc, *val, *tb; /* Save and clear the current exception. Python functions must not be called with an exception set. Calling Python functions happens when the codec of the filesystem encoding is implemented in pure Python. */ PyErr_Fetch(&exc, &val, &tb); globals = PyDict_New(); if (!globals) goto error; code = PyCode_NewEmpty(filename, funcname, lineno); if (!code) { goto error; } frame = PyFrame_New(PyThreadState_Get(), code, globals, NULL); Py_DECREF(globals); Py_DECREF(code); if (!frame) goto error; frame->f_lineno = lineno; PyErr_Restore(exc, val, tb); PyTraceBack_Here(frame); Py_DECREF(frame); return; error: _PyErr_ChainExceptions(exc, val, tb); } /* * Add traceback information to *loc* to the active exception. * loc can be NULL, which causes this function to become a no-op. */ static void traceback_add_loc(PyObject *loc) { const char *function_name_str = NULL, *filename_str = NULL; PyObject *function_name = NULL, *filename = NULL, *lineno = NULL; Py_ssize_t pos; /* instance is instantiated/internal exception is raised, if loc is present * add a frame for it into the traceback */ if(loc && loc != Py_None && PyTuple_Check(loc)) { pos = 0; function_name = PyTuple_GET_ITEM(loc, pos); function_name_str = PyString_AsString(function_name); pos = 1; filename = PyTuple_GET_ITEM(loc, pos); filename_str = PyString_AsString(filename); pos = 2; lineno = PyTuple_GET_ITEM(loc, pos); traceback_add(function_name_str, filename_str, \ (int)PyLong_AsLong(lineno)); } } /** * Re-raise the current active exception. * Called internal by process_raise() when *exc* is None. */ static int reraise_exc_is_none(void) { /* Reraise */ PyThreadState *tstate = PyThreadState_GET(); PyObject *tb, *type, *value; #if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 7) _PyErr_StackItem *tstate_exc = tstate->exc_info; #else PyThreadState *tstate_exc = tstate; #endif type = tstate_exc->exc_type; value = tstate_exc->exc_value; tb = tstate_exc->exc_traceback; if (type == Py_None) { PyErr_SetString(PyExc_RuntimeError, "No active exception to reraise"); return 0; } /* incref needed because PyErr_Restore DOES NOT */ Py_XINCREF(type); Py_XINCREF(value); Py_XINCREF(tb); PyErr_Restore(type, value, tb); return 1; } /* * Set exception given the Exception type and the constructor argument. * Equivalent to ``raise exc(value)``. * PyExceptionClass_Check(exc) must be True. * value can be NULL. */ static int process_exception_class(PyObject *exc, PyObject *value) { PyObject *type; /* It is a class, type used here just as a tmp var */ type = PyObject_CallObject(exc, value); if (type == NULL){ return 0; } if (!PyExceptionInstance_Check(type)) { PyErr_SetString(PyExc_TypeError, "exceptions must derive from BaseException"); Py_DECREF(type); return 0; } /* all ok, set type to the exc */ Py_DECREF(type); type = exc; PyErr_SetObject(type, value); return 1; } /* * Internal routine to process exceptions. * exc cannot be NULL. It can be a None, Exception type, or Exception instance. * value can be NULL for absent, or any PyObject valid for the exception. */ static int process_raise(PyObject *exc, PyObject *value) { /* exc is None */ if (exc == Py_None) { return reraise_exc_is_none(); } /* exc should be an exception class */ else if (PyExceptionClass_Check(exc)) { return process_exception_class(exc, value); } /* exc is an instance of an Exception */ else if (PyExceptionInstance_Check(exc)) { PyObject *type = PyExceptionInstance_Class(exc); PyErr_SetObject(type, exc); return 0; } else { /* Not something you can raise. You get an exception anyway, just not what you specified :-) */ PyErr_SetString(PyExc_TypeError, "exceptions must derive from BaseException"); return 0; } } /* Logic for raising an arbitrary object. Adapted from CPython's ceval.c. This *consumes* a reference count to its argument. */ NUMBA_EXPORT_FUNC(int) numba_do_raise(PyObject *exc_packed) { int status; PyObject *exc = NULL, *value = NULL, *loc = NULL; /* We support the following forms of raise: raise raise raise */ /* could be a tuple from npm (some exc like thing, args, location) */ if (PyTuple_CheckExact(exc_packed)) { /* Unpack a (class/inst/tuple, arguments, location) tuple. */ if (!PyArg_ParseTuple(exc_packed, "OOO", &exc, &value, &loc)) { traceback_add_loc(loc); return 0; } } else { /* could be a reraise or an exception from objmode */ exc = exc_packed; /* branch exit with value = NULL and loc = NULL */ } /* value is either NULL or borrowed */ status = process_raise(exc, value); traceback_add_loc(loc); Py_DECREF(exc_packed); return status; } #ifdef PYCC_COMPILING /* AOT avoid the use of `numba.core.serialize` */ NUMBA_EXPORT_FUNC(PyObject *) numba_unpickle(const char *data, int n, const char *hashed) { PyObject *buf, *obj; static PyObject *loads; /* Caching the pickle.loads function shaves a couple µs here. */ if (loads == NULL) { PyObject *picklemod; picklemod = PyImport_ImportModule("pickle"); if (picklemod == NULL) return NULL; loads = PyObject_GetAttrString(picklemod, "loads"); Py_DECREF(picklemod); if (loads == NULL) return NULL; } buf = PyBytes_FromStringAndSize(data, n); if (buf == NULL) return NULL; obj = PyObject_CallFunctionObjArgs(loads, buf, NULL); Py_DECREF(buf); return obj; } #else NUMBA_EXPORT_FUNC(PyObject *) numba_unpickle(const char *data, int n, const char *hashed) { PyObject *buf=NULL, *obj=NULL, *addr=NULL, *hashedbuf=NULL; static PyObject *loads=NULL; /* Caching the pickle.loads function shaves a couple µs here. */ if (loads == NULL) { PyObject *picklemod; picklemod = PyImport_ImportModule("numba.core.serialize"); if (picklemod == NULL) return NULL; loads = PyObject_GetAttrString(picklemod, "_numba_unpickle"); Py_DECREF(picklemod); if (loads == NULL) return NULL; } buf = PyBytes_FromStringAndSize(data, n); if (buf == NULL) return NULL; /* SHA1 produces 160 bit or 20 bytes */ hashedbuf = PyBytes_FromStringAndSize(hashed, 20); if (hashedbuf == NULL) goto error; addr = PyLong_FromVoidPtr((void*)data); if (addr == NULL) goto error; obj = PyObject_CallFunctionObjArgs(loads, addr, buf, hashedbuf, NULL); error: Py_XDECREF(addr); Py_XDECREF(hashedbuf); Py_DECREF(buf); return obj; } #endif /* * Unicode helpers */ /* Developer note: * * The hash value of unicode objects is obtained via: * ((PyASCIIObject *)(obj))->hash; * The use comes from this definition: * https://github.com/python/cpython/blob/6d43f6f081023b680d9db4542d19b9e382149f0a/Objects/unicodeobject.c#L119-L120 * and it's used extensively throughout the `cpython/Object/unicodeobject.c` * source, not least in `unicode_hash` itself: * https://github.com/python/cpython/blob/6d43f6f081023b680d9db4542d19b9e382149f0a/Objects/unicodeobject.c#L11662-L11679 * * The Unicode string struct layouts are described here: * https://github.com/python/cpython/blob/6d43f6f081023b680d9db4542d19b9e382149f0a/Include/cpython/unicodeobject.h#L82-L161 * essentially, all the unicode string layouts start with a `PyASCIIObject` at * offset 0 (as of commit 6d43f6f081023b680d9db4542d19b9e382149f0a, somewhere * in the 3.8 development cycle). * * For safety against future CPython internal changes, the code checks that the * _base members of the unicode structs are what is expected in 3.7, and that * their offset is 0. It then walks the struct to the hash location to make sure * the offset is indeed the same as PyASCIIObject->hash. * Note: The large condition in the if should evaluate to a compile time * constant. */ #define MEMBER_SIZE(structure, member) sizeof(((structure *)0)->member) NUMBA_EXPORT_FUNC(void *) numba_extract_unicode(PyObject *obj, Py_ssize_t *length, int *kind, unsigned int *ascii, Py_ssize_t *hash) { if (!PyUnicode_READY(obj)) { *length = PyUnicode_GET_LENGTH(obj); *kind = PyUnicode_KIND(obj); /* could also use PyUnicode_IS_ASCII but it is not publicly advertised in https://docs.python.org/3/c-api/unicode.html */ *ascii = (unsigned int)(PyUnicode_MAX_CHAR_VALUE(obj) == (0x7f)); /* this is here as a crude check for safe casting of all unicode string * structs to a PyASCIIObject */ if (MEMBER_SIZE(PyCompactUnicodeObject, _base) == sizeof(PyASCIIObject) && MEMBER_SIZE(PyUnicodeObject, _base) == sizeof(PyCompactUnicodeObject) && offsetof(PyCompactUnicodeObject, _base) == 0 && offsetof(PyUnicodeObject, _base) == 0 && offsetof(PyCompactUnicodeObject, _base.hash) == offsetof(PyASCIIObject, hash) && offsetof(PyUnicodeObject, _base._base.hash) == offsetof(PyASCIIObject, hash) ) { /* Grab the hash from the type object cache, do not compute it. */ *hash = ((PyASCIIObject *)(obj))->hash; } else { /* cast is not safe, fail */ return NULL; } return PyUnicode_DATA(obj); } else { return NULL; } } /* this is late included as it #defines e.g. SHIFT that should not impact * the above */ #include "_unicodetype_db.h" /* This function is a modified copy of the private function gettyperecord from * CPython's Objects/unicodectype.c * * See:https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L45-L59 */ NUMBA_EXPORT_FUNC(void) numba_gettyperecord(Py_UCS4 code, int *upper, int *lower, int *title, unsigned char *decimal, unsigned char *digit, unsigned short *flags) { int index; const numba_PyUnicode_TypeRecord *rec; if (code >= 0x110000) index = 0; else { index = index1[(code>>SHIFT)]; index = index2[(index<upper; *lower = rec->lower; *title = rec->title; *decimal = rec->decimal; *digit = rec->digit; *flags = rec->flags; } /* This function provides a consistent access point for the * _PyUnicode_ExtendedCase array defined in CPython's Objects/unicodectype.c * and now also as numba_PyUnicode_ExtendedCase in Numba's _unicodetype_db.h */ NUMBA_EXPORT_FUNC(Py_UCS4) numba_get_PyUnicode_ExtendedCase(int code) { return numba_PyUnicode_ExtendedCase[code]; } /* from _unicodetype_db.h */ #undef SHIFT /* * defined break point for gdb */ NUMBA_EXPORT_FUNC(void) numba_gdb_breakpoint(void) { /* does nothing */ } /* * Define bridge for all math functions */ #define MATH_UNARY(F, R, A) \ NUMBA_EXPORT_FUNC(R) numba_##F(A a) { return F(a); } #define MATH_BINARY(F, R, A, B) \ NUMBA_EXPORT_FUNC(R) numba_##F(A a, B b) { return F(a, b); } #include "mathnames.h" #undef MATH_UNARY #undef MATH_BINARY /* * BLAS and LAPACK wrappers */ #include "_lapack.c" /* * PRNG support */ #include "_random.c" numba-0.55.1/numba/_helpermod.c000664 000000 000000 00000017714 14174536160 016271 0ustar00rootroot000000 000000 /* Expose all functions as pointers in a dedicated C extension. */ #include "cext/cext.h" /* Import _pymodule.h first, for a recent _POSIX_C_SOURCE */ #include "_pymodule.h" #include #ifdef _MSC_VER #define false 0 #define true 1 #define bool int #else #include #endif /* Include C-extension here */ #include "cext/cext.h" /* Numba C helpers */ #include "_helperlib.c" /* Numpy C math function exports */ #include "_npymath_exports.c" static PyObject * build_c_helpers_dict(void) { PyObject *dct = PyDict_New(); if (dct == NULL) goto error; #define _declpointer(name, value) do { \ PyObject *o = PyLong_FromVoidPtr(value); \ if (o == NULL) goto error; \ if (PyDict_SetItemString(dct, name, o)) { \ Py_DECREF(o); \ goto error; \ } \ Py_DECREF(o); \ } while (0) #define declmethod(func) _declpointer(#func, &numba_##func) #define declpointer(ptr) _declpointer(#ptr, &numba_##ptr) declmethod(fixed_fmod); declmethod(fixed_fmodf); declmethod(set_fnclex); declmethod(sdiv); declmethod(srem); declmethod(udiv); declmethod(urem); declmethod(frexp); declmethod(frexpf); declmethod(ldexp); declmethod(ldexpf); declmethod(cpow); declmethod(cpowf); declmethod(erf); declmethod(erff); declmethod(erfc); declmethod(erfcf); declmethod(gamma); declmethod(gammaf); declmethod(lgamma); declmethod(lgammaf); declmethod(signbit); declmethod(signbitf); declmethod(complex_adaptor); declmethod(adapt_ndarray); declmethod(ndarray_new); declmethod(extract_record_data); declmethod(get_buffer); declmethod(adapt_buffer); declmethod(release_buffer); declmethod(extract_np_datetime); declmethod(create_np_datetime); declmethod(extract_np_timedelta); declmethod(create_np_timedelta); declmethod(recreate_record); declmethod(fptoui); declmethod(fptouif); declmethod(gil_ensure); declmethod(gil_release); declmethod(fatal_error); declmethod(py_type); declmethod(unpack_slice); declmethod(do_raise); declmethod(unpickle); declmethod(attempt_nocopy_reshape); declmethod(get_pyobject_private_data); declmethod(set_pyobject_private_data); declmethod(reset_pyobject_private_data); /* BLAS / LAPACK */ declmethod(xxgemm); declmethod(xxgemv); declmethod(xxdot); declmethod(xxgetrf); declmethod(ez_xxgetri); declmethod(xxpotrf); declmethod(ez_rgeev); declmethod(ez_cgeev); declmethod(ez_xxxevd); declmethod(ez_gesdd); declmethod(ez_geqrf); declmethod(ez_xxgqr); declmethod(ez_gelsd); declmethod(xgesv); declmethod(xxnrm2); /* PRNG support */ declmethod(get_py_random_state); declmethod(get_np_random_state); declmethod(get_internal_random_state); declmethod(rnd_shuffle); declmethod(rnd_init); declmethod(poisson_ptrs); /* Unicode string support */ declmethod(extract_unicode); declmethod(gettyperecord); declmethod(get_PyUnicode_ExtendedCase); /* for gdb breakpoint */ declmethod(gdb_breakpoint); /* for dictionary support */ declmethod(test_dict); declmethod(dict_new_minsize); declmethod(dict_set_method_table); declmethod(dict_free); declmethod(dict_length); declmethod(dict_lookup); declmethod(dict_insert); declmethod(dict_insert_ez); declmethod(dict_delitem); declmethod(dict_popitem); declmethod(dict_iter_sizeof); declmethod(dict_iter); declmethod(dict_iter_next); declmethod(dict_dump); /* for list support */ declmethod(test_list); declmethod(list_new); declmethod(list_set_method_table); declmethod(list_free); declmethod(list_base_ptr); declmethod(list_size_address); declmethod(list_length); declmethod(list_allocated); declmethod(list_is_mutable); declmethod(list_set_is_mutable); declmethod(list_setitem); declmethod(list_getitem); declmethod(list_append); declmethod(list_delitem); declmethod(list_delete_slice); declmethod(list_iter_sizeof); declmethod(list_iter); declmethod(list_iter_next); #define MATH_UNARY(F, R, A) declmethod(F); #define MATH_BINARY(F, R, A, B) declmethod(F); #include "mathnames.h" #undef MATH_UNARY #undef MATH_BINARY #undef declmethod return dct; error: Py_XDECREF(dct); return NULL; } static int register_npymath_exports(PyObject *dct) { size_t count = sizeof(npymath_exports) / sizeof(npymath_exports[0]); size_t i; for (i = 0; i < count; ++i) { PyObject *ptr = PyLong_FromVoidPtr(npymath_exports[i].func); if (ptr == NULL) return -1; if (PyDict_SetItemString(dct, npymath_exports[i].name, ptr) < 0) { Py_DECREF(ptr); return -1; } Py_DECREF(ptr); } return 0; } static PyObject * build_npymath_exports_dict(void) { PyObject *dct = PyDict_New(); if (dct != NULL) { if (register_npymath_exports(dct) < 0) Py_CLEAR(dct); } return dct; } /* * Helper to deal with flushing stdout */ PyAPI_FUNC(void) _numba_flush_stdout(void) ; void _numba_flush_stdout(void) { fflush(stdout); } static PyMethodDef ext_methods[] = { { "rnd_get_state", (PyCFunction) _numba_rnd_get_state, METH_O, NULL }, { "rnd_get_py_state_ptr", (PyCFunction) _numba_rnd_get_py_state_ptr, METH_NOARGS, NULL }, { "rnd_get_np_state_ptr", (PyCFunction) _numba_rnd_get_np_state_ptr, METH_NOARGS, NULL }, { "rnd_seed", (PyCFunction) _numba_rnd_seed, METH_VARARGS, NULL }, { "rnd_set_state", (PyCFunction) _numba_rnd_set_state, METH_VARARGS, NULL }, { "rnd_shuffle", (PyCFunction) _numba_rnd_shuffle, METH_O, NULL }, { "_import_cython_function", (PyCFunction) _numba_import_cython_function, METH_VARARGS, NULL }, { NULL }, }; /* * These functions are exported by the module's DLL, to exercise ctypes / cffi * without relying on libc availability (see https://bugs.python.org/issue23606) */ PyAPI_FUNC(double) _numba_test_sin(double x); PyAPI_FUNC(double) _numba_test_cos(double x); PyAPI_FUNC(double) _numba_test_exp(double x); PyAPI_FUNC(void) _numba_test_vsquare(int n, double *x, double *out); PyAPI_FUNC(double) _numba_test_funcptr(double (*func)(double)); PyAPI_FUNC(bool) _numba_test_boolean(void); double _numba_test_sin(double x) { return sin(x); } double _numba_test_cos(double x) { return cos(x); } double _numba_test_exp(double x) { return exp(x); } void _numba_test_vsquare(int n, double *x, double *out) { int i; for (i = 0; i < n; i++) out[i] = pow(x[i], 2.0); } void _numba_test_vcube(int n, double *x, double *out) { int i; for (i = 0; i < n; i++) out[i] = pow(x[i], 3.0); } double _numba_test_funcptr(double (*func)(double)) { return func(1.5); } bool _numba_test_boolean() { return true; } MOD_INIT(_helperlib) { PyObject *m; MOD_DEF(m, "_helperlib", "No docs", ext_methods) if (m == NULL) return MOD_ERROR_VAL; import_array(); PyModule_AddObject(m, "c_helpers", build_c_helpers_dict()); PyModule_AddObject(m, "npymath_exports", build_npymath_exports_dict()); PyModule_AddIntConstant(m, "long_min", LONG_MIN); PyModule_AddIntConstant(m, "long_max", LONG_MAX); PyModule_AddIntConstant(m, "py_buffer_size", sizeof(Py_buffer)); PyModule_AddIntConstant(m, "py_gil_state_size", sizeof(PyGILState_STATE)); PyModule_AddIntConstant(m, "py_unicode_1byte_kind", PyUnicode_1BYTE_KIND); PyModule_AddIntConstant(m, "py_unicode_2byte_kind", PyUnicode_2BYTE_KIND); PyModule_AddIntConstant(m, "py_unicode_4byte_kind", PyUnicode_4BYTE_KIND); PyModule_AddIntConstant(m, "py_unicode_wchar_kind", PyUnicode_WCHAR_KIND); numba_rnd_ensure_global_init(); return MOD_SUCCESS_VAL(m); } numba-0.55.1/numba/_lapack.c000664 000000 000000 00000153447 14174536160 015551 0ustar00rootroot000000 000000 /* * This file contains wrappers of BLAS and LAPACK functions */ /* * BLAS calling helpers. The helpers can be called without the GIL held. * The caller is responsible for checking arguments (especially dimensions). */ /* Fast getters caching the value of a function's address after the first call to import_cblas_function(). */ #define EMIT_GET_CBLAS_FUNC(name) \ static void *cblas_ ## name = NULL; \ static void *get_cblas_ ## name(void) { \ if (cblas_ ## name == NULL) { \ PyGILState_STATE st = PyGILState_Ensure(); \ const char *mod = "scipy.linalg.cython_blas"; \ cblas_ ## name = import_cython_function(mod, # name); \ PyGILState_Release(st); \ } \ return cblas_ ## name; \ } EMIT_GET_CBLAS_FUNC(dgemm) EMIT_GET_CBLAS_FUNC(sgemm) EMIT_GET_CBLAS_FUNC(cgemm) EMIT_GET_CBLAS_FUNC(zgemm) EMIT_GET_CBLAS_FUNC(dgemv) EMIT_GET_CBLAS_FUNC(sgemv) EMIT_GET_CBLAS_FUNC(cgemv) EMIT_GET_CBLAS_FUNC(zgemv) EMIT_GET_CBLAS_FUNC(ddot) EMIT_GET_CBLAS_FUNC(sdot) EMIT_GET_CBLAS_FUNC(cdotu) EMIT_GET_CBLAS_FUNC(zdotu) EMIT_GET_CBLAS_FUNC(cdotc) EMIT_GET_CBLAS_FUNC(zdotc) EMIT_GET_CBLAS_FUNC(snrm2) EMIT_GET_CBLAS_FUNC(dnrm2) EMIT_GET_CBLAS_FUNC(scnrm2) EMIT_GET_CBLAS_FUNC(dznrm2) #undef EMIT_GET_CBLAS_FUNC /* * NOTE: On return value convention. * For LAPACK wrapper development the following conventions are followed: * Publicly exposed wrapper functions must return:- * STATUS_ERROR : For an unrecoverable error e.g. caught by xerbla, this is so * a Py_FatalError can be raised. * STATUS_SUCCESS: For successful execution * +n : Where n is an integer for a routine specific error * (typically derived from an `info` argument). * * The caller is responsible for checking and handling the error status. */ /* return STATUS_SUCCESS if everything went ok */ #define STATUS_SUCCESS (0) /* return STATUS_ERROR if an unrecoverable error is encountered */ #define STATUS_ERROR (-1) /* * A union of all the types accepted by BLAS/LAPACK for use in cases where * stack based allocation is needed (typically for work space query args length * 1). */ typedef union all_dtypes_ { float s; double d; npy_complex64 c; npy_complex128 z; } all_dtypes; /* * A checked PyMem_RawMalloc, ensures that the var is either NULL * and an exception is raised, or that the allocation was successful. * Returns zero on success for status checking. */ static int checked_PyMem_RawMalloc(void** var, size_t bytes) { *var = NULL; *var = PyMem_RawMalloc(bytes); if (!(*var)) { { PyGILState_STATE st = PyGILState_Ensure(); PyErr_SetString(PyExc_MemoryError, "Insufficient memory for buffer allocation\ required by LAPACK."); PyGILState_Release(st); } return 1; } return 0; } /* * Checks that the char kind is valid (one of [s,d,c,z]) for use in blas/lapack. * Returns zero on success for status checking. */ static int check_kind(char kind) { switch (kind) { case 's': case 'd': case 'c': case 'z': break; default: { PyGILState_STATE st = PyGILState_Ensure(); PyErr_SetString(PyExc_ValueError, "invalid data type (kind) found"); PyGILState_Release(st); } return 1; } return 0; } /* * Guard macro for ensuring a valid data "kind" is being used. * Place at the top of all routines with switches on "kind" that accept * one of [s,d,c,z]. */ #define ENSURE_VALID_KIND(__KIND) \ if (check_kind( __KIND )) \ { \ return STATUS_ERROR; \ } \ /* * Checks that the char kind is valid for the real domain (one of [s,d]) * for use in blas/lapack. * Returns zero on success for status checking. */ static int check_real_kind(char kind) { switch (kind) { case 's': case 'd': break; default: { PyGILState_STATE st = PyGILState_Ensure(); PyErr_SetString(PyExc_ValueError, "invalid data type (kind) found"); PyGILState_Release(st); } return 1; } return 0; } /* * Guard macro for ensuring a valid data "kind" is being used for the * real domain routines. * Place at the top of all routines with switches on "kind" that accept * one of [s,d]. */ #define ENSURE_VALID_REAL_KIND(__KIND) \ if (check_real_kind( __KIND )) \ { \ return STATUS_ERROR; \ } \ /* * Checks that the char kind is valid for the complex domain (one of [c,z]) * for use in blas/lapack. * Returns zero on success for status checking. */ static int check_complex_kind(char kind) { switch (kind) { case 'c': case 'z': break; default: { PyGILState_STATE st = PyGILState_Ensure(); PyErr_SetString(PyExc_ValueError, "invalid data type (kind) found"); PyGILState_Release(st); } return 1; } return 0; } /* * Guard macro for ensuring a valid data "kind" is being used for the * real domain routines. * Place at the top of all routines with switches on "kind" that accept * one of [c,z]. */ #define ENSURE_VALID_COMPLEX_KIND(__KIND) \ if (check_complex_kind( __KIND )) \ { \ return STATUS_ERROR; \ } \ /* * Checks that a function is found (i.e. not null) * Returns zero on success for status checking. */ static int check_func(void *func) { if (func == NULL) { PyGILState_STATE st = PyGILState_Ensure(); PyErr_SetString(PyExc_RuntimeError, "Specified LAPACK function could not be found."); PyGILState_Release(st); return STATUS_ERROR; } return STATUS_SUCCESS; } /* * Guard macro for ensuring a valid function is found. */ #define ENSURE_VALID_FUNC(__FUNC) \ if (check_func(__FUNC)) \ { \ return STATUS_ERROR; \ } \ /* * Define what a Fortran "int" is, some LAPACKs have 64 bit integer support * numba presently opts for a 32 bit C int. * This definition allows scope for later configuration time magic to adjust * the size of int at all the call sites. */ #define F_INT int typedef float (*sdot_t)(F_INT *n, void *dx, F_INT *incx, void *dy, F_INT *incy); typedef double (*ddot_t)(F_INT *n, void *dx, F_INT *incx, void *dy, F_INT *incy); typedef npy_complex64 (*cdot_t)(F_INT *n, void *dx, F_INT *incx, void *dy, F_INT *incy); typedef npy_complex128 (*zdot_t)(F_INT *n, void *dx, F_INT *incx, void *dy, F_INT *incy); typedef void (*xxgemv_t)(char *trans, F_INT *m, F_INT *n, void *alpha, void *a, F_INT *lda, void *x, F_INT *incx, void *beta, void *y, F_INT *incy); typedef void (*xxgemm_t)(char *transa, char *transb, F_INT *m, F_INT *n, F_INT *k, void *alpha, void *a, F_INT *lda, void *b, F_INT *ldb, void *beta, void *c, F_INT *ldc); typedef float (*sxnrm2_t) (F_INT *n, void *x, F_INT *incx); typedef double (*dxnrm2_t) (F_INT *n, void *x, F_INT *incx); /* Vector * vector: result = dx * dy */ NUMBA_EXPORT_FUNC(int) numba_xxdot(char kind, char conjugate, Py_ssize_t n, void *dx, void *dy, void *result) { void *raw_func = NULL; F_INT _n; F_INT inc = 1; ENSURE_VALID_KIND(kind) switch (kind) { case 's': raw_func = get_cblas_sdot(); break; case 'd': raw_func = get_cblas_ddot(); break; case 'c': raw_func = conjugate ? get_cblas_cdotc() : get_cblas_cdotu(); break; case 'z': raw_func = conjugate ? get_cblas_zdotc() : get_cblas_zdotu(); break; } ENSURE_VALID_FUNC(raw_func) _n = (F_INT) n; switch (kind) { case 's': *(float *) result = (*(sdot_t) raw_func)(&_n, dx, &inc, dy, &inc);; break; case 'd': *(double *) result = (*(ddot_t) raw_func)(&_n, dx, &inc, dy, &inc);; break; case 'c': *(npy_complex64 *) result = (*(cdot_t) raw_func)(&_n, dx, &inc, dy,\ &inc);; break; case 'z': *(npy_complex128 *) result = (*(zdot_t) raw_func)(&_n, dx, &inc,\ dy, &inc);; break; } return 0; } /* Matrix * vector: y = alpha * a * x + beta * y */ NUMBA_EXPORT_FUNC(int) numba_xxgemv(char kind, char trans, Py_ssize_t m, Py_ssize_t n, void *alpha, void *a, Py_ssize_t lda, void *x, void *beta, void *y) { void *raw_func = NULL; F_INT _m, _n; F_INT _lda; F_INT inc = 1; ENSURE_VALID_KIND(kind) switch (kind) { case 's': raw_func = get_cblas_sgemv(); break; case 'd': raw_func = get_cblas_dgemv(); break; case 'c': raw_func = get_cblas_cgemv(); break; case 'z': raw_func = get_cblas_zgemv(); break; } ENSURE_VALID_FUNC(raw_func) _m = (F_INT) m; _n = (F_INT) n; _lda = (F_INT) lda; (*(xxgemv_t) raw_func)(&trans, &_m, &_n, alpha, a, &_lda, x, &inc, beta, y, &inc); return 0; } /* Matrix * matrix: c = alpha * a * b + beta * c */ NUMBA_EXPORT_FUNC(int) numba_xxgemm(char kind, char transa, char transb, Py_ssize_t m, Py_ssize_t n, Py_ssize_t k, void *alpha, void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *beta, void *c, Py_ssize_t ldc) { void *raw_func = NULL; F_INT _m, _n, _k; F_INT _lda, _ldb, _ldc; ENSURE_VALID_KIND(kind) switch (kind) { case 's': raw_func = get_cblas_sgemm(); break; case 'd': raw_func = get_cblas_dgemm(); break; case 'c': raw_func = get_cblas_cgemm(); break; case 'z': raw_func = get_cblas_zgemm(); break; } ENSURE_VALID_FUNC(raw_func) _m = (F_INT) m; _n = (F_INT) n; _k = (F_INT) k; _lda = (F_INT) lda; _ldb = (F_INT) ldb; _ldc = (F_INT) ldc; (*(xxgemm_t) raw_func)(&transa, &transb, &_m, &_n, &_k, alpha, a, &_lda, b, &_ldb, beta, c, &_ldc); return 0; } /* L2-norms */ NUMBA_EXPORT_FUNC(F_INT) numba_xxnrm2(char kind, Py_ssize_t n, void * x, Py_ssize_t incx, void * result) { void *raw_func = NULL; F_INT _incx; F_INT _n; ENSURE_VALID_KIND(kind) switch (kind) { case 's': raw_func = get_cblas_snrm2(); break; case 'd': raw_func = get_cblas_dnrm2(); break; case 'c': raw_func = get_cblas_scnrm2(); break; case 'z': raw_func = get_cblas_dznrm2(); break; } ENSURE_VALID_FUNC(raw_func) _n = (F_INT) n; _incx = (F_INT) incx; switch (kind) { case 's': *(float *) result = (*(sxnrm2_t) raw_func)(&_n, x, &_incx);; break; case 'd': *(double *) result = (*(dxnrm2_t) raw_func)(&_n, x, &_incx);; break; case 'c': *(float *) result = (*(sxnrm2_t) raw_func)(&_n, x, &_incx);; break; case 'z': *(double *) result = (*(dxnrm2_t) raw_func)(&_n, x, &_incx);; break; } return 0; } /* * LAPACK calling helpers. The helpers can be called without the GIL held. * The caller is responsible for checking arguments (especially dimensions). */ /* Fast getters caching the value of a function's address after the first call to import_clapack_function(). */ #define EMIT_GET_CLAPACK_FUNC(name) \ static void *clapack_ ## name = NULL; \ static void *get_clapack_ ## name(void) { \ if (clapack_ ## name == NULL) { \ PyGILState_STATE st = PyGILState_Ensure(); \ const char *mod = "scipy.linalg.cython_lapack"; \ clapack_ ## name = import_cython_function(mod, # name); \ PyGILState_Release(st); \ } \ return clapack_ ## name; \ } /* Computes an LU factorization of a general M-by-N matrix A * using partial pivoting with row interchanges. */ EMIT_GET_CLAPACK_FUNC(sgetrf) EMIT_GET_CLAPACK_FUNC(dgetrf) EMIT_GET_CLAPACK_FUNC(cgetrf) EMIT_GET_CLAPACK_FUNC(zgetrf) /* Computes the inverse of a matrix using the LU factorization * computed by xGETRF. */ EMIT_GET_CLAPACK_FUNC(sgetri) EMIT_GET_CLAPACK_FUNC(dgetri) EMIT_GET_CLAPACK_FUNC(cgetri) EMIT_GET_CLAPACK_FUNC(zgetri) /* Compute Cholesky factorizations */ EMIT_GET_CLAPACK_FUNC(spotrf) EMIT_GET_CLAPACK_FUNC(dpotrf) EMIT_GET_CLAPACK_FUNC(cpotrf) EMIT_GET_CLAPACK_FUNC(zpotrf) /* Computes for an N-by-N real nonsymmetric matrix A, the * eigenvalues and, optionally, the left and/or right eigenvectors. */ EMIT_GET_CLAPACK_FUNC(sgeev) EMIT_GET_CLAPACK_FUNC(dgeev) EMIT_GET_CLAPACK_FUNC(cgeev) EMIT_GET_CLAPACK_FUNC(zgeev) /* Computes for an N-by-N Hermitian matrix A, the * eigenvalues and, optionally, the left and/or right eigenvectors. */ EMIT_GET_CLAPACK_FUNC(ssyevd) EMIT_GET_CLAPACK_FUNC(dsyevd) EMIT_GET_CLAPACK_FUNC(cheevd) EMIT_GET_CLAPACK_FUNC(zheevd) /* Computes generalised SVD */ EMIT_GET_CLAPACK_FUNC(sgesdd) EMIT_GET_CLAPACK_FUNC(dgesdd) EMIT_GET_CLAPACK_FUNC(cgesdd) EMIT_GET_CLAPACK_FUNC(zgesdd) /* Computes QR decompositions */ EMIT_GET_CLAPACK_FUNC(sgeqrf) EMIT_GET_CLAPACK_FUNC(dgeqrf) EMIT_GET_CLAPACK_FUNC(cgeqrf) EMIT_GET_CLAPACK_FUNC(zgeqrf) /* Computes columns of Q from elementary reflectors produced by xgeqrf() (QR). */ EMIT_GET_CLAPACK_FUNC(sorgqr) EMIT_GET_CLAPACK_FUNC(dorgqr) EMIT_GET_CLAPACK_FUNC(cungqr) EMIT_GET_CLAPACK_FUNC(zungqr) /* Computes the minimum norm solution to linear least squares problems */ EMIT_GET_CLAPACK_FUNC(sgelsd) EMIT_GET_CLAPACK_FUNC(dgelsd) EMIT_GET_CLAPACK_FUNC(cgelsd) EMIT_GET_CLAPACK_FUNC(zgelsd) // Computes the solution to a system of linear equations EMIT_GET_CLAPACK_FUNC(sgesv) EMIT_GET_CLAPACK_FUNC(dgesv) EMIT_GET_CLAPACK_FUNC(cgesv) EMIT_GET_CLAPACK_FUNC(zgesv) #undef EMIT_GET_CLAPACK_FUNC typedef void (*xxgetrf_t)(F_INT *m, F_INT *n, void *a, F_INT *lda, F_INT *ipiv, F_INT *info); typedef void (*xxgetri_t)(F_INT *n, void *a, F_INT *lda, F_INT *ipiv, void *work, F_INT *lwork, F_INT *info); typedef void (*xxpotrf_t)(char *uplo, F_INT *n, void *a, F_INT *lda, F_INT *info); typedef void (*rgeev_t)(char *jobvl, char *jobvr, F_INT *n, void *a, F_INT *lda, void *wr, void *wi, void *vl, F_INT *ldvl, void *vr, F_INT *ldvr, void *work, F_INT *lwork, F_INT *info); typedef void (*cgeev_t)(char *jobvl, char *jobvr, F_INT *n, void *a, F_INT *lda, void *w, void *vl, F_INT *ldvl, void *vr, F_INT *ldvr, void *work, F_INT *lwork, void *rwork, F_INT *info); typedef void (*rgesdd_t)(char *jobz, F_INT *m, F_INT *n, void *a, F_INT *lda, void *s, void *u, F_INT *ldu, void *vt, F_INT *ldvt, void *work, F_INT *lwork, F_INT *iwork, F_INT *info); typedef void (*cgesdd_t)(char *jobz, F_INT *m, F_INT *n, void *a, F_INT *lda, void *s, void * u, F_INT *ldu, void * vt, F_INT *ldvt, void *work, F_INT *lwork, void *rwork, F_INT *iwork, F_INT *info); typedef void (*xsyevd_t)(char *jobz, char *uplo, F_INT *n, void *a, F_INT *lda, void *w, void *work, F_INT *lwork, F_INT *iwork, F_INT *liwork, F_INT *info); typedef void (*xheevd_t)(char *jobz, char *uplo, F_INT *n, void *a, F_INT *lda, void *w, void *work, F_INT *lwork, void *rwork, F_INT *lrwork, F_INT *iwork, F_INT *liwork, F_INT *info); typedef void (*xgeqrf_t)(F_INT *m, F_INT *n, void *a, F_INT *lda, void *tau, void *work, F_INT *lwork, F_INT *info); typedef void (*xxxgqr_t)(F_INT *m, F_INT *n, F_INT *k, void *a, F_INT *lda, void *tau, void *work, F_INT *lwork, F_INT *info); typedef void (*rgelsd_t)(F_INT *m, F_INT *n, F_INT *nrhs, void *a, F_INT *lda, void *b, F_INT *ldb, void *s, void *rcond, F_INT *rank, void *work, F_INT *lwork, F_INT *iwork, F_INT *info); typedef void (*cgelsd_t)(F_INT *m, F_INT *n, F_INT *nrhs, void *a, F_INT *lda, void *b, F_INT *ldb, void *s, void *rcond, F_INT *rank, void *work, F_INT *lwork, void *rwork, F_INT *iwork, F_INT *info); typedef void (*xgesv_t)(F_INT *n, F_INT *nrhs, void *a, F_INT *lda, F_INT *ipiv, void *b, F_INT *ldb, F_INT *info); /* * kind_size() * gets the data size appropriate for a specified kind. * * Input: * kind - the kind, one of: * (s, d, c, z) = (float, double, complex, double complex). * * Returns: * data_size - the appropriate data size. * */ static size_t kind_size(char kind) { size_t data_size = 0; switch (kind) { case 's': data_size = sizeof(float); break; case 'd': data_size = sizeof(double); break; case 'c': data_size = sizeof(npy_complex64); break; case 'z': data_size = sizeof(npy_complex128); break; } return data_size; } /* * underlying_float_kind() * gets the underlying float kind for a given kind. * * Input: * kind - the kind, one of: * (s, d, c, z) = (float, double, complex, double complex). * * Returns: * underlying_float_kind - the underlying float kind, one of: * (s, d) = (float, double). * * This function essentially provides a map between the char kind * of a type and the char kind of the underlying float used in the * type. Essentially: * --------------- * Input -> Output * --------------- * s -> s * d -> d * c -> s * z -> d * --------------- * */ static char underlying_float_kind(char kind) { switch(kind) { case 's': case 'c': return 's'; case 'd': case 'z': return 'd'; default: { PyGILState_STATE st = PyGILState_Ensure(); PyErr_SetString(PyExc_ValueError, "invalid kind in underlying_float_kind()"); PyGILState_Release(st); } } return -1; } /* * cast_from_X() * cast from a kind (s, d, c, z) = (float, double, complex, double complex) * to a Fortran integer. * * Parameters: * kind the kind of val * val a pointer to the value to cast * * Returns: * A Fortran int from a cast of val (in complex case, takes the real part). * * Struct access via non c99 (python only) cmplx types, used for compatibility. */ static F_INT cast_from_X(char kind, void *val) { switch(kind) { case 's': return (F_INT)(*((float *) val)); case 'd': return (F_INT)(*((double *) val)); case 'c': return (F_INT)(*((npy_complex64 *)val)).real; case 'z': return (F_INT)(*((npy_complex128 *)val)).real; default: { PyGILState_STATE st = PyGILState_Ensure(); PyErr_SetString(PyExc_ValueError, "invalid kind in cast"); PyGILState_Release(st); } } return -1; } #define CATCH_LAPACK_INVALID_ARG(__routine, info) \ do { \ if (info < 0) { \ PyGILState_STATE st = PyGILState_Ensure(); \ PyErr_Format(PyExc_RuntimeError, \ "LAPACK Error: Routine " #__routine ". On input %d\n",\ -(int) info); \ PyGILState_Release(st); \ return STATUS_ERROR; \ } \ } while(0) /* Compute LU decomposition of A * NOTE: ipiv is an array of Fortran integers allocated by the caller, * which is therefore expected to use the right dtype. */ NUMBA_EXPORT_FUNC(int) numba_xxgetrf(char kind, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t lda, F_INT *ipiv) { void *raw_func = NULL; F_INT _m, _n, _lda, info; ENSURE_VALID_KIND(kind) switch (kind) { case 's': raw_func = get_clapack_sgetrf(); break; case 'd': raw_func = get_clapack_dgetrf(); break; case 'c': raw_func = get_clapack_cgetrf(); break; case 'z': raw_func = get_clapack_zgetrf(); break; } ENSURE_VALID_FUNC(raw_func) _m = (F_INT) m; _n = (F_INT) n; _lda = (F_INT) lda; (*(xxgetrf_t) raw_func)(&_m, &_n, a, &_lda, ipiv, &info); CATCH_LAPACK_INVALID_ARG("xxgetrf", info); return (int)info; } /* Compute the inverse of a matrix given its LU decomposition * Args are as per LAPACK. */ static int numba_raw_xxgetri(char kind, F_INT n, void *a, F_INT lda, F_INT *ipiv, void *work, F_INT *lwork, F_INT *info) { void *raw_func = NULL; ENSURE_VALID_KIND(kind) switch (kind) { case 's': raw_func = get_clapack_sgetri(); break; case 'd': raw_func = get_clapack_dgetri(); break; case 'c': raw_func = get_clapack_cgetri(); break; case 'z': raw_func = get_clapack_zgetri(); break; } ENSURE_VALID_FUNC(raw_func) (*(xxgetri_t) raw_func)(&n, a, &lda, ipiv, work, lwork, info); return 0; } /* Compute the inverse of a matrix from the factorization provided by * xxgetrf. (see numba_xxgetrf() about ipiv) * Args are as per LAPACK. */ NUMBA_EXPORT_FUNC(int) numba_ez_xxgetri(char kind, Py_ssize_t n, void *a, Py_ssize_t lda, F_INT *ipiv) { F_INT _n, _lda; F_INT lwork = -1; F_INT info = 0; size_t base_size = -1; void * work = NULL; all_dtypes stack_slot; ENSURE_VALID_KIND(kind) _n = (F_INT)n; _lda = (F_INT)lda; base_size = kind_size(kind); work = &stack_slot; numba_raw_xxgetri(kind, _n, a, _lda, ipiv, work, &lwork, &info); CATCH_LAPACK_INVALID_ARG("xxgetri", info); lwork = cast_from_X(kind, work); if (checked_PyMem_RawMalloc(&work, base_size * lwork)) { return STATUS_ERROR; } numba_raw_xxgetri(kind, _n, a, _lda, ipiv, work, &lwork, &info); PyMem_RawFree(work); CATCH_LAPACK_INVALID_ARG("xxgetri", info); return (int)info; } /* Compute the Cholesky factorization of a matrix. */ NUMBA_EXPORT_FUNC(int) numba_xxpotrf(char kind, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda) { void *raw_func = NULL; F_INT _n, _lda, info; ENSURE_VALID_KIND(kind) switch (kind) { case 's': raw_func = get_clapack_spotrf(); break; case 'd': raw_func = get_clapack_dpotrf(); break; case 'c': raw_func = get_clapack_cpotrf(); break; case 'z': raw_func = get_clapack_zpotrf(); break; } ENSURE_VALID_FUNC(raw_func) _n = (F_INT) n; _lda = (F_INT) lda; (*(xxpotrf_t) raw_func)(&uplo, &_n, a, &_lda, &info); CATCH_LAPACK_INVALID_ARG("xxpotrf", info); return (int)info; } /* real space eigen systems info from dgeev/sgeev */ static int numba_raw_rgeev(char kind, char jobvl, char jobvr, Py_ssize_t n, void *a, Py_ssize_t lda, void *wr, void *wi, void *vl, Py_ssize_t ldvl, void *vr, Py_ssize_t ldvr, void *work, Py_ssize_t lwork, F_INT *info) { void *raw_func = NULL; F_INT _n, _lda, _ldvl, _ldvr, _lwork; ENSURE_VALID_REAL_KIND(kind) switch (kind) { case 's': raw_func = get_clapack_sgeev(); break; case 'd': raw_func = get_clapack_dgeev(); break; } ENSURE_VALID_FUNC(raw_func) _n = (F_INT) n; _lda = (F_INT) lda; _ldvl = (F_INT) ldvl; _ldvr = (F_INT) ldvr; _lwork = (F_INT) lwork; (*(rgeev_t) raw_func)(&jobvl, &jobvr, &_n, a, &_lda, wr, wi, vl, &_ldvl, vr, &_ldvr, work, &_lwork, info); return 0; } /* Real space eigen systems info from dgeev/sgeev * as numba_raw_rgeev but the allocation and error handling is done for the user. * Args are as per LAPACK. */ NUMBA_EXPORT_FUNC(int) numba_ez_rgeev(char kind, char jobvl, char jobvr, Py_ssize_t n, void *a, Py_ssize_t lda, void *wr, void *wi, void *vl, Py_ssize_t ldvl, void *vr, Py_ssize_t ldvr) { F_INT info = 0; F_INT lwork = -1; F_INT _n, _lda, _ldvl, _ldvr; size_t base_size = -1; void * work = NULL; all_dtypes stack_slot; ENSURE_VALID_REAL_KIND(kind) _n = (F_INT) n; _lda = (F_INT) lda; _ldvl = (F_INT) ldvl; _ldvr = (F_INT) ldvr; base_size = kind_size(kind); work = &stack_slot; numba_raw_rgeev(kind, jobvl, jobvr, _n, a, _lda, wr, wi, vl, _ldvl, vr, _ldvr, work, lwork, &info); CATCH_LAPACK_INVALID_ARG("numba_raw_rgeev", info); lwork = cast_from_X(kind, work); if (checked_PyMem_RawMalloc(&work, base_size * lwork)) { return STATUS_ERROR; } numba_raw_rgeev(kind, jobvl, jobvr, _n, a, _lda, wr, wi, vl, _ldvl, vr, _ldvr, work, lwork, &info); PyMem_RawFree(work); CATCH_LAPACK_INVALID_ARG("numba_raw_rgeev", info); return (int)info; } /* Complex space eigen systems info from cgeev/zgeev * Args are as per LAPACK. */ static int numba_raw_cgeev(char kind, char jobvl, char jobvr, Py_ssize_t n, void *a, Py_ssize_t lda, void *w, void *vl, Py_ssize_t ldvl, void *vr, Py_ssize_t ldvr, void *work, Py_ssize_t lwork, void *rwork, F_INT *info) { void *raw_func = NULL; F_INT _n, _lda, _ldvl, _ldvr, _lwork; ENSURE_VALID_COMPLEX_KIND(kind) _n = (F_INT) n; _lda = (F_INT) lda; _ldvl = (F_INT) ldvl; _ldvr = (F_INT) ldvr; _lwork = (F_INT) lwork; switch (kind) { case 'c': raw_func = get_clapack_cgeev(); break; case 'z': raw_func = get_clapack_zgeev(); break; } ENSURE_VALID_FUNC(raw_func) (*(cgeev_t) raw_func)(&jobvl, &jobvr, &_n, a, &_lda, w, vl, &_ldvl, vr, &_ldvr, work, &_lwork, rwork, info); return 0; } /* Complex space eigen systems info from cgeev/zgeev * as numba_raw_cgeev but the allocation and error handling is done for the user. * Args are as per LAPACK. */ NUMBA_EXPORT_FUNC(int) numba_ez_cgeev(char kind, char jobvl, char jobvr, Py_ssize_t n, void *a, Py_ssize_t lda, void *w, void *vl, Py_ssize_t ldvl, void *vr, Py_ssize_t ldvr) { F_INT info = 0; F_INT lwork = -1; F_INT _n, _lda, _ldvl, _ldvr; size_t base_size = -1; all_dtypes stack_slot, wk; void * work = NULL; void * rwork = (void *)&wk; ENSURE_VALID_COMPLEX_KIND(kind) _n = (F_INT) n; _lda = (F_INT) lda; _ldvl = (F_INT) ldvl; _ldvr = (F_INT) ldvr; base_size = kind_size(kind); work = &stack_slot; numba_raw_cgeev(kind, jobvl, jobvr, n, a, lda, w, vl, ldvl, vr, ldvr, work, lwork, rwork, &info); CATCH_LAPACK_INVALID_ARG("numba_raw_cgeev", info); lwork = cast_from_X(kind, work); if (checked_PyMem_RawMalloc((void**)&rwork, 2*n*base_size)) { return STATUS_ERROR; } if (checked_PyMem_RawMalloc(&work, base_size * lwork)) { PyMem_RawFree(rwork); return STATUS_ERROR; } numba_raw_cgeev(kind, jobvl, jobvr, _n, a, _lda, w, vl, _ldvl, vr, _ldvr, work, lwork, rwork, &info); PyMem_RawFree(work); PyMem_RawFree(rwork); CATCH_LAPACK_INVALID_ARG("numba_raw_cgeev", info); return (int)info; } /* real space symmetric eigen systems info from ssyevd/dsyevd */ static int numba_raw_rsyevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w, void *work, Py_ssize_t lwork, F_INT *iwork, Py_ssize_t liwork, F_INT *info) { void *raw_func = NULL; F_INT _n, _lda, _lwork, _liwork; ENSURE_VALID_REAL_KIND(kind) switch (kind) { case 's': raw_func = get_clapack_ssyevd(); break; case 'd': raw_func = get_clapack_dsyevd(); break; } ENSURE_VALID_FUNC(raw_func) _n = (F_INT) n; _lda = (F_INT) lda; _lwork = (F_INT) lwork; _liwork = (F_INT) liwork; (*(xsyevd_t) raw_func)(&jobz, &uplo, &_n, a, &_lda, w, work, &_lwork, iwork, &_liwork, info); return 0; } /* Real space eigen systems info from dsyevd/ssyevd * as numba_raw_rsyevd but the allocation and error handling is done for the user. * Args are as per LAPACK. */ static int numba_ez_rsyevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w) { F_INT info = 0; F_INT lwork = -1, liwork=-1; F_INT _n, _lda; size_t base_size = -1; void *work = NULL; F_INT *iwork = NULL; all_dtypes stack_slot; int stack_int = -1; ENSURE_VALID_REAL_KIND(kind) _n = (F_INT) n; _lda = (F_INT) lda; base_size = kind_size(kind); work = &stack_slot; iwork = &stack_int; numba_raw_rsyevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, iwork, liwork, &info); CATCH_LAPACK_INVALID_ARG("numba_raw_rsyevd", info); lwork = cast_from_X(kind, work); if (checked_PyMem_RawMalloc(&work, base_size * lwork)) { return STATUS_ERROR; } liwork = *iwork; if (checked_PyMem_RawMalloc((void**)&iwork, base_size * liwork)) { PyMem_RawFree(work); return STATUS_ERROR; } numba_raw_rsyevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, iwork, liwork, &info); PyMem_RawFree(work); PyMem_RawFree(iwork); CATCH_LAPACK_INVALID_ARG("numba_raw_rsyevd", info); return (int)info; } /* complex space symmetric eigen systems info from cheevd/zheevd*/ static int numba_raw_cheevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w, void *work, Py_ssize_t lwork, void *rwork, Py_ssize_t lrwork, F_INT *iwork, Py_ssize_t liwork, F_INT *info) { void *raw_func = NULL; F_INT _n, _lda, _lwork, _lrwork, _liwork; ENSURE_VALID_COMPLEX_KIND(kind) switch (kind) { case 'c': raw_func = get_clapack_cheevd(); break; case 'z': raw_func = get_clapack_zheevd(); break; } ENSURE_VALID_FUNC(raw_func) _n = (F_INT) n; _lda = (F_INT) lda; _lwork = (F_INT) lwork; _lrwork = (F_INT) lrwork; _liwork = (F_INT) liwork; (*(xheevd_t) raw_func)(&jobz, &uplo, &_n, a, &_lda, w, work, &_lwork, rwork, &_lrwork, iwork, &_liwork, info); return 0; } /* complex space eigen systems info from cheevd/zheevd * as numba_raw_cheevd but the allocation and error handling is done for the user. * Args are as per LAPACK. */ static int numba_ez_cheevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w) { F_INT info = 0; F_INT lwork = -1, lrwork = -1, liwork=-1; F_INT _n, _lda; size_t base_size = -1, underlying_float_size = -1; void *work = NULL, *rwork = NULL; F_INT *iwork = NULL; all_dtypes stack_slot1, stack_slot2; char uf_kind; int stack_int = -1; ENSURE_VALID_COMPLEX_KIND(kind) _n = (F_INT) n; _lda = (F_INT) lda; base_size = kind_size(kind); uf_kind = underlying_float_kind(kind); underlying_float_size = kind_size(uf_kind); work = &stack_slot1; rwork = &stack_slot2; iwork = &stack_int; numba_raw_cheevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, rwork, lrwork, iwork, liwork, &info); CATCH_LAPACK_INVALID_ARG("numba_raw_cheevd", info); lwork = cast_from_X(uf_kind, work); if (checked_PyMem_RawMalloc(&work, base_size * lwork)) { return STATUS_ERROR; } lrwork = cast_from_X(uf_kind, rwork); if (checked_PyMem_RawMalloc(&rwork, underlying_float_size * lrwork)) { PyMem_RawFree(work); return STATUS_ERROR; } liwork = *iwork; if (checked_PyMem_RawMalloc((void**)&iwork, base_size * liwork)) { PyMem_RawFree(work); PyMem_RawFree(rwork); return STATUS_ERROR; } numba_raw_cheevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, rwork, lrwork, iwork, liwork, &info); PyMem_RawFree(work); PyMem_RawFree(rwork); PyMem_RawFree(iwork); CATCH_LAPACK_INVALID_ARG("numba_raw_cheevd", info); return (int)info; } /* Hermitian eigenvalue systems info from *syevd and *heevd. * This routine hides the type and general complexity involved with making the * calls. The work space computation and error handling etc is hidden. * Args are as per LAPACK. */ NUMBA_EXPORT_FUNC(int) numba_ez_xxxevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w) { ENSURE_VALID_KIND(kind) switch (kind) { case 's': case 'd': return numba_ez_rsyevd(kind, jobz, uplo, n, a, lda, w); case 'c': case 'z': return numba_ez_cheevd(kind, jobz, uplo, n, a, lda, w); } return STATUS_ERROR; /* unreachable */ } /* Real space svd systems info from dgesdd/sgesdd * Args are as per LAPACK. */ static int numba_raw_rgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt, Py_ssize_t ldvt, void *work, Py_ssize_t lwork, F_INT *iwork, F_INT *info) { void *raw_func = NULL; F_INT _m, _n, _lda, _ldu, _ldvt, _lwork; ENSURE_VALID_REAL_KIND(kind) _m = (F_INT) m; _n = (F_INT) n; _lda = (F_INT) lda; _ldu = (F_INT) ldu; _ldvt = (F_INT) ldvt; _lwork = (F_INT) lwork; switch (kind) { case 's': raw_func = get_clapack_sgesdd(); break; case 'd': raw_func = get_clapack_dgesdd(); break; } ENSURE_VALID_FUNC(raw_func) (*(rgesdd_t) raw_func)(&jobz, &_m, &_n, a, &_lda, s, u, &_ldu, vt, &_ldvt, work, &_lwork, iwork, info); return 0; } /* Real space svd info from dgesdd/sgesdd. * As numba_raw_rgesdd but the allocation and error handling is done for the * user. * Args are as per LAPACK. */ static int numba_ez_rgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt, Py_ssize_t ldvt) { F_INT info = 0; Py_ssize_t minmn = -1; Py_ssize_t lwork = -1; all_dtypes stack_slot, wk; size_t base_size = -1; F_INT *iwork = (F_INT *)&wk; void *work = NULL; ENSURE_VALID_REAL_KIND(kind) base_size = kind_size(kind); work = &stack_slot; /* Compute optimal work size (lwork) */ numba_raw_rgesdd(kind, jobz, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, iwork, &info); CATCH_LAPACK_INVALID_ARG("numba_raw_rgesdd", info); /* Allocate work array */ lwork = cast_from_X(kind, work); if (checked_PyMem_RawMalloc(&work, base_size * lwork)) return -1; minmn = m > n ? n : m; if (checked_PyMem_RawMalloc((void**) &iwork, 8 * minmn * sizeof(F_INT))) { PyMem_RawFree(work); return STATUS_ERROR; } numba_raw_rgesdd(kind, jobz, m, n, a, lda, s, u ,ldu, vt, ldvt, work, lwork, iwork, &info); PyMem_RawFree(work); PyMem_RawFree(iwork); CATCH_LAPACK_INVALID_ARG("numba_raw_rgesdd", info); return (int)info; } /* Complex space svd systems info from cgesdd/zgesdd * Args are as per LAPACK. */ static int numba_raw_cgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt, Py_ssize_t ldvt, void *work, Py_ssize_t lwork, void *rwork, F_INT *iwork, F_INT *info) { void *raw_func = NULL; F_INT _m, _n, _lda, _ldu, _ldvt, _lwork; ENSURE_VALID_COMPLEX_KIND(kind) _m = (F_INT) m; _n = (F_INT) n; _lda = (F_INT) lda; _ldu = (F_INT) ldu; _ldvt = (F_INT) ldvt; _lwork = (F_INT) lwork; switch (kind) { case 'c': raw_func = get_clapack_cgesdd(); break; case 'z': raw_func = get_clapack_zgesdd(); break; } ENSURE_VALID_FUNC(raw_func) (*(cgesdd_t) raw_func)(&jobz, &_m, &_n, a, &_lda, s, u, &_ldu, vt, &_ldvt, work, &_lwork, rwork, iwork, info); return 0; } /* complex space svd info from cgesdd/zgesdd. * As numba_raw_cgesdd but the allocation and error handling is done for the * user. * Args are as per LAPACK. */ static int numba_ez_cgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt, Py_ssize_t ldvt) { F_INT info = 0; Py_ssize_t lwork = -1; Py_ssize_t lrwork = -1; Py_ssize_t minmn = -1; Py_ssize_t tmp1, tmp2; Py_ssize_t maxmn = -1; size_t real_base_size = -1; size_t complex_base_size = -1; all_dtypes stack_slot, wk1, wk2; void *work = NULL; void *rwork = (void *)&wk1; F_INT *iwork = (F_INT *)&wk2; ENSURE_VALID_COMPLEX_KIND(kind) switch (kind) { case 'c': real_base_size = sizeof(float); complex_base_size = sizeof(npy_complex64); break; case 'z': real_base_size = sizeof(double); complex_base_size = sizeof(npy_complex128); break; default: { PyGILState_STATE st = PyGILState_Ensure(); PyErr_SetString(PyExc_ValueError,\ "Invalid kind in numba_ez_rgesdd"); PyGILState_Release(st); } return STATUS_ERROR; } work = &stack_slot; /* Compute optimal work size (lwork) */ numba_raw_cgesdd(kind, jobz, m, n, a, lda, s, u ,ldu, vt, ldvt, work, lwork, rwork, iwork, &info); CATCH_LAPACK_INVALID_ARG("numba_raw_cgesdd", info); /* Allocate work array */ lwork = cast_from_X(kind, work); if (checked_PyMem_RawMalloc(&work, complex_base_size * lwork)) return STATUS_ERROR; minmn = m > n ? n : m; if (jobz == 'n') { lrwork = 7 * minmn; } else { maxmn = m > n ? m : n; tmp1 = 5 * minmn + 7; tmp2 = 2 * maxmn + 2 * minmn + 1; lrwork = minmn * (tmp1 > tmp2 ? tmp1: tmp2); } if (checked_PyMem_RawMalloc(&rwork, real_base_size * (lrwork > 1 ? lrwork : 1))) { PyMem_RawFree(work); return STATUS_ERROR; } if (checked_PyMem_RawMalloc((void **) &iwork, 8 * minmn * sizeof(F_INT))) { PyMem_RawFree(work); PyMem_RawFree(rwork); return STATUS_ERROR; } numba_raw_cgesdd(kind, jobz, m, n, a, lda, s, u ,ldu, vt, ldvt, work, lwork, rwork, iwork, &info); PyMem_RawFree(work); PyMem_RawFree(rwork); PyMem_RawFree(iwork); CATCH_LAPACK_INVALID_ARG("numba_raw_cgesdd", info); return (int)info; } /* SVD systems info from *gesdd. * This routine hides the type and general complexity involved with making the * calls to *gesdd. The work space computation and error handling etc is hidden. * Args are as per LAPACK. */ NUMBA_EXPORT_FUNC(int) numba_ez_gesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt, Py_ssize_t ldvt) { ENSURE_VALID_KIND(kind) switch (kind) { case 's': case 'd': return numba_ez_rgesdd(kind, jobz, m, n, a, lda, s, u, ldu, vt, ldvt); case 'c': case 'z': return numba_ez_cgesdd(kind, jobz, m, n, a, lda, s, u, ldu, vt, ldvt); } return STATUS_ERROR; /* unreachable */ } /* * Compute the QR factorization of a matrix. * Return -1 on internal error, 0 on success, > 0 on failure. */ static int numba_raw_xgeqrf(char kind, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t lda, void *tau, void *work, Py_ssize_t lwork, F_INT *info) { void *raw_func = NULL; F_INT _m, _n, _lda, _lwork; ENSURE_VALID_KIND(kind) switch (kind) { case 's': raw_func = get_clapack_sgeqrf(); break; case 'd': raw_func = get_clapack_dgeqrf(); break; case 'c': raw_func = get_clapack_cgeqrf(); break; case 'z': raw_func = get_clapack_zgeqrf(); break; } ENSURE_VALID_FUNC(raw_func) _m = (F_INT) m; _n = (F_INT) n; _lda = (F_INT) lda; _lwork = (F_INT) lwork; (*(xgeqrf_t) raw_func)(&_m, &_n, a, &_lda, tau, work, &_lwork, info); return 0; } /* * Compute the QR factorization of a matrix. * This routine hides the type and general complexity involved with making the * xgeqrf calls. The work space computation and error handling etc is hidden. * Args are as per LAPACK. */ NUMBA_EXPORT_FUNC(int) numba_ez_geqrf(char kind, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t lda, void *tau) { F_INT info = 0; Py_ssize_t lwork = -1; size_t base_size = -1; all_dtypes stack_slot; void *work = NULL; base_size = kind_size(kind); work = &stack_slot; /* Compute optimal work size (lwork) */ numba_raw_xgeqrf(kind, m, n, a, lda, tau, work, lwork, &info); CATCH_LAPACK_INVALID_ARG("numba_raw_xgeqrf", info); /* Allocate work array */ lwork = cast_from_X(kind, work); if (checked_PyMem_RawMalloc(&work, base_size * lwork)) return STATUS_ERROR; numba_raw_xgeqrf(kind, m, n, a, lda, tau, work, lwork, &info); PyMem_RawFree(work); CATCH_LAPACK_INVALID_ARG("numba_raw_xgeqrf", info); return 0; /* info cannot be >0 */ } /* * Compute the orthogonal Q matrix (in QR) from elementary relectors. */ static int numba_raw_xxxgqr(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t k, void *a, Py_ssize_t lda, void *tau, void * work, Py_ssize_t lwork, F_INT *info) { void *raw_func = NULL; F_INT _m, _n, _k, _lda, _lwork; ENSURE_VALID_KIND(kind) switch (kind) { case 's': raw_func = get_clapack_sorgqr(); break; case 'd': raw_func = get_clapack_dorgqr(); break; case 'c': raw_func = get_clapack_cungqr(); break; case 'z': raw_func = get_clapack_zungqr(); break; } ENSURE_VALID_FUNC(raw_func) _m = (F_INT) m; _n = (F_INT) n; _k = (F_INT) k; _lda = (F_INT) lda; _lwork = (F_INT) lwork; (*(xxxgqr_t) raw_func)(&_m, &_n, &_k, a, &_lda, tau, work, &_lwork, info); return 0; } /* * Compute the orthogonal Q matrix (in QR) from elementary reflectors. * This routine hides the type and general complexity involved with making the * x{or,un}qrf calls. The work space computation and error handling etc is * hidden. Args are as per LAPACK. */ NUMBA_EXPORT_FUNC(int) numba_ez_xxgqr(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t k, void *a, Py_ssize_t lda, void *tau) { F_INT info = 0; Py_ssize_t lwork = -1; size_t base_size = -1; all_dtypes stack_slot; void *work = NULL; work = &stack_slot; /* Compute optimal work size (lwork) */ numba_raw_xxxgqr(kind, m, n, k, a, lda, tau, work, lwork, &info); CATCH_LAPACK_INVALID_ARG("numba_raw_xxxgqr", info); base_size = kind_size(kind); /* Allocate work array */ lwork = cast_from_X(kind, work); if (checked_PyMem_RawMalloc(&work, base_size * lwork)) return STATUS_ERROR; numba_raw_xxxgqr(kind, m, n, k, a, lda, tau, work, lwork, &info); PyMem_RawFree(work); CATCH_LAPACK_INVALID_ARG("numba_raw_xxxgqr", info); return 0; /* info cannot be >0 */ } /* * Compute the minimum-norm solution to a real linear least squares problem. */ static int numba_raw_rgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs, void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S, void * rcond, Py_ssize_t * rank, void * work, Py_ssize_t lwork, F_INT *iwork, F_INT *info) { void *raw_func = NULL; F_INT _m, _n, _nrhs, _lda, _ldb, _rank, _lwork; ENSURE_VALID_REAL_KIND(kind) switch (kind) { case 's': raw_func = get_clapack_sgelsd(); break; case 'd': raw_func = get_clapack_dgelsd(); break; } ENSURE_VALID_FUNC(raw_func) _m = (F_INT) m; _n = (F_INT) n; _nrhs = (F_INT) nrhs; _lda = (F_INT) lda; _ldb = (F_INT) ldb; _lwork = (F_INT) lwork; (*(rgelsd_t) raw_func)(&_m, &_n, &_nrhs, a, &_lda, b, &_ldb, S, rcond, &_rank, work, &_lwork, iwork, info); *rank = (Py_ssize_t) _rank; return 0; } /* * Compute the minimum-norm solution to a real linear least squares problem. * This routine hides the type and general complexity involved with making the * {s,d}gelsd calls. The work space computation and error handling etc is * hidden. Args are as per LAPACK. */ static int numba_ez_rgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs, void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S, double rcond, Py_ssize_t * rank) { F_INT info = 0; Py_ssize_t lwork = -1; size_t base_size = -1; all_dtypes stack_slot; void *work = NULL, *rcond_cast = NULL; F_INT *iwork = NULL; F_INT iwork_tmp; float tmpf; ENSURE_VALID_REAL_KIND(kind) base_size = kind_size(kind); work = &stack_slot; rcond_cast = work; /* stop checks on null ptr complaining */ /* Compute optimal work size (lwork) */ numba_raw_rgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank, work, lwork, &iwork_tmp, &info); CATCH_LAPACK_INVALID_ARG("numba_raw_rgelsd", info); /* Allocate work array */ lwork = cast_from_X(kind, work); if (checked_PyMem_RawMalloc(&work, base_size * lwork)) return STATUS_ERROR; /* Allocate iwork array */ if (checked_PyMem_RawMalloc((void **)&iwork, sizeof(F_INT) * iwork_tmp)) { PyMem_RawFree(work); return STATUS_ERROR; } /* cast rcond to the right type */ switch (kind) { case 's': tmpf = (float)rcond; rcond_cast = (void * )&tmpf; break; case 'd': rcond_cast = (void * )&rcond; break; } numba_raw_rgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank, work, lwork, iwork, &info); PyMem_RawFree(work); PyMem_RawFree(iwork); CATCH_LAPACK_INVALID_ARG("numba_raw_rgelsd", info); return (int)info; } /* * Compute the minimum-norm solution to a complex linear least squares problem. */ static int numba_raw_cgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs, void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S, void *rcond, Py_ssize_t * rank, void * work, Py_ssize_t lwork, void * rwork, F_INT *iwork, F_INT *info) { void *raw_func = NULL; F_INT _m, _n, _nrhs, _lda, _ldb, _rank, _lwork; ENSURE_VALID_COMPLEX_KIND(kind) switch (kind) { case 'c': raw_func = get_clapack_cgelsd(); break; case 'z': raw_func = get_clapack_zgelsd(); break; } ENSURE_VALID_FUNC(raw_func) _m = (F_INT) m; _n = (F_INT) n; _nrhs = (F_INT) nrhs; _lda = (F_INT) lda; _ldb = (F_INT) ldb; _lwork = (F_INT) lwork; (*(cgelsd_t) raw_func)(&_m, &_n, &_nrhs, a, &_lda, b, &_ldb, S, rcond, &_rank, work, &_lwork, rwork, iwork, info); *rank = (Py_ssize_t) _rank; return 0; } /* * Compute the minimum-norm solution to a complex linear least squares problem. * This routine hides the type and general complexity involved with making the * {c,z}gelsd calls. The work space computation and error handling etc is * hidden. Args are as per LAPACK. */ static int numba_ez_cgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs, void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S, double rcond, Py_ssize_t * rank) { F_INT info = 0; Py_ssize_t lwork = -1; size_t base_size = -1; all_dtypes stack_slot1, stack_slot2; size_t real_base_size = 0; void *work = NULL, *rwork = NULL, *rcond_cast = NULL; Py_ssize_t lrwork; F_INT *iwork = NULL; F_INT iwork_tmp; char real_kind = '-'; float tmpf; ENSURE_VALID_COMPLEX_KIND(kind) base_size = kind_size(kind); work = &stack_slot1; rwork = &stack_slot2; rcond_cast = work; /* stop checks on null ptr complaining */ /* Compute optimal work size */ numba_raw_cgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank, work, lwork, rwork, &iwork_tmp, &info); CATCH_LAPACK_INVALID_ARG("numba_raw_cgelsd", info); /* Allocate work array */ lwork = cast_from_X(kind, work); if (checked_PyMem_RawMalloc(&work, base_size * lwork)) return STATUS_ERROR; /* Allocate iwork array */ if (checked_PyMem_RawMalloc((void **)&iwork, sizeof(F_INT) * iwork_tmp)) { PyMem_RawFree(work); return STATUS_ERROR; } switch (kind) { case 'c': real_kind = 's'; tmpf = (float)rcond; rcond_cast = (void * )&tmpf; break; case 'z': real_kind = 'd'; rcond_cast = (void * )&rcond; break; } real_base_size = kind_size(real_kind); lrwork = cast_from_X(real_kind, rwork); if (checked_PyMem_RawMalloc((void **)&rwork, real_base_size * lrwork)) { PyMem_RawFree(work); PyMem_RawFree(iwork); return STATUS_ERROR; } numba_raw_cgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank, work, lwork, rwork, iwork, &info); PyMem_RawFree(work); PyMem_RawFree(rwork); PyMem_RawFree(iwork); CATCH_LAPACK_INVALID_ARG("numba_raw_cgelsd", info); return (int)info; } /* * Compute the minimum-norm solution to a linear least squares problems. * This routine hides the type and general complexity involved with making the * calls to *gelsd. The work space computation and error handling etc is hidden. * Args are as per LAPACK. */ NUMBA_EXPORT_FUNC(int) numba_ez_gelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs, void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S, double rcond, Py_ssize_t * rank) { ENSURE_VALID_KIND(kind) switch (kind) { case 's': case 'd': return numba_ez_rgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond, rank); case 'c': case 'z': return numba_ez_cgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond, rank); } return STATUS_ERROR; /* unreachable */ } /* * Compute the solution to a system of linear equations */ NUMBA_EXPORT_FUNC(int) numba_xgesv(char kind, Py_ssize_t n, Py_ssize_t nrhs, void *a, Py_ssize_t lda, F_INT *ipiv, void *b, Py_ssize_t ldb) { void *raw_func = NULL; F_INT _n, _nrhs, _lda, _ldb, info; ENSURE_VALID_KIND(kind) switch (kind) { case 's': raw_func = get_clapack_sgesv(); break; case 'd': raw_func = get_clapack_dgesv(); break; case 'c': raw_func = get_clapack_cgesv(); break; case 'z': raw_func = get_clapack_zgesv(); break; } ENSURE_VALID_FUNC(raw_func) _n = (F_INT) n; _nrhs = (F_INT) nrhs; _lda = (F_INT) lda; _ldb = (F_INT) ldb; (*(xgesv_t) raw_func)(&_n, &_nrhs, a, &_lda, ipiv, b, &_ldb, &info); CATCH_LAPACK_INVALID_ARG("xgesv", info); return (int)info; } /* undef defines and macros */ #undef STATUS_SUCCESS #undef STATUS_ERROR #undef ENSURE_VALID_KIND #undef ENSURE_VALID_REAL_KIND #undef ENSURE_VALID_COMPLEX_KIND #undef ENSURE_VALID_FUNC #undef F_INT #undef EMIT_GET_CLAPACK_FUNC #undef CATCH_LAPACK_INVALID_ARG numba-0.55.1/numba/_npymath_exports.c000664 000000 000000 00000001573 14174536160 017552 0ustar00rootroot000000 000000 /* * This file contains exports of Numpy math functions needed by numba. */ #include "_pymodule.h" #include #include /* * Map Numpy C function symbols to their addresses. */ struct npymath_entry { const char *name; void *func; }; #define NPYMATH_SYMBOL(name) \ { "npy_" #name, (void*) npy_##name } static struct npymath_entry npymath_exports[] = { /* double functions */ NPYMATH_SYMBOL(exp2), NPYMATH_SYMBOL(log2), NPYMATH_SYMBOL(logaddexp), NPYMATH_SYMBOL(logaddexp2), NPYMATH_SYMBOL(nextafter), NPYMATH_SYMBOL(spacing), NPYMATH_SYMBOL(modf), /* float functions */ NPYMATH_SYMBOL(exp2f), NPYMATH_SYMBOL(log2f), NPYMATH_SYMBOL(logaddexpf), NPYMATH_SYMBOL(logaddexp2f), NPYMATH_SYMBOL(nextafterf), NPYMATH_SYMBOL(spacingf), NPYMATH_SYMBOL(modff), }; #undef NPYMATH_SYMBOL numba-0.55.1/numba/_numba_common.h000664 000000 000000 00000002664 14174536160 016767 0ustar00rootroot000000 000000 #ifndef NUMBA_COMMON_H_ #define NUMBA_COMMON_H_ /* __has_attribute() is a clang / gcc-5 macro */ #ifndef __has_attribute # define __has_attribute(x) 0 #endif /* This attribute marks symbols that can be shared across C objects * but are not exposed outside of a shared library or executable. * Note this is default behaviour for global symbols under Windows. */ #if (__has_attribute(visibility) || \ (defined(__GNUC__) && __GNUC__ >= 4)) #define VISIBILITY_HIDDEN __attribute__ ((visibility("hidden"))) #else #define VISIBILITY_HIDDEN #endif /* * Numba's version of the PyArray_DescrCheck macro from NumPy, use it as a * direct replacement of NumPy's PyArray_DescrCheck to ensure binary * compatibility. * * Details of why this is needed: * NumPy 1.18 changed the definition of the PyArray_DescrCheck macro here: * https://github.com/numpy/numpy/commit/6108b5d1e138d07e3c9f2a4e3b1933749ad0e698 * the result of this being that building against NumPy <1.18 would prevent * Numba running against NumPy >= 1.20 as noted here: * https://github.com/numba/numba/issues/6041#issuecomment-665132199 * * This macro definition is copied from: * https://github.com/numpy/numpy/commit/6108b5d1e138d07e3c9f2a4e3b1933749ad0e698#diff-ad2213da23136c5fc5883d9eb2d88666R26 * * NOTE: This is the NumPy 1.18 and above version of the macro. */ #define NUMBA_PyArray_DescrCheck(op) PyObject_TypeCheck(op, &PyArrayDescr_Type) #endif /* NUMBA_COMMON_H_ */ numba-0.55.1/numba/_pymodule.h000664 000000 000000 00000002141 14174536160 016141 0ustar00rootroot000000 000000 #ifndef NUMBA_PY_MODULE_H_ #define NUMBA_PY_MODULE_H_ #define PY_SSIZE_T_CLEAN #include #include #include #define MOD_ERROR_VAL NULL #define MOD_SUCCESS_VAL(val) val #define MOD_INIT(name) PyMODINIT_FUNC PyInit_##name(void) #define MOD_DEF(ob, name, doc, methods) { \ static struct PyModuleDef moduledef = { \ PyModuleDef_HEAD_INIT, name, doc, -1, methods, NULL, NULL, NULL, NULL }; \ ob = PyModule_Create(&moduledef); } #define MOD_INIT_EXEC(name) PyInit_##name(); #define PyString_AsString PyUnicode_AsUTF8 #define PyString_Check PyUnicode_Check #define PyString_FromFormat PyUnicode_FromFormat #define PyString_FromString PyUnicode_FromString #define PyString_InternFromString PyUnicode_InternFromString #define PyInt_Type PyLong_Type #define PyInt_Check PyLong_Check #define PyInt_CheckExact PyLong_CheckExact #define SetAttrStringFromVoidPointer(m, name) do { \ PyObject *tmp = PyLong_FromVoidPtr((void *) &name); \ PyObject_SetAttrString(m, #name, tmp); \ Py_DECREF(tmp); } while (0) #endif /* NUMBA_PY_MODULE_H_ */ numba-0.55.1/numba/_random.c000664 000000 000000 00000031421 14174536160 015561 0ustar00rootroot000000 000000 /* * PRNG support. */ #ifdef _MSC_VER #define HAVE_PTHREAD_ATFORK 0 #else #define HAVE_PTHREAD_ATFORK 1 #include #endif /* Magic Mersenne Twister constants */ #define MT_N 624 #define MT_M 397 #define MT_MATRIX_A 0x9908b0dfU #define MT_UPPER_MASK 0x80000000U #define MT_LOWER_MASK 0x7fffffffU /* * Note this structure is accessed in numba.targets.randomimpl, * any changes here should be reflected there too. */ typedef struct { int index; /* unsigned int is sufficient on modern machines as we only need 32 bits */ unsigned int mt[MT_N]; int has_gauss; double gauss; int is_initialized; } rnd_state_t; /* Some code portions below from CPython's _randommodule.c, some others from Numpy's and Jean-Sebastien Roy's randomkit.c. */ NUMBA_EXPORT_FUNC(void) numba_rnd_shuffle(rnd_state_t *state) { int i; unsigned int y; for (i = 0; i < MT_N - MT_M; i++) { y = (state->mt[i] & MT_UPPER_MASK) | (state->mt[i+1] & MT_LOWER_MASK); state->mt[i] = state->mt[i+MT_M] ^ (y >> 1) ^ (-(int) (y & 1) & MT_MATRIX_A); } for (; i < MT_N - 1; i++) { y = (state->mt[i] & MT_UPPER_MASK) | (state->mt[i+1] & MT_LOWER_MASK); state->mt[i] = state->mt[i+(MT_M-MT_N)] ^ (y >> 1) ^ (-(int) (y & 1) & MT_MATRIX_A); } y = (state->mt[MT_N - 1] & MT_UPPER_MASK) | (state->mt[0] & MT_LOWER_MASK); state->mt[MT_N - 1] = state->mt[MT_M - 1] ^ (y >> 1) ^ (-(int) (y & 1) & MT_MATRIX_A); } /* Initialize mt[] with an integer seed */ NUMBA_EXPORT_FUNC(void) numba_rnd_init(rnd_state_t *state, unsigned int seed) { unsigned int pos; seed &= 0xffffffffU; /* Knuth's PRNG as used in the Mersenne Twister reference implementation */ for (pos = 0; pos < MT_N; pos++) { state->mt[pos] = seed; seed = (1812433253U * (seed ^ (seed >> 30)) + pos + 1) & 0xffffffffU; } state->index = MT_N; state->has_gauss = 0; state->gauss = 0.0; state->is_initialized = 1; } /* Perturb mt[] with a key array */ static void rnd_init_by_array(rnd_state_t *state, unsigned int init_key[], size_t key_length) { size_t i, j, k; unsigned int *mt = state->mt; numba_rnd_init(state, 19650218U); i = 1; j = 0; k = (MT_N > key_length ? MT_N : key_length); for (; k; k--) { mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525U)) + init_key[j] + (unsigned int) j; /* non linear */ mt[i] &= 0xffffffffU; i++; j++; if (i >= MT_N) { mt[0] = mt[MT_N - 1]; i = 1; } if (j >= key_length) j = 0; } for (k = MT_N - 1; k; k--) { mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941U)) - (unsigned int) i; /* non linear */ mt[i] &= 0xffffffffU; i++; if (i >= MT_N) { mt[0] = mt[MT_N - 1]; i=1; } } mt[0] = 0x80000000U; /* MSB is 1; ensuring non-zero initial array */ state->index = MT_N; state->has_gauss = 0; state->gauss = 0.0; state->is_initialized = 1; } /* * Management of thread-local random state. */ static int rnd_globally_initialized; #ifdef _MSC_VER #define THREAD_LOCAL(ty) __declspec(thread) ty #else /* Non-standard C99 extension that's understood by gcc and clang */ #define THREAD_LOCAL(ty) __thread ty #endif static THREAD_LOCAL(rnd_state_t) numba_py_random_state; static THREAD_LOCAL(rnd_state_t) numba_np_random_state; static THREAD_LOCAL(rnd_state_t) numba_internal_random_state; /* Seed the state with random bytes */ static int rnd_seed_with_bytes(rnd_state_t *state, Py_buffer *buf) { unsigned int *keys; unsigned char *bytes; size_t i, nkeys; nkeys = buf->len / sizeof(unsigned int); keys = (unsigned int *) PyMem_Malloc(nkeys * sizeof(unsigned int)); if (keys == NULL) { PyBuffer_Release(buf); return -1; } bytes = (unsigned char *) buf->buf; /* Convert input bytes to int32 keys, without violating alignment * constraints. */ for (i = 0; i < nkeys; i++, bytes += 4) { keys[i] = (bytes[3] << 24) + (bytes[2] << 16) + (bytes[1] << 8) + (bytes[0] << 0); } PyBuffer_Release(buf); rnd_init_by_array(state, keys, nkeys); PyMem_Free(keys); return 0; } #if HAVE_PTHREAD_ATFORK /* After a fork(), the child should reseed its random states. * Since only the main thread survives in the child, it's enough to mark * the current thread-local states as uninitialized. */ static void rnd_atfork_child(void) { numba_py_random_state.is_initialized = 0; numba_np_random_state.is_initialized = 0; numba_internal_random_state.is_initialized = 0; } #endif /* Global initialization routine. It must be called as early as possible. */ NUMBA_EXPORT_FUNC(void) numba_rnd_ensure_global_init(void) { if (!rnd_globally_initialized) { #if HAVE_PTHREAD_ATFORK pthread_atfork(NULL, NULL, rnd_atfork_child); #endif numba_py_random_state.is_initialized = 0; numba_np_random_state.is_initialized = 0; numba_internal_random_state.is_initialized = 0; rnd_globally_initialized = 1; } } /* First-time init a random state */ static void rnd_implicit_init(rnd_state_t *state) { /* Initialize with random bytes. The easiest way to get good-quality * cross-platform random bytes is still to call os.urandom() * using the Python interpreter... */ PyObject *module, *bufobj; Py_buffer buf; PyGILState_STATE gilstate = PyGILState_Ensure(); module = PyImport_ImportModuleNoBlock("os"); if (module == NULL) goto error; /* Read as many bytes as necessary to get the full entropy * exploitable by the MT generator. */ bufobj = PyObject_CallMethod(module, "urandom", "i", (int) (MT_N * sizeof(unsigned int))); Py_DECREF(module); if (bufobj == NULL) goto error; if (PyObject_GetBuffer(bufobj, &buf, PyBUF_SIMPLE)) goto error; Py_DECREF(bufobj); if (rnd_seed_with_bytes(state, &buf)) goto error; /* state->is_initialized is set now */ PyGILState_Release(gilstate); return; error: /* In normal conditions, os.urandom() and PyMem_Malloc() shouldn't fail, * and we don't want the caller to deal with errors, so just bail out. */ if (PyErr_Occurred()) PyErr_Print(); Py_FatalError(NULL); } /* Functions returning the thread-local random state pointer. * The LLVM JIT doesn't support thread-local variables so we rely * on the C compiler instead. */ NUMBA_EXPORT_FUNC(rnd_state_t *) numba_get_py_random_state(void) { rnd_state_t *state = &numba_py_random_state; if (!state->is_initialized) rnd_implicit_init(state); return state; } NUMBA_EXPORT_FUNC(rnd_state_t *) numba_get_np_random_state(void) { rnd_state_t *state = &numba_np_random_state; if (!state->is_initialized) rnd_implicit_init(state); return state; } NUMBA_EXPORT_FUNC(rnd_state_t *) numba_get_internal_random_state(void) { rnd_state_t *state = &numba_internal_random_state; if (!state->is_initialized) rnd_implicit_init(state); return state; } /* * Python-exposed helpers for state management and testing. */ static int rnd_state_converter(PyObject *obj, rnd_state_t **state) { *state = (rnd_state_t *) PyLong_AsVoidPtr(obj); return (*state != NULL || !PyErr_Occurred()); } NUMBA_EXPORT_FUNC(PyObject *) _numba_rnd_get_py_state_ptr(PyObject *self) { return PyLong_FromVoidPtr(numba_get_py_random_state()); } NUMBA_EXPORT_FUNC(PyObject *) _numba_rnd_get_np_state_ptr(PyObject *self) { return PyLong_FromVoidPtr(numba_get_np_random_state()); } NUMBA_EXPORT_FUNC(PyObject *) _numba_rnd_shuffle(PyObject *self, PyObject *arg) { rnd_state_t *state; if (!rnd_state_converter(arg, &state)) return NULL; numba_rnd_shuffle(state); Py_RETURN_NONE; } NUMBA_EXPORT_FUNC(PyObject *) _numba_rnd_set_state(PyObject *self, PyObject *args) { int i, index; rnd_state_t *state; PyObject *tuplearg, *intlist; if (!PyArg_ParseTuple(args, "O&O!:rnd_set_state", rnd_state_converter, &state, &PyTuple_Type, &tuplearg)) return NULL; if (!PyArg_ParseTuple(tuplearg, "iO!", &index, &PyList_Type, &intlist)) return NULL; if (PyList_GET_SIZE(intlist) != MT_N) { PyErr_SetString(PyExc_ValueError, "list object has wrong size"); return NULL; } state->index = index; for (i = 0; i < MT_N; i++) { PyObject *v = PyList_GET_ITEM(intlist, i); unsigned long x = PyLong_AsUnsignedLong(v); if (x == (unsigned long) -1 && PyErr_Occurred()) return NULL; state->mt[i] = (unsigned int) x; } state->has_gauss = 0; state->gauss = 0.0; state->is_initialized = 1; Py_RETURN_NONE; } NUMBA_EXPORT_FUNC(PyObject *) _numba_rnd_get_state(PyObject *self, PyObject *arg) { PyObject *intlist; int i; rnd_state_t *state; if (!rnd_state_converter(arg, &state)) return NULL; intlist = PyList_New(MT_N); if (intlist == NULL) return NULL; for (i = 0; i < MT_N; i++) { PyObject *v = PyLong_FromUnsignedLong(state->mt[i]); if (v == NULL) { Py_DECREF(intlist); return NULL; } PyList_SET_ITEM(intlist, i, v); } return Py_BuildValue("iN", state->index, intlist); } NUMBA_EXPORT_FUNC(PyObject *) _numba_rnd_seed(PyObject *self, PyObject *args) { unsigned int seed; rnd_state_t *state; if (!PyArg_ParseTuple(args, "O&I:rnd_seed", rnd_state_converter, &state, &seed)) { /* rnd_seed_*(bytes-like object) */ Py_buffer buf; PyErr_Clear(); if (!PyArg_ParseTuple(args, "O&s*:rnd_seed", rnd_state_converter, &state, &buf)) return NULL; if (rnd_seed_with_bytes(state, &buf)) return NULL; else Py_RETURN_NONE; } else { /* rnd_seed_*(int32) */ numba_rnd_init(state, seed); Py_RETURN_NONE; } } /* * Random distribution helpers. * Most code straight from Numpy's distributions.c. */ #ifndef M_PI #define M_PI 3.14159265358979323846264338328 #endif NUMBA_EXPORT_FUNC(unsigned int) get_next_int32(rnd_state_t *state) { unsigned int y; if (state->index == MT_N) { numba_rnd_shuffle(state); state->index = 0; } y = state->mt[state->index++]; /* Tempering */ y ^= (y >> 11); y ^= (y << 7) & 0x9d2c5680U; y ^= (y << 15) & 0xefc60000U; y ^= (y >> 18); return y; } NUMBA_EXPORT_FUNC(double) get_next_double(rnd_state_t *state) { double a = get_next_int32(state) >> 5; double b = get_next_int32(state) >> 6; return (a * 67108864.0 + b) / 9007199254740992.0; } NUMBA_EXPORT_FUNC(double) loggam(double x) { double x0, x2, xp, gl, gl0; long k, n; static double a[10] = {8.333333333333333e-02,-2.777777777777778e-03, 7.936507936507937e-04,-5.952380952380952e-04, 8.417508417508418e-04,-1.917526917526918e-03, 6.410256410256410e-03,-2.955065359477124e-02, 1.796443723688307e-01,-1.39243221690590e+00}; x0 = x; n = 0; if ((x == 1.0) || (x == 2.0)) { return 0.0; } else if (x <= 7.0) { n = (long)(7 - x); x0 = x + n; } x2 = 1.0/(x0*x0); xp = 2*M_PI; gl0 = a[9]; for (k=8; k>=0; k--) { gl0 *= x2; gl0 += a[k]; } gl = gl0/x0 + 0.5*log(xp) + (x0-0.5)*log(x0) - x0; if (x <= 7.0) { for (k=1; k<=n; k++) { gl -= log(x0-1.0); x0 -= 1.0; } } return gl; } NUMBA_EXPORT_FUNC(int64_t) numba_poisson_ptrs(rnd_state_t *state, double lam) { /* This method is invoked only if the parameter lambda of this * distribution is big enough ( >= 10 ). The algorithm used is * described in "Hörmann, W. 1992. 'The Transformed Rejection * Method for Generating Poisson Random Variables'. * The implementation comes straight from Numpy. */ int64_t k; double U, V, slam, loglam, a, b, invalpha, vr, us; slam = sqrt(lam); loglam = log(lam); b = 0.931 + 2.53*slam; a = -0.059 + 0.02483*b; invalpha = 1.1239 + 1.1328/(b-3.4); vr = 0.9277 - 3.6224/(b-2); while (1) { U = get_next_double(state) - 0.5; V = get_next_double(state); us = 0.5 - fabs(U); k = (int64_t) floor((2*a/us + b)*U + lam + 0.43); if ((us >= 0.07) && (V <= vr)) { return k; } if ((k < 0) || ((us < 0.013) && (V > us))) { continue; } if ((log(V) + log(invalpha) - log(a/(us*us)+b)) <= (-lam + (double) k*loglam - loggam((double) k+1))) { return k; } } } numba-0.55.1/numba/_typeof.c000664 000000 000000 00000105466 14174536160 015622 0ustar00rootroot000000 000000 #include "_pymodule.h" #include #include #include #include "_numba_common.h" #include "_typeof.h" #include "_hashtable.h" #include "_devicearray.h" #include "pyerrors.h" #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include /* Cached typecodes for basic scalar types */ static int tc_int8; static int tc_int16; static int tc_int32; static int tc_int64; static int tc_uint8; static int tc_uint16; static int tc_uint32; static int tc_uint64; static int tc_float32; static int tc_float64; static int tc_complex64; static int tc_complex128; static int BASIC_TYPECODES[12]; static int tc_intp; /* The type object for the numba .dispatcher.OmittedArg class * that wraps omitted arguments. */ static PyObject *omittedarg_type; static PyObject *typecache; static PyObject *ndarray_typecache; static PyObject *structured_dtypes; static PyObject *str_typeof_pyval = NULL; static PyObject *str_value = NULL; static PyObject *str_numba_type = NULL; /* CUDA device array API */ void **DeviceArray_API; /* * Type fingerprint computation. */ typedef struct { /* A buffer the fingerprint will be written to */ char *buf; size_t n; size_t allocated; /* A preallocated buffer, sufficient to fit the fingerprint for most types */ char static_buf[40]; } string_writer_t; static void string_writer_init(string_writer_t *w) { w->buf = w->static_buf; w->n = 0; w->allocated = sizeof(w->static_buf) / sizeof(unsigned char); } static void string_writer_clear(string_writer_t *w) { if (w->buf != w->static_buf) free(w->buf); } static void string_writer_move(string_writer_t *dest, const string_writer_t *src) { dest->n = src->n; dest->allocated = src->allocated; if (src->buf == src->static_buf) { dest->buf = dest->static_buf; memcpy(dest->buf, src->buf, src->n); } else { dest->buf = src->buf; } } /* Ensure at least *bytes* can be appended to the string writer's buffer. */ static int string_writer_ensure(string_writer_t *w, size_t bytes) { size_t newsize; bytes += w->n; if (bytes <= w->allocated) return 0; newsize = (w->allocated << 2) + 1; if (newsize < bytes) newsize = bytes; if (w->buf == w->static_buf) w->buf = malloc(newsize); else w->buf = realloc(w->buf, newsize); if (w->buf) { w->allocated = newsize; return 0; } else { PyErr_NoMemory(); return -1; } } static int string_writer_put_char(string_writer_t *w, unsigned char c) { if (string_writer_ensure(w, 1)) return -1; w->buf[w->n++] = c; return 0; } static int string_writer_put_int32(string_writer_t *w, unsigned int v) { if (string_writer_ensure(w, 4)) return -1; w->buf[w->n] = v & 0xff; w->buf[w->n + 1] = (v >> 8) & 0xff; w->buf[w->n + 2] = (v >> 16) & 0xff; w->buf[w->n + 3] = (v >> 24) & 0xff; w->n += 4; return 0; } static int string_writer_put_intp(string_writer_t *w, npy_intp v) { if (string_writer_ensure(w, NPY_SIZEOF_PY_INTPTR_T)) return -1; w->buf[w->n] = v & 0xff; w->buf[w->n + 1] = (v >> 8) & 0xff; w->buf[w->n + 2] = (v >> 16) & 0xff; w->buf[w->n + 3] = (v >> 24) & 0xff; #if NPY_SIZEOF_PY_INTPTR_T == 8 w->buf[w->n + 4] = (v >> 32) & 0xff; w->buf[w->n + 5] = (v >> 40) & 0xff; w->buf[w->n + 6] = (v >> 48) & 0xff; w->buf[w->n + 7] = (v >> 56) & 0xff; #endif w->n += NPY_SIZEOF_PY_INTPTR_T; return 0; } static int string_writer_put_string(string_writer_t *w, const char *s) { if (s == NULL) { return string_writer_put_char(w, 0); } else { size_t N = strlen(s) + 1; if (string_writer_ensure(w, N)) return -1; memcpy(w->buf + w->n, s, N); w->n += N; return 0; } } enum opcode { OP_START_TUPLE = '(', OP_END_TUPLE = ')', OP_INT = 'i', OP_FLOAT = 'f', OP_COMPLEX = 'c', OP_BOOL = '?', OP_OMITTED = '!', OP_BYTEARRAY = 'a', OP_BYTES = 'b', OP_NONE = 'n', OP_LIST = '[', OP_SET = '{', OP_BUFFER = 'B', OP_NP_SCALAR = 'S', OP_NP_ARRAY = 'A', OP_NP_DTYPE = 'D' }; #define TRY(func, w, arg) \ do { \ if (func(w, arg)) return -1; \ } while (0) static int fingerprint_unrecognized(void) { PyErr_SetString(PyExc_NotImplementedError, "cannot compute type fingerprint for value"); return -1; } static int compute_dtype_fingerprint(string_writer_t *w, PyArray_Descr *descr) { int typenum = descr->type_num; if (typenum < NPY_OBJECT) return string_writer_put_char(w, (char) typenum); if (typenum == NPY_VOID) { /* Structured dtype: serialize the dtype pointer. Unfortunately, * some structured dtypes can be ephemeral, so we have to * intern them to avoid pointer reuse and fingerprint collisions. * (e.g. np.recarray(dtype=some_dtype) creates a new dtype * equal to some_dtype) */ PyObject *interned = PyDict_GetItem(structured_dtypes, (PyObject *) descr); if (interned == NULL) { interned = (PyObject *) descr; if (PyDict_SetItem(structured_dtypes, interned, interned)) return -1; } TRY(string_writer_put_char, w, (char) typenum); return string_writer_put_intp(w, (npy_intp) interned); } #if NPY_API_VERSION >= 0x00000007 if (PyTypeNum_ISDATETIME(typenum)) { PyArray_DatetimeMetaData *md; md = &(((PyArray_DatetimeDTypeMetaData *)descr->c_metadata)->meta); TRY(string_writer_put_char, w, (char) typenum); TRY(string_writer_put_char, w, (char) md->base); return string_writer_put_int32(w, (char) md->num); } #endif return fingerprint_unrecognized(); } static int compute_fingerprint(string_writer_t *w, PyObject *val) { /* * Implementation note: for performance, we start with common * types that can be tested with fast checks. */ if (val == Py_None) return string_writer_put_char(w, OP_NONE); if (PyBool_Check(val)) return string_writer_put_char(w, OP_BOOL); /* Note we avoid matching int subclasses such as IntEnum */ if (PyInt_CheckExact(val) || PyLong_CheckExact(val)) return string_writer_put_char(w, OP_INT); if (PyFloat_Check(val)) return string_writer_put_char(w, OP_FLOAT); if (PyComplex_CheckExact(val)) return string_writer_put_char(w, OP_COMPLEX); if (PyTuple_Check(val)) { if(PyTuple_CheckExact(val)) { Py_ssize_t i, n; n = PyTuple_GET_SIZE(val); TRY(string_writer_put_char, w, OP_START_TUPLE); for (i = 0; i < n; i++) TRY(compute_fingerprint, w, PyTuple_GET_ITEM(val, i)); TRY(string_writer_put_char, w, OP_END_TUPLE); return 0; } /* as per typeof.py, check "_asdict" for namedtuple. */ else if(PyObject_HasAttrString(val, "_asdict")) { /* * This encodes the class name and field names of a namedtuple into * the fingerprint on the condition that the number of fields is * small (<10) and that the class name and field names are encodable * as ASCII. */ PyObject * clazz = NULL; PyObject * name = NULL; PyObject * _fields = PyObject_GetAttrString(val, "_fields"); PyObject * field = NULL; PyObject * ascii_str = NULL; Py_ssize_t i, n, j, flen; char * buf = NULL; int ret; clazz = PyObject_GetAttrString(val, "__class__"); if (clazz == NULL) return -1; name = PyObject_GetAttrString(clazz, "__name__"); Py_DECREF(clazz); if (name == NULL) return -1; ascii_str = PyUnicode_AsEncodedString(name, "ascii", "ignore"); Py_DECREF(name); if (ascii_str == NULL) return -1; ret = PyBytes_AsStringAndSize(ascii_str, &buf, &flen); if (ret == -1) return -1; for(j = 0; j < flen; j++) { TRY(string_writer_put_char, w, buf[j]); } Py_DECREF(ascii_str); if (_fields == NULL) return -1; n = PyTuple_GET_SIZE(val); TRY(string_writer_put_char, w, OP_START_TUPLE); for (i = 0; i < n; i++) { field = PyTuple_GET_ITEM(_fields, i); if (field == NULL) return -1; ascii_str = PyUnicode_AsEncodedString(field, "ascii", "ignore"); if (ascii_str == NULL) return -1; ret = PyBytes_AsStringAndSize(ascii_str, &buf, &flen); if (ret == -1) return -1; for(j = 0; j < flen; j++) { TRY(string_writer_put_char, w, buf[j]); } Py_DECREF(ascii_str); TRY(compute_fingerprint, w, PyTuple_GET_ITEM(val, i)); } TRY(string_writer_put_char, w, OP_END_TUPLE); Py_DECREF(_fields); return 0; } } if (PyBytes_Check(val)) return string_writer_put_char(w, OP_BYTES); if (PyByteArray_Check(val)) return string_writer_put_char(w, OP_BYTEARRAY); if ((PyObject *) Py_TYPE(val) == omittedarg_type) { PyObject *default_val = PyObject_GetAttr(val, str_value); if (default_val == NULL) return -1; TRY(string_writer_put_char, w, OP_OMITTED); TRY(compute_fingerprint, w, default_val); Py_DECREF(default_val); return 0; } if (PyArray_IsScalar(val, Generic)) { /* Note: PyArray_DescrFromScalar() may be a bit slow on non-trivial types. */ PyArray_Descr *descr = PyArray_DescrFromScalar(val); if (descr == NULL) return -1; TRY(string_writer_put_char, w, OP_NP_SCALAR); TRY(compute_dtype_fingerprint, w, descr); Py_DECREF(descr); return 0; } if (PyArray_Check(val)) { PyArrayObject *ary = (PyArrayObject *) val; int ndim = PyArray_NDIM(ary); TRY(string_writer_put_char, w, OP_NP_ARRAY); TRY(string_writer_put_int32, w, ndim); if (PyArray_IS_C_CONTIGUOUS(ary)) TRY(string_writer_put_char, w, 'C'); else if (PyArray_IS_F_CONTIGUOUS(ary)) TRY(string_writer_put_char, w, 'F'); else TRY(string_writer_put_char, w, 'A'); if (PyArray_ISWRITEABLE(ary)) TRY(string_writer_put_char, w, 'W'); else TRY(string_writer_put_char, w, 'R'); return compute_dtype_fingerprint(w, PyArray_DESCR(ary)); } if (PyList_Check(val)) { Py_ssize_t n = PyList_GET_SIZE(val); if (n == 0) { PyErr_SetString(PyExc_ValueError, "cannot compute fingerprint of empty list"); return -1; } /* Only the first item is considered, as in typeof.py */ TRY(string_writer_put_char, w, OP_LIST); TRY(compute_fingerprint, w, PyList_GET_ITEM(val, 0)); return 0; } /* Note we only accept sets, not frozensets */ if (Py_TYPE(val) == &PySet_Type) { Py_hash_t h; PyObject *item; Py_ssize_t pos = 0; /* Only one item is considered, as in typeof.py */ if (!_PySet_NextEntry(val, &pos, &item, &h)) { /* Empty set */ PyErr_SetString(PyExc_ValueError, "cannot compute fingerprint of empty set"); return -1; } TRY(string_writer_put_char, w, OP_SET); TRY(compute_fingerprint, w, item); return 0; } if (PyObject_CheckBuffer(val)) { Py_buffer buf; int flags = PyBUF_ND | PyBUF_STRIDES | PyBUF_FORMAT; char contig; int ndim; char readonly; /* Attempt to get a writable buffer, then fallback on read-only */ if (PyObject_GetBuffer(val, &buf, flags | PyBUF_WRITABLE)) { PyErr_Clear(); if (PyObject_GetBuffer(val, &buf, flags)) goto _unrecognized; } if (PyBuffer_IsContiguous(&buf, 'C')) contig = 'C'; else if (PyBuffer_IsContiguous(&buf, 'F')) contig = 'F'; else contig = 'A'; ndim = buf.ndim; readonly = buf.readonly ? 'R' : 'W'; if (string_writer_put_char(w, OP_BUFFER) || string_writer_put_int32(w, ndim) || string_writer_put_char(w, contig) || string_writer_put_char(w, readonly) || string_writer_put_string(w, buf.format) || /* We serialize the object's Python type as well, to distinguish between types which have Numba specializations (e.g. array.array() vs. memoryview) */ string_writer_put_intp(w, (npy_intp) Py_TYPE(val))) { PyBuffer_Release(&buf); return -1; } PyBuffer_Release(&buf); return 0; } if (NUMBA_PyArray_DescrCheck(val)) { TRY(string_writer_put_char, w, OP_NP_DTYPE); return compute_dtype_fingerprint(w, (PyArray_Descr *) val); } _unrecognized: /* Type not recognized */ return fingerprint_unrecognized(); } PyObject * typeof_compute_fingerprint(PyObject *val) { PyObject *res; string_writer_t w; string_writer_init(&w); if (compute_fingerprint(&w, val)) goto error; res = PyBytes_FromStringAndSize(w.buf, w.n); string_writer_clear(&w); return res; error: string_writer_clear(&w); return NULL; } /* * Getting the typecode from a Type object. */ static int _typecode_from_type_object(PyObject *tyobj) { int typecode; PyObject *tmpcode = PyObject_GetAttrString(tyobj, "_code"); if (tmpcode == NULL) { return -1; } typecode = PyLong_AsLong(tmpcode); Py_DECREF(tmpcode); return typecode; } /* When we want to cache the type's typecode for later lookup, we need to keep a reference to the returned type object so that it cannot be deleted. This is because of the following events occurring when first using a @jit function for a given set of types: 1. typecode_fallback requests a new typecode for an arbitrary Python value; this implies creating a Numba type object (on the first dispatcher call); the typecode cache is then populated. 2. matching of the typecode list in _dispatcherimpl.cpp fails, since the typecode is new. 3. we have to compile: compile_and_invoke() is called, it will invoke Dispatcher_Insert to register the new signature. The reference to the Numba type object returned in step 1 is deleted as soon as we call Py_DECREF() on it, since we are holding the only reference. If this happens and we use the typecode we got to populate the cache, then the cache won't ever return the correct typecode, and the dispatcher will never successfully match the typecodes with those of some already-compiled instance. So we need to make sure that we don't call Py_DECREF() on objects whose typecode will be used to populate the cache. This is ensured by calling _typecode_fallback with retain_reference == 0. Note that technically we are leaking the reference, since we do not continue to hold a pointer to the type object that we get back from typeof_pyval. However, we don't need to refer to it again, we just need to make sure that it is never deleted. */ static int _typecode_fallback(PyObject *dispatcher, PyObject *val, int retain_reference) { PyObject *numba_type; int typecode; /* * For values that define "_numba_type_", which holds a numba Type * instance that should be used as the type of the value. * Note this is done here, not in typeof_typecode(), so that * some values can still benefit from fingerprint caching. */ if (PyObject_HasAttr(val, str_numba_type)) { numba_type = PyObject_GetAttrString(val, "_numba_type_"); if (!numba_type) return -1; } else { // Go back to the interpreter numba_type = PyObject_CallMethodObjArgs((PyObject *) dispatcher, str_typeof_pyval, val, NULL); } if (!numba_type) return -1; typecode = _typecode_from_type_object(numba_type); if (!retain_reference) Py_DECREF(numba_type); return typecode; } /* Variations on _typecode_fallback for convenience */ static int typecode_fallback(PyObject *dispatcher, PyObject *val) { return _typecode_fallback(dispatcher, val, 0); } static int typecode_fallback_keep_ref(PyObject *dispatcher, PyObject *val) { return _typecode_fallback(dispatcher, val, 1); } /* A cache mapping fingerprints (string_writer_t *) to typecodes (int). */ static _Numba_hashtable_t *fingerprint_hashtable = NULL; static Py_uhash_t hash_writer(const void *key) { string_writer_t *writer = (string_writer_t *) key; Py_uhash_t x = 0; /* The old FNV algorithm used by Python 2 */ if (writer->n > 0) { unsigned char *p = (unsigned char *) writer->buf; Py_ssize_t len = writer->n; x ^= *p << 7; while (--len >= 0) x = (1000003*x) ^ *p++; x ^= writer->n; if (x == (Py_uhash_t) -1) x = -2; } return x; } static int compare_writer(const void *key, const _Numba_hashtable_entry_t *entry) { string_writer_t *v = (string_writer_t *) key; string_writer_t *w = (string_writer_t *) entry->key; if (v->n != w->n) return 0; return memcmp(v->buf, w->buf, v->n) == 0; } /* Try to compute *val*'s typecode using its fingerprint and the * fingerprint->typecode cache. */ static int typecode_using_fingerprint(PyObject *dispatcher, PyObject *val) { int typecode; string_writer_t w; string_writer_init(&w); if (compute_fingerprint(&w, val)) { string_writer_clear(&w); if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) { /* Can't compute a type fingerprint for the given value, fall back on typeof() without caching. */ PyErr_Clear(); return typecode_fallback(dispatcher, val); } return -1; } if (_Numba_HASHTABLE_GET(fingerprint_hashtable, &w, typecode) > 0) { /* Cache hit */ string_writer_clear(&w); return typecode; } /* Not found in cache: invoke pure Python typeof() and cache result. * Note we have to keep the type alive forever as explained * above in _typecode_fallback(). */ typecode = typecode_fallback_keep_ref(dispatcher, val); if (typecode >= 0) { string_writer_t *key = (string_writer_t *) malloc(sizeof(string_writer_t)); if (key == NULL) { string_writer_clear(&w); PyErr_NoMemory(); return -1; } /* Ownership of the string writer's buffer will be transferred * to the hash table. */ string_writer_move(key, &w); if (_Numba_HASHTABLE_SET(fingerprint_hashtable, key, typecode)) { string_writer_clear(&w); PyErr_NoMemory(); return -1; } } return typecode; } /* * Direct lookup table for extra-fast typecode resolution of simple array types. */ #define N_DTYPES 12 #define N_NDIM 5 /* Fast path for up to 5D array */ #define N_LAYOUT 3 static int cached_arycode[N_NDIM][N_LAYOUT][N_DTYPES]; /* Convert a Numpy dtype number to an internal index into cached_arycode. The returned value must also be a valid index into BASIC_TYPECODES. */ static int dtype_num_to_typecode(int type_num) { int dtype; switch(type_num) { case NPY_INT8: dtype = 0; break; case NPY_INT16: dtype = 1; break; case NPY_INT32: dtype = 2; break; case NPY_INT64: dtype = 3; break; case NPY_UINT8: dtype = 4; break; case NPY_UINT16: dtype = 5; break; case NPY_UINT32: dtype = 6; break; case NPY_UINT64: dtype = 7; break; case NPY_FLOAT32: dtype = 8; break; case NPY_FLOAT64: dtype = 9; break; case NPY_COMPLEX64: dtype = 10; break; case NPY_COMPLEX128: dtype = 11; break; default: /* Type not included in the global lookup table */ dtype = -1; } return dtype; } static int get_cached_typecode(PyArray_Descr* descr) { PyObject* tmpobject = PyDict_GetItem(typecache, (PyObject*)descr); if (tmpobject == NULL) return -1; return PyLong_AsLong(tmpobject); } static void cache_typecode(PyArray_Descr* descr, int typecode) { PyObject* value = PyLong_FromLong(typecode); PyDict_SetItem(typecache, (PyObject*)descr, value); Py_DECREF(value); } static PyObject* ndarray_key(int ndim, int layout, PyArray_Descr* descr) { PyObject* tmpndim = PyLong_FromLong(ndim); PyObject* tmplayout = PyLong_FromLong(layout); PyObject* key = PyTuple_Pack(3, tmpndim, tmplayout, descr); Py_DECREF(tmpndim); Py_DECREF(tmplayout); return key; } static int get_cached_ndarray_typecode(int ndim, int layout, PyArray_Descr* descr) { PyObject* key = ndarray_key(ndim, layout, descr); PyObject *tmpobject = PyDict_GetItem(ndarray_typecache, key); if (tmpobject == NULL) return -1; Py_DECREF(key); return PyLong_AsLong(tmpobject); } static void cache_ndarray_typecode(int ndim, int layout, PyArray_Descr* descr, int typecode) { PyObject* key = ndarray_key(ndim, layout, descr); PyObject* value = PyLong_FromLong(typecode); PyDict_SetItem(ndarray_typecache, key, value); Py_DECREF(key); Py_DECREF(value); } static int typecode_ndarray(PyObject *dispatcher, PyArrayObject *ary) { int typecode; int dtype; int ndim = PyArray_NDIM(ary); int layout = 0; /* The order in which we check for the right contiguous-ness is important. The order must match the order by numba.numpy_support.map_layout. Further, only *contiguous-ness* is checked, not alignment, byte order or write permissions. */ if (PyArray_IS_C_CONTIGUOUS(ary)){ layout = 1; } else if (PyArray_IS_F_CONTIGUOUS(ary)) { layout = 2; } /* the typecode cache by convention is for "behaved" arrays (aligned and * writeable), all others must be forced to the fall back */ if (!PyArray_ISBEHAVED(ary)) goto FALLBACK; if (ndim <= 0 || ndim > N_NDIM) goto FALLBACK; dtype = dtype_num_to_typecode(PyArray_TYPE(ary)); if (dtype == -1) goto FALLBACK; /* Fast path, using direct table lookup */ assert(layout < N_LAYOUT); assert(ndim <= N_NDIM); assert(dtype < N_DTYPES); typecode = cached_arycode[ndim - 1][layout][dtype]; if (typecode == -1) { /* First use of this table entry, so it requires populating */ typecode = typecode_fallback_keep_ref(dispatcher, (PyObject*)ary); cached_arycode[ndim - 1][layout][dtype] = typecode; } return typecode; FALLBACK: /* Slower path, for non-trivial array types */ /* If this isn't a structured array then we can't use the cache */ if (PyArray_TYPE(ary) != NPY_VOID) return typecode_using_fingerprint(dispatcher, (PyObject *) ary); /* Check type cache */ typecode = get_cached_ndarray_typecode(ndim, layout, PyArray_DESCR(ary)); if (typecode == -1) { /* First use of this type, use fallback and populate the cache */ typecode = typecode_fallback_keep_ref(dispatcher, (PyObject*)ary); cache_ndarray_typecode(ndim, layout, PyArray_DESCR(ary), typecode); } return typecode; } static int typecode_arrayscalar(PyObject *dispatcher, PyObject* aryscalar) { int typecode; PyArray_Descr *descr; descr = PyArray_DescrFromScalar(aryscalar); if (!descr) return typecode_using_fingerprint(dispatcher, aryscalar); /* Is it a structured scalar? */ if (descr->type_num == NPY_VOID) { typecode = get_cached_typecode(descr); if (typecode == -1) { /* Resolve through fallback then populate cache */ typecode = typecode_fallback_keep_ref(dispatcher, aryscalar); cache_typecode(descr, typecode); } Py_DECREF(descr); return typecode; } /* Is it one of the well-known basic types? */ typecode = dtype_num_to_typecode(descr->type_num); Py_DECREF(descr); if (typecode == -1) return typecode_using_fingerprint(dispatcher, aryscalar); return BASIC_TYPECODES[typecode]; } static int typecode_devicendarray(PyObject *dispatcher, PyObject *ary) { int typecode; int dtype; int ndim; int layout = 0; PyObject* flags = PyObject_GetAttrString(ary, "flags"); if (flags == NULL) { PyErr_Clear(); goto FALLBACK; } if (PyDict_GetItemString(flags, "C_CONTIGUOUS") == Py_True) { layout = 1; } else if (PyDict_GetItemString(flags, "F_CONTIGUOUS") == Py_True) { layout = 2; } Py_DECREF(flags); PyObject *ndim_obj = PyObject_GetAttrString(ary, "ndim"); if (ndim_obj == NULL) { /* If there's no ndim, try to proceed by clearing the error and using the * fallback. */ PyErr_Clear(); goto FALLBACK; } ndim = PyLong_AsLong(ndim_obj); Py_DECREF(ndim_obj); if (PyErr_Occurred()) { /* ndim wasn't an integer for some reason - unlikely to happen, but try * the fallback. */ PyErr_Clear(); goto FALLBACK; } if (ndim <= 0 || ndim > N_NDIM) goto FALLBACK; PyObject* dtype_obj = PyObject_GetAttrString(ary, "dtype"); if (dtype_obj == NULL) { /* No dtype: try the fallback. */ PyErr_Clear(); goto FALLBACK; } PyObject* num_obj = PyObject_GetAttrString(dtype_obj, "num"); Py_DECREF(dtype_obj); if (num_obj == NULL) { /* This strange dtype has no num - try the fallback. */ PyErr_Clear(); goto FALLBACK; } int dtype_num = PyLong_AsLong(num_obj); Py_DECREF(num_obj); if (PyErr_Occurred()) { /* num wasn't an integer for some reason - unlikely to happen, but try * the fallback. */ PyErr_Clear(); goto FALLBACK; } dtype = dtype_num_to_typecode(dtype_num); if (dtype == -1) { /* Not a dtype we have in the global lookup table. */ goto FALLBACK; } /* Fast path, using direct table lookup */ assert(layout < N_LAYOUT); assert(ndim <= N_NDIM); assert(dtype < N_DTYPES); typecode = cached_arycode[ndim - 1][layout][dtype]; if (typecode == -1) { /* First use of this table entry, so it requires populating */ typecode = typecode_fallback_keep_ref(dispatcher, (PyObject*)ary); cached_arycode[ndim - 1][layout][dtype] = typecode; } return typecode; FALLBACK: /* Slower path, for non-trivial array types. At present this always uses the fingerprinting to get the typecode. Future optimization might implement a cache, but this would require some fast equivalent of PyArray_DESCR for a device array. */ return typecode_using_fingerprint(dispatcher, (PyObject *) ary); } int typeof_typecode(PyObject *dispatcher, PyObject *val) { PyTypeObject *tyobj = Py_TYPE(val); int subtype_attr; /* This needs to be kept in sync with Dispatcher.typeof_pyval(), * otherwise funny things may happen. */ if (tyobj == &PyInt_Type || tyobj == &PyLong_Type) { #if SIZEOF_VOID_P < 8 /* On 32-bit platforms, choose between tc_intp (32-bit) and tc_int64 */ PY_LONG_LONG ll = PyLong_AsLongLong(val); if (ll == -1 && PyErr_Occurred()) { /* The integer is too large, let us truncate it */ PyErr_Clear(); return tc_int64; } if ((ll & 0xffffffff) != ll) return tc_int64; #endif return tc_intp; } else if (tyobj == &PyFloat_Type) return tc_float64; else if (tyobj == &PyComplex_Type) return tc_complex128; /* Array scalar handling */ else if (PyArray_CheckScalar(val)) { return typecode_arrayscalar(dispatcher, val); } /* Array handling */ else if (tyobj == &PyArray_Type) { return typecode_ndarray(dispatcher, (PyArrayObject*)val); } /* Subtype of CUDA device array */ else if (PyType_IsSubtype(tyobj, &DeviceArrayType)) { return typecode_devicendarray(dispatcher, val); } /* Subtypes of Array handling */ else if (PyType_IsSubtype(tyobj, &PyArray_Type)) { /* By default, Numba will treat all numpy.ndarray subtypes as if they were the base numpy.ndarray type. In this way, ndarray subtypes can easily use all of the support that Numba has for ndarray methods. EXPERIMENTAL: There may be cases where a programmer would NOT want ndarray subtypes to be treated exactly like the base numpy.ndarray. For this purpose, a currently experimental feature allows a programmer to add an attribute named __numba_array_subtype_dispatch__ to their ndarray subtype. This attribute can have any value as Numba only checks for the presence of the attribute and not its value. When present, a ndarray subtype will NOT be typed by Numba as a regular ndarray but this code will fallthrough to the typecode_using_fingerprint call, which will create a new unique Numba typecode for this ndarray subtype. This behavior has several significant effects. First, since this ndarray subtype will be treated as a different type by Numba, the Numba dispatcher would then specialize on this type. So, if there was a function that had several parameters that were expected to be either numpy.ndarray or a subtype of ndarray, then Numba would compile a custom version of this function for each combination of base and subtypes that were actually passed to the function. Second, because this subtype would now be treated as a totally separate type, it will cease to function in Numba unless an implementation of that type is provided to Numba through the Numba type extension mechanisms (e.g., overload). This would typically start with defining a Numba type corresponding to the ndarray subtype. This is the same concept as how Numba has a corollary of numpy.ndarray in its type system as types.Array. Next, one would typically defining boxing and unboxing routines and the associated memory model. Then, overloads for NumPy functions on that type would be created. However, if the same default array memory model is used then there are tricks one can do to look at Numba's internal types.Array registries and to quickly apply those to the subtype as well. In this manner, only those cases where the base ndarray and the ndarray subtype behavior differ would new custom functions need to be written for the subtype. Finally, after adding support for the new type, you would have a separate ndarray subtype that could operate with other objects of the same subtype but would not support interoperation with regular NumPy ndarrays. In standard Python, this interoperation is provided through the __array_ufunc__ magic method in the ndarray subtype class and in that case the function operates on ndarrays or their subtypes. This idea is extended into Numba such that __array_ufunc__ can be present in a Numba array type object. In this case, this function is consulted during Numba typing and so the arguments to __array_ufunc__ are Numba types instead of ndarray subtypes. The array type __array_ufunc__ returns the type of the output of the given ufunc. */ subtype_attr = PyObject_HasAttrString(val, "__numba_array_subtype_dispatch__"); if (!subtype_attr) { return typecode_ndarray(dispatcher, (PyArrayObject*)val); } } return typecode_using_fingerprint(dispatcher, val); } static void* wrap_import_array(void) { import_array(); /* import array returns NULL on failure */ return (void*)1; } static int init_numpy(void) { return wrap_import_array() != NULL; } /* * typeof_init(omittedarg_type, typecode_dict) * (called from dispatcher.py to fill in missing information) */ PyObject * typeof_init(PyObject *self, PyObject *args) { PyObject *tmpobj; PyObject *dict; int index = 0; if (!PyArg_ParseTuple(args, "O!O!:typeof_init", &PyType_Type, &omittedarg_type, &PyDict_Type, &dict)) return NULL; /* Initialize Numpy API */ if ( ! init_numpy() ) { return NULL; } #define UNWRAP_TYPE(S) \ if(!(tmpobj = PyDict_GetItemString(dict, #S))) return NULL; \ else { tc_##S = PyLong_AsLong(tmpobj); \ BASIC_TYPECODES[index++] = tc_##S; } UNWRAP_TYPE(int8) UNWRAP_TYPE(int16) UNWRAP_TYPE(int32) UNWRAP_TYPE(int64) UNWRAP_TYPE(uint8) UNWRAP_TYPE(uint16) UNWRAP_TYPE(uint32) UNWRAP_TYPE(uint64) UNWRAP_TYPE(float32) UNWRAP_TYPE(float64) UNWRAP_TYPE(complex64) UNWRAP_TYPE(complex128) switch(sizeof(void*)) { case 4: tc_intp = tc_int32; break; case 8: tc_intp = tc_int64; break; default: PyErr_SetString(PyExc_AssertionError, "sizeof(void*) != {4, 8}"); return NULL; } #undef UNWRAP_TYPE typecache = PyDict_New(); ndarray_typecache = PyDict_New(); structured_dtypes = PyDict_New(); if (typecache == NULL || ndarray_typecache == NULL || structured_dtypes == NULL) { PyErr_SetString(PyExc_RuntimeError, "failed to create type cache"); return NULL; } fingerprint_hashtable = _Numba_hashtable_new(sizeof(int), hash_writer, compare_writer); if (fingerprint_hashtable == NULL) { PyErr_NoMemory(); return NULL; } /* initialize cached_arycode to all ones (in bits) */ memset(cached_arycode, 0xFF, sizeof(cached_arycode)); str_typeof_pyval = PyString_InternFromString("typeof_pyval"); str_value = PyString_InternFromString("value"); str_numba_type = PyString_InternFromString("_numba_type_"); if (!str_value || !str_typeof_pyval || !str_numba_type) return NULL; Py_RETURN_NONE; } numba-0.55.1/numba/_typeof.h000664 000000 000000 00000000530 14174536160 015611 0ustar00rootroot000000 000000 #ifndef NUMBA_TYPEOF_H_ #define NUMBA_TYPEOF_H_ #ifdef __cplusplus extern "C" { #endif extern PyObject *typeof_init(PyObject *self, PyObject *args); extern int typeof_typecode(PyObject *dispatcher, PyObject *val); extern PyObject *typeof_compute_fingerprint(PyObject *val); #ifdef __cplusplus } #endif #endif /* NUMBA_TYPEOF_H_ */ numba-0.55.1/numba/_unicodetype_db.h000664 000000 000000 00000745446 14174536160 017326 0ustar00rootroot000000 000000 /* This file is from CPython: * https://github.com/python/cpython/blob/3.7/Objects/unicodetype_db.h * As of Commit SHA: 1d4b6ba19466aba0eb91c4ba01ba509acf18c723 * * Changes made include: * - Renaming all functions and structures with a `numba` prefix to prevent * collisions. * * NOTE: Numba devs, this may need updating from time to time as the unicode * standard is updated. */ #ifndef _UNICODETYPE_DB_H #define _UNICODETYPE_DB_H /*Py_UCS4 definition from Include/unicodeobject.h */ #define Py_UCS4 uint32_t typedef struct { /* These are either deltas to the character or offsets in _PyUnicode_ExtendedCase. */ const int upper; const int lower; const int title; /* Note if more flag space is needed, decimal and digit could be unified. */ const unsigned char decimal; const unsigned char digit; const unsigned short flags; } numba_PyUnicode_TypeRecord; /* -------------------------------------------------------------------------- */ /* CPython unicodetype_db.h definitions start here */ /* -------------------------------------------------------------------------- */ /* this file was generated by Tools/unicode/makeunicodedata.py 3.2 */ /* a list of unique character type descriptors */ const numba_PyUnicode_TypeRecord numba_PyUnicode_TypeRecords[] = { {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 32}, {0, 0, 0, 0, 0, 48}, {0, 0, 0, 0, 0, 1056}, {0, 0, 0, 0, 0, 1024}, {0, 0, 0, 0, 0, 5120}, {0, 0, 0, 0, 0, 3590}, {0, 0, 0, 1, 1, 3590}, {0, 0, 0, 2, 2, 3590}, {0, 0, 0, 3, 3, 3590}, {0, 0, 0, 4, 4, 3590}, {0, 0, 0, 5, 5, 3590}, {0, 0, 0, 6, 6, 3590}, {0, 0, 0, 7, 7, 3590}, {0, 0, 0, 8, 8, 3590}, {0, 0, 0, 9, 9, 3590}, {0, 32, 0, 0, 0, 10113}, {0, 0, 0, 0, 0, 1536}, {-32, 0, -32, 0, 0, 9993}, {0, 0, 0, 0, 0, 9993}, {0, 0, 0, 0, 0, 4096}, {0, 0, 0, 0, 2, 3076}, {0, 0, 0, 0, 3, 3076}, {16777218, 17825792, 16777218, 0, 0, 26377}, {0, 0, 0, 0, 0, 5632}, {0, 0, 0, 0, 1, 3076}, {0, 0, 0, 0, 0, 3072}, {33554438, 18874371, 33554440, 0, 0, 26377}, {121, 0, 121, 0, 0, 9993}, {0, 1, 0, 0, 0, 10113}, {-1, 0, -1, 0, 0, 9993}, {16777228, 33554442, 16777228, 0, 0, 26497}, {-232, 0, -232, 0, 0, 9993}, {33554448, 18874381, 33554448, 0, 0, 26377}, {0, -121, 0, 0, 0, 10113}, {16777236, 17825810, 16777236, 0, 0, 26377}, {195, 0, 195, 0, 0, 9993}, {0, 210, 0, 0, 0, 10113}, {0, 206, 0, 0, 0, 10113}, {0, 205, 0, 0, 0, 10113}, {0, 79, 0, 0, 0, 10113}, {0, 202, 0, 0, 0, 10113}, {0, 203, 0, 0, 0, 10113}, {0, 207, 0, 0, 0, 10113}, {97, 0, 97, 0, 0, 9993}, {0, 211, 0, 0, 0, 10113}, {0, 209, 0, 0, 0, 10113}, {163, 0, 163, 0, 0, 9993}, {0, 213, 0, 0, 0, 10113}, {130, 0, 130, 0, 0, 9993}, {0, 214, 0, 0, 0, 10113}, {0, 218, 0, 0, 0, 10113}, {0, 217, 0, 0, 0, 10113}, {0, 219, 0, 0, 0, 10113}, {0, 0, 0, 0, 0, 1793}, {56, 0, 56, 0, 0, 9993}, {0, 2, 1, 0, 0, 10113}, {-1, 1, 0, 0, 0, 10049}, {-2, 0, -1, 0, 0, 9993}, {-79, 0, -79, 0, 0, 9993}, {33554456, 18874389, 33554456, 0, 0, 26377}, {0, -97, 0, 0, 0, 10113}, {0, -56, 0, 0, 0, 10113}, {0, -130, 0, 0, 0, 10113}, {0, 10795, 0, 0, 0, 10113}, {0, -163, 0, 0, 0, 10113}, {0, 10792, 0, 0, 0, 10113}, {10815, 0, 10815, 0, 0, 9993}, {0, -195, 0, 0, 0, 10113}, {0, 69, 0, 0, 0, 10113}, {0, 71, 0, 0, 0, 10113}, {10783, 0, 10783, 0, 0, 9993}, {10780, 0, 10780, 0, 0, 9993}, {10782, 0, 10782, 0, 0, 9993}, {-210, 0, -210, 0, 0, 9993}, {-206, 0, -206, 0, 0, 9993}, {-205, 0, -205, 0, 0, 9993}, {-202, 0, -202, 0, 0, 9993}, {-203, 0, -203, 0, 0, 9993}, {42319, 0, 42319, 0, 0, 9993}, {42315, 0, 42315, 0, 0, 9993}, {-207, 0, -207, 0, 0, 9993}, {42280, 0, 42280, 0, 0, 9993}, {42308, 0, 42308, 0, 0, 9993}, {-209, 0, -209, 0, 0, 9993}, {-211, 0, -211, 0, 0, 9993}, {10743, 0, 10743, 0, 0, 9993}, {42305, 0, 42305, 0, 0, 9993}, {10749, 0, 10749, 0, 0, 9993}, {-213, 0, -213, 0, 0, 9993}, {-214, 0, -214, 0, 0, 9993}, {10727, 0, 10727, 0, 0, 9993}, {-218, 0, -218, 0, 0, 9993}, {42282, 0, 42282, 0, 0, 9993}, {-69, 0, -69, 0, 0, 9993}, {-217, 0, -217, 0, 0, 9993}, {-71, 0, -71, 0, 0, 9993}, {-219, 0, -219, 0, 0, 9993}, {42261, 0, 42261, 0, 0, 9993}, {42258, 0, 42258, 0, 0, 9993}, {0, 0, 0, 0, 0, 14089}, {0, 0, 0, 0, 0, 5889}, {16777244, 17825818, 16777244, 0, 0, 30216}, {0, 0, 0, 0, 0, 13321}, {0, 116, 0, 0, 0, 10113}, {0, 38, 0, 0, 0, 10113}, {0, 37, 0, 0, 0, 10113}, {0, 64, 0, 0, 0, 10113}, {0, 63, 0, 0, 0, 10113}, {50331681, 19922973, 50331681, 0, 0, 26377}, {-38, 0, -38, 0, 0, 9993}, {-37, 0, -37, 0, 0, 9993}, {50331688, 19922980, 50331688, 0, 0, 26377}, {16777261, 17825835, 16777261, 0, 0, 26377}, {-64, 0, -64, 0, 0, 9993}, {-63, 0, -63, 0, 0, 9993}, {0, 8, 0, 0, 0, 10113}, {16777264, 17825838, 16777264, 0, 0, 26377}, {16777267, 17825841, 16777267, 0, 0, 26377}, {0, 0, 0, 0, 0, 10113}, {16777270, 17825844, 16777270, 0, 0, 26377}, {16777273, 17825847, 16777273, 0, 0, 26377}, {-8, 0, -8, 0, 0, 9993}, {16777276, 17825850, 16777276, 0, 0, 26377}, {16777279, 17825853, 16777279, 0, 0, 26377}, {7, 0, 7, 0, 0, 9993}, {-116, 0, -116, 0, 0, 9993}, {0, -60, 0, 0, 0, 10113}, {16777282, 17825856, 16777282, 0, 0, 26377}, {0, -7, 0, 0, 0, 10113}, {0, 80, 0, 0, 0, 10113}, {-80, 0, -80, 0, 0, 9993}, {0, 15, 0, 0, 0, 10113}, {-15, 0, -15, 0, 0, 9993}, {0, 48, 0, 0, 0, 10113}, {-48, 0, -48, 0, 0, 9993}, {33554502, 18874435, 33554504, 0, 0, 26377}, {0, 0, 0, 0, 0, 1537}, {0, 7264, 0, 0, 0, 10113}, {3008, 0, 0, 0, 0, 9993}, {0, 0, 0, 0, 1, 3588}, {0, 0, 0, 0, 2, 3588}, {0, 0, 0, 0, 3, 3588}, {0, 0, 0, 0, 4, 3588}, {0, 0, 0, 0, 5, 3588}, {0, 0, 0, 0, 6, 3588}, {0, 0, 0, 0, 7, 3588}, {0, 0, 0, 0, 8, 3588}, {0, 0, 0, 0, 9, 3588}, {16777292, 17825866, 16777292, 0, 0, 26497}, {16777295, 17825869, 16777295, 0, 0, 26497}, {16777298, 17825872, 16777298, 0, 0, 26497}, {16777301, 17825875, 16777301, 0, 0, 26497}, {16777304, 17825878, 16777304, 0, 0, 26497}, {16777307, 17825881, 16777307, 0, 0, 26497}, {16777310, 17825884, 16777310, 0, 0, 26497}, {16777313, 17825887, 16777313, 0, 0, 26497}, {16777316, 17825890, 16777316, 0, 0, 26497}, {16777319, 17825893, 16777319, 0, 0, 26497}, {16777322, 17825896, 16777322, 0, 0, 26497}, {16777325, 17825899, 16777325, 0, 0, 26497}, {16777328, 17825902, 16777328, 0, 0, 26497}, {16777331, 17825905, 16777331, 0, 0, 26497}, {16777334, 17825908, 16777334, 0, 0, 26497}, {16777337, 17825911, 16777337, 0, 0, 26497}, {16777340, 17825914, 16777340, 0, 0, 26497}, {16777343, 17825917, 16777343, 0, 0, 26497}, {16777346, 17825920, 16777346, 0, 0, 26497}, {16777349, 17825923, 16777349, 0, 0, 26497}, {16777352, 17825926, 16777352, 0, 0, 26497}, {16777355, 17825929, 16777355, 0, 0, 26497}, {16777358, 17825932, 16777358, 0, 0, 26497}, {16777361, 17825935, 16777361, 0, 0, 26497}, {16777364, 17825938, 16777364, 0, 0, 26497}, {16777367, 17825941, 16777367, 0, 0, 26497}, {16777370, 17825944, 16777370, 0, 0, 26497}, {16777373, 17825947, 16777373, 0, 0, 26497}, {16777376, 17825950, 16777376, 0, 0, 26497}, {16777379, 17825953, 16777379, 0, 0, 26497}, {16777382, 17825956, 16777382, 0, 0, 26497}, {16777385, 17825959, 16777385, 0, 0, 26497}, {16777388, 17825962, 16777388, 0, 0, 26497}, {16777391, 17825965, 16777391, 0, 0, 26497}, {16777394, 17825968, 16777394, 0, 0, 26497}, {16777397, 17825971, 16777397, 0, 0, 26497}, {16777400, 17825974, 16777400, 0, 0, 26497}, {16777403, 17825977, 16777403, 0, 0, 26497}, {16777406, 17825980, 16777406, 0, 0, 26497}, {16777409, 17825983, 16777409, 0, 0, 26497}, {16777412, 17825986, 16777412, 0, 0, 26497}, {16777415, 17825989, 16777415, 0, 0, 26497}, {16777418, 17825992, 16777418, 0, 0, 26497}, {16777421, 17825995, 16777421, 0, 0, 26497}, {16777424, 17825998, 16777424, 0, 0, 26497}, {16777427, 17826001, 16777427, 0, 0, 26497}, {16777430, 17826004, 16777430, 0, 0, 26497}, {16777433, 17826007, 16777433, 0, 0, 26497}, {16777436, 17826010, 16777436, 0, 0, 26497}, {16777439, 17826013, 16777439, 0, 0, 26497}, {16777442, 17826016, 16777442, 0, 0, 26497}, {16777445, 17826019, 16777445, 0, 0, 26497}, {16777448, 17826022, 16777448, 0, 0, 26497}, {16777451, 17826025, 16777451, 0, 0, 26497}, {16777454, 17826028, 16777454, 0, 0, 26497}, {16777457, 17826031, 16777457, 0, 0, 26497}, {16777460, 17826034, 16777460, 0, 0, 26497}, {16777463, 17826037, 16777463, 0, 0, 26497}, {16777466, 17826040, 16777466, 0, 0, 26497}, {16777469, 17826043, 16777469, 0, 0, 26497}, {16777472, 17826046, 16777472, 0, 0, 26497}, {16777475, 17826049, 16777475, 0, 0, 26497}, {16777478, 17826052, 16777478, 0, 0, 26497}, {16777481, 17826055, 16777481, 0, 0, 26497}, {16777484, 17826058, 16777484, 0, 0, 26497}, {16777487, 17826061, 16777487, 0, 0, 26497}, {16777490, 17826064, 16777490, 0, 0, 26497}, {16777493, 17826067, 16777493, 0, 0, 26497}, {16777496, 17826070, 16777496, 0, 0, 26497}, {16777499, 17826073, 16777499, 0, 0, 26497}, {16777502, 17826076, 16777502, 0, 0, 26497}, {16777505, 17826079, 16777505, 0, 0, 26497}, {16777508, 17826082, 16777508, 0, 0, 26497}, {16777511, 17826085, 16777511, 0, 0, 26497}, {16777514, 17826088, 16777514, 0, 0, 26497}, {16777517, 17826091, 16777517, 0, 0, 26497}, {16777520, 17826094, 16777520, 0, 0, 26497}, {16777523, 17826097, 16777523, 0, 0, 26497}, {16777526, 17826100, 16777526, 0, 0, 26497}, {16777529, 17826103, 16777529, 0, 0, 26497}, {16777532, 17826106, 16777532, 0, 0, 26497}, {16777535, 17826109, 16777535, 0, 0, 26497}, {16777538, 17826112, 16777538, 0, 0, 26497}, {16777541, 17826115, 16777541, 0, 0, 26497}, {16777544, 17826118, 16777544, 0, 0, 26497}, {16777547, 17826121, 16777547, 0, 0, 26497}, {16777550, 17826124, 16777550, 0, 0, 26377}, {16777553, 17826127, 16777553, 0, 0, 26377}, {16777556, 17826130, 16777556, 0, 0, 26377}, {16777559, 17826133, 16777559, 0, 0, 26377}, {16777562, 17826136, 16777562, 0, 0, 26377}, {16777565, 17826139, 16777565, 0, 0, 26377}, {0, 0, 0, 0, 0, 3840}, {0, 0, 0, 0, 0, 5888}, {16777568, 17826142, 16777568, 0, 0, 26377}, {16777571, 17826145, 16777571, 0, 0, 26377}, {16777574, 17826148, 16777574, 0, 0, 26377}, {16777577, 17826151, 16777577, 0, 0, 26377}, {16777580, 17826154, 16777580, 0, 0, 26377}, {16777583, 17826157, 16777583, 0, 0, 26377}, {16777586, 17826160, 16777586, 0, 0, 26377}, {16777589, 17826163, 16777589, 0, 0, 26377}, {16777592, 17826166, 16777592, 0, 0, 26377}, {0, -3008, 0, 0, 0, 10113}, {35332, 0, 35332, 0, 0, 9993}, {3814, 0, 3814, 0, 0, 9993}, {33554812, 18874745, 33554812, 0, 0, 26377}, {33554817, 18874750, 33554817, 0, 0, 26377}, {33554822, 18874755, 33554822, 0, 0, 26377}, {33554827, 18874760, 33554827, 0, 0, 26377}, {33554832, 18874765, 33554832, 0, 0, 26377}, {16777620, 17826194, 16777620, 0, 0, 26377}, {16777624, 18874773, 16777624, 0, 0, 26497}, {8, 0, 8, 0, 0, 9993}, {0, -8, 0, 0, 0, 10113}, {33554844, 18874777, 33554844, 0, 0, 26377}, {50332066, 19923358, 50332066, 0, 0, 26377}, {50332073, 19923365, 50332073, 0, 0, 26377}, {50332080, 19923372, 50332080, 0, 0, 26377}, {74, 0, 74, 0, 0, 9993}, {86, 0, 86, 0, 0, 9993}, {100, 0, 100, 0, 0, 9993}, {128, 0, 128, 0, 0, 9993}, {112, 0, 112, 0, 0, 9993}, {126, 0, 126, 0, 0, 9993}, {33554870, 18874803, 16777656, 0, 0, 26377}, {33554876, 18874809, 16777662, 0, 0, 26377}, {33554882, 18874815, 16777668, 0, 0, 26377}, {33554888, 18874821, 16777674, 0, 0, 26377}, {33554894, 18874827, 16777680, 0, 0, 26377}, {33554900, 18874833, 16777686, 0, 0, 26377}, {33554906, 18874839, 16777692, 0, 0, 26377}, {33554912, 18874845, 16777698, 0, 0, 26377}, {33554918, 18874851, 16777704, 0, 0, 26433}, {33554924, 18874857, 16777710, 0, 0, 26433}, {33554930, 18874863, 16777716, 0, 0, 26433}, {33554936, 18874869, 16777722, 0, 0, 26433}, {33554942, 18874875, 16777728, 0, 0, 26433}, {33554948, 18874881, 16777734, 0, 0, 26433}, {33554954, 18874887, 16777740, 0, 0, 26433}, {33554960, 18874893, 16777746, 0, 0, 26433}, {33554966, 18874899, 16777752, 0, 0, 26377}, {33554972, 18874905, 16777758, 0, 0, 26377}, {33554978, 18874911, 16777764, 0, 0, 26377}, {33554984, 18874917, 16777770, 0, 0, 26377}, {33554990, 18874923, 16777776, 0, 0, 26377}, {33554996, 18874929, 16777782, 0, 0, 26377}, {33555002, 18874935, 16777788, 0, 0, 26377}, {33555008, 18874941, 16777794, 0, 0, 26377}, {33555014, 18874947, 16777800, 0, 0, 26433}, {33555020, 18874953, 16777806, 0, 0, 26433}, {33555026, 18874959, 16777812, 0, 0, 26433}, {33555032, 18874965, 16777818, 0, 0, 26433}, {33555038, 18874971, 16777824, 0, 0, 26433}, {33555044, 18874977, 16777830, 0, 0, 26433}, {33555050, 18874983, 16777836, 0, 0, 26433}, {33555056, 18874989, 16777842, 0, 0, 26433}, {33555062, 18874995, 16777848, 0, 0, 26377}, {33555068, 18875001, 16777854, 0, 0, 26377}, {33555074, 18875007, 16777860, 0, 0, 26377}, {33555080, 18875013, 16777866, 0, 0, 26377}, {33555086, 18875019, 16777872, 0, 0, 26377}, {33555092, 18875025, 16777878, 0, 0, 26377}, {33555098, 18875031, 16777884, 0, 0, 26377}, {33555104, 18875037, 16777890, 0, 0, 26377}, {33555110, 18875043, 16777896, 0, 0, 26433}, {33555116, 18875049, 16777902, 0, 0, 26433}, {33555122, 18875055, 16777908, 0, 0, 26433}, {33555128, 18875061, 16777914, 0, 0, 26433}, {33555134, 18875067, 16777920, 0, 0, 26433}, {33555140, 18875073, 16777926, 0, 0, 26433}, {33555146, 18875079, 16777932, 0, 0, 26433}, {33555152, 18875085, 16777938, 0, 0, 26433}, {33555158, 18875091, 33555160, 0, 0, 26377}, {33555165, 18875098, 16777951, 0, 0, 26377}, {33555171, 18875104, 33555173, 0, 0, 26377}, {33555178, 18875111, 33555178, 0, 0, 26377}, {50332400, 19923692, 50332403, 0, 0, 26377}, {0, -74, 0, 0, 0, 10113}, {33555193, 18875126, 16777979, 0, 0, 26433}, {16777982, 17826556, 16777982, 0, 0, 26377}, {33555202, 18875135, 33555204, 0, 0, 26377}, {33555209, 18875142, 16777995, 0, 0, 26377}, {33555215, 18875148, 33555217, 0, 0, 26377}, {33555222, 18875155, 33555222, 0, 0, 26377}, {50332444, 19923736, 50332447, 0, 0, 26377}, {0, -86, 0, 0, 0, 10113}, {33555237, 18875170, 16778023, 0, 0, 26433}, {50332460, 19923752, 50332460, 0, 0, 26377}, {50332467, 19923759, 50332467, 0, 0, 26377}, {33555257, 18875190, 33555257, 0, 0, 26377}, {50332479, 19923771, 50332479, 0, 0, 26377}, {0, -100, 0, 0, 0, 10113}, {50332486, 19923778, 50332486, 0, 0, 26377}, {50332493, 19923785, 50332493, 0, 0, 26377}, {33555283, 18875216, 33555283, 0, 0, 26377}, {33555288, 18875221, 33555288, 0, 0, 26377}, {50332510, 19923802, 50332510, 0, 0, 26377}, {0, -112, 0, 0, 0, 10113}, {33555300, 18875233, 33555302, 0, 0, 26377}, {33555307, 18875240, 16778093, 0, 0, 26377}, {33555313, 18875246, 33555315, 0, 0, 26377}, {33555320, 18875253, 33555320, 0, 0, 26377}, {50332542, 19923834, 50332545, 0, 0, 26377}, {0, -128, 0, 0, 0, 10113}, {0, -126, 0, 0, 0, 10113}, {33555335, 18875268, 16778121, 0, 0, 26433}, {0, 0, 0, 0, 0, 3076}, {0, 0, 0, 0, 4, 3076}, {0, 0, 0, 0, 5, 3076}, {0, 0, 0, 0, 6, 3076}, {0, 0, 0, 0, 7, 3076}, {0, 0, 0, 0, 8, 3076}, {0, 0, 0, 0, 9, 3076}, {0, 0, 0, 0, 0, 1792}, {0, -7517, 0, 0, 0, 10113}, {0, -8383, 0, 0, 0, 10113}, {0, -8262, 0, 0, 0, 10113}, {0, 28, 0, 0, 0, 10113}, {-28, 0, -28, 0, 0, 9993}, {0, 16, 0, 0, 0, 12160}, {-16, 0, -16, 0, 0, 12040}, {0, 26, 0, 0, 0, 9344}, {-26, 0, -26, 0, 0, 9224}, {0, -10743, 0, 0, 0, 10113}, {0, -3814, 0, 0, 0, 10113}, {0, -10727, 0, 0, 0, 10113}, {-10795, 0, -10795, 0, 0, 9993}, {-10792, 0, -10792, 0, 0, 9993}, {0, -10780, 0, 0, 0, 10113}, {0, -10749, 0, 0, 0, 10113}, {0, -10783, 0, 0, 0, 10113}, {0, -10782, 0, 0, 0, 10113}, {0, -10815, 0, 0, 0, 10113}, {-7264, 0, -7264, 0, 0, 9993}, {0, 0, 0, 0, 0, 5121}, {0, 0, 0, 0, 0, 3841}, {0, -35332, 0, 0, 0, 10113}, {0, -42280, 0, 0, 0, 10113}, {0, -42308, 0, 0, 0, 10113}, {0, -42319, 0, 0, 0, 10113}, {0, -42315, 0, 0, 0, 10113}, {0, -42305, 0, 0, 0, 10113}, {0, -42258, 0, 0, 0, 10113}, {0, -42282, 0, 0, 0, 10113}, {0, -42261, 0, 0, 0, 10113}, {0, 928, 0, 0, 0, 10113}, {-928, 0, -928, 0, 0, 9993}, {16778124, 17826698, 16778124, 0, 0, 26377}, {16778127, 17826701, 16778127, 0, 0, 26377}, {16778130, 17826704, 16778130, 0, 0, 26377}, {16778133, 17826707, 16778133, 0, 0, 26377}, {16778136, 17826710, 16778136, 0, 0, 26377}, {16778139, 17826713, 16778139, 0, 0, 26377}, {16778142, 17826716, 16778142, 0, 0, 26377}, {16778145, 17826719, 16778145, 0, 0, 26377}, {16778148, 17826722, 16778148, 0, 0, 26377}, {16778151, 17826725, 16778151, 0, 0, 26377}, {16778154, 17826728, 16778154, 0, 0, 26377}, {16778157, 17826731, 16778157, 0, 0, 26377}, {16778160, 17826734, 16778160, 0, 0, 26377}, {16778163, 17826737, 16778163, 0, 0, 26377}, {16778166, 17826740, 16778166, 0, 0, 26377}, {16778169, 17826743, 16778169, 0, 0, 26377}, {16778172, 17826746, 16778172, 0, 0, 26377}, {16778175, 17826749, 16778175, 0, 0, 26377}, {16778178, 17826752, 16778178, 0, 0, 26377}, {16778181, 17826755, 16778181, 0, 0, 26377}, {16778184, 17826758, 16778184, 0, 0, 26377}, {16778187, 17826761, 16778187, 0, 0, 26377}, {16778190, 17826764, 16778190, 0, 0, 26377}, {16778193, 17826767, 16778193, 0, 0, 26377}, {16778196, 17826770, 16778196, 0, 0, 26377}, {16778199, 17826773, 16778199, 0, 0, 26377}, {16778202, 17826776, 16778202, 0, 0, 26377}, {16778205, 17826779, 16778205, 0, 0, 26377}, {16778208, 17826782, 16778208, 0, 0, 26377}, {16778211, 17826785, 16778211, 0, 0, 26377}, {16778214, 17826788, 16778214, 0, 0, 26377}, {16778217, 17826791, 16778217, 0, 0, 26377}, {16778220, 17826794, 16778220, 0, 0, 26377}, {16778223, 17826797, 16778223, 0, 0, 26377}, {16778226, 17826800, 16778226, 0, 0, 26377}, {16778229, 17826803, 16778229, 0, 0, 26377}, {16778232, 17826806, 16778232, 0, 0, 26377}, {16778235, 17826809, 16778235, 0, 0, 26377}, {16778238, 17826812, 16778238, 0, 0, 26377}, {16778241, 17826815, 16778241, 0, 0, 26377}, {16778244, 17826818, 16778244, 0, 0, 26377}, {16778247, 17826821, 16778247, 0, 0, 26377}, {16778250, 17826824, 16778250, 0, 0, 26377}, {16778253, 17826827, 16778253, 0, 0, 26377}, {16778256, 17826830, 16778256, 0, 0, 26377}, {16778259, 17826833, 16778259, 0, 0, 26377}, {16778262, 17826836, 16778262, 0, 0, 26377}, {16778265, 17826839, 16778265, 0, 0, 26377}, {16778268, 17826842, 16778268, 0, 0, 26377}, {16778271, 17826845, 16778271, 0, 0, 26377}, {16778274, 17826848, 16778274, 0, 0, 26377}, {16778277, 17826851, 16778277, 0, 0, 26377}, {16778280, 17826854, 16778280, 0, 0, 26377}, {16778283, 17826857, 16778283, 0, 0, 26377}, {16778286, 17826860, 16778286, 0, 0, 26377}, {16778289, 17826863, 16778289, 0, 0, 26377}, {16778292, 17826866, 16778292, 0, 0, 26377}, {16778295, 17826869, 16778295, 0, 0, 26377}, {16778298, 17826872, 16778298, 0, 0, 26377}, {16778301, 17826875, 16778301, 0, 0, 26377}, {16778304, 17826878, 16778304, 0, 0, 26377}, {16778307, 17826881, 16778307, 0, 0, 26377}, {16778310, 17826884, 16778310, 0, 0, 26377}, {16778313, 17826887, 16778313, 0, 0, 26377}, {16778316, 17826890, 16778316, 0, 0, 26377}, {16778319, 17826893, 16778319, 0, 0, 26377}, {16778322, 17826896, 16778322, 0, 0, 26377}, {16778325, 17826899, 16778325, 0, 0, 26377}, {16778328, 17826902, 16778328, 0, 0, 26377}, {16778331, 17826905, 16778331, 0, 0, 26377}, {16778334, 17826908, 16778334, 0, 0, 26377}, {16778337, 17826911, 16778337, 0, 0, 26377}, {16778340, 17826914, 16778340, 0, 0, 26377}, {16778343, 17826917, 16778343, 0, 0, 26377}, {16778346, 17826920, 16778346, 0, 0, 26377}, {16778349, 17826923, 16778349, 0, 0, 26377}, {16778352, 17826926, 16778352, 0, 0, 26377}, {16778355, 17826929, 16778355, 0, 0, 26377}, {16778358, 17826932, 16778358, 0, 0, 26377}, {16778361, 17826935, 16778361, 0, 0, 26377}, {33555581, 18875514, 33555583, 0, 0, 26377}, {33555588, 18875521, 33555590, 0, 0, 26377}, {33555595, 18875528, 33555597, 0, 0, 26377}, {50332819, 19924111, 50332822, 0, 0, 26377}, {50332829, 19924121, 50332832, 0, 0, 26377}, {33555622, 18875555, 33555624, 0, 0, 26377}, {33555629, 18875562, 33555631, 0, 0, 26377}, {33555636, 18875569, 33555638, 0, 0, 26377}, {33555643, 18875576, 33555645, 0, 0, 26377}, {33555650, 18875583, 33555652, 0, 0, 26377}, {33555657, 18875590, 33555659, 0, 0, 26377}, {33555664, 18875597, 33555666, 0, 0, 26377}, {0, 0, 0, 0, 0, 1025}, {0, 0, 0, 0, 0, 5633}, {0, 40, 0, 0, 0, 10113}, {-40, 0, -40, 0, 0, 9993}, {0, 34, 0, 0, 0, 10113}, {-34, 0, -34, 0, 0, 9993}, {0, 0, 0, 0, 0, 9344}, }; /* extended case mappings */ const Py_UCS4 numba_PyUnicode_ExtendedCase[] = { 181, 956, 924, 223, 115, 115, 83, 83, 83, 115, 105, 775, 304, 329, 700, 110, 700, 78, 383, 115, 83, 496, 106, 780, 74, 780, 837, 953, 921, 912, 953, 776, 769, 921, 776, 769, 944, 965, 776, 769, 933, 776, 769, 962, 963, 931, 976, 946, 914, 977, 952, 920, 981, 966, 934, 982, 960, 928, 1008, 954, 922, 1009, 961, 929, 1013, 949, 917, 1415, 1381, 1410, 1333, 1362, 1333, 1410, 43888, 5024, 5024, 43889, 5025, 5025, 43890, 5026, 5026, 43891, 5027, 5027, 43892, 5028, 5028, 43893, 5029, 5029, 43894, 5030, 5030, 43895, 5031, 5031, 43896, 5032, 5032, 43897, 5033, 5033, 43898, 5034, 5034, 43899, 5035, 5035, 43900, 5036, 5036, 43901, 5037, 5037, 43902, 5038, 5038, 43903, 5039, 5039, 43904, 5040, 5040, 43905, 5041, 5041, 43906, 5042, 5042, 43907, 5043, 5043, 43908, 5044, 5044, 43909, 5045, 5045, 43910, 5046, 5046, 43911, 5047, 5047, 43912, 5048, 5048, 43913, 5049, 5049, 43914, 5050, 5050, 43915, 5051, 5051, 43916, 5052, 5052, 43917, 5053, 5053, 43918, 5054, 5054, 43919, 5055, 5055, 43920, 5056, 5056, 43921, 5057, 5057, 43922, 5058, 5058, 43923, 5059, 5059, 43924, 5060, 5060, 43925, 5061, 5061, 43926, 5062, 5062, 43927, 5063, 5063, 43928, 5064, 5064, 43929, 5065, 5065, 43930, 5066, 5066, 43931, 5067, 5067, 43932, 5068, 5068, 43933, 5069, 5069, 43934, 5070, 5070, 43935, 5071, 5071, 43936, 5072, 5072, 43937, 5073, 5073, 43938, 5074, 5074, 43939, 5075, 5075, 43940, 5076, 5076, 43941, 5077, 5077, 43942, 5078, 5078, 43943, 5079, 5079, 43944, 5080, 5080, 43945, 5081, 5081, 43946, 5082, 5082, 43947, 5083, 5083, 43948, 5084, 5084, 43949, 5085, 5085, 43950, 5086, 5086, 43951, 5087, 5087, 43952, 5088, 5088, 43953, 5089, 5089, 43954, 5090, 5090, 43955, 5091, 5091, 43956, 5092, 5092, 43957, 5093, 5093, 43958, 5094, 5094, 43959, 5095, 5095, 43960, 5096, 5096, 43961, 5097, 5097, 43962, 5098, 5098, 43963, 5099, 5099, 43964, 5100, 5100, 43965, 5101, 5101, 43966, 5102, 5102, 43967, 5103, 5103, 5112, 5104, 5104, 5113, 5105, 5105, 5114, 5106, 5106, 5115, 5107, 5107, 5116, 5108, 5108, 5117, 5109, 5109, 5112, 5104, 5104, 5113, 5105, 5105, 5114, 5106, 5106, 5115, 5107, 5107, 5116, 5108, 5108, 5117, 5109, 5109, 7296, 1074, 1042, 7297, 1076, 1044, 7298, 1086, 1054, 7299, 1089, 1057, 7300, 1090, 1058, 7301, 1090, 1058, 7302, 1098, 1066, 7303, 1123, 1122, 7304, 42571, 42570, 7830, 104, 817, 72, 817, 7831, 116, 776, 84, 776, 7832, 119, 778, 87, 778, 7833, 121, 778, 89, 778, 7834, 97, 702, 65, 702, 7835, 7777, 7776, 223, 115, 115, 7838, 8016, 965, 787, 933, 787, 8018, 965, 787, 768, 933, 787, 768, 8020, 965, 787, 769, 933, 787, 769, 8022, 965, 787, 834, 933, 787, 834, 8064, 7936, 953, 7944, 921, 8072, 8065, 7937, 953, 7945, 921, 8073, 8066, 7938, 953, 7946, 921, 8074, 8067, 7939, 953, 7947, 921, 8075, 8068, 7940, 953, 7948, 921, 8076, 8069, 7941, 953, 7949, 921, 8077, 8070, 7942, 953, 7950, 921, 8078, 8071, 7943, 953, 7951, 921, 8079, 8064, 7936, 953, 7944, 921, 8072, 8065, 7937, 953, 7945, 921, 8073, 8066, 7938, 953, 7946, 921, 8074, 8067, 7939, 953, 7947, 921, 8075, 8068, 7940, 953, 7948, 921, 8076, 8069, 7941, 953, 7949, 921, 8077, 8070, 7942, 953, 7950, 921, 8078, 8071, 7943, 953, 7951, 921, 8079, 8080, 7968, 953, 7976, 921, 8088, 8081, 7969, 953, 7977, 921, 8089, 8082, 7970, 953, 7978, 921, 8090, 8083, 7971, 953, 7979, 921, 8091, 8084, 7972, 953, 7980, 921, 8092, 8085, 7973, 953, 7981, 921, 8093, 8086, 7974, 953, 7982, 921, 8094, 8087, 7975, 953, 7983, 921, 8095, 8080, 7968, 953, 7976, 921, 8088, 8081, 7969, 953, 7977, 921, 8089, 8082, 7970, 953, 7978, 921, 8090, 8083, 7971, 953, 7979, 921, 8091, 8084, 7972, 953, 7980, 921, 8092, 8085, 7973, 953, 7981, 921, 8093, 8086, 7974, 953, 7982, 921, 8094, 8087, 7975, 953, 7983, 921, 8095, 8096, 8032, 953, 8040, 921, 8104, 8097, 8033, 953, 8041, 921, 8105, 8098, 8034, 953, 8042, 921, 8106, 8099, 8035, 953, 8043, 921, 8107, 8100, 8036, 953, 8044, 921, 8108, 8101, 8037, 953, 8045, 921, 8109, 8102, 8038, 953, 8046, 921, 8110, 8103, 8039, 953, 8047, 921, 8111, 8096, 8032, 953, 8040, 921, 8104, 8097, 8033, 953, 8041, 921, 8105, 8098, 8034, 953, 8042, 921, 8106, 8099, 8035, 953, 8043, 921, 8107, 8100, 8036, 953, 8044, 921, 8108, 8101, 8037, 953, 8045, 921, 8109, 8102, 8038, 953, 8046, 921, 8110, 8103, 8039, 953, 8047, 921, 8111, 8114, 8048, 953, 8122, 921, 8122, 837, 8115, 945, 953, 913, 921, 8124, 8116, 940, 953, 902, 921, 902, 837, 8118, 945, 834, 913, 834, 8119, 945, 834, 953, 913, 834, 921, 913, 834, 837, 8115, 945, 953, 913, 921, 8124, 8126, 953, 921, 8130, 8052, 953, 8138, 921, 8138, 837, 8131, 951, 953, 919, 921, 8140, 8132, 942, 953, 905, 921, 905, 837, 8134, 951, 834, 919, 834, 8135, 951, 834, 953, 919, 834, 921, 919, 834, 837, 8131, 951, 953, 919, 921, 8140, 8146, 953, 776, 768, 921, 776, 768, 8147, 953, 776, 769, 921, 776, 769, 8150, 953, 834, 921, 834, 8151, 953, 776, 834, 921, 776, 834, 8162, 965, 776, 768, 933, 776, 768, 8163, 965, 776, 769, 933, 776, 769, 8164, 961, 787, 929, 787, 8166, 965, 834, 933, 834, 8167, 965, 776, 834, 933, 776, 834, 8178, 8060, 953, 8186, 921, 8186, 837, 8179, 969, 953, 937, 921, 8188, 8180, 974, 953, 911, 921, 911, 837, 8182, 969, 834, 937, 834, 8183, 969, 834, 953, 937, 834, 921, 937, 834, 837, 8179, 969, 953, 937, 921, 8188, 43888, 5024, 5024, 43889, 5025, 5025, 43890, 5026, 5026, 43891, 5027, 5027, 43892, 5028, 5028, 43893, 5029, 5029, 43894, 5030, 5030, 43895, 5031, 5031, 43896, 5032, 5032, 43897, 5033, 5033, 43898, 5034, 5034, 43899, 5035, 5035, 43900, 5036, 5036, 43901, 5037, 5037, 43902, 5038, 5038, 43903, 5039, 5039, 43904, 5040, 5040, 43905, 5041, 5041, 43906, 5042, 5042, 43907, 5043, 5043, 43908, 5044, 5044, 43909, 5045, 5045, 43910, 5046, 5046, 43911, 5047, 5047, 43912, 5048, 5048, 43913, 5049, 5049, 43914, 5050, 5050, 43915, 5051, 5051, 43916, 5052, 5052, 43917, 5053, 5053, 43918, 5054, 5054, 43919, 5055, 5055, 43920, 5056, 5056, 43921, 5057, 5057, 43922, 5058, 5058, 43923, 5059, 5059, 43924, 5060, 5060, 43925, 5061, 5061, 43926, 5062, 5062, 43927, 5063, 5063, 43928, 5064, 5064, 43929, 5065, 5065, 43930, 5066, 5066, 43931, 5067, 5067, 43932, 5068, 5068, 43933, 5069, 5069, 43934, 5070, 5070, 43935, 5071, 5071, 43936, 5072, 5072, 43937, 5073, 5073, 43938, 5074, 5074, 43939, 5075, 5075, 43940, 5076, 5076, 43941, 5077, 5077, 43942, 5078, 5078, 43943, 5079, 5079, 43944, 5080, 5080, 43945, 5081, 5081, 43946, 5082, 5082, 43947, 5083, 5083, 43948, 5084, 5084, 43949, 5085, 5085, 43950, 5086, 5086, 43951, 5087, 5087, 43952, 5088, 5088, 43953, 5089, 5089, 43954, 5090, 5090, 43955, 5091, 5091, 43956, 5092, 5092, 43957, 5093, 5093, 43958, 5094, 5094, 43959, 5095, 5095, 43960, 5096, 5096, 43961, 5097, 5097, 43962, 5098, 5098, 43963, 5099, 5099, 43964, 5100, 5100, 43965, 5101, 5101, 43966, 5102, 5102, 43967, 5103, 5103, 64256, 102, 102, 70, 70, 70, 102, 64257, 102, 105, 70, 73, 70, 105, 64258, 102, 108, 70, 76, 70, 108, 64259, 102, 102, 105, 70, 70, 73, 70, 102, 105, 64260, 102, 102, 108, 70, 70, 76, 70, 102, 108, 64261, 115, 116, 83, 84, 83, 116, 64262, 115, 116, 83, 84, 83, 116, 64275, 1396, 1398, 1348, 1350, 1348, 1398, 64276, 1396, 1381, 1348, 1333, 1348, 1381, 64277, 1396, 1387, 1348, 1339, 1348, 1387, 64278, 1406, 1398, 1358, 1350, 1358, 1398, 64279, 1396, 1389, 1348, 1341, 1348, 1389, }; /* type indexes */ #define SHIFT 7 static unsigned short index1[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 34, 35, 36, 37, 38, 39, 34, 34, 34, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 64, 64, 64, 65, 66, 64, 64, 64, 64, 67, 68, 64, 64, 64, 64, 64, 64, 69, 70, 71, 72, 73, 74, 75, 76, 64, 77, 78, 79, 80, 81, 82, 83, 64, 64, 84, 85, 34, 34, 34, 34, 34, 34, 86, 34, 34, 34, 34, 34, 87, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 88, 89, 90, 91, 34, 34, 34, 92, 34, 34, 34, 93, 94, 34, 34, 34, 34, 34, 95, 34, 34, 34, 96, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 97, 98, 99, 34, 34, 34, 34, 34, 34, 100, 101, 34, 34, 34, 34, 34, 34, 34, 34, 102, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 103, 34, 34, 34, 34, 34, 34, 34, 34, 104, 34, 34, 34, 34, 100, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 103, 34, 34, 34, 34, 34, 34, 105, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 106, 107, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 108, 109, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 110, 111, 34, 34, 34, 34, 34, 34, 34, 34, 112, 34, 34, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 125, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 127, 128, 129, 130, 131, 132, 133, 34, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 144, 34, 34, 151, 144, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 144, 163, 144, 164, 144, 165, 166, 167, 168, 169, 170, 171, 144, 172, 173, 144, 174, 175, 176, 177, 144, 178, 179, 144, 144, 180, 181, 144, 144, 182, 183, 184, 185, 144, 186, 144, 144, 34, 34, 34, 34, 34, 34, 34, 187, 188, 34, 189, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 34, 34, 34, 34, 34, 34, 34, 34, 190, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 34, 34, 34, 34, 191, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 34, 34, 34, 34, 192, 193, 194, 195, 144, 144, 144, 144, 196, 197, 198, 199, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 200, 34, 34, 34, 34, 34, 201, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 34, 34, 202, 34, 34, 203, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 204, 205, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 64, 206, 207, 208, 209, 210, 211, 144, 212, 213, 214, 215, 216, 217, 218, 219, 64, 64, 64, 64, 220, 221, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 222, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 34, 223, 224, 144, 144, 144, 144, 144, 225, 226, 144, 144, 227, 228, 144, 144, 229, 230, 231, 232, 233, 144, 64, 234, 64, 64, 64, 64, 64, 235, 236, 237, 238, 239, 240, 241, 242, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 243, 244, 245, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 86, 246, 34, 247, 248, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 249, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 250, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 251, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 252, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 253, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 254, 34, 255, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 256, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 257, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 34, 249, 34, 34, 258, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 259, 144, 260, 261, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 262, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 262, }; static unsigned short index2[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 2, 4, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 6, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 6, 5, 5, 5, 5, 5, 5, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 5, 5, 5, 6, 18, 6, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 5, 5, 5, 5, 5, 5, 5, 6, 5, 20, 5, 5, 21, 5, 6, 5, 5, 22, 23, 6, 24, 5, 25, 6, 26, 20, 5, 27, 27, 27, 5, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 5, 17, 17, 17, 17, 17, 17, 17, 28, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 5, 19, 19, 19, 19, 19, 19, 19, 29, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 32, 33, 30, 31, 30, 31, 30, 31, 20, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 34, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 35, 30, 31, 30, 31, 30, 31, 36, 37, 38, 30, 31, 30, 31, 39, 30, 31, 40, 40, 30, 31, 20, 41, 42, 43, 30, 31, 40, 44, 45, 46, 47, 30, 31, 48, 20, 46, 49, 50, 51, 30, 31, 30, 31, 30, 31, 52, 30, 31, 52, 20, 20, 30, 31, 52, 30, 31, 53, 53, 30, 31, 30, 31, 54, 30, 31, 20, 55, 30, 31, 20, 56, 55, 55, 55, 55, 57, 58, 59, 57, 58, 59, 57, 58, 59, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 60, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 61, 57, 58, 59, 30, 31, 62, 63, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 64, 20, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 20, 20, 20, 20, 20, 20, 65, 30, 31, 66, 67, 68, 68, 30, 31, 69, 70, 71, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 72, 73, 74, 75, 76, 20, 77, 77, 20, 78, 20, 79, 80, 20, 20, 20, 77, 81, 20, 82, 20, 83, 84, 20, 85, 86, 84, 87, 88, 20, 20, 86, 20, 89, 90, 20, 20, 91, 20, 20, 20, 20, 20, 20, 20, 92, 20, 20, 93, 20, 20, 93, 20, 20, 20, 94, 93, 95, 96, 96, 97, 20, 20, 20, 20, 20, 98, 20, 55, 20, 20, 20, 20, 20, 20, 20, 20, 99, 100, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 101, 101, 101, 101, 101, 101, 101, 101, 101, 102, 102, 102, 102, 102, 102, 102, 101, 101, 6, 6, 6, 6, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 101, 101, 101, 101, 101, 6, 6, 6, 6, 6, 6, 6, 102, 6, 102, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 103, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 30, 31, 30, 31, 102, 6, 30, 31, 0, 0, 104, 50, 50, 50, 5, 105, 0, 0, 0, 0, 6, 6, 106, 25, 107, 107, 107, 0, 108, 0, 109, 109, 110, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 111, 112, 112, 112, 113, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 114, 19, 19, 19, 19, 19, 19, 19, 19, 19, 115, 116, 116, 117, 118, 119, 120, 120, 120, 121, 122, 123, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 124, 125, 126, 127, 128, 129, 5, 30, 31, 130, 30, 31, 20, 64, 64, 64, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 5, 25, 25, 25, 25, 25, 6, 6, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 133, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 134, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 0, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 0, 0, 102, 5, 5, 5, 5, 5, 5, 20, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 137, 20, 5, 5, 0, 0, 5, 5, 5, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 5, 25, 5, 25, 25, 5, 25, 25, 5, 25, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 55, 55, 55, 55, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 5, 21, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 102, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 5, 5, 5, 5, 55, 55, 25, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5, 55, 25, 25, 25, 25, 25, 25, 25, 21, 5, 25, 25, 25, 25, 25, 25, 102, 102, 25, 25, 5, 25, 25, 25, 25, 55, 55, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 55, 55, 55, 5, 5, 55, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 21, 55, 25, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 102, 102, 5, 5, 5, 5, 102, 0, 0, 25, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 102, 25, 25, 25, 25, 25, 25, 25, 25, 25, 102, 25, 25, 25, 102, 25, 25, 25, 25, 25, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 0, 0, 5, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 21, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 18, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 18, 25, 55, 18, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 18, 18, 18, 18, 25, 18, 18, 55, 25, 25, 25, 25, 25, 25, 25, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 5, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 5, 102, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 18, 18, 0, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 0, 0, 0, 55, 55, 55, 55, 0, 0, 25, 55, 18, 18, 18, 25, 25, 25, 25, 0, 0, 18, 18, 0, 0, 18, 18, 25, 55, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 0, 0, 0, 55, 55, 0, 55, 55, 55, 25, 25, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 55, 55, 5, 5, 27, 27, 27, 27, 27, 27, 5, 5, 55, 5, 25, 0, 0, 25, 25, 18, 0, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 0, 55, 55, 0, 55, 55, 0, 0, 25, 0, 18, 18, 18, 25, 25, 0, 0, 0, 0, 25, 25, 0, 0, 25, 25, 25, 0, 0, 0, 25, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 0, 55, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 25, 25, 55, 55, 55, 25, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 18, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 0, 55, 55, 55, 55, 55, 0, 0, 25, 55, 18, 18, 18, 25, 25, 25, 25, 25, 0, 25, 25, 18, 0, 18, 18, 25, 0, 0, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 25, 25, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 5, 5, 0, 0, 0, 0, 0, 0, 0, 55, 25, 25, 25, 25, 25, 25, 0, 25, 18, 18, 0, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 0, 55, 55, 55, 55, 55, 0, 0, 25, 55, 18, 25, 18, 25, 25, 25, 25, 0, 0, 18, 18, 0, 0, 18, 18, 25, 0, 0, 0, 0, 0, 0, 0, 0, 25, 18, 0, 0, 0, 0, 55, 55, 0, 55, 55, 55, 25, 25, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 5, 55, 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 55, 0, 55, 55, 55, 55, 55, 55, 0, 0, 0, 55, 55, 55, 0, 55, 55, 55, 55, 0, 0, 0, 55, 55, 0, 55, 0, 55, 55, 0, 0, 0, 55, 55, 0, 0, 0, 55, 55, 55, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 18, 18, 25, 18, 18, 0, 0, 0, 18, 18, 18, 0, 18, 18, 18, 25, 0, 0, 55, 0, 0, 0, 0, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 27, 27, 27, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 25, 18, 18, 18, 25, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 55, 25, 25, 25, 18, 18, 18, 18, 0, 25, 25, 25, 0, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 25, 25, 0, 55, 55, 55, 0, 0, 0, 0, 0, 55, 55, 25, 25, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, 5, 55, 25, 18, 18, 5, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 0, 0, 25, 55, 18, 25, 18, 18, 18, 18, 18, 0, 25, 18, 18, 0, 18, 18, 25, 25, 0, 0, 0, 0, 0, 0, 0, 18, 18, 0, 0, 0, 0, 0, 0, 0, 55, 0, 55, 55, 25, 25, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 18, 18, 0, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 55, 18, 18, 18, 25, 25, 25, 25, 0, 18, 18, 18, 0, 18, 18, 18, 25, 55, 5, 0, 0, 0, 0, 55, 55, 55, 18, 27, 27, 27, 27, 27, 27, 27, 55, 55, 55, 25, 25, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 27, 27, 27, 27, 27, 27, 27, 27, 27, 5, 55, 55, 55, 55, 55, 55, 0, 0, 18, 18, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 25, 0, 0, 0, 0, 18, 18, 18, 25, 25, 25, 0, 25, 0, 18, 18, 18, 18, 18, 18, 18, 18, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 18, 18, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 55, 138, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 5, 55, 55, 55, 55, 55, 55, 102, 25, 25, 25, 25, 25, 25, 25, 25, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 0, 55, 0, 0, 55, 55, 0, 55, 0, 0, 55, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 0, 55, 0, 55, 0, 0, 55, 55, 0, 55, 55, 55, 55, 25, 55, 138, 25, 25, 25, 25, 25, 25, 0, 25, 25, 55, 0, 0, 55, 55, 55, 55, 55, 0, 102, 0, 25, 25, 25, 25, 25, 25, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 25, 5, 5, 5, 5, 5, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 5, 25, 5, 25, 5, 25, 5, 5, 5, 5, 18, 18, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 18, 25, 25, 25, 25, 25, 5, 25, 25, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 5, 5, 5, 5, 5, 5, 5, 5, 25, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 25, 25, 25, 25, 18, 25, 25, 25, 25, 25, 25, 18, 25, 25, 18, 18, 25, 25, 55, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 5, 5, 5, 5, 5, 5, 55, 55, 55, 55, 55, 55, 18, 18, 25, 25, 55, 55, 55, 55, 25, 25, 25, 55, 18, 18, 18, 55, 55, 18, 18, 18, 18, 18, 18, 18, 55, 55, 55, 25, 25, 25, 25, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 18, 18, 25, 25, 18, 18, 18, 18, 18, 18, 25, 55, 18, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 18, 18, 25, 5, 5, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 0, 139, 0, 0, 0, 0, 0, 139, 0, 0, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 5, 102, 140, 140, 140, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 0, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 0, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 25, 25, 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, 141, 142, 143, 144, 145, 146, 147, 148, 149, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 0, 0, 236, 237, 238, 239, 240, 241, 0, 0, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 2, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5, 5, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5, 5, 5, 242, 242, 242, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 0, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 18, 25, 25, 25, 25, 25, 25, 25, 18, 18, 18, 18, 18, 18, 18, 18, 25, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 102, 5, 5, 5, 5, 55, 25, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 25, 25, 21, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 102, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 243, 243, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 55, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 25, 25, 25, 18, 18, 18, 18, 25, 25, 18, 18, 18, 0, 0, 0, 0, 18, 18, 25, 18, 18, 18, 18, 18, 18, 25, 25, 25, 0, 0, 0, 0, 5, 0, 0, 0, 5, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 141, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 18, 18, 25, 0, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 25, 18, 25, 25, 25, 25, 25, 25, 25, 0, 25, 18, 25, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 18, 18, 18, 18, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 25, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 102, 5, 5, 5, 5, 5, 5, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 18, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 18, 25, 25, 25, 25, 25, 18, 25, 18, 18, 18, 18, 18, 25, 18, 18, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 25, 25, 18, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 25, 25, 25, 25, 18, 18, 25, 25, 18, 25, 25, 25, 55, 55, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 18, 25, 25, 18, 18, 18, 25, 18, 25, 25, 25, 18, 18, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 18, 18, 18, 18, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 18, 18, 25, 25, 0, 0, 0, 5, 5, 5, 5, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 55, 55, 55, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 102, 102, 102, 102, 102, 102, 5, 5, 244, 245, 246, 247, 248, 249, 250, 251, 252, 0, 0, 0, 0, 0, 0, 0, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 0, 0, 253, 253, 253, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 5, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 18, 25, 25, 25, 25, 25, 25, 25, 55, 55, 55, 55, 25, 55, 55, 55, 55, 18, 18, 25, 55, 55, 18, 25, 25, 0, 0, 0, 0, 0, 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 101, 254, 20, 20, 20, 255, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 25, 25, 25, 25, 25, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 256, 257, 258, 259, 260, 261, 20, 20, 262, 20, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 263, 263, 263, 263, 263, 263, 263, 263, 264, 264, 264, 264, 264, 264, 264, 264, 263, 263, 263, 263, 263, 263, 0, 0, 264, 264, 264, 264, 264, 264, 0, 0, 263, 263, 263, 263, 263, 263, 263, 263, 264, 264, 264, 264, 264, 264, 264, 264, 263, 263, 263, 263, 263, 263, 263, 263, 264, 264, 264, 264, 264, 264, 264, 264, 263, 263, 263, 263, 263, 263, 0, 0, 264, 264, 264, 264, 264, 264, 0, 0, 265, 263, 266, 263, 267, 263, 268, 263, 0, 264, 0, 264, 0, 264, 0, 264, 263, 263, 263, 263, 263, 263, 263, 263, 264, 264, 264, 264, 264, 264, 264, 264, 269, 269, 270, 270, 270, 270, 271, 271, 272, 272, 273, 273, 274, 274, 0, 0, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 263, 263, 323, 324, 325, 0, 326, 327, 264, 264, 328, 328, 329, 6, 330, 6, 6, 6, 331, 332, 333, 0, 334, 335, 336, 336, 336, 336, 337, 6, 6, 6, 263, 263, 338, 339, 0, 0, 340, 341, 264, 264, 342, 342, 0, 6, 6, 6, 263, 263, 343, 344, 345, 126, 346, 347, 264, 264, 348, 348, 130, 6, 6, 6, 0, 0, 349, 350, 351, 0, 352, 353, 354, 354, 355, 355, 356, 6, 6, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 21, 21, 21, 21, 21, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 6, 3, 3, 21, 21, 21, 21, 21, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 21, 21, 21, 21, 21, 0, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 357, 101, 0, 0, 358, 359, 360, 361, 362, 363, 5, 5, 5, 5, 5, 101, 357, 26, 22, 23, 358, 359, 360, 361, 362, 363, 5, 5, 5, 5, 5, 0, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 6, 6, 6, 6, 25, 6, 6, 6, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 120, 5, 5, 5, 5, 120, 5, 5, 20, 120, 120, 120, 20, 20, 120, 120, 120, 20, 5, 120, 5, 5, 364, 120, 120, 120, 120, 120, 5, 5, 5, 5, 5, 5, 120, 5, 365, 5, 120, 5, 366, 367, 120, 120, 364, 20, 120, 120, 368, 120, 20, 55, 55, 55, 55, 20, 5, 5, 20, 20, 120, 120, 5, 5, 5, 5, 5, 120, 20, 20, 20, 20, 5, 5, 5, 5, 369, 5, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 371, 371, 371, 371, 371, 371, 371, 371, 371, 371, 371, 371, 371, 371, 371, 371, 242, 242, 242, 30, 31, 242, 242, 242, 242, 27, 5, 5, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 22, 23, 358, 359, 360, 361, 362, 363, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 26, 22, 23, 358, 359, 360, 361, 362, 363, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 26, 22, 23, 358, 359, 360, 361, 362, 363, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 357, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 26, 22, 23, 358, 359, 360, 361, 362, 363, 27, 357, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 26, 22, 23, 358, 359, 360, 361, 362, 363, 27, 26, 22, 23, 358, 359, 360, 361, 362, 363, 27, 26, 22, 23, 358, 359, 360, 361, 362, 363, 27, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 0, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 0, 30, 31, 374, 375, 376, 377, 378, 30, 31, 30, 31, 30, 31, 379, 380, 381, 382, 20, 30, 31, 20, 30, 31, 20, 20, 20, 20, 20, 101, 101, 383, 383, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 20, 5, 5, 5, 5, 5, 5, 30, 31, 30, 31, 25, 25, 25, 30, 31, 0, 0, 0, 0, 0, 5, 5, 5, 5, 27, 5, 5, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 0, 384, 0, 0, 0, 0, 0, 384, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 102, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 385, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 2, 5, 5, 5, 5, 102, 55, 242, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 242, 242, 242, 242, 242, 242, 242, 242, 242, 25, 25, 25, 25, 18, 18, 5, 102, 102, 102, 102, 102, 5, 5, 242, 242, 242, 102, 55, 5, 5, 5, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 25, 25, 6, 6, 102, 102, 55, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5, 102, 102, 102, 55, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 5, 5, 27, 27, 27, 27, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 27, 27, 27, 27, 27, 27, 27, 27, 5, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 386, 55, 55, 386, 55, 55, 55, 386, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 386, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 386, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 386, 386, 386, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 386, 386, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 386, 386, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 102, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 102, 102, 102, 102, 102, 102, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 102, 5, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 55, 25, 6, 6, 6, 5, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 5, 102, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 101, 101, 25, 25, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 25, 25, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 102, 102, 102, 102, 102, 102, 102, 102, 102, 6, 6, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 20, 20, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 101, 20, 20, 20, 20, 20, 20, 20, 20, 30, 31, 30, 31, 387, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 102, 6, 6, 30, 31, 388, 20, 55, 30, 31, 30, 31, 20, 20, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 389, 390, 391, 392, 389, 20, 393, 394, 395, 396, 30, 31, 30, 31, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 101, 101, 20, 55, 55, 55, 55, 55, 55, 55, 25, 55, 55, 55, 25, 55, 55, 55, 55, 25, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 25, 25, 18, 5, 5, 5, 5, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 18, 18, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 55, 55, 55, 55, 55, 55, 5, 5, 5, 55, 5, 55, 55, 25, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 18, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 25, 25, 25, 18, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 18, 18, 25, 25, 25, 25, 18, 18, 25, 18, 18, 18, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 102, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 5, 5, 55, 55, 55, 55, 55, 25, 102, 55, 55, 55, 55, 55, 55, 55, 55, 55, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 18, 18, 25, 25, 18, 18, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 25, 55, 55, 55, 55, 55, 55, 55, 55, 25, 18, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 5, 5, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 102, 55, 55, 55, 55, 55, 55, 5, 5, 5, 55, 18, 25, 18, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 55, 25, 25, 25, 55, 55, 25, 25, 55, 55, 55, 55, 55, 25, 25, 55, 25, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 102, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 25, 25, 18, 18, 5, 5, 55, 102, 102, 18, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 397, 20, 20, 20, 20, 20, 20, 20, 6, 101, 101, 101, 101, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 25, 18, 18, 25, 18, 18, 5, 18, 25, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 478, 479, 480, 481, 482, 483, 484, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 485, 486, 487, 488, 489, 0, 0, 0, 0, 0, 55, 25, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 0, 55, 0, 55, 55, 0, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 490, 490, 490, 490, 490, 490, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 490, 490, 5, 5, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 18, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 18, 18, 5, 5, 6, 0, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 0, 0, 0, 0, 490, 55, 490, 55, 490, 0, 490, 55, 490, 55, 490, 55, 490, 55, 490, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 21, 0, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 6, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 6, 5, 5, 5, 5, 5, 5, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 5, 5, 5, 6, 18, 6, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 102, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 491, 491, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, 0, 0, 0, 5, 5, 5, 6, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 5, 5, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 5, 5, 5, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 27, 27, 27, 27, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 27, 27, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 242, 55, 55, 55, 55, 55, 55, 55, 55, 242, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 5, 242, 242, 242, 242, 242, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 0, 0, 0, 0, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 0, 0, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 0, 0, 0, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 5, 27, 27, 27, 27, 27, 27, 27, 27, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5, 5, 27, 27, 27, 27, 27, 27, 27, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 27, 27, 27, 27, 27, 27, 0, 0, 0, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 27, 27, 55, 55, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 55, 25, 25, 25, 0, 25, 25, 0, 0, 0, 0, 0, 25, 25, 25, 25, 55, 55, 55, 55, 0, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 25, 25, 25, 0, 0, 0, 0, 25, 26, 22, 23, 358, 27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 27, 27, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 0, 0, 0, 0, 27, 27, 27, 27, 27, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 0, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 22, 23, 358, 359, 360, 361, 362, 363, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 55, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 27, 27, 27, 27, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 25, 18, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 26, 22, 23, 358, 359, 360, 361, 362, 363, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 18, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 18, 25, 25, 25, 25, 18, 18, 25, 25, 5, 5, 21, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 25, 25, 25, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 18, 25, 25, 25, 25, 25, 25, 25, 25, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 5, 5, 5, 5, 55, 18, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 5, 5, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 18, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 25, 18, 18, 55, 55, 55, 55, 5, 5, 5, 5, 25, 25, 25, 25, 5, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 55, 5, 55, 5, 5, 5, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 18, 25, 25, 25, 18, 18, 25, 18, 25, 25, 5, 5, 5, 5, 5, 5, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 0, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 18, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 25, 25, 18, 18, 0, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 0, 55, 55, 55, 55, 55, 0, 25, 25, 55, 18, 18, 25, 18, 18, 18, 18, 0, 0, 18, 18, 0, 0, 18, 18, 18, 0, 0, 55, 0, 0, 0, 0, 0, 0, 18, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 18, 18, 0, 0, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 18, 18, 25, 25, 25, 18, 25, 55, 55, 55, 55, 5, 5, 5, 5, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 5, 0, 5, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 18, 25, 25, 25, 25, 25, 25, 18, 25, 18, 18, 18, 18, 25, 25, 18, 25, 25, 55, 55, 5, 55, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 18, 25, 25, 25, 25, 0, 0, 18, 18, 18, 18, 25, 25, 18, 25, 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 55, 55, 55, 55, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 18, 18, 25, 18, 25, 25, 5, 5, 5, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 18, 25, 18, 18, 25, 25, 25, 25, 25, 25, 18, 25, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 25, 25, 25, 18, 18, 25, 25, 25, 25, 18, 25, 25, 25, 25, 25, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 27, 27, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 25, 18, 25, 25, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 18, 55, 25, 25, 25, 25, 5, 5, 5, 5, 5, 5, 5, 5, 25, 0, 0, 0, 0, 0, 0, 0, 0, 55, 25, 25, 25, 25, 25, 25, 18, 18, 25, 25, 25, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 18, 25, 25, 5, 5, 5, 55, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 25, 25, 25, 25, 25, 25, 25, 0, 25, 25, 25, 25, 25, 25, 18, 25, 55, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 18, 25, 25, 25, 25, 25, 25, 25, 18, 25, 25, 18, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 0, 0, 0, 25, 0, 25, 25, 0, 25, 25, 25, 25, 25, 25, 25, 55, 25, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 0, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 18, 18, 18, 0, 25, 25, 0, 18, 18, 25, 18, 25, 55, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 18, 18, 5, 5, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 0, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 25, 25, 25, 25, 25, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, 102, 102, 102, 102, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 27, 27, 27, 27, 27, 27, 27, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 5, 25, 25, 5, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 18, 25, 25, 25, 5, 5, 5, 18, 18, 18, 18, 18, 18, 21, 21, 21, 21, 21, 21, 21, 21, 25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 25, 25, 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 25, 25, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 20, 20, 20, 20, 20, 20, 20, 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 0, 120, 120, 0, 0, 120, 0, 0, 120, 120, 0, 0, 120, 120, 120, 120, 0, 120, 120, 120, 120, 120, 120, 120, 120, 20, 20, 20, 20, 0, 20, 0, 20, 20, 20, 20, 20, 20, 20, 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 120, 0, 120, 120, 120, 120, 0, 0, 120, 120, 120, 120, 120, 120, 120, 120, 0, 120, 120, 120, 120, 120, 120, 120, 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 120, 0, 120, 120, 120, 120, 0, 120, 120, 120, 120, 120, 0, 120, 0, 0, 0, 120, 120, 120, 120, 120, 120, 120, 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 5, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 5, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 5, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 5, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 5, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 5, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 5, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 5, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 5, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 5, 20, 20, 20, 20, 20, 20, 120, 20, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 5, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 5, 5, 5, 5, 5, 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 25, 25, 25, 25, 25, 25, 25, 0, 25, 25, 0, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 5, 27, 27, 27, 5, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 0, 55, 0, 0, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 0, 55, 0, 55, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0, 0, 55, 0, 55, 0, 55, 0, 55, 55, 55, 0, 55, 55, 0, 55, 0, 0, 55, 0, 55, 0, 55, 0, 55, 0, 55, 0, 55, 55, 0, 55, 0, 0, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 0, 55, 55, 55, 55, 0, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 55, 55, 55, 0, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 357, 357, 26, 22, 23, 358, 359, 360, 361, 362, 363, 27, 27, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 5, 5, 5, 5, 5, 5, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 5, 5, 0, 0, 0, 0, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 5, 5, 5, 5, 0, 0, 0, 5, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, }; /* Returns the numeric value as double for Unicode characters * having this property, -1.0 otherwise. */ double numba_PyUnicode_ToNumeric(Py_UCS4 ch) { switch (ch) { case 0x0F33: return (double) -1.0/2.0; case 0x0030: case 0x0660: case 0x06F0: case 0x07C0: case 0x0966: case 0x09E6: case 0x0A66: case 0x0AE6: case 0x0B66: case 0x0BE6: case 0x0C66: case 0x0C78: case 0x0CE6: case 0x0D66: case 0x0DE6: case 0x0E50: case 0x0ED0: case 0x0F20: case 0x1040: case 0x1090: case 0x17E0: case 0x17F0: case 0x1810: case 0x1946: case 0x19D0: case 0x1A80: case 0x1A90: case 0x1B50: case 0x1BB0: case 0x1C40: case 0x1C50: case 0x2070: case 0x2080: case 0x2189: case 0x24EA: case 0x24FF: case 0x3007: case 0x96F6: case 0xA620: case 0xA6EF: case 0xA8D0: case 0xA900: case 0xA9D0: case 0xA9F0: case 0xAA50: case 0xABF0: case 0xF9B2: case 0xFF10: case 0x1018A: case 0x104A0: case 0x10D30: case 0x11066: case 0x110F0: case 0x11136: case 0x111D0: case 0x112F0: case 0x11450: case 0x114D0: case 0x11650: case 0x116C0: case 0x11730: case 0x118E0: case 0x11C50: case 0x11D50: case 0x11DA0: case 0x16A60: case 0x16B50: case 0x16E80: case 0x1D2E0: case 0x1D7CE: case 0x1D7D8: case 0x1D7E2: case 0x1D7EC: case 0x1D7F6: case 0x1E950: case 0x1F100: case 0x1F101: case 0x1F10B: case 0x1F10C: return (double) 0.0; case 0x0031: case 0x00B9: case 0x0661: case 0x06F1: case 0x07C1: case 0x0967: case 0x09E7: case 0x0A67: case 0x0AE7: case 0x0B67: case 0x0BE7: case 0x0C67: case 0x0C79: case 0x0C7C: case 0x0CE7: case 0x0D67: case 0x0DE7: case 0x0E51: case 0x0ED1: case 0x0F21: case 0x1041: case 0x1091: case 0x1369: case 0x17E1: case 0x17F1: case 0x1811: case 0x1947: case 0x19D1: case 0x19DA: case 0x1A81: case 0x1A91: case 0x1B51: case 0x1BB1: case 0x1C41: case 0x1C51: case 0x2081: case 0x215F: case 0x2160: case 0x2170: case 0x2460: case 0x2474: case 0x2488: case 0x24F5: case 0x2776: case 0x2780: case 0x278A: case 0x3021: case 0x3192: case 0x3220: case 0x3280: case 0x4E00: case 0x58F1: case 0x58F9: case 0x5E7A: case 0x5F0C: case 0xA621: case 0xA6E6: case 0xA8D1: case 0xA901: case 0xA9D1: case 0xA9F1: case 0xAA51: case 0xABF1: case 0xFF11: case 0x10107: case 0x10142: case 0x10158: case 0x10159: case 0x1015A: case 0x102E1: case 0x10320: case 0x103D1: case 0x104A1: case 0x10858: case 0x10879: case 0x108A7: case 0x108FB: case 0x10916: case 0x109C0: case 0x10A40: case 0x10A7D: case 0x10A9D: case 0x10AEB: case 0x10B58: case 0x10B78: case 0x10BA9: case 0x10CFA: case 0x10D31: case 0x10E60: case 0x10F1D: case 0x10F51: case 0x11052: case 0x11067: case 0x110F1: case 0x11137: case 0x111D1: case 0x111E1: case 0x112F1: case 0x11451: case 0x114D1: case 0x11651: case 0x116C1: case 0x11731: case 0x118E1: case 0x11C51: case 0x11C5A: case 0x11D51: case 0x11DA1: case 0x12415: case 0x1241E: case 0x1242C: case 0x12434: case 0x1244F: case 0x12458: case 0x16A61: case 0x16B51: case 0x16E81: case 0x16E94: case 0x1D2E1: case 0x1D360: case 0x1D372: case 0x1D377: case 0x1D7CF: case 0x1D7D9: case 0x1D7E3: case 0x1D7ED: case 0x1D7F7: case 0x1E8C7: case 0x1E951: case 0x1EC71: case 0x1ECA3: case 0x1ECB1: case 0x1F102: case 0x2092A: return (double) 1.0; case 0x0D5C: case 0x2152: return (double) 1.0/10.0; case 0x109F6: return (double) 1.0/12.0; case 0x09F4: case 0x0B75: case 0x0D76: case 0xA833: return (double) 1.0/16.0; case 0x0D58: return (double) 1.0/160.0; case 0x00BD: case 0x0B73: case 0x0D74: case 0x0F2A: case 0x2CFD: case 0xA831: case 0x10141: case 0x10175: case 0x10176: case 0x109BD: case 0x10A48: case 0x10E7B: case 0x10F26: case 0x12464: case 0x1ECAE: return (double) 1.0/2.0; case 0x0D5B: return (double) 1.0/20.0; case 0x2153: case 0x10E7D: case 0x1245A: case 0x1245D: case 0x12465: return (double) 1.0/3.0; case 0x00BC: case 0x09F7: case 0x0B72: case 0x0D73: case 0xA830: case 0x10140: case 0x1018B: case 0x10E7C: case 0x12460: case 0x12462: case 0x12463: case 0x1ECAD: return (double) 1.0/4.0; case 0x0D59: return (double) 1.0/40.0; case 0x0D5E: case 0x2155: return (double) 1.0/5.0; case 0x2159: case 0x12461: return (double) 1.0/6.0; case 0x2150: return (double) 1.0/7.0; case 0x09F5: case 0x0B76: case 0x0D77: case 0x215B: case 0xA834: case 0x1245F: return (double) 1.0/8.0; case 0x2151: return (double) 1.0/9.0; case 0x0BF0: case 0x0D70: case 0x1372: case 0x2169: case 0x2179: case 0x2469: case 0x247D: case 0x2491: case 0x24FE: case 0x277F: case 0x2789: case 0x2793: case 0x3038: case 0x3229: case 0x3248: case 0x3289: case 0x4EC0: case 0x5341: case 0x62FE: case 0xF973: case 0xF9FD: case 0x10110: case 0x10149: case 0x10150: case 0x10157: case 0x10160: case 0x10161: case 0x10162: case 0x10163: case 0x10164: case 0x102EA: case 0x10322: case 0x103D3: case 0x1085B: case 0x1087E: case 0x108AD: case 0x108FD: case 0x10917: case 0x109C9: case 0x10A44: case 0x10A9E: case 0x10AED: case 0x10B5C: case 0x10B7C: case 0x10BAD: case 0x10CFC: case 0x10E69: case 0x10F22: case 0x10F52: case 0x1105B: case 0x111EA: case 0x1173A: case 0x118EA: case 0x11C63: case 0x16B5B: case 0x16E8A: case 0x1D2EA: case 0x1D369: case 0x1EC7A: return (double) 10.0; case 0x109FF: return (double) 10.0/12.0; case 0x0BF1: case 0x0D71: case 0x137B: case 0x216D: case 0x217D: case 0x4F70: case 0x767E: case 0x964C: case 0x10119: case 0x1014B: case 0x10152: case 0x1016A: case 0x102F3: case 0x103D5: case 0x1085D: case 0x108AF: case 0x108FF: case 0x10919: case 0x109D2: case 0x10A46: case 0x10AEF: case 0x10B5E: case 0x10B7E: case 0x10BAF: case 0x10CFE: case 0x10E72: case 0x10F25: case 0x10F54: case 0x11064: case 0x111F3: case 0x11C6C: case 0x16B5C: case 0x1EC83: return (double) 100.0; case 0x0BF2: case 0x0D72: case 0x216F: case 0x217F: case 0x2180: case 0x4EDF: case 0x5343: case 0x9621: case 0x10122: case 0x1014D: case 0x10154: case 0x10171: case 0x1085E: case 0x109DB: case 0x10A47: case 0x10B5F: case 0x10B7F: case 0x10CFF: case 0x11065: case 0x111F4: case 0x1EC8C: return (double) 1000.0; case 0x137C: case 0x2182: case 0x4E07: case 0x842C: case 0x1012B: case 0x10155: case 0x1085F: case 0x109E4: case 0x16B5D: case 0x1EC95: case 0x1ECB3: return (double) 10000.0; case 0x2188: case 0x109ED: case 0x1EC9E: case 0x1ECA0: case 0x1ECB4: return (double) 100000.0; case 0x16B5E: return (double) 1000000.0; case 0x1ECA1: return (double) 10000000.0; case 0x4EBF: case 0x5104: case 0x16B5F: return (double) 100000000.0; case 0x16B60: return (double) 10000000000.0; case 0x5146: case 0x16B61: return (double) 1000000000000.0; case 0x216A: case 0x217A: case 0x246A: case 0x247E: case 0x2492: case 0x24EB: case 0x16E8B: case 0x1D2EB: return (double) 11.0; case 0x109BC: return (double) 11.0/12.0; case 0x0F2F: return (double) 11.0/2.0; case 0x216B: case 0x217B: case 0x246B: case 0x247F: case 0x2493: case 0x24EC: case 0x16E8C: case 0x1D2EC: return (double) 12.0; case 0x246C: case 0x2480: case 0x2494: case 0x24ED: case 0x16E8D: case 0x1D2ED: return (double) 13.0; case 0x0F30: return (double) 13.0/2.0; case 0x246D: case 0x2481: case 0x2495: case 0x24EE: case 0x16E8E: case 0x1D2EE: return (double) 14.0; case 0x246E: case 0x2482: case 0x2496: case 0x24EF: case 0x16E8F: case 0x1D2EF: return (double) 15.0; case 0x0F31: return (double) 15.0/2.0; case 0x09F9: case 0x246F: case 0x2483: case 0x2497: case 0x24F0: case 0x16E90: case 0x1D2F0: return (double) 16.0; case 0x16EE: case 0x2470: case 0x2484: case 0x2498: case 0x24F1: case 0x16E91: case 0x1D2F1: return (double) 17.0; case 0x0F32: return (double) 17.0/2.0; case 0x16EF: case 0x2471: case 0x2485: case 0x2499: case 0x24F2: case 0x16E92: case 0x1D2F2: return (double) 18.0; case 0x16F0: case 0x2472: case 0x2486: case 0x249A: case 0x24F3: case 0x16E93: case 0x1D2F3: return (double) 19.0; case 0x0032: case 0x00B2: case 0x0662: case 0x06F2: case 0x07C2: case 0x0968: case 0x09E8: case 0x0A68: case 0x0AE8: case 0x0B68: case 0x0BE8: case 0x0C68: case 0x0C7A: case 0x0C7D: case 0x0CE8: case 0x0D68: case 0x0DE8: case 0x0E52: case 0x0ED2: case 0x0F22: case 0x1042: case 0x1092: case 0x136A: case 0x17E2: case 0x17F2: case 0x1812: case 0x1948: case 0x19D2: case 0x1A82: case 0x1A92: case 0x1B52: case 0x1BB2: case 0x1C42: case 0x1C52: case 0x2082: case 0x2161: case 0x2171: case 0x2461: case 0x2475: case 0x2489: case 0x24F6: case 0x2777: case 0x2781: case 0x278B: case 0x3022: case 0x3193: case 0x3221: case 0x3281: case 0x3483: case 0x4E8C: case 0x5169: case 0x5F0D: case 0x5F10: case 0x8CAE: case 0x8CB3: case 0x8D30: case 0xA622: case 0xA6E7: case 0xA8D2: case 0xA902: case 0xA9D2: case 0xA9F2: case 0xAA52: case 0xABF2: case 0xF978: case 0xFF12: case 0x10108: case 0x1015B: case 0x1015C: case 0x1015D: case 0x1015E: case 0x102E2: case 0x103D2: case 0x104A2: case 0x10859: case 0x1087A: case 0x108A8: case 0x1091A: case 0x109C1: case 0x10A41: case 0x10B59: case 0x10B79: case 0x10BAA: case 0x10D32: case 0x10E61: case 0x10F1E: case 0x11053: case 0x11068: case 0x110F2: case 0x11138: case 0x111D2: case 0x111E2: case 0x112F2: case 0x11452: case 0x114D2: case 0x11652: case 0x116C2: case 0x11732: case 0x118E2: case 0x11C52: case 0x11C5B: case 0x11D52: case 0x11DA2: case 0x12400: case 0x12416: case 0x1241F: case 0x12423: case 0x1242D: case 0x12435: case 0x1244A: case 0x12450: case 0x12456: case 0x12459: case 0x16A62: case 0x16B52: case 0x16E82: case 0x16E95: case 0x1D2E2: case 0x1D361: case 0x1D373: case 0x1D7D0: case 0x1D7DA: case 0x1D7E4: case 0x1D7EE: case 0x1D7F8: case 0x1E8C8: case 0x1E952: case 0x1EC72: case 0x1ECA4: case 0x1ECB2: case 0x1F103: case 0x22390: return (double) 2.0; case 0x109F7: return (double) 2.0/12.0; case 0x2154: case 0x10177: case 0x10E7E: case 0x1245B: case 0x1245E: case 0x12466: return (double) 2.0/3.0; case 0x2156: return (double) 2.0/5.0; case 0x1373: case 0x2473: case 0x2487: case 0x249B: case 0x24F4: case 0x3039: case 0x3249: case 0x5344: case 0x5EFF: case 0x10111: case 0x102EB: case 0x103D4: case 0x1085C: case 0x1087F: case 0x108AE: case 0x108FE: case 0x10918: case 0x109CA: case 0x10A45: case 0x10A9F: case 0x10AEE: case 0x10B5D: case 0x10B7D: case 0x10BAE: case 0x10E6A: case 0x10F23: case 0x10F53: case 0x1105C: case 0x111EB: case 0x1173B: case 0x118EB: case 0x11C64: case 0x1D36A: case 0x1EC7B: return (double) 20.0; case 0x1011A: case 0x102F4: case 0x109D3: case 0x10E73: case 0x1EC84: return (double) 200.0; case 0x10123: case 0x109DC: case 0x1EC8D: return (double) 2000.0; case 0x1012C: case 0x109E5: case 0x1EC96: return (double) 20000.0; case 0x109EE: case 0x1EC9F: return (double) 200000.0; case 0x1ECA2: return (double) 20000000.0; case 0x3251: return (double) 21.0; case 0x12432: return (double) 216000.0; case 0x3252: return (double) 22.0; case 0x3253: return (double) 23.0; case 0x3254: return (double) 24.0; case 0x3255: return (double) 25.0; case 0x3256: return (double) 26.0; case 0x3257: return (double) 27.0; case 0x3258: return (double) 28.0; case 0x3259: return (double) 29.0; case 0x0033: case 0x00B3: case 0x0663: case 0x06F3: case 0x07C3: case 0x0969: case 0x09E9: case 0x0A69: case 0x0AE9: case 0x0B69: case 0x0BE9: case 0x0C69: case 0x0C7B: case 0x0C7E: case 0x0CE9: case 0x0D69: case 0x0DE9: case 0x0E53: case 0x0ED3: case 0x0F23: case 0x1043: case 0x1093: case 0x136B: case 0x17E3: case 0x17F3: case 0x1813: case 0x1949: case 0x19D3: case 0x1A83: case 0x1A93: case 0x1B53: case 0x1BB3: case 0x1C43: case 0x1C53: case 0x2083: case 0x2162: case 0x2172: case 0x2462: case 0x2476: case 0x248A: case 0x24F7: case 0x2778: case 0x2782: case 0x278C: case 0x3023: case 0x3194: case 0x3222: case 0x3282: case 0x4E09: case 0x4EE8: case 0x53C1: case 0x53C2: case 0x53C3: case 0x53C4: case 0x5F0E: case 0xA623: case 0xA6E8: case 0xA8D3: case 0xA903: case 0xA9D3: case 0xA9F3: case 0xAA53: case 0xABF3: case 0xF96B: case 0xFF13: case 0x10109: case 0x102E3: case 0x104A3: case 0x1085A: case 0x1087B: case 0x108A9: case 0x1091B: case 0x109C2: case 0x10A42: case 0x10B5A: case 0x10B7A: case 0x10BAB: case 0x10D33: case 0x10E62: case 0x10F1F: case 0x11054: case 0x11069: case 0x110F3: case 0x11139: case 0x111D3: case 0x111E3: case 0x112F3: case 0x11453: case 0x114D3: case 0x11653: case 0x116C3: case 0x11733: case 0x118E3: case 0x11C53: case 0x11C5C: case 0x11D53: case 0x11DA3: case 0x12401: case 0x12408: case 0x12417: case 0x12420: case 0x12424: case 0x12425: case 0x1242E: case 0x1242F: case 0x12436: case 0x12437: case 0x1243A: case 0x1243B: case 0x1244B: case 0x12451: case 0x12457: case 0x16A63: case 0x16B53: case 0x16E83: case 0x16E96: case 0x1D2E3: case 0x1D362: case 0x1D374: case 0x1D7D1: case 0x1D7DB: case 0x1D7E5: case 0x1D7EF: case 0x1D7F9: case 0x1E8C9: case 0x1E953: case 0x1EC73: case 0x1ECA5: case 0x1F104: case 0x20AFD: case 0x20B19: case 0x22998: case 0x23B1B: return (double) 3.0; case 0x109F8: return (double) 3.0/12.0; case 0x09F6: case 0x0B77: case 0x0D78: case 0xA835: return (double) 3.0/16.0; case 0x0F2B: return (double) 3.0/2.0; case 0x0D5D: return (double) 3.0/20.0; case 0x00BE: case 0x09F8: case 0x0B74: case 0x0D75: case 0xA832: case 0x10178: case 0x1ECAF: return (double) 3.0/4.0; case 0x2157: return (double) 3.0/5.0; case 0x215C: return (double) 3.0/8.0; case 0x0D5A: return (double) 3.0/80.0; case 0x1374: case 0x303A: case 0x324A: case 0x325A: case 0x5345: case 0x10112: case 0x10165: case 0x102EC: case 0x109CB: case 0x10E6B: case 0x10F24: case 0x1105D: case 0x111EC: case 0x118EC: case 0x11C65: case 0x1D36B: case 0x1EC7C: case 0x20983: return (double) 30.0; case 0x1011B: case 0x1016B: case 0x102F5: case 0x109D4: case 0x10E74: case 0x1EC85: return (double) 300.0; case 0x10124: case 0x109DD: case 0x1EC8E: return (double) 3000.0; case 0x1012D: case 0x109E6: case 0x1EC97: return (double) 30000.0; case 0x109EF: return (double) 300000.0; case 0x325B: return (double) 31.0; case 0x325C: return (double) 32.0; case 0x325D: return (double) 33.0; case 0x325E: return (double) 34.0; case 0x325F: return (double) 35.0; case 0x32B1: return (double) 36.0; case 0x32B2: return (double) 37.0; case 0x32B3: return (double) 38.0; case 0x32B4: return (double) 39.0; case 0x0034: case 0x0664: case 0x06F4: case 0x07C4: case 0x096A: case 0x09EA: case 0x0A6A: case 0x0AEA: case 0x0B6A: case 0x0BEA: case 0x0C6A: case 0x0CEA: case 0x0D6A: case 0x0DEA: case 0x0E54: case 0x0ED4: case 0x0F24: case 0x1044: case 0x1094: case 0x136C: case 0x17E4: case 0x17F4: case 0x1814: case 0x194A: case 0x19D4: case 0x1A84: case 0x1A94: case 0x1B54: case 0x1BB4: case 0x1C44: case 0x1C54: case 0x2074: case 0x2084: case 0x2163: case 0x2173: case 0x2463: case 0x2477: case 0x248B: case 0x24F8: case 0x2779: case 0x2783: case 0x278D: case 0x3024: case 0x3195: case 0x3223: case 0x3283: case 0x4E96: case 0x56DB: case 0x8086: case 0xA624: case 0xA6E9: case 0xA8D4: case 0xA904: case 0xA9D4: case 0xA9F4: case 0xAA54: case 0xABF4: case 0xFF14: case 0x1010A: case 0x102E4: case 0x104A4: case 0x1087C: case 0x108AA: case 0x108AB: case 0x109C3: case 0x10A43: case 0x10B5B: case 0x10B7B: case 0x10BAC: case 0x10D34: case 0x10E63: case 0x10F20: case 0x11055: case 0x1106A: case 0x110F4: case 0x1113A: case 0x111D4: case 0x111E4: case 0x112F4: case 0x11454: case 0x114D4: case 0x11654: case 0x116C4: case 0x11734: case 0x118E4: case 0x11C54: case 0x11C5D: case 0x11D54: case 0x11DA4: case 0x12402: case 0x12409: case 0x1240F: case 0x12418: case 0x12421: case 0x12426: case 0x12430: case 0x12438: case 0x1243C: case 0x1243D: case 0x1243E: case 0x1243F: case 0x1244C: case 0x12452: case 0x12453: case 0x12469: case 0x16A64: case 0x16B54: case 0x16E84: case 0x1D2E4: case 0x1D363: case 0x1D375: case 0x1D7D2: case 0x1D7DC: case 0x1D7E6: case 0x1D7F0: case 0x1D7FA: case 0x1E8CA: case 0x1E954: case 0x1EC74: case 0x1ECA6: case 0x1F105: case 0x20064: case 0x200E2: case 0x2626D: return (double) 4.0; case 0x109F9: return (double) 4.0/12.0; case 0x2158: return (double) 4.0/5.0; case 0x1375: case 0x324B: case 0x32B5: case 0x534C: case 0x10113: case 0x102ED: case 0x109CC: case 0x10E6C: case 0x1105E: case 0x111ED: case 0x118ED: case 0x11C66: case 0x12467: case 0x1D36C: case 0x1EC7D: case 0x2098C: case 0x2099C: return (double) 40.0; case 0x1011C: case 0x102F6: case 0x109D5: case 0x10E75: case 0x1EC86: return (double) 400.0; case 0x10125: case 0x109DE: case 0x1EC8F: return (double) 4000.0; case 0x1012E: case 0x109E7: case 0x1EC98: return (double) 40000.0; case 0x109F0: return (double) 400000.0; case 0x32B6: return (double) 41.0; case 0x32B7: return (double) 42.0; case 0x32B8: return (double) 43.0; case 0x12433: return (double) 432000.0; case 0x32B9: return (double) 44.0; case 0x32BA: return (double) 45.0; case 0x32BB: return (double) 46.0; case 0x32BC: return (double) 47.0; case 0x32BD: return (double) 48.0; case 0x32BE: return (double) 49.0; case 0x0035: case 0x0665: case 0x06F5: case 0x07C5: case 0x096B: case 0x09EB: case 0x0A6B: case 0x0AEB: case 0x0B6B: case 0x0BEB: case 0x0C6B: case 0x0CEB: case 0x0D6B: case 0x0DEB: case 0x0E55: case 0x0ED5: case 0x0F25: case 0x1045: case 0x1095: case 0x136D: case 0x17E5: case 0x17F5: case 0x1815: case 0x194B: case 0x19D5: case 0x1A85: case 0x1A95: case 0x1B55: case 0x1BB5: case 0x1C45: case 0x1C55: case 0x2075: case 0x2085: case 0x2164: case 0x2174: case 0x2464: case 0x2478: case 0x248C: case 0x24F9: case 0x277A: case 0x2784: case 0x278E: case 0x3025: case 0x3224: case 0x3284: case 0x3405: case 0x382A: case 0x4E94: case 0x4F0D: case 0xA625: case 0xA6EA: case 0xA8D5: case 0xA905: case 0xA9D5: case 0xA9F5: case 0xAA55: case 0xABF5: case 0xFF15: case 0x1010B: case 0x10143: case 0x10148: case 0x1014F: case 0x1015F: case 0x10173: case 0x102E5: case 0x10321: case 0x104A5: case 0x1087D: case 0x108AC: case 0x108FC: case 0x109C4: case 0x10AEC: case 0x10CFB: case 0x10D35: case 0x10E64: case 0x10F21: case 0x11056: case 0x1106B: case 0x110F5: case 0x1113B: case 0x111D5: case 0x111E5: case 0x112F5: case 0x11455: case 0x114D5: case 0x11655: case 0x116C5: case 0x11735: case 0x118E5: case 0x11C55: case 0x11C5E: case 0x11D55: case 0x11DA5: case 0x12403: case 0x1240A: case 0x12410: case 0x12419: case 0x12422: case 0x12427: case 0x12431: case 0x12439: case 0x1244D: case 0x12454: case 0x12455: case 0x1246A: case 0x16A65: case 0x16B55: case 0x16E85: case 0x1D2E5: case 0x1D364: case 0x1D376: case 0x1D378: case 0x1D7D3: case 0x1D7DD: case 0x1D7E7: case 0x1D7F1: case 0x1D7FB: case 0x1E8CB: case 0x1E955: case 0x1EC75: case 0x1ECA7: case 0x1F106: case 0x20121: return (double) 5.0; case 0x109FA: return (double) 5.0/12.0; case 0x0F2C: return (double) 5.0/2.0; case 0x215A: case 0x1245C: return (double) 5.0/6.0; case 0x215D: return (double) 5.0/8.0; case 0x1376: case 0x216C: case 0x217C: case 0x2186: case 0x324C: case 0x32BF: case 0x10114: case 0x10144: case 0x1014A: case 0x10151: case 0x10166: case 0x10167: case 0x10168: case 0x10169: case 0x10174: case 0x102EE: case 0x10323: case 0x109CD: case 0x10A7E: case 0x10CFD: case 0x10E6D: case 0x1105F: case 0x111EE: case 0x118EE: case 0x11C67: case 0x12468: case 0x1D36D: case 0x1EC7E: return (double) 50.0; case 0x216E: case 0x217E: case 0x1011D: case 0x10145: case 0x1014C: case 0x10153: case 0x1016C: case 0x1016D: case 0x1016E: case 0x1016F: case 0x10170: case 0x102F7: case 0x109D6: case 0x10E76: case 0x1EC87: return (double) 500.0; case 0x2181: case 0x10126: case 0x10146: case 0x1014E: case 0x10172: case 0x109DF: case 0x1EC90: return (double) 5000.0; case 0x2187: case 0x1012F: case 0x10147: case 0x10156: case 0x109E8: case 0x1EC99: return (double) 50000.0; case 0x109F1: return (double) 500000.0; case 0x0036: case 0x0666: case 0x06F6: case 0x07C6: case 0x096C: case 0x09EC: case 0x0A6C: case 0x0AEC: case 0x0B6C: case 0x0BEC: case 0x0C6C: case 0x0CEC: case 0x0D6C: case 0x0DEC: case 0x0E56: case 0x0ED6: case 0x0F26: case 0x1046: case 0x1096: case 0x136E: case 0x17E6: case 0x17F6: case 0x1816: case 0x194C: case 0x19D6: case 0x1A86: case 0x1A96: case 0x1B56: case 0x1BB6: case 0x1C46: case 0x1C56: case 0x2076: case 0x2086: case 0x2165: case 0x2175: case 0x2185: case 0x2465: case 0x2479: case 0x248D: case 0x24FA: case 0x277B: case 0x2785: case 0x278F: case 0x3026: case 0x3225: case 0x3285: case 0x516D: case 0x9646: case 0x9678: case 0xA626: case 0xA6EB: case 0xA8D6: case 0xA906: case 0xA9D6: case 0xA9F6: case 0xAA56: case 0xABF6: case 0xF9D1: case 0xF9D3: case 0xFF16: case 0x1010C: case 0x102E6: case 0x104A6: case 0x109C5: case 0x10D36: case 0x10E65: case 0x11057: case 0x1106C: case 0x110F6: case 0x1113C: case 0x111D6: case 0x111E6: case 0x112F6: case 0x11456: case 0x114D6: case 0x11656: case 0x116C6: case 0x11736: case 0x118E6: case 0x11C56: case 0x11C5F: case 0x11D56: case 0x11DA6: case 0x12404: case 0x1240B: case 0x12411: case 0x1241A: case 0x12428: case 0x12440: case 0x1244E: case 0x1246B: case 0x16A66: case 0x16B56: case 0x16E86: case 0x1D2E6: case 0x1D365: case 0x1D7D4: case 0x1D7DE: case 0x1D7E8: case 0x1D7F2: case 0x1D7FC: case 0x1E8CC: case 0x1E956: case 0x1EC76: case 0x1ECA8: case 0x1F107: case 0x20AEA: return (double) 6.0; case 0x109FB: return (double) 6.0/12.0; case 0x1377: case 0x324D: case 0x10115: case 0x102EF: case 0x109CE: case 0x10E6E: case 0x11060: case 0x111EF: case 0x118EF: case 0x11C68: case 0x1D36E: case 0x1EC7F: return (double) 60.0; case 0x1011E: case 0x102F8: case 0x109D7: case 0x10E77: case 0x1EC88: return (double) 600.0; case 0x10127: case 0x109E0: case 0x1EC91: return (double) 6000.0; case 0x10130: case 0x109E9: case 0x1EC9A: return (double) 60000.0; case 0x109F2: return (double) 600000.0; case 0x0037: case 0x0667: case 0x06F7: case 0x07C7: case 0x096D: case 0x09ED: case 0x0A6D: case 0x0AED: case 0x0B6D: case 0x0BED: case 0x0C6D: case 0x0CED: case 0x0D6D: case 0x0DED: case 0x0E57: case 0x0ED7: case 0x0F27: case 0x1047: case 0x1097: case 0x136F: case 0x17E7: case 0x17F7: case 0x1817: case 0x194D: case 0x19D7: case 0x1A87: case 0x1A97: case 0x1B57: case 0x1BB7: case 0x1C47: case 0x1C57: case 0x2077: case 0x2087: case 0x2166: case 0x2176: case 0x2466: case 0x247A: case 0x248E: case 0x24FB: case 0x277C: case 0x2786: case 0x2790: case 0x3027: case 0x3226: case 0x3286: case 0x3B4D: case 0x4E03: case 0x67D2: case 0x6F06: case 0xA627: case 0xA6EC: case 0xA8D7: case 0xA907: case 0xA9D7: case 0xA9F7: case 0xAA57: case 0xABF7: case 0xFF17: case 0x1010D: case 0x102E7: case 0x104A7: case 0x109C6: case 0x10D37: case 0x10E66: case 0x11058: case 0x1106D: case 0x110F7: case 0x1113D: case 0x111D7: case 0x111E7: case 0x112F7: case 0x11457: case 0x114D7: case 0x11657: case 0x116C7: case 0x11737: case 0x118E7: case 0x11C57: case 0x11C60: case 0x11D57: case 0x11DA7: case 0x12405: case 0x1240C: case 0x12412: case 0x1241B: case 0x12429: case 0x12441: case 0x12442: case 0x12443: case 0x1246C: case 0x16A67: case 0x16B57: case 0x16E87: case 0x1D2E7: case 0x1D366: case 0x1D7D5: case 0x1D7DF: case 0x1D7E9: case 0x1D7F3: case 0x1D7FD: case 0x1E8CD: case 0x1E957: case 0x1EC77: case 0x1ECA9: case 0x1F108: case 0x20001: return (double) 7.0; case 0x109FC: return (double) 7.0/12.0; case 0x0F2D: return (double) 7.0/2.0; case 0x215E: return (double) 7.0/8.0; case 0x1378: case 0x324E: case 0x10116: case 0x102F0: case 0x109CF: case 0x10E6F: case 0x11061: case 0x111F0: case 0x118F0: case 0x11C69: case 0x1D36F: case 0x1EC80: return (double) 70.0; case 0x1011F: case 0x102F9: case 0x109D8: case 0x10E78: case 0x1EC89: return (double) 700.0; case 0x10128: case 0x109E1: case 0x1EC92: return (double) 7000.0; case 0x10131: case 0x109EA: case 0x1EC9B: return (double) 70000.0; case 0x109F3: return (double) 700000.0; case 0x0038: case 0x0668: case 0x06F8: case 0x07C8: case 0x096E: case 0x09EE: case 0x0A6E: case 0x0AEE: case 0x0B6E: case 0x0BEE: case 0x0C6E: case 0x0CEE: case 0x0D6E: case 0x0DEE: case 0x0E58: case 0x0ED8: case 0x0F28: case 0x1048: case 0x1098: case 0x1370: case 0x17E8: case 0x17F8: case 0x1818: case 0x194E: case 0x19D8: case 0x1A88: case 0x1A98: case 0x1B58: case 0x1BB8: case 0x1C48: case 0x1C58: case 0x2078: case 0x2088: case 0x2167: case 0x2177: case 0x2467: case 0x247B: case 0x248F: case 0x24FC: case 0x277D: case 0x2787: case 0x2791: case 0x3028: case 0x3227: case 0x3287: case 0x516B: case 0x634C: case 0xA628: case 0xA6ED: case 0xA8D8: case 0xA908: case 0xA9D8: case 0xA9F8: case 0xAA58: case 0xABF8: case 0xFF18: case 0x1010E: case 0x102E8: case 0x104A8: case 0x109C7: case 0x10D38: case 0x10E67: case 0x11059: case 0x1106E: case 0x110F8: case 0x1113E: case 0x111D8: case 0x111E8: case 0x112F8: case 0x11458: case 0x114D8: case 0x11658: case 0x116C8: case 0x11738: case 0x118E8: case 0x11C58: case 0x11C61: case 0x11D58: case 0x11DA8: case 0x12406: case 0x1240D: case 0x12413: case 0x1241C: case 0x1242A: case 0x12444: case 0x12445: case 0x1246D: case 0x16A68: case 0x16B58: case 0x16E88: case 0x1D2E8: case 0x1D367: case 0x1D7D6: case 0x1D7E0: case 0x1D7EA: case 0x1D7F4: case 0x1D7FE: case 0x1E8CE: case 0x1E958: case 0x1EC78: case 0x1ECAA: case 0x1F109: return (double) 8.0; case 0x109FD: return (double) 8.0/12.0; case 0x1379: case 0x324F: case 0x10117: case 0x102F1: case 0x10E70: case 0x11062: case 0x111F1: case 0x118F1: case 0x11C6A: case 0x1D370: case 0x1EC81: return (double) 80.0; case 0x10120: case 0x102FA: case 0x109D9: case 0x10E79: case 0x1EC8A: return (double) 800.0; case 0x10129: case 0x109E2: case 0x1EC93: return (double) 8000.0; case 0x10132: case 0x109EB: case 0x1EC9C: return (double) 80000.0; case 0x109F4: return (double) 800000.0; case 0x0039: case 0x0669: case 0x06F9: case 0x07C9: case 0x096F: case 0x09EF: case 0x0A6F: case 0x0AEF: case 0x0B6F: case 0x0BEF: case 0x0C6F: case 0x0CEF: case 0x0D6F: case 0x0DEF: case 0x0E59: case 0x0ED9: case 0x0F29: case 0x1049: case 0x1099: case 0x1371: case 0x17E9: case 0x17F9: case 0x1819: case 0x194F: case 0x19D9: case 0x1A89: case 0x1A99: case 0x1B59: case 0x1BB9: case 0x1C49: case 0x1C59: case 0x2079: case 0x2089: case 0x2168: case 0x2178: case 0x2468: case 0x247C: case 0x2490: case 0x24FD: case 0x277E: case 0x2788: case 0x2792: case 0x3029: case 0x3228: case 0x3288: case 0x4E5D: case 0x5EFE: case 0x7396: case 0xA629: case 0xA6EE: case 0xA8D9: case 0xA909: case 0xA9D9: case 0xA9F9: case 0xAA59: case 0xABF9: case 0xFF19: case 0x1010F: case 0x102E9: case 0x104A9: case 0x109C8: case 0x10D39: case 0x10E68: case 0x1105A: case 0x1106F: case 0x110F9: case 0x1113F: case 0x111D9: case 0x111E9: case 0x112F9: case 0x11459: case 0x114D9: case 0x11659: case 0x116C9: case 0x11739: case 0x118E9: case 0x11C59: case 0x11C62: case 0x11D59: case 0x11DA9: case 0x12407: case 0x1240E: case 0x12414: case 0x1241D: case 0x1242B: case 0x12446: case 0x12447: case 0x12448: case 0x12449: case 0x1246E: case 0x16A69: case 0x16B59: case 0x16E89: case 0x1D2E9: case 0x1D368: case 0x1D7D7: case 0x1D7E1: case 0x1D7EB: case 0x1D7F5: case 0x1D7FF: case 0x1E8CF: case 0x1E959: case 0x1EC79: case 0x1ECAB: case 0x1F10A: case 0x2F890: return (double) 9.0; case 0x109FE: return (double) 9.0/12.0; case 0x0F2E: return (double) 9.0/2.0; case 0x137A: case 0x10118: case 0x102F2: case 0x10341: case 0x10E71: case 0x11063: case 0x111F2: case 0x118F2: case 0x11C6B: case 0x1D371: case 0x1EC82: return (double) 90.0; case 0x10121: case 0x102FB: case 0x1034A: case 0x109DA: case 0x10E7A: case 0x1EC8B: return (double) 900.0; case 0x1012A: case 0x109E3: case 0x1EC94: return (double) 9000.0; case 0x10133: case 0x109EC: case 0x1EC9D: return (double) 90000.0; case 0x109F5: return (double) 900000.0; } return -1.0; } /* Returns 1 for Unicode characters having the bidirectional * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */ int numba_PyUnicode_IsWhitespace(const Py_UCS4 ch) { switch (ch) { case 0x0009: case 0x000A: case 0x000B: case 0x000C: case 0x000D: case 0x001C: case 0x001D: case 0x001E: case 0x001F: case 0x0020: case 0x0085: case 0x00A0: case 0x1680: case 0x2000: case 0x2001: case 0x2002: case 0x2003: case 0x2004: case 0x2005: case 0x2006: case 0x2007: case 0x2008: case 0x2009: case 0x200A: case 0x2028: case 0x2029: case 0x202F: case 0x205F: case 0x3000: return 1; } return 0; } /* Returns 1 for Unicode characters having the line break * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional * type 'B', 0 otherwise. */ int numba_PyUnicode_IsLinebreak(const Py_UCS4 ch) { switch (ch) { case 0x000A: case 0x000B: case 0x000C: case 0x000D: case 0x001C: case 0x001D: case 0x001E: case 0x0085: case 0x2028: case 0x2029: return 1; } return 0; } #endif /* _UNICODETYPE_DB_H */ numba-0.55.1/numba/_version.py000664 000000 000000 00000021715 14174536160 016201 0ustar00rootroot000000 000000 # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. Generated by # versioneer-0.14 (https://github.com/warner/python-versioneer) import errno import os import re import subprocess import sys # these strings will be replaced by git during git-archive git_refnames = " (tag: 0.55.1, release0.55)" git_full = "76720bf88350c70b99ab3d4272d01bd9f14eeace" # these strings are filled in when 'setup.py versioneer' creates _version.py tag_prefix = "" parentdir_prefix = "numba-" versionfile_source = "numba/_version.py" def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): assert isinstance(commands, list) p = None for c in commands: try: # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %s" % args[0]) print(e) return None else: if verbose: print("unable to find command, tried %s" % (commands,)) return None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: print("unable to run %s (error)" % args[0]) return None return stdout def versions_from_parentdir(parentdir_prefix, root, verbose=False): # Source tarballs conventionally unpack into a directory that includes # both the project name and a version string. dirname = os.path.basename(root) if not dirname.startswith(parentdir_prefix): if verbose: print("guessing rootdir is '%s', but '%s' doesn't start with " "prefix '%s'" % (root, dirname, parentdir_prefix)) return None return {"version": dirname[len(parentdir_prefix):], "full": ""} def git_get_keywords(versionfile_abs): # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) f.close() except EnvironmentError: pass return keywords def git_versions_from_keywords(keywords, tag_prefix, verbose=False): if not keywords: return {} # keyword-finding function failed to find keywords refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") return {} # unexpanded, so not in an unpacked git-archive tarball refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs-tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %s" % r) return {"version": r, "full": keywords["full"].strip()} # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full": keywords["full"].strip()} def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False): # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens. # dirty dirty = git_describe.endswith("-dirty") if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] dirty_suffix = ".dirty" if dirty else "" # now we have TAG-NUM-gHEX or HEX if "-" not in git_describe: # just HEX return "0+untagged.g"+git_describe+dirty_suffix, dirty # just TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? return "0+unparseable"+dirty_suffix, dirty # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) return None, dirty tag = full_tag[len(tag_prefix):] # distance: number of commits since tag distance = int(mo.group(2)) # commit: short hex revision ID commit = mo.group(3) # now build up version string, with post-release "local version # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you # can always test version.endswith(".dirty"). version = tag if distance or dirty: version += "+%d.g%s" % (distance, commit) + dirty_suffix return version, dirty def git_versions_from_vcs(tag_prefix, root, verbose=False): # this runs 'git' from the root of the source tree. This only gets called # if the git-archive 'subst' keywords were *not* expanded, and # _version.py hasn't already been rewritten with a short version string, # meaning we're inside a checked out source tree. if not os.path.exists(os.path.join(root, ".git")): if verbose: print("no .git in %s" % root) return {} # get_versions() will try next method GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] # if there is a tag, this yields TAG-NUM-gHEX[-dirty] # if there are no tags, this yields HEX[-dirty] (no NUM) stdout = run_command(GITS, ["describe", "--tags", "--dirty", "--always", "--long"], cwd=root) # --long was added in git-1.5.5 if stdout is None: return {} # try next method version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose) # build "full", which is FULLHEX[.dirty] stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if stdout is None: return {} full = stdout.strip() if dirty: full += ".dirty" return {"version": version, "full": full} def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False): # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which # case we can only use expanded keywords. keywords = {"refnames": git_refnames, "full": git_full} ver = git_versions_from_keywords(keywords, tag_prefix, verbose) if ver: return ver try: root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for i in versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return default return (git_versions_from_vcs(tag_prefix, root, verbose) or versions_from_parentdir(parentdir_prefix, root, verbose) or default) numba-0.55.1/numba/capsulethunk.h000664 000000 000000 00000004771 14174536160 016665 0ustar00rootroot000000 000000 /** This is a modified version of capsulethunk.h for use in llvmpy **/ #ifndef __CAPSULETHUNK_H #define __CAPSULETHUNK_H #if ( (PY_VERSION_HEX < 0x02070000) \ || ((PY_VERSION_HEX >= 0x03000000) \ && (PY_VERSION_HEX < 0x03010000)) ) //#define Assert(X) do_assert(!!(X), #X, __FILE__, __LINE__) #define Assert(X) static void do_assert(int cond, const char * msg, const char *file, unsigned line){ if (!cond) { fprintf(stderr, "Assertion failed %s:%d\n%s\n", file, line, msg); exit(1); } } typedef void (*PyCapsule_Destructor)(PyObject *); struct FakePyCapsule_Desc { const char *name; void *context; PyCapsule_Destructor dtor; PyObject *parent; FakePyCapsule_Desc() : name(0), context(0), dtor(0) {} }; static FakePyCapsule_Desc* get_pycobj_desc(PyObject *p){ void *desc = ((PyCObject*)p)->desc; Assert(desc && "No desc in PyCObject"); return static_cast(desc); } static void pycobject_pycapsule_dtor(void *p, void *desc){ Assert(desc); Assert(p); FakePyCapsule_Desc *fpc_desc = static_cast(desc); Assert(fpc_desc->parent); Assert(PyCObject_Check(fpc_desc->parent)); fpc_desc->dtor(static_cast(fpc_desc->parent)); delete fpc_desc; } static PyObject* PyCapsule_New(void* ptr, const char *name, PyCapsule_Destructor dtor) { FakePyCapsule_Desc *desc = new FakePyCapsule_Desc; desc->name = name; desc->context = NULL; desc->dtor = dtor; PyObject *p = PyCObject_FromVoidPtrAndDesc(ptr, desc, pycobject_pycapsule_dtor); desc->parent = p; return p; } static int PyCapsule_CheckExact(PyObject *p) { return PyCObject_Check(p); } static void* PyCapsule_GetPointer(PyObject *p, const char *name) { Assert(PyCapsule_CheckExact(p)); if (strcmp(get_pycobj_desc(p)->name, name) != 0) { PyErr_SetString(PyExc_ValueError, "Invalid PyCapsule object"); } return PyCObject_AsVoidPtr(p); } static void* PyCapsule_GetContext(PyObject *p) { Assert(p); Assert(PyCapsule_CheckExact(p)); return get_pycobj_desc(p)->context; } static int PyCapsule_SetContext(PyObject *p, void *context) { Assert(PyCapsule_CheckExact(p)); get_pycobj_desc(p)->context = context; return 0; } static const char * PyCapsule_GetName(PyObject *p) { // Assert(PyCapsule_CheckExact(p)); return get_pycobj_desc(p)->name; } #endif /* #if PY_VERSION_HEX < 0x02070000 */ #endif /* __CAPSULETHUNK_H */ numba-0.55.1/numba/cext/000775 000000 000000 00000000000 14174536160 014740 5ustar00rootroot000000 000000 numba-0.55.1/numba/cext/__init__.py000664 000000 000000 00000000773 14174536160 017060 0ustar00rootroot000000 000000 """ Utilities for getting information about Numba C extensions """ import os def get_extension_libs(): """Return the .c files in the `numba.cext` directory. """ libs = [] base = get_path() for fn in os.listdir(base): if fn.endswith('.c'): fn = os.path.join(base, fn) libs.append(fn) return libs def get_path(): """Returns the path to the directory for `numba.cext`. """ return os.path.abspath(os.path.join(os.path.dirname(__file__))) numba-0.55.1/numba/cext/cext.h000664 000000 000000 00000001012 14174536160 016046 0ustar00rootroot000000 000000 #ifndef NUMBA_EXTENSION_HELPER_H_ #define NUMBA_EXTENSION_HELPER_H_ #include "Python.h" #include "../_numba_common.h" /* Define all runtime-required symbols in this C module, but do not export them outside the shared library if possible. */ #define NUMBA_EXPORT_FUNC(_rettype) VISIBILITY_HIDDEN _rettype #define NUMBA_EXPORT_DATA(_vartype) VISIBILITY_HIDDEN _vartype NUMBA_EXPORT_FUNC(Py_ssize_t) aligned_size(Py_ssize_t sz); #include "dictobject.h" #include "listobject.h" #endif // end NUMBA_EXTENSION_HELPER_H_ numba-0.55.1/numba/cext/dictobject.c000664 000000 000000 00000106305 14174536160 017223 0ustar00rootroot000000 000000 /* The following is adapted from CPython3.7. The exact commit is: - https://github.com/python/cpython/blob/44467e8ea4cea390b0718702291b4cfe8ddd67ed/Objects/dictobject.c */ /* Dictionary object implementation using a hash table */ /* The distribution includes a separate file, Objects/dictnotes.txt, describing explorations into dictionary design and optimization. It covers typical dictionary use patterns, the parameters for tuning dictionaries, and several ideas for possible optimizations. */ /* PyDictKeysObject This implements the dictionary's hashtable. As of Python 3.6, this is compact and ordered. Basic idea is described here: * https://mail.python.org/pipermail/python-dev/2012-December/123028.html * https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html layout: +---------------+ | dk_refcnt | | dk_size | | dk_lookup | | dk_usable | | dk_nentries | +---------------+ | dk_indices | | | +---------------+ | dk_entries | | | +---------------+ dk_indices is actual hashtable. It holds index in entries, or DKIX_EMPTY(-1) or DKIX_DUMMY(-2). Size of indices is dk_size. Type of each index in indices is vary on dk_size: * int8 for dk_size <= 128 * int16 for 256 <= dk_size <= 2**15 * int32 for 2**16 <= dk_size <= 2**31 * int64 for 2**32 <= dk_size dk_entries is array of PyDictKeyEntry. It's size is USABLE_FRACTION(dk_size). DK_ENTRIES(dk) can be used to get pointer to entries. NOTE: Since negative value is used for DKIX_EMPTY and DKIX_DUMMY, type of dk_indices entry is signed integer and int16 is used for table which dk_size == 256. */ /* The DictObject can be in one of two forms. Either: A combined table: ma_values == NULL, dk_refcnt == 1. Values are stored in the me_value field of the PyDictKeysObject. Or: (Numba dev notes: split table logic is removed) A split table: ma_values != NULL, dk_refcnt >= 1 Values are stored in the ma_values array. Only string (unicode) keys are allowed. All dicts sharing same key must have same insertion order. There are four kinds of slots in the table (slot is index, and DK_ENTRIES(keys)[index] if index >= 0): 1. Unused. index == DKIX_EMPTY Does not hold an active (key, value) pair now and never did. Unused can transition to Active upon key insertion. This is each slot's initial state. 2. Active. index >= 0, me_key != NULL and me_value != NULL Holds an active (key, value) pair. Active can transition to Dummy or Pending upon key deletion (for combined and split tables respectively). This is the only case in which me_value != NULL. 3. Dummy. index == DKIX_DUMMY (combined only) Previously held an active (key, value) pair, but that was deleted and an active pair has not yet overwritten the slot. Dummy can transition to Active upon key insertion. Dummy slots cannot be made Unused again else the probe sequence in case of collision would have no way to know they were once active. 4. Pending. index >= 0, key != NULL, and value == NULL (split only) Not yet inserted in split-table. */ /* Preserving insertion order It's simple for combined table. Since dk_entries is mostly append only, we can get insertion order by just iterating dk_entries. One exception is .popitem(). It removes last item in dk_entries and decrement dk_nentries to achieve amortized O(1). Since there are DKIX_DUMMY remains in dk_indices, we can't increment dk_usable even though dk_nentries is decremented. In split table, inserting into pending entry is allowed only for dk_entries[ix] where ix == mp->ma_used. Inserting into other index and deleting item cause converting the dict to the combined table. */ /* D_MINSIZE (adapted from PyDict_MINSIZE) * is the starting size for any new dict. * 8 allows dicts with no more than 5 active entries; experiments suggested * this suffices for the majority of dicts (consisting mostly of usually-small * dicts created to pass keyword arguments). * Making this 8, rather than 4 reduces the number of resizes for most * dictionaries, without any significant extra memory use. */ #define D_MINSIZE 8 #include "dictobject.h" #if defined(_MSC_VER) # if _MSC_VER <= 1900 /* Visual Studio 2014 */ typedef __int8 int8_t; typedef __int16 int16_t; typedef __int32 int32_t; typedef __int64 int64_t; # endif /* Use _alloca() to dynamically allocate on the stack on MSVC */ #define STACK_ALLOC(Type, Name, Size) Type * const Name = _alloca(Size); #else #define STACK_ALLOC(Type, Name, Size) Type Name[Size]; #endif /*[clinic input] class dict "PyDictObject *" "&PyDict_Type" [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=f157a5a0ce9589d6]*/ /* To ensure the lookup algorithm terminates, there must be at least one Unused slot (NULL key) in the table. To avoid slowing down lookups on a near-full table, we resize the table when it's USABLE_FRACTION (currently two-thirds) full. */ #define PERTURB_SHIFT 5 /* Major subtleties ahead: Most hash schemes depend on having a "good" hash function, in the sense of simulating randomness. Python doesn't: its most important hash functions (for ints) are very regular in common cases: >>>[hash(i) for i in range(4)] [0, 1, 2, 3] This isn't necessarily bad! To the contrary, in a table of size 2**i, taking the low-order i bits as the initial table index is extremely fast, and there are no collisions at all for dicts indexed by a contiguous range of ints. So this gives better-than-random behavior in common cases, and that's very desirable. OTOH, when collisions occur, the tendency to fill contiguous slices of the hash table makes a good collision resolution strategy crucial. Taking only the last i bits of the hash code is also vulnerable: for example, consider the list [i << 16 for i in range(20000)] as a set of keys. Since ints are their own hash codes, and this fits in a dict of size 2**15, the last 15 bits of every hash code are all 0: they *all* map to the same table index. But catering to unusual cases should not slow the usual ones, so we just take the last i bits anyway. It's up to collision resolution to do the rest. If we *usually* find the key we're looking for on the first try (and, it turns out, we usually do -- the table load factor is kept under 2/3, so the odds are solidly in our favor), then it makes best sense to keep the initial index computation dirt cheap. The first half of collision resolution is to visit table indices via this recurrence: j = ((5*j) + 1) mod 2**i For any initial j in range(2**i), repeating that 2**i times generates each int in range(2**i) exactly once (see any text on random-number generation for proof). By itself, this doesn't help much: like linear probing (setting j += 1, or j -= 1, on each loop trip), it scans the table entries in a fixed order. This would be bad, except that's not the only thing we do, and it's actually *good* in the common cases where hash keys are consecutive. In an example that's really too small to make this entirely clear, for a table of size 2**3 the order of indices is: 0 -> 1 -> 6 -> 7 -> 4 -> 5 -> 2 -> 3 -> 0 [and here it's repeating] If two things come in at index 5, the first place we look after is index 2, not 6, so if another comes in at index 6 the collision at 5 didn't hurt it. Linear probing is deadly in this case because there the fixed probe order is the *same* as the order consecutive keys are likely to arrive. But it's extremely unlikely hash codes will follow a 5*j+1 recurrence by accident, and certain that consecutive hash codes do not. The other half of the strategy is to get the other bits of the hash code into play. This is done by initializing a (unsigned) vrbl "perturb" to the full hash code, and changing the recurrence to: perturb >>= PERTURB_SHIFT; j = (5*j) + 1 + perturb; use j % 2**i as the next table index; Now the probe sequence depends (eventually) on every bit in the hash code, and the pseudo-scrambling property of recurring on 5*j+1 is more valuable, because it quickly magnifies small differences in the bits that didn't affect the initial index. Note that because perturb is unsigned, if the recurrence is executed often enough perturb eventually becomes and remains 0. At that point (very rarely reached) the recurrence is on (just) 5*j+1 again, and that's certain to find an empty slot eventually (since it generates every int in range(2**i), and we make sure there's always at least one empty slot). Selecting a good value for PERTURB_SHIFT is a balancing act. You want it small so that the high bits of the hash code continue to affect the probe sequence across iterations; but you want it large so that in really bad cases the high-order hash bits have an effect on early iterations. 5 was "the best" in minimizing total collisions across experiments Tim Peters ran (on both normal and pathological cases), but 4 and 6 weren't significantly worse. Historical: Reimer Behrends contributed the idea of using a polynomial-based approach, using repeated multiplication by x in GF(2**n) where an irreducible polynomial for each table size was chosen such that x was a primitive root. Christian Tismer later extended that to use division by x instead, as an efficient way to get the high bits of the hash code into play. This scheme also gave excellent collision statistics, but was more expensive: two if-tests were required inside the loop; computing "the next" index took about the same number of operations but without as much potential parallelism (e.g., computing 5*j can go on at the same time as computing 1+perturb in the above, and then shifting perturb can be done while the table index is being masked); and the PyDictObject struct required a member to hold the table's polynomial. In Tim's experiments the current scheme ran faster, produced equally good collision statistics, needed less code & used less memory. */ #define DKIX_EMPTY (-1) #define DKIX_DUMMY (-2) /* Used internally */ #define DKIX_ERROR (-3) typedef enum { OK = 0, OK_REPLACED = 1, ERR_NO_MEMORY = -1, ERR_DICT_MUTATED = -2, ERR_ITER_EXHAUSTED = -3, ERR_DICT_EMPTY = -4, ERR_CMP_FAILED = -5, } Status; #ifndef NDEBUG static int mem_cmp_zeros(void *obj, size_t n){ int diff = 0; char *mem = obj; char *it; for (it = mem; it < mem + n; ++it) { if (*it != 0) diff += 1; } return diff; } #endif #define D_MASK(dk) ((dk)->size-1) #define D_GROWTH_RATE(d) ((d)->used*3) static int ix_size(Py_ssize_t size) { if ( size < 0xff ) return 1; if ( size < 0xffff ) return 2; if ( size < 0xffffffff ) return 4; return sizeof(int64_t); } #ifndef NDEBUG /* NOTE: This function is only used in assert()s */ /* Align pointer *ptr* to pointer size */ static void* aligned_pointer(void *ptr) { return (void*)aligned_size((size_t)ptr); } #endif /* lookup indices. returns DKIX_EMPTY, DKIX_DUMMY, or ix >=0 */ static Py_ssize_t get_index(NB_DictKeys *dk, Py_ssize_t i) { Py_ssize_t s = dk->size; Py_ssize_t ix; if (s <= 0xff) { int8_t *indices = (int8_t*)(dk->indices); assert (i < dk->size); ix = indices[i]; } else if (s <= 0xffff) { int16_t *indices = (int16_t*)(dk->indices); ix = indices[i]; } #if SIZEOF_VOID_P > 4 else if (s > 0xffffffff) { int64_t *indices = (int64_t*)(dk->indices); ix = indices[i]; } #endif else { int32_t *indices = (int32_t*)(dk->indices); ix = indices[i]; } assert(ix >= DKIX_DUMMY); return ix; } /* write to indices. */ static void set_index(NB_DictKeys *dk, Py_ssize_t i, Py_ssize_t ix) { Py_ssize_t s = dk->size; assert(ix >= DKIX_DUMMY); if (s <= 0xff) { int8_t *indices = (int8_t*)(dk->indices); assert(ix <= 0x7f); indices[i] = (char)ix; } else if (s <= 0xffff) { int16_t *indices = (int16_t*)(dk->indices); assert(ix <= 0x7fff); indices[i] = (int16_t)ix; } #if SIZEOF_VOID_P > 4 else if (s > 0xffffffff) { int64_t *indices = (int64_t*)(dk->indices); indices[i] = ix; } #endif else { int32_t *indices = (int32_t*)(dk->indices); assert(ix <= 0x7fffffff); indices[i] = (int32_t)ix; } } /* USABLE_FRACTION is the maximum dictionary load. * Increasing this ratio makes dictionaries more dense resulting in more * collisions. Decreasing it improves sparseness at the expense of spreading * indices over more cache lines and at the cost of total memory consumed. * * USABLE_FRACTION must obey the following: * (0 < USABLE_FRACTION(n) < n) for all n >= 2 * * USABLE_FRACTION should be quick to calculate. * Fractions around 1/2 to 2/3 seem to work well in practice. */ #define USABLE_FRACTION(n) (((n) << 1)/3) /* Alternative fraction that is otherwise close enough to 2n/3 to make * little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10. * 32 * 2/3 = 21, 32 * 5/8 = 20. * Its advantage is that it is faster to compute on machines with slow division. * #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3)) */ /* GROWTH_RATE. Growth rate upon hitting maximum load. * Currently set to used*3. * This means that dicts double in size when growing without deletions, * but have more head room when the number of deletions is on a par with the * number of insertions. See also bpo-17563 and bpo-33205. * * GROWTH_RATE was set to used*4 up to version 3.2. * GROWTH_RATE was set to used*2 in version 3.3.0 * GROWTH_RATE was set to used*2 + capacity/2 in 3.4.0-3.6.0. */ #define GROWTH_RATE(d) ((d)->ma_used*3) static NB_DictEntry* get_entry(NB_DictKeys *dk, Py_ssize_t idx) { Py_ssize_t offset; char *ptr; assert (idx < dk->size); offset = idx * dk->entry_size; ptr = dk->indices + dk->entry_offset + offset; return (NB_DictEntry*)ptr; } static void zero_key(NB_DictKeys *dk, char *data){ memset(data, 0, dk->key_size); } static void zero_val(NB_DictKeys *dk, char *data){ memset(data, 0, dk->val_size); } static void copy_key(NB_DictKeys *dk, char *dst, const char *src){ memcpy(dst, src, dk->key_size); } static void copy_val(NB_DictKeys *dk, char *dst, const char *src){ memcpy(dst, src, dk->val_size); } /* Returns -1 for error; 0 for not equal; 1 for equal */ static int key_equal(NB_DictKeys *dk, const char *lhs, const char *rhs) { if ( dk->methods.key_equal ) { return dk->methods.key_equal(lhs, rhs); } else { return memcmp(lhs, rhs, dk->key_size) == 0; } } static char * entry_get_key(NB_DictKeys *dk, NB_DictEntry* entry) { char * out = entry->keyvalue; assert (out == aligned_pointer(out)); return out; } static char * entry_get_val(NB_DictKeys *dk, NB_DictEntry* entry) { char * out = entry_get_key(dk, entry) + aligned_size(dk->key_size); assert (out == aligned_pointer(out)); return out; } static void dk_incref_key(NB_DictKeys *dk, const char *key) { if ( dk->methods.key_incref ) { dk->methods.key_incref(key); } } static void dk_decref_key(NB_DictKeys *dk, const char *key) { if ( dk->methods.key_decref ) { dk->methods.key_decref(key); } } static void dk_incref_val(NB_DictKeys *dk, const char *val) { if ( dk->methods.value_incref ) { dk->methods.value_incref(val); } } static void dk_decref_val(NB_DictKeys *dk, const char *val) { if ( dk->methods.value_decref ) { dk->methods.value_decref(val); } } void numba_dictkeys_free(NB_DictKeys *dk) { /* Clear all references from the entries */ Py_ssize_t i; NB_DictEntry *ep; for (i = 0; i < dk->nentries; i++) { ep = get_entry(dk, i); if (ep->hash != DKIX_EMPTY) { dk_decref_key(dk, entry_get_key(dk, ep)); dk_decref_val(dk, entry_get_val(dk, ep)); } } /* Deallocate */ free(dk); } void numba_dict_free(NB_Dict *d) { numba_dictkeys_free(d->keys); free(d); } Py_ssize_t numba_dict_length(NB_Dict *d) { return d->used; } /* Allocate new dictionary keys Adapted from CPython's new_keys_object(). */ int numba_dictkeys_new(NB_DictKeys **out, Py_ssize_t size, Py_ssize_t key_size, Py_ssize_t val_size) { Py_ssize_t usable = USABLE_FRACTION(size); Py_ssize_t index_size = ix_size(size); Py_ssize_t entry_size = aligned_size(sizeof(NB_DictEntry) + aligned_size(key_size) + aligned_size(val_size)); Py_ssize_t entry_offset = aligned_size(index_size * size); Py_ssize_t alloc_size = sizeof(NB_DictKeys) + entry_offset + entry_size * usable; NB_DictKeys *dk = malloc(aligned_size(alloc_size)); if (!dk) return ERR_NO_MEMORY; assert ( size >= D_MINSIZE ); dk->size = size; dk->usable = usable; dk->nentries = 0; dk->key_size = key_size; dk->val_size = val_size; dk->entry_offset = entry_offset; dk->entry_size = entry_size; assert (aligned_pointer(dk->indices) == dk->indices ); /* Ensure that the method table is all nulls */ memset(&dk->methods, 0x00, sizeof(type_based_methods_table)); /* Ensure hash is (-1) for empty entry */ memset(dk->indices, 0xff, entry_offset + entry_size * usable); *out = dk; return OK; } /* Allocate new dictionary */ int numba_dict_new(NB_Dict **out, Py_ssize_t size, Py_ssize_t key_size, Py_ssize_t val_size) { NB_DictKeys* dk; NB_Dict *d; int status = numba_dictkeys_new(&dk, size, key_size, val_size); if (status != OK) return status; d = malloc(sizeof(NB_Dict)); if (!d) { numba_dictkeys_free(dk); return ERR_NO_MEMORY; } d->used = 0; d->keys = dk; *out = d; return OK; } /* Adapted from CPython lookdict_index(). Search index of hash table from offset of entry table */ static Py_ssize_t lookdict_index(NB_DictKeys *dk, Py_hash_t hash, Py_ssize_t index) { size_t mask = D_MASK(dk); size_t perturb = (size_t)hash; size_t i = (size_t)hash & mask; for (;;) { Py_ssize_t ix = get_index(dk, i); if (ix == index) { return i; } if (ix == DKIX_EMPTY) { return DKIX_EMPTY; } perturb >>= PERTURB_SHIFT; i = mask & (i*5 + perturb + 1); } assert(0 && "unreachable"); } /* Adapted from the CPython3.7 lookdict(). The basic lookup function used by all operations. This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. Open addressing is preferred over chaining since the link overhead for chaining would be substantial (100% with typical malloc overhead). The initial probe index is computed as hash mod the table size. Subsequent probe indices are computed as explained earlier. All arithmetic on hash should ignore overflow. The details in this version are due to Tim Peters, building on many past contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and Christian Tismer. lookdict() is general-purpose, and may return DKIX_ERROR if (and only if) a comparison raises an exception. lookdict_unicode() below is specialized to string keys, comparison of which can never raise an exception; that function can never return DKIX_ERROR when key is string. Otherwise, it falls back to lookdict(). lookdict_unicode_nodummy is further specialized for string keys that cannot be the value. For both, when the key isn't found a DKIX_EMPTY is returned. */ Py_ssize_t numba_dict_lookup(NB_Dict *d, const char *key_bytes, Py_hash_t hash, char *oldval_bytes) { NB_DictKeys *dk = d->keys; size_t mask = D_MASK(dk); size_t perturb = hash; size_t i = (size_t)hash & mask; for (;;) { Py_ssize_t ix = get_index(dk, i); if (ix == DKIX_EMPTY) { zero_val(dk, oldval_bytes); return ix; } if (ix >= 0) { NB_DictEntry *ep = get_entry(dk, ix); const char *startkey = NULL; if (ep->hash == hash) { int cmp; startkey = entry_get_key(dk, ep); cmp = key_equal(dk, startkey, key_bytes); if (cmp < 0) { // error'ed in comparison memset(oldval_bytes, 0, dk->val_size); return DKIX_ERROR; } if (cmp > 0) { // key is equal; retrieve the value. copy_val(dk, oldval_bytes, entry_get_val(dk, ep)); return ix; } } } perturb >>= PERTURB_SHIFT; i = (i*5 + perturb + 1) & mask; } assert(0 && "unreachable"); } /* Internal function to find slot for an item from its hash when it is known that the key is not present in the dict. The dict must be combined. */ static Py_ssize_t find_empty_slot(NB_DictKeys *dk, Py_hash_t hash){ size_t mask; size_t i; Py_ssize_t ix; size_t perturb; assert(dk != NULL); mask = D_MASK(dk); i = hash & mask; ix = get_index(dk, i); for (perturb = hash; ix >= 0;) { perturb >>= PERTURB_SHIFT; i = (i*5 + perturb + 1) & mask; ix = get_index(dk, i); } return i; } static int insertion_resize(NB_Dict *d) { return numba_dict_resize(d, D_GROWTH_RATE(d)); } int numba_dict_insert( NB_Dict *d, const char *key_bytes, Py_hash_t hash, const char *val_bytes, char *oldval_bytes ) { NB_DictKeys *dk = d->keys; Py_ssize_t ix = numba_dict_lookup(d, key_bytes, hash, oldval_bytes); if (ix == DKIX_ERROR) { // exception in key comparison in lookup. return ERR_CMP_FAILED; } if (ix == DKIX_EMPTY) { /* Insert into new slot */ Py_ssize_t hashpos; NB_DictEntry *ep; if (dk->usable <= 0) { /* Need to resize */ if (insertion_resize(d) != OK) return ERR_NO_MEMORY; else dk = d->keys; // reload } hashpos = find_empty_slot(dk, hash); ep = get_entry(dk, dk->nentries); set_index(dk, hashpos, dk->nentries); copy_key(dk, entry_get_key(dk, ep), key_bytes); assert ( hash != -1 ); ep->hash = hash; copy_val(dk, entry_get_val(dk, ep), val_bytes); /* incref */ dk_incref_key(dk, key_bytes); dk_incref_val(dk, val_bytes); d->used += 1; dk->usable -= 1; dk->nentries += 1; assert (dk->usable >= 0); return OK; } else { /* Replace existing value in the slot at ix */ /* decref old value */ dk_decref_val(dk, oldval_bytes); // Replace the previous value copy_val(dk, entry_get_val(dk, get_entry(dk, ix)), val_bytes); /* incref */ dk_incref_val(dk, val_bytes); return OK_REPLACED; } } /* Adapted from build_indices(). Internal routine used by dictresize() to build a hashtable of entries. */ void build_indices(NB_DictKeys *keys, Py_ssize_t n) { size_t mask = (size_t)D_MASK(keys); Py_ssize_t ix; for (ix = 0; ix != n; ix++) { size_t perturb; Py_hash_t hash = get_entry(keys, ix)->hash; size_t i = hash & mask; for (perturb = hash; get_index(keys, i) != DKIX_EMPTY;) { perturb >>= PERTURB_SHIFT; i = mask & (i*5 + perturb + 1); } set_index(keys, i, ix); } } /* Adapted from CPython dictresize(). Restructure the table by allocating a new table and reinserting all items again. When entries have been deleted, the new table may actually be smaller than the old one. If a table is split (its keys and hashes are shared, its values are not), then the values are temporarily copied into the table, it is resized as a combined table, then the me_value slots in the old table are NULLed out. After resizing a table is always combined, but can be resplit by make_keys_shared(). */ int numba_dict_resize(NB_Dict *d, Py_ssize_t minsize) { Py_ssize_t newsize, numentries; NB_DictKeys *oldkeys; int status; /* Find the smallest table size > minused. */ for (newsize = D_MINSIZE; newsize < minsize && newsize > 0; newsize <<= 1) ; if (newsize <= 0) { return ERR_NO_MEMORY; } oldkeys = d->keys; /* NOTE: Current odict checks mp->ma_keys to detect resize happen. * So we can't reuse oldkeys even if oldkeys->dk_size == newsize. * TODO: Try reusing oldkeys when reimplement odict. */ /* Allocate a new table. */ status = numba_dictkeys_new( &d->keys, newsize, oldkeys->key_size, oldkeys->val_size ); if (status != OK) { d->keys = oldkeys; return status; } // New table must be large enough. assert(d->keys->usable >= d->used); // Copy method table memcpy(&d->keys->methods, &oldkeys->methods, sizeof(type_based_methods_table)); numentries = d->used; if (oldkeys->nentries == numentries) { NB_DictEntry *oldentries, *newentries; oldentries = get_entry(oldkeys, 0); newentries = get_entry(d->keys, 0); memcpy(newentries, oldentries, numentries * oldkeys->entry_size); // to avoid decref memset(oldentries, 0xff, numentries * oldkeys->entry_size); } else { Py_ssize_t i; size_t epi = 0; for (i=0; ihash == (-1) hash means it is empty Here, we skip until a non empty entry is encountered. */ while( get_entry(oldkeys, epi)->hash == DKIX_EMPTY ) { assert( mem_cmp_zeros(entry_get_val(oldkeys, get_entry(oldkeys, epi)), oldkeys->val_size) == 0 ); epi += 1; } memcpy( get_entry(d->keys, i), get_entry(oldkeys, epi), oldkeys->entry_size ); get_entry(oldkeys, epi)->hash = DKIX_EMPTY; // to avoid decref epi += 1; } } numba_dictkeys_free(oldkeys); build_indices(d->keys, numentries); d->keys->usable -= numentries; d->keys->nentries = numentries; return OK; } /* Adapted from CPython delitem_common */ int numba_dict_delitem(NB_Dict *d, Py_hash_t hash, Py_ssize_t ix) { Py_ssize_t hashpos; NB_DictEntry *ep; NB_DictKeys *dk = d->keys; hashpos = lookdict_index(dk, hash, ix); assert(hashpos >= 0); d->used -= 1; ep = get_entry(dk, ix); set_index(dk, hashpos, DKIX_DUMMY); /* decref */ dk_decref_key(dk, entry_get_key(dk, ep)); dk_decref_val(dk, entry_get_val(dk, ep)); /* zero the entries */ zero_key(dk, entry_get_key(dk, ep)); zero_val(dk, entry_get_val(dk, ep)); ep->hash = DKIX_EMPTY; // to mark it as empty; return OK; } /** * Adapted from dict_popitem * */ int numba_dict_popitem(NB_Dict *d, char *key_bytes, char *val_bytes) { Py_ssize_t i, j; char *key_ptr, *val_ptr; NB_DictEntry *ep = NULL; if (d->used == 0) { return ERR_DICT_EMPTY; } /* Pop last item */ i = d->keys->nentries - 1; while (i >= 0 && (ep = get_entry(d->keys, i))->hash == DKIX_EMPTY ) { i--; } assert(i >= 0); j = lookdict_index(d->keys, ep->hash, i); assert(j >= 0); assert(get_index(d->keys, j) == i); set_index(d->keys, j, DKIX_DUMMY); key_ptr = entry_get_key(d->keys, ep); val_ptr = entry_get_val(d->keys, ep); copy_key(d->keys, key_bytes, key_ptr); copy_val(d->keys, val_bytes, val_ptr); zero_key(d->keys, key_ptr); zero_val(d->keys, val_ptr); /* We can't dk_usable++ since there is DKIX_DUMMY in indices */ d->keys->nentries = i; d->used--; return OK; } void numba_dict_dump(NB_Dict *d) { long long i, j, k; long long size, n; char *cp; NB_DictEntry *ep; NB_DictKeys *dk = d->keys; n = d->used; size = dk->nentries; printf("Dict dump\n"); printf(" key_size = %lld\n", (long long)d->keys->key_size); printf(" val_size = %lld\n", (long long)d->keys->val_size); for (i = 0, j = 0; i < size; i++) { ep = get_entry(dk, i); if (ep->hash != DKIX_EMPTY) { long long hash = ep->hash; printf(" key="); for (cp=entry_get_key(dk, ep), k=0; k < d->keys->key_size; ++k, ++cp){ printf("%02x ", ((int)*cp) & 0xff); } printf(" hash=%llu value=", hash); for (cp=entry_get_val(dk, ep), k=0; k < d->keys->val_size; ++k, ++cp){ printf("%02x ", ((int)*cp) & 0xff); } printf("\n"); j++; } } printf("j = %lld; n = %lld\n", j, n); assert(j == n); } size_t numba_dict_iter_sizeof() { return sizeof(NB_DictIter); } void numba_dict_iter(NB_DictIter *it, NB_Dict *d) { it->parent = d; it->parent_keys = d->keys; it->size = d->used; it->pos = 0; } int numba_dict_iter_next(NB_DictIter *it, const char **key_ptr, const char **val_ptr) { /* Detect dictionary mutation during iteration */ NB_DictKeys *dk; if (it->parent->keys != it->parent_keys || it->parent->used != it->size) { return ERR_DICT_MUTATED; } dk = it->parent_keys; while ( it->pos < dk->nentries ) { NB_DictEntry *ep = get_entry(dk, it->pos++); if ( ep->hash != DKIX_EMPTY ) { *key_ptr = entry_get_key(dk, ep); *val_ptr = entry_get_val(dk, ep); return OK; } } return ERR_ITER_EXHAUSTED; } int numba_dict_insert_ez( NB_Dict *d, const char *key_bytes, Py_hash_t hash, const char *val_bytes ) { STACK_ALLOC(char, old, d->keys->val_size); return numba_dict_insert(d, key_bytes, hash, val_bytes, old); } int numba_dict_new_minsize(NB_Dict **out, Py_ssize_t key_size, Py_ssize_t val_size) { return numba_dict_new(out, D_MINSIZE, key_size, val_size); } void numba_dict_set_method_table(NB_Dict *d, type_based_methods_table *methods) { memcpy(&d->keys->methods, methods, sizeof(type_based_methods_table)); } #define CHECK(CASE) { \ if ( !(CASE) ) { \ printf("'%s' failed file %s:%d\n", #CASE, __FILE__, __LINE__); \ return 1; \ } \ } int numba_test_dict(void) { NB_Dict *d; int status; Py_ssize_t ix; Py_ssize_t usable; Py_ssize_t it_count; const char *it_key, *it_val; NB_DictIter iter; #if defined(_MSC_VER) /* So that VS2008 compiler is happy */ char *got_key, *got_value; got_key = _alloca(4); got_value = _alloca(8); #else char got_key[4]; char got_value[8]; #endif puts("test_dict"); status = numba_dict_new(&d, D_MINSIZE, 4, 8); CHECK(status == OK); CHECK(d->keys->size == D_MINSIZE); CHECK(d->keys->key_size == 4); CHECK(d->keys->val_size == 8); CHECK(ix_size(d->keys->size) == 1); printf("aligned_size(index_size * size) = %d\n", (int)(aligned_size(ix_size(d->keys->size) * d->keys->size))); printf("d %p\n", d); printf("d->usable = %u\n", (int)d->keys->usable); usable = d->keys->usable; printf("d[0] %d\n", (int)((char*)get_entry(d->keys, 0) - (char*)d->keys)); CHECK ((char*)get_entry(d->keys, 0) - (char*)d->keys->indices == d->keys->entry_offset); printf("d[1] %d\n", (int)((char*)get_entry(d->keys, 1) - (char*)d->keys)); CHECK ((char*)get_entry(d->keys, 1) - (char*)d->keys->indices == d->keys->entry_offset + d->keys->entry_size); ix = numba_dict_lookup(d, "bef", 0xbeef, got_value); printf("ix = %d\n", (int)ix); CHECK (ix == DKIX_EMPTY); // insert 1st key status = numba_dict_insert(d, "bef", 0xbeef, "1234567", got_value); CHECK (status == OK); CHECK (d->used == 1); CHECK (d->keys->usable == usable - d->used); // insert same key status = numba_dict_insert(d, "bef", 0xbeef, "1234567", got_value); CHECK (status == OK_REPLACED); printf("got_value %s\n", got_value); CHECK (d->used == 1); CHECK (d->keys->usable == usable - d->used); // insert 2nd key status = numba_dict_insert(d, "beg", 0xbeef, "1234568", got_value); CHECK (status == OK); CHECK (d->used == 2); CHECK (d->keys->usable == usable - d->used); // insert 3rd key status = numba_dict_insert(d, "beh", 0xcafe, "1234569", got_value); CHECK (status == OK); CHECK (d->used == 3); CHECK (d->keys->usable == usable - d->used); // replace key "bef"'s value status = numba_dict_insert(d, "bef", 0xbeef, "7654321", got_value); CHECK (status == OK_REPLACED); CHECK (d->used == 3); CHECK (d->keys->usable == usable - d->used); // insert 4th key status = numba_dict_insert(d, "bei", 0xcafe, "0_0_0_1", got_value); CHECK (status == OK); CHECK (d->used == 4); CHECK (d->keys->usable == usable - d->used); // insert 5th key status = numba_dict_insert(d, "bej", 0xcafe, "0_0_0_2", got_value); CHECK (status == OK); CHECK (d->used == 5); CHECK (d->keys->usable == usable - d->used); // insert 6th key & triggers resize status = numba_dict_insert(d, "bek", 0xcafe, "0_0_0_3", got_value); CHECK (status == OK); CHECK (d->used == 6); CHECK (d->keys->usable == USABLE_FRACTION(d->keys->size) - d->used); // Dump numba_dict_dump(d); // Make sure everything are still in there ix = numba_dict_lookup(d, "bef", 0xbeef, got_value); CHECK (ix >= 0); CHECK (memcpy(got_value, "7654321", d->keys->val_size)); ix = numba_dict_lookup(d, "beg", 0xbeef, got_value); CHECK (ix >= 0); CHECK (memcpy(got_value, "1234567", d->keys->val_size)); ix = numba_dict_lookup(d, "beh", 0xcafe, got_value); printf("ix = %d\n", (int)ix); CHECK (ix >= 0); CHECK (memcpy(got_value, "1234569", d->keys->val_size)); ix = numba_dict_lookup(d, "bei", 0xcafe, got_value); CHECK (ix >= 0); CHECK (memcpy(got_value, "0_0_0_1", d->keys->val_size)); ix = numba_dict_lookup(d, "bej", 0xcafe, got_value); CHECK (ix >= 0); CHECK (memcpy(got_value, "0_0_0_2", d->keys->val_size)); ix = numba_dict_lookup(d, "bek", 0xcafe, got_value); CHECK (ix >= 0); CHECK (memcpy(got_value, "0_0_0_3", d->keys->val_size)); // Test delete ix = numba_dict_lookup(d, "beg", 0xbeef, got_value); status = numba_dict_delitem(d, 0xbeef, ix); CHECK (status == OK); ix = numba_dict_lookup(d, "beg", 0xbeef, got_value); CHECK (ix == DKIX_EMPTY); // not found ix = numba_dict_lookup(d, "bef", 0xbeef, got_value); CHECK (ix >= 0); ix = numba_dict_lookup(d, "beh", 0xcafe, got_value); CHECK (ix >= 0); // Test popitem // They are always the last item status = numba_dict_popitem(d, got_key, got_value); CHECK(status == OK); CHECK(memcmp("bek", got_key, d->keys->key_size) == 0); CHECK(memcmp("0_0_0_3", got_value, d->keys->val_size) == 0); status = numba_dict_popitem(d, got_key, got_value); CHECK(status == OK); CHECK(memcmp("bej", got_key, d->keys->key_size) == 0); CHECK(memcmp("0_0_0_2", got_value, d->keys->val_size) == 0); // Test iterator CHECK( d->used > 0 ); numba_dict_iter(&iter, d); it_count = 0; while ( (status = numba_dict_iter_next(&iter, &it_key, &it_val)) == OK) { it_count += 1; // valid items CHECK(it_key != NULL); CHECK(it_val != NULL); } CHECK(status == ERR_ITER_EXHAUSTED); CHECK(d->used == it_count); numba_dict_free(d); return 0; } #undef CHECK numba-0.55.1/numba/cext/dictobject.h000664 000000 000000 00000013211 14174536160 017221 0ustar00rootroot000000 000000 /* Adapted from CPython3.7 Objects/dict-common.h */ #include "Python.h" #include "../_pymodule.h" #include "cext.h" #ifndef NUMBA_DICT_COMMON_H #define NUMBA_DICT_COMMON_H typedef struct { /* Uses Py_ssize_t instead of Py_hash_t to guarantee word size alignment */ Py_ssize_t hash; char keyvalue[]; } NB_DictEntry; typedef int (*dict_key_comparator_t)(const char *lhs, const char *rhs); typedef void (*dict_refcount_op_t)(const void*); typedef struct { dict_key_comparator_t key_equal; dict_refcount_op_t key_incref; dict_refcount_op_t key_decref; dict_refcount_op_t value_incref; dict_refcount_op_t value_decref; } type_based_methods_table; typedef struct { /* hash table size */ Py_ssize_t size; /* Usable size of the hash table. Also, size of the entries */ Py_ssize_t usable; /* hash table used entries */ Py_ssize_t nentries; /* Entry info - key_size is the sizeof key type - val_size is the sizeof value type - entry_size is key_size + val_size + alignment */ Py_ssize_t key_size, val_size, entry_size; /* Byte offset from indices to the first entry. */ Py_ssize_t entry_offset; /* Method table for type-dependent operations. */ type_based_methods_table methods; /* hash table */ char indices[]; } NB_DictKeys; typedef struct { /* num of elements in the hashtable */ Py_ssize_t used; NB_DictKeys *keys; } NB_Dict; typedef struct { /* parent dictionary */ NB_Dict *parent; /* parent keys object */ NB_DictKeys *parent_keys; /* dict size */ Py_ssize_t size; /* iterator position; indicates the next position to read */ Py_ssize_t pos; } NB_DictIter; /* A test function for the dict Returns 0 for OK; 1 for failure. */ NUMBA_EXPORT_FUNC(int) numba_test_dict(void); /* Allocate a new dict Parameters - NB_Dict **out Output for the new dictionary. - Py_ssize_t size Hashtable size. Must be power of two. - Py_ssize_t key_size Size of a key entry. - Py_ssize_t val_size Size of a value entry. */ NUMBA_EXPORT_FUNC(int) numba_dict_new(NB_Dict **out, Py_ssize_t size, Py_ssize_t key_size, Py_ssize_t val_size); /* Free a dict */ NUMBA_EXPORT_FUNC(void) numba_dict_free(NB_Dict *d); /* Returns length of a dict */ NUMBA_EXPORT_FUNC(Py_ssize_t) numba_dict_length(NB_Dict *d); /* Allocates a new dict at the minimal size See numba_dict_new(). */ NUMBA_EXPORT_FUNC(int) numba_dict_new_minsize(NB_Dict **out, Py_ssize_t key_size, Py_ssize_t val_size); /* Set the method table for type specific operations */ NUMBA_EXPORT_FUNC(void) numba_dict_set_method_table(NB_Dict *d, type_based_methods_table *methods); /* Lookup a key Parameters - NB_Dict *d The dictionary object. - const char *key_bytes The key as a byte buffer. - Py_hash_t hash The precomputed hash of the key. - char *oldval_bytes An output parameter to store the associated value if the key is found. Must point to memory of sufficient size to store the value. */ NUMBA_EXPORT_FUNC(Py_ssize_t) numba_dict_lookup(NB_Dict *d, const char *key_bytes, Py_hash_t hash, char *oldval_bytes); /* Resize the dict to at least *minsize*. */ NUMBA_EXPORT_FUNC(int) numba_dict_resize(NB_Dict *d, Py_ssize_t minsize); /* Insert to the dict Parameters - NB_Dict *d The dictionary object. - const char *key_bytes The key as a byte buffer. - Py_hash_t hash The precomputed hash of key. - const char *val_bytes The value as a byte buffer. - char *oldval_bytes An output buffer to store the replaced value. Must point to memory of sufficient size to store the value. Returns - < 0 for error - 0 for ok - 1 for ok and oldval_bytes has a copy of the replaced value. */ NUMBA_EXPORT_FUNC(int) numba_dict_insert(NB_Dict *d, const char *key_bytes, Py_hash_t hash, const char *val_bytes, char *oldval_bytes); /* Same as numba_dict_insert() but oldval_bytes is not needed */ NUMBA_EXPORT_FUNC(int) numba_dict_insert_ez(NB_Dict *d, const char *key_bytes, Py_hash_t hash, const char *val_bytes); /* Delete an entry from the dict Parameters - NB_Dict *d The dictionary - Py_hash_t hash Precomputed hash of the key to be deleted - Py_ssize_t ix Precomputed entry index of the key to be deleted. Usually results of numba_dict_lookup(). */ NUMBA_EXPORT_FUNC(int) numba_dict_delitem(NB_Dict *d, Py_hash_t hash, Py_ssize_t ix); /* Remove an item from the dict Parameters - NB_Dict *d The dictionary - char *key_bytes Output. The key as a byte buffer - char *val_bytes Output. The value as a byte buffer */ NUMBA_EXPORT_FUNC(int) numba_dict_popitem(NB_Dict *d, char *key_bytes, char *val_bytes); /* Returns the sizeof a dictionary iterator */ NUMBA_EXPORT_FUNC(size_t) numba_dict_iter_sizeof(void); /* Fill a NB_DictIter for a dictionary to begin iteration Parameters - NB_DictIter *it Output. Must points to memory of size at least `numba_dict_iter_sizeof()`. - NB_Dict *d The dictionary to be iterated. */ NUMBA_EXPORT_FUNC(void) numba_dict_iter(NB_DictIter *it, NB_Dict *d); /* Advance the iterator Parameters - NB_DictIter *it The iterator - const char **key_ptr Output pointer for the key. Points to data in the dictionary. - const char **val_ptr Output pointer for the key. Points to data in the dictionary. Returns - 0 for success; valid key_ptr and val_ptr - ERR_ITER_EXHAUSTED for end of iterator. - ERR_DICT_MUTATED for detected dictionary mutation. */ NUMBA_EXPORT_FUNC(int) numba_dict_iter_next(NB_DictIter *it, const char **key_ptr, const char **val_ptr); NUMBA_EXPORT_FUNC(void) numba_dict_dump(NB_Dict *); #endif numba-0.55.1/numba/cext/listobject.c000664 000000 000000 00000076266 14174536160 017267 0ustar00rootroot000000 000000 #include "listobject.h" /* This implements the C component of the Numba typed list. It is loosely * inspired by the list implementation of the cpython list with some parts * taken from the cpython slice implementation. The exact commit-id of the * relevant files are: * * https://github.com/python/cpython/blob/51ddab8dae056867f3595ab3400bffc93f67c8d4/Objects/listobject.c * https://github.com/python/cpython/blob/51ddab8dae056867f3595ab3400bffc93f67c8d4/Objects/sliceobject.c * * Algorithmically, this list is very similar to the cpython implementation so * it should have the same performance (Big-O) characteristics for accessing, * adding and removing elements/items. Specifically, it implements the same * algorithms for list overallocation and growth. However, it never deals with * PyObject types and instead must be typed with a type-size. As a result, the * typed-list is type homogeneous and in contrast to the cpython version can * not store a mixture of arbitrarily typed objects. Reference counting via the * Numba Runtime (NRT) is supported and incrementing and decrementing functions * are store as part of the struct and can be setup from the compiler level. * * Importantly, only a very limited subset of the cpython c functions have been * ported over and the rest have been implemented (in Python) at the compiler * level using the c functions provided. Additionally, initialization of, and * iteration over, a ListIter is provided * * The following functions are implemented for the list: * * - Check valid index valid_index * - Creation numba_list_new * - Deletion numba_list_free * - Accessing the length numba_list_length * - Appending to the list numba_list_append * - Getting an item numba_list_setitem * - Setting an item numba_list_getitem * - Resizing the list numba_list_resize * - Deleting an item numba_list_delitem * - Deleting a slice numba_list_delete_slice * * As you can see, only a single function for slices is implemented. The rest * is all done entirely at the compiler level which then calls the c functions * to mutate the list accordingly. Since slicing allows for replace, insert and * delete operations over multiple items, we can simply implement those using * the basic functions above. * * The following additional functions are implemented for the list, these are * needed to make the list work within Numba. * * - Accessing the allocation numba_list_allocated * - Copying an item copy_item * - Calling incref on item list_incref_item * - Calling decref on item list_decref_item * - Set method table numba_list_set_method_table * * The following functions are implemented for the iterator: * * - Size of the iterator numba_list_iter_size * - Initialization of iter numba_list_iter * - Get next item from iter numba_list_iter_next * * Two methods are provided to query and set the 'is_mutable': * * - Query numba_list_is_mutable * - Set numba_list_set_is_mutable * * Lastly a set of pure C level tests are provided which come in handy when * needing to use valgrind and friends. * */ /* Return status for the list functions. */ typedef enum { LIST_OK = 0, LIST_ERR_INDEX = -1, LIST_ERR_NO_MEMORY = -2, LIST_ERR_MUTATED = -3, LIST_ERR_ITER_EXHAUSTED = -4, LIST_ERR_IMMUTABLE = -5, } ListStatus; /* Copy an item from a list. * * lp: a list * dst: destination pointer * src: source pointer */ static void copy_item(NB_List *lp, char *dst, const char *src){ memcpy(dst, src, lp->item_size); } /* Increment a reference to an item in a list. * * lp: a list * item: the item to increment the reference for */ static void list_incref_item(NB_List *lp, const char *item){ if (lp->methods.item_incref) { lp->methods.item_incref(item); } } /* Decrement a reference to an item in a list. * * lp: a list * item: the item to decrement the reference for */ static void list_decref_item(NB_List *lp, const char *item){ if (lp->methods.item_decref) { lp->methods.item_decref(item); } } /* Setup the method table for a list. * * This function is used from the compiler level to initialize the internal * method table. * * lp: a list * methods: the methods table to set up */ void numba_list_set_method_table(NB_List *lp, list_type_based_methods_table *methods) { memcpy(&lp->methods, methods, sizeof(list_type_based_methods_table)); } /* Check if a list index is valid. * * i: the index to check * limit: the size of a list * * Adapted from CPython's valid_index(). * * FIXME: need to find a way to inline this, even for Python 2.7 on Windows */ static int valid_index(Py_ssize_t i, Py_ssize_t limit){ /* The cast to size_t lets us use just a single comparison to check whether i is in the range: 0 <= i < limit. See: Section 14.2 "Bounds Checking" in the Agner Fog optimization manual found at: https://www.agner.org/optimize/optimizing_cpp.pdf */ return (size_t) i < (size_t) limit; } /* Initialize a new list. * * out: pointer to hold an initialized list * item_size: the size in bytes of the items in the list * allocated: preallocation of the list in items * * This will allocate sufficient memory to hold the list structure and any * items if requested (allocated != 0). See _listobject.h for more information * on the NB_List struct. */ int numba_list_new(NB_List **out, Py_ssize_t item_size, Py_ssize_t allocated){ NB_List *lp; char *items; // allocate memory to hold the struct lp = malloc(aligned_size(sizeof(NB_List))); if (lp == NULL) { return LIST_ERR_NO_MEMORY; } // set up members lp->size = 0; lp->item_size = item_size; lp->allocated = allocated; lp->is_mutable = 1; // set method table to zero */ memset(&lp->methods, 0x00, sizeof(list_type_based_methods_table)); // allocate memory to hold items, if requested if (allocated != 0) { items = malloc(aligned_size(lp->item_size * allocated)); // allocated was definitely not zero, if malloc returns NULL // this is definitely an error if (items == NULL) { // free previously allocated struct to avoid leaking memory free(lp); return LIST_ERR_NO_MEMORY; } lp->items = items; } else { // be explicit lp->items = NULL; } *out = lp; return LIST_OK; } /* Free the memory associated with a list. * * lp: a list */ void numba_list_free(NB_List *lp) { // decref all items, if needed Py_ssize_t i; if (lp->methods.item_decref) { for (i = 0; i < lp->size; i++) { char *item = lp->items + lp->item_size * i; list_decref_item(lp, item); } } // free items and list if (lp->items != NULL) { free(lp->items); } free(lp); } /* Return the base pointer of the list items. */ char * numba_list_base_ptr(NB_List *lp) { return lp->items; } /* Return the address of the list size. */ Py_ssize_t numba_list_size_address(NB_List *lp) { return (Py_ssize_t)&lp->size; } /* Return the length of a list. * * lp: a list */ Py_ssize_t numba_list_length(NB_List *lp) { return lp->size; } /* Return the current allocation of a list. * * lp: a list */ Py_ssize_t numba_list_allocated(NB_List *lp) { return lp->allocated; } /* Return the mutability status of the list * * lp: a list * */ int numba_list_is_mutable(NB_List *lp){ return lp->is_mutable; } /* Set the is_mutable attribute * * lp: a list * is_mutable: an int, 0(False) or 1(True) * */ void numba_list_set_is_mutable(NB_List *lp, int is_mutable){ lp->is_mutable = is_mutable; } /* Set an item in a list. * * lp: a list * index: the index of the item to set (must be in range 0 <= index < len(list)) * item: the item to set * * This assume there is already an element at the given index that will be * overwritten and thereby have its reference decremented. DO NOT use this to * write to an unassigned location. */ int numba_list_setitem(NB_List *lp, Py_ssize_t index, const char *item) { char *loc; // check for mutability if (!lp->is_mutable) { return LIST_ERR_IMMUTABLE; } // check index is valid // FIXME: this can be (and probably is) checked at the compiler level if (!valid_index(index, lp->size)) { return LIST_ERR_INDEX; } // set item at desired location loc = lp->items + lp-> item_size * index; list_decref_item(lp, loc); copy_item(lp, loc, item); list_incref_item(lp, loc); return LIST_OK; } /* Get an item from a list. * * lp: a list * index: the index of the item to get (must be in range 0 <= index < len(list)) * out: a pointer to hold the item */ int numba_list_getitem(NB_List *lp, Py_ssize_t index, char *out) { char *loc; // check index is valid // FIXME: this can be (and probably is) checked at the compiler level if (!valid_index(index, lp->size)) { return LIST_ERR_INDEX; } // get item at desired location loc = lp->items + lp->item_size * index; copy_item(lp, out, loc); return LIST_OK; } /* Append an item to the end of a list. * * lp: a list * item: the item to append. */ int numba_list_append(NB_List *lp, const char *item) { char *loc; // check for mutability if (!lp->is_mutable) { return LIST_ERR_IMMUTABLE; } // resize by one, will change list size int result = numba_list_resize(lp, lp->size + 1); if(result < LIST_OK) { return result; } // insert item at index: original size before resize loc = lp->items + lp->item_size * (lp->size - 1); copy_item(lp, loc, item); list_incref_item(lp, loc); return LIST_OK; } /* Resize a list. * * lp: a list * newsize: the desired new size of the list. * * This will increase or decrease the size of the list, including reallocating * the required memory and increasing the total allocation (additional free * space to hold new items). * * * Adapted from CPython's list_resize(). * * Ensure lp->items has room for at least newsize elements, and set * lp->size to newsize. If newsize > lp->size on entry, the content * of the new slots at exit is undefined heap trash; it's the caller's * responsibility to overwrite them with sane values. * The number of allocated elements may grow, shrink, or stay the same. * Failure is impossible if newsize <= lp->allocated on entry, although * that partly relies on an assumption that the system realloc() never * fails when passed a number of bytes <= the number of bytes last * allocated (the C standard doesn't guarantee this, but it's hard to * imagine a realloc implementation where it wouldn't be true). * Note that lp->items may change, and even if newsize is less * than lp->size on entry. */ int numba_list_resize(NB_List *lp, Py_ssize_t newsize) { char * items; // check for mutability if (!lp->is_mutable) { return LIST_ERR_IMMUTABLE; } size_t new_allocated, num_allocated_bytes; /* Bypass realloc() when a previous overallocation is large enough to accommodate the newsize. If the newsize falls lower than half the allocated size, then proceed with the realloc() to shrink the list. */ if (lp->allocated >= newsize && newsize >= (lp->allocated >> 1)) { assert(lp->items != NULL || newsize == 0); lp->size = newsize; return LIST_OK; } /* This over-allocates proportional to the list size, making room * for additional growth. The over-allocation is mild, but is * enough to give linear-time amortized behavior over a long * sequence of appends() in the presence of a poorly-performing * system realloc(). * The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... * Note: new_allocated won't overflow because the largest possible value * is PY_SSIZE_T_MAX * (9 / 8) + 6 which always fits in a size_t. */ new_allocated = (size_t)newsize + (newsize >> 3) + (newsize < 9 ? 3 : 6); if (new_allocated > (size_t)PY_SSIZE_T_MAX / lp->item_size) { return LIST_ERR_NO_MEMORY; } if (newsize == 0) new_allocated = 0; num_allocated_bytes = new_allocated * lp->item_size; items = realloc(lp->items, aligned_size(num_allocated_bytes)); // realloc may return NULL if requested size is 0 if (num_allocated_bytes != 0 && items == NULL) { return LIST_ERR_NO_MEMORY; } lp->items = items; lp->size = newsize; lp->allocated = (Py_ssize_t)new_allocated; return LIST_OK; } /* Delete a single item. * * lp: a list * index: the index of the item to delete * (must be in range 0 <= index < len(list)) * * */ int numba_list_delitem(NB_List *lp, Py_ssize_t index) { int result; char *loc, *new_loc; Py_ssize_t leftover_bytes; // check for mutability if (!lp->is_mutable) { return LIST_ERR_IMMUTABLE; } // check index is valid // FIXME: this can be (and probably is) checked at the compiler level if (!valid_index(index, lp->size)) { return LIST_ERR_INDEX; } // obtain item and decref if needed loc = lp->items + lp->item_size * index; list_decref_item(lp, loc); if (index != lp->size - 1) { // delitem from somewhere other than the end, incur the memory copy leftover_bytes = (lp->size - 1 - index) * lp->item_size; new_loc = lp->items + (lp->item_size * (index + 1)); // use memmove instead of memcpy since we may be dealing with // overlapping regions of memory and the behaviour of memcpy is // undefined in such situation (C99). memmove(loc, new_loc, leftover_bytes); } // finally, shrink list by one result = numba_list_resize(lp, lp->size - 1); if(result < LIST_OK) { // Since we are decreasing the size, this should never happen return result; } return LIST_OK; } /* Delete a slice * * start: the start index of ths slice * stop: the stop index of the slice (not included) * step: the step to take * * This function assumes that the start and stop were clipped appropriately. * I.e. if step > 0 start >= 0 and stop <= len(l) and * if step < 0 start <= length and stop >= -1 * step != 0 and no Python negative indexing allowed. * * This code was copied and edited from the relevant section in * list_ass_subscript from the cpython implementation, see the top of this file * for the exact source */ int numba_list_delete_slice(NB_List *lp, Py_ssize_t start, Py_ssize_t stop, Py_ssize_t step) { int result, i, slicelength, new_length; char *loc, *new_loc; Py_ssize_t leftover_bytes, cur, lim; // check for mutability if (!lp->is_mutable) { return LIST_ERR_IMMUTABLE; } // calculate the slicelength, taken from PySlice_AdjustIndices, see the top // of this file for the exact source if (step > 0) { slicelength = start < stop ? (stop - start - 1) / step + 1 : 0; } else { slicelength = stop < start ? (start - stop - 1) / -step + 1 : 0; } if (slicelength <= 0){ return LIST_OK; } new_length = lp->size - slicelength; // reverse step and indices if (step < 0) { stop = start + 1; start = stop + step * (slicelength - 1) - 1; step = -step; } if (step == 1) { // decref if needed if (lp->methods.item_decref) { for (i = start ; i < stop ; i++){ loc = lp->items + lp->item_size * i; lp->methods.item_decref(loc); } } // memove items into place leftover_bytes = (lp->size - stop) * lp->item_size; loc = lp->items + lp->item_size * start; new_loc = lp->items + lp->item_size * stop; memmove(loc, new_loc, leftover_bytes); } else { // step != 1 /* drawing pictures might help understand these for * loops. Basically, we memmove the parts of the * list that are *not* part of the slice: step-1 * items for each item that is part of the slice, * and then tail end of the list that was not * covered by the slice * * */ for (cur = start, // index of item to be deleted i = 0; // counter of total items deleted so far cur < stop; cur += step, i++) { lim = step - 1; // number of leftover items after deletion of item // clip limit, in case we are at the end of the slice, and there // are now less than step-1 items to be moved if (cur + step >= lp->size) { lim = lp->size - cur - 1; } // decref item being removed loc = lp->items + lp->item_size * cur; list_decref_item(lp, loc); /* memmove the aforementiond step-1 (or less) items * dst : index of deleted item minus total deleted sofar * src : index of deleted item plus one (next item) */ memmove(lp->items + lp->item_size * (cur - i), lp->items + lp->item_size * (cur + 1), lim * lp->item_size); } // memmove tail of the list cur = start + slicelength * step; if (cur < lp->size) { memmove(lp->items + lp->item_size * (cur - slicelength), lp->items + lp->item_size * cur, (lp->size - cur) * lp->item_size); } } // resize to correct size result = numba_list_resize(lp, new_length); if(result < LIST_OK) { // Since we are decreasing the size, this should never happen return result; } return LIST_OK; } /* Return the size of the list iterator (NB_ListIter) struct. */ size_t numba_list_iter_sizeof() { return sizeof(NB_ListIter); } /* Initialize a list iterator (NB_ListIter). * * it: an iterator * lp: a list to iterate over */ void numba_list_iter(NB_ListIter *it, NB_List *lp) { // set members of iterator it->parent = lp; it->size = lp->size; it->pos = 0; } /* Obtain the next item from a list iterator. * * it: an iterator * item_ptr: pointer to hold the next item */ int numba_list_iter_next(NB_ListIter *it, const char **item_ptr) { NB_List *lp; lp = it->parent; /* FIXME: Detect list mutation during iteration */ if (lp->size != it->size) { return LIST_ERR_MUTATED; } // get next element if (it->pos < lp->size) { *item_ptr = lp->items + lp->item_size * it->pos++; return LIST_OK; }else{ return LIST_ERR_ITER_EXHAUSTED; } } #define CHECK(CASE) { \ if ( !(CASE) ) { \ printf("'%s' failed file %s:%d\n", #CASE, __FILE__, __LINE__); \ return -1; \ } \ } /* Basic C based tests for the list. */ int numba_test_list(void) { NB_List *lp = NULL; int status, i; Py_ssize_t it_count; const char *it_item = NULL; NB_ListIter iter; char got_item[4] = "\x00\x00\x00\x00"; const char *test_items_1 = NULL, *test_items_2 = NULL; char *test_items_3 = NULL; puts("test_list"); status = numba_list_new(&lp, 4, 0); CHECK(status == LIST_OK); CHECK(lp->item_size == 4); CHECK(lp->size == 0); CHECK(lp->allocated == 0); CHECK(lp->is_mutable == 1); // flip and check the is_mutable bit CHECK(numba_list_is_mutable(lp) == 1); numba_list_set_is_mutable(lp, 0); CHECK(numba_list_is_mutable(lp) == 0); numba_list_set_is_mutable(lp, 1); CHECK(numba_list_is_mutable(lp) == 1); // append 1st item, this will cause a realloc status = numba_list_append(lp, "abc"); CHECK(status == LIST_OK); CHECK(lp->size == 1); CHECK(lp->allocated == 4); status = numba_list_getitem(lp, 0, got_item); CHECK(status == LIST_OK); CHECK(memcmp(got_item, "abc", 4) == 0); // append 2nd item status = numba_list_append(lp, "def"); CHECK(status == LIST_OK); CHECK(lp->size == 2); CHECK(lp->allocated == 4); status = numba_list_getitem(lp, 1, got_item); CHECK(status == LIST_OK); CHECK(memcmp(got_item, "def", 4) == 0); // append 3rd item status = numba_list_append(lp, "ghi"); CHECK(status == LIST_OK); CHECK(lp->size == 3); CHECK(lp->allocated == 4); status = numba_list_getitem(lp, 2, got_item); CHECK(status == LIST_OK); CHECK(memcmp(got_item, "ghi", 4) == 0); // append 4th item status = numba_list_append(lp, "jkl"); CHECK(status == LIST_OK); CHECK(lp->size == 4); CHECK(lp->allocated == 4); status = numba_list_getitem(lp, 3, got_item); CHECK(status == LIST_OK); CHECK(memcmp(got_item, "jkl", 4) == 0); // append 5th item, this will cause another realloc status = numba_list_append(lp, "mno"); CHECK(status == LIST_OK); CHECK(lp->size == 5); CHECK(lp->allocated == 8); status = numba_list_getitem(lp, 4, got_item); CHECK(status == LIST_OK); CHECK(memcmp(got_item, "mno", 4) == 0); // overwrite 1st item status = numba_list_setitem(lp, 0, "pqr"); CHECK(status == LIST_OK); CHECK(lp->size == 5); CHECK(lp->allocated == 8); status = numba_list_getitem(lp, 0, got_item); CHECK(status == LIST_OK); CHECK(memcmp(got_item, "pqr", 4) == 0); // get and del 1st item, check item shift status = numba_list_getitem(lp, 0, got_item); status = numba_list_delitem(lp, 0); CHECK(status == LIST_OK); CHECK(lp->size == 4); CHECK(lp->allocated == 8); CHECK(memcmp(got_item, "pqr", 4) == 0); CHECK(memcmp(lp->items, "def\x00ghi\x00jkl\x00mno\x00", 16) == 0); // get and del last (4th) item, no shift since only last item affected status = numba_list_getitem(lp, 3, got_item); status = numba_list_delitem(lp, 3); CHECK(status == LIST_OK); CHECK(lp->size == 3); CHECK(lp->allocated == 6); // this also shrinks the allocation CHECK(memcmp(got_item, "mno", 4) == 0); CHECK(memcmp(lp->items, "def\x00ghi\x00jkl\x00", 12) == 0); // flip and check the is_mutable member CHECK(numba_list_is_mutable(lp) == 1); numba_list_set_is_mutable(lp, 0); CHECK(numba_list_is_mutable(lp) == 0); // ensure that any attempts to mutate an immutable list fail CHECK(numba_list_setitem(lp, 0, "zzz") == LIST_ERR_IMMUTABLE); CHECK(numba_list_append(lp, "zzz") == LIST_ERR_IMMUTABLE); CHECK(numba_list_delitem(lp, 0) == LIST_ERR_IMMUTABLE); CHECK(numba_list_resize(lp, 23) == LIST_ERR_IMMUTABLE); CHECK(numba_list_delete_slice(lp, 0, 3, 1) == LIST_ERR_IMMUTABLE); // ensure that all attempts to query/read from and immutable list succeed CHECK(numba_list_length(lp) == 3); status = numba_list_getitem(lp, 0, got_item); CHECK(status == LIST_OK); CHECK(memcmp(got_item, "def", 4) == 0); // flip the is_mutable member back and check numba_list_set_is_mutable(lp, 1); CHECK(numba_list_is_mutable(lp) == 1); // test iterator CHECK(lp->size > 0); numba_list_iter(&iter, lp); it_count = 0; CHECK(iter.parent == lp); CHECK(iter.pos == it_count); // current contents of list test_items_1 = "def\x00ghi\x00jkl\x00"; while ( (status = numba_list_iter_next(&iter, &it_item)) == LIST_OK) { it_count += 1; CHECK(iter.pos == it_count); // check iterator position CHECK(it_item != NULL); // quick check item is non-null // go fishing in test_items_1 CHECK(memcmp((const char *)test_items_1 + ((it_count - 1) * 4), it_item, 4) == 0); } CHECK(status == LIST_ERR_ITER_EXHAUSTED); CHECK(lp->size == it_count); // free existing list numba_list_free(lp); // test growth upon append and shrink during delitem status = numba_list_new(&lp, 1, 0); CHECK(status == LIST_OK); CHECK(lp->item_size == 1); CHECK(lp->size == 0); CHECK(lp->allocated == 0); // first, grow the list // Use exactly 17 elements, should go through the allocation pattern: // 0, 4, 8, 16, 25 for (i = 0; i < 17 ; i++) { switch(i) { // Check the allocation before case 0: CHECK(lp->allocated == 0); break; case 4: CHECK(lp->allocated == 4); break; case 8: CHECK(lp->allocated == 8); break; case 16: CHECK(lp->allocated == 16); break; } status = numba_list_append(lp, (const char*)&i); CHECK(status == LIST_OK); switch(i) { // Check that the growth happened accordingly case 0: CHECK(lp->allocated == 4); break; case 4: CHECK(lp->allocated == 8); break; case 8: CHECK(lp->allocated == 16); break; case 16: CHECK(lp->allocated == 25); break; } } CHECK(lp->size == 17); // Check current contents of list test_items_2 = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"; CHECK(memcmp(lp->items, test_items_2, 17) == 0); // Now, delete them again and check that list shrinks for (i = 17; i > 0 ; i--) { switch(i) { // Check the allocation before delitem case 17: CHECK(lp->allocated == 25); break; case 12: CHECK(lp->allocated == 25); break; case 9: CHECK(lp->allocated == 18); break; case 6: CHECK(lp->allocated == 12); break; case 4: CHECK(lp->allocated == 8); break; case 3: CHECK(lp->allocated == 6); break; case 2: CHECK(lp->allocated == 5); break; case 1: CHECK(lp->allocated == 4); break; } status = numba_list_getitem(lp, i-1, got_item); status = numba_list_delitem(lp, i-1); CHECK(status == LIST_OK); switch(i) { // Check that the shrink happened accordingly case 17: CHECK(lp->allocated == 25); break; case 12: CHECK(lp->allocated == 18); break; case 9: CHECK(lp->allocated == 12); break; case 6: CHECK(lp->allocated == 8); break; case 4: CHECK(lp->allocated == 6); break; case 3: CHECK(lp->allocated == 5); break; case 2: CHECK(lp->allocated == 4); break; case 1: CHECK(lp->allocated == 0); break; } } // free existing list numba_list_free(lp); // Setup list for testing delete_slice status = numba_list_new(&lp, 1, 0); CHECK(status == LIST_OK); CHECK(lp->item_size == 1); CHECK(lp->size == 0); CHECK(lp->allocated == 0); for (i = 0; i < 17 ; i++) { status = numba_list_append(lp, (const char*)&i); CHECK(status == LIST_OK); } CHECK(lp->size == 17); test_items_3 = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"; CHECK(memcmp(lp->items, test_items_3, 17) == 0); // delete multiple elements from the middle status = numba_list_delete_slice(lp, 2, 5, 1); CHECK(status == LIST_OK); CHECK(lp->size == 14); test_items_3 = "\x00\x01\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"; CHECK(memcmp(lp->items, test_items_3, 14) == 0); // delete single element from start status = numba_list_delete_slice(lp, 0, 1, 1); CHECK(status == LIST_OK); CHECK(lp->size == 13); test_items_3 = "\x01\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"; CHECK(memcmp(lp->items, test_items_3, 13) == 0); // delete single element from end status = numba_list_delete_slice(lp, 12, 13, 1); CHECK(status == LIST_OK); CHECK(lp->size == 12); test_items_3 = "\x01\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"; CHECK(memcmp(lp->items, test_items_3, 12) == 0); // delete single element from middle status = numba_list_delete_slice(lp, 4, 5, 1); CHECK(status == LIST_OK); CHECK(lp->size == 11); test_items_3 = "\x01\x05\x06\x07\x09\x0a\x0b\x0c\x0d\x0e\x0f"; CHECK(memcmp(lp->items, test_items_3, 11) == 0); // delete all elements except first and last status = numba_list_delete_slice(lp, 1, 10, 1); CHECK(status == LIST_OK); CHECK(lp->size == 2); test_items_3 = "\x01\x0f"; CHECK(memcmp(lp->items, test_items_3, 2) == 0); // delete all remaining elements status = numba_list_delete_slice(lp, 0, lp->size, 1); CHECK(status == LIST_OK); CHECK(lp->size == 0); test_items_3 = ""; CHECK(memcmp(lp->items, test_items_3, 0) == 0); // free existing list numba_list_free(lp); // Setup list for testing delete_slice with non unary step status = numba_list_new(&lp, 1, 0); CHECK(status == LIST_OK); CHECK(lp->item_size == 1); CHECK(lp->size == 0); CHECK(lp->allocated == 0); for (i = 0; i < 17 ; i++) { status = numba_list_append(lp, (const char*)&i); CHECK(status == LIST_OK); } CHECK(lp->size == 17); // delete all items with odd index status = numba_list_delete_slice(lp, 0, 17, 2); CHECK(status == LIST_OK); CHECK(lp->size == 8); test_items_3 = "\x01\x03\x05\x07\x09\x0b\x0d\x0f"; CHECK(memcmp(lp->items, test_items_3, 8) == 0); // delete with a step of 4, starting at index 1 status = numba_list_delete_slice(lp, 1, 8, 4); CHECK(status == LIST_OK); CHECK(lp->size == 6); test_items_3 = "\x01\x05\x07\x09\x0d\x0f"; CHECK(memcmp(lp->items, test_items_3, 6) == 0); // delete with a step of 2, but finish before end of list status = numba_list_delete_slice(lp, 0, 4, 2); CHECK(status == LIST_OK); CHECK(lp->size == 4); test_items_3 = "\x05\x09\x0d\x0f"; CHECK(memcmp(lp->items, test_items_3, 4) == 0); // no-op on empty slice status = numba_list_delete_slice(lp, 0, 0, 1); CHECK(status == LIST_OK); CHECK(lp->size == 4); test_items_3 = "\x05\x09\x0d\x0f"; CHECK(memcmp(lp->items, test_items_3, 4) == 0); // no-op on empty slice, non-zero index status = numba_list_delete_slice(lp, 2, 2, 1); CHECK(status == LIST_OK); CHECK(lp->size == 4); test_items_3 = "\x05\x09\x0d\x0f"; CHECK(memcmp(lp->items, test_items_3, 4) == 0); // free list and return 0 numba_list_free(lp); // Setup list for testing delete_slice with negative step status = numba_list_new(&lp, 1, 0); CHECK(status == LIST_OK); CHECK(lp->item_size == 1); CHECK(lp->size == 0); CHECK(lp->allocated == 0); for (i = 0; i < 17 ; i++) { status = numba_list_append(lp, (const char*)&i); CHECK(status == LIST_OK); } CHECK(lp->size == 17); // delete all items using unary negative slice status = numba_list_delete_slice(lp, 16, -1, -1); CHECK(status == LIST_OK); CHECK(lp->size == 0); // refill list for (i = 0; i < 17 ; i++) { status = numba_list_append(lp, (const char*)&i); CHECK(status == LIST_OK); } // delete all items using unary negative slice // need to start at index of last item (16) and // go beyond first item, i.e. -1 in Cd status = numba_list_delete_slice(lp, 16, -1, -2); CHECK(status == LIST_OK); CHECK(lp->size == 8); test_items_3 = "\x01\x03\x05\x07\x09\x0b\x0d\x0f"; CHECK(memcmp(lp->items, test_items_3, 8) == 0); // free list and return 0 numba_list_free(lp); return 0; } #undef CHECK numba-0.55.1/numba/cext/listobject.h000664 000000 000000 00000007535 14174536160 017265 0ustar00rootroot000000 000000 /* Adapted from CPython3.7 Include/listobject.h * * The exact commit-id of the relevant file is: * * https://github.com/python/cpython/blob/51ddab8dae056867f3595ab3400bffc93f67c8d4/Include/listobject.h * * * */ #ifndef NUMBA_LIST_H #define NUMBA_LIST_H #include "Python.h" #include "cext.h" typedef void (*list_refcount_op_t)(const void*); typedef struct { list_refcount_op_t item_incref; list_refcount_op_t item_decref; } list_type_based_methods_table; /* This is the struct for the Numba typed list. It is largely inspired by the * CPython list struct in listobject.h. In essence the list is a homogeneously * typed container that can grow and shrink upon insertion and deletion. This * means that appending an item to, or removing an item from, the end of the * list, this will have a O(1) amortized runtime. This matches the * behaviour of the CPython list type and it will grow with the same * increments. * * 'items' contains space for 'allocated' elements. The number * currently in use is 'size'. The size in bytes of the items stored in the * list is given by 'item_size'. * * Invariants: * 0 <= size <= allocated * len(list) == size * item == NULL implies size == allocated == 0 * * FIXME: list.sort() temporarily sets allocated to -1 to detect mutations. * * Items must normally not be NULL, except during construction when * the list is not yet visible outside the function that builds it. * * Additionally, this list has boolean member 'is_mutable' that can be used to * set a list as immutable. Two functions to query and set this member are * provided. Any attempt to mutate an immutable list will result in a status * of LIST_ERR_IMMUTABLE. * */ typedef struct { /* size of the list in items */ Py_ssize_t size; /* size of the list items in bytes */ Py_ssize_t item_size; /* total allocated slots in items */ Py_ssize_t allocated; /* is the list mutable */ int is_mutable; /* method table for type-dependent operations */ list_type_based_methods_table methods; /* array/pointer for items. Interpretation is governed by item_size */ char * items; } NB_List; typedef struct { /* parent list */ NB_List *parent; /* list size */ Py_ssize_t size; /* iterator position; indicates the next position to read */ Py_ssize_t pos; } NB_ListIter; NUMBA_EXPORT_FUNC(void) numba_list_set_method_table(NB_List *lp, list_type_based_methods_table *methods); NUMBA_EXPORT_FUNC(int) numba_list_new(NB_List **out, Py_ssize_t item_size, Py_ssize_t allocated); NUMBA_EXPORT_FUNC(void) numba_list_free(NB_List *lp); NUMBA_EXPORT_FUNC(char *) numba_list_base_ptr(NB_List *lp); NUMBA_EXPORT_FUNC(Py_ssize_t) numba_list_size_address(NB_List *lp); NUMBA_EXPORT_FUNC(Py_ssize_t) numba_list_length(NB_List *lp); NUMBA_EXPORT_FUNC(Py_ssize_t) numba_list_allocated(NB_List *lp); NUMBA_EXPORT_FUNC(int) numba_list_is_mutable(NB_List *lp); NUMBA_EXPORT_FUNC(void) numba_list_set_is_mutable(NB_List *lp, int is_mutable); NUMBA_EXPORT_FUNC(int) numba_list_setitem(NB_List *lp, Py_ssize_t index, const char *item); NUMBA_EXPORT_FUNC(int) numba_list_getitem(NB_List *lp, Py_ssize_t index, char *out); NUMBA_EXPORT_FUNC(int) numba_list_append(NB_List *lp, const char *item); // FIXME: should this be public? NUMBA_EXPORT_FUNC(int) numba_list_resize(NB_List *lp, Py_ssize_t newsize); NUMBA_EXPORT_FUNC(int) numba_list_delitem(NB_List *lp, Py_ssize_t index); NUMBA_EXPORT_FUNC(int) numba_list_delete_slice(NB_List *lp, Py_ssize_t start, Py_ssize_t stop, Py_ssize_t step); NUMBA_EXPORT_FUNC(size_t) numba_list_iter_sizeof(void); NUMBA_EXPORT_FUNC(void) numba_list_iter(NB_ListIter *it, NB_List *l); NUMBA_EXPORT_FUNC(int) numba_list_iter_next(NB_ListIter *it, const char **item_ptr); NUMBA_EXPORT_FUNC(int) numba_test_list(void); #endif numba-0.55.1/numba/cext/utils.c000664 000000 000000 00000000311 14174536160 016237 0ustar00rootroot000000 000000 #include "cext.h" /* Align size *sz* to pointer width */ Py_ssize_t aligned_size(Py_ssize_t sz) { Py_ssize_t alignment = sizeof(void*); return sz + (alignment - sz % alignment) % alignment; } numba-0.55.1/numba/cloudpickle/000775 000000 000000 00000000000 14174536160 016273 5ustar00rootroot000000 000000 numba-0.55.1/numba/cloudpickle/__init__.py000664 000000 000000 00000001010 14174536160 020374 0ustar00rootroot000000 000000 from __future__ import absolute_import # NOTE: The following imports are adapted to use as a vendored subpackge. # from https://github.com/cloudpipe/cloudpickle/blob/d3279a0689b769d5315fc6ff00cd0f5897844526/cloudpickle/init.py from .cloudpickle import * # noqa from .cloudpickle_fast import CloudPickler, dumps, dump # noqa # Conform to the convention used by python serialization libraries, which # expose their Pickler subclass at top-level under the "Pickler" name. Pickler = CloudPickler __version__ = '1.6.0' numba-0.55.1/numba/cloudpickle/cloudpickle.py000664 000000 000000 00000073610 14174536160 021152 0ustar00rootroot000000 000000 """ This class is defined to override standard pickle functionality The goals of it follow: -Serialize lambdas and nested functions to compiled byte code -Deal with main module correctly -Deal with other non-serializable objects It does not include an unpickler, as standard python unpickling suffices. This module was extracted from the `cloud` package, developed by `PiCloud, Inc. `_. Copyright (c) 2012, Regents of the University of California. Copyright (c) 2009 `PiCloud, Inc. `_. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of California, Berkeley nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ from __future__ import print_function import builtins import dis import opcode import platform import sys import types import weakref import uuid import threading import typing import warnings from .compat import pickle from typing import Generic, Union, Tuple, Callable from pickle import _getattribute from importlib._bootstrap import _find_spec try: # pragma: no branch import typing_extensions as _typing_extensions from typing_extensions import Literal, Final except ImportError: _typing_extensions = Literal = Final = None if sys.version_info >= (3, 5, 3): from typing import ClassVar else: # pragma: no cover ClassVar = None if sys.version_info >= (3, 8): from types import CellType else: def f(): a = 1 def g(): return a return g CellType = type(f().__closure__[0]) # cloudpickle is meant for inter process communication: we expect all # communicating processes to run the same Python version hence we favor # communication speed over compatibility: DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL # Track the provenance of reconstructed dynamic classes to make it possible to # recontruct instances from the matching singleton class definition when # appropriate and preserve the usual "isinstance" semantics of Python objects. _DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary() _DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary() _DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock() _DYNAMIC_CLASS_TRACKER_REUSING = weakref.WeakSet() PYPY = platform.python_implementation() == "PyPy" builtin_code_type = None if PYPY: # builtin-code objects only exist in pypy builtin_code_type = type(float.__new__.__code__) _extract_code_globals_cache = weakref.WeakKeyDictionary() def _get_or_create_tracker_id(class_def): with _DYNAMIC_CLASS_TRACKER_LOCK: class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def) if class_tracker_id is None: class_tracker_id = uuid.uuid4().hex _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def return class_tracker_id def _lookup_class_or_track(class_tracker_id, class_def): if class_tracker_id is not None: with _DYNAMIC_CLASS_TRACKER_LOCK: orig_class_def = class_def class_def = _DYNAMIC_CLASS_TRACKER_BY_ID.setdefault( class_tracker_id, class_def) _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id # Check if we are reusing a previous class_def if orig_class_def is not class_def: # Remember the class_def is being reused _DYNAMIC_CLASS_TRACKER_REUSING.add(class_def) return class_def def _whichmodule(obj, name): """Find the module an object belongs to. This function differs from ``pickle.whichmodule`` in two ways: - it does not mangle the cases where obj's module is __main__ and obj was not found in any module. - Errors arising during module introspection are ignored, as those errors are considered unwanted side effects. """ if sys.version_info[:2] < (3, 7) and isinstance(obj, typing.TypeVar): # pragma: no branch # noqa # Workaround bug in old Python versions: prior to Python 3.7, # T.__module__ would always be set to "typing" even when the TypeVar T # would be defined in a different module. # # For such older Python versions, we ignore the __module__ attribute of # TypeVar instances and instead exhaustively lookup those instances in # all currently imported modules. module_name = None else: module_name = getattr(obj, '__module__', None) if module_name is not None: return module_name # Protect the iteration by using a copy of sys.modules against dynamic # modules that trigger imports of other modules upon calls to getattr or # other threads importing at the same time. for module_name, module in sys.modules.copy().items(): # Some modules such as coverage can inject non-module objects inside # sys.modules if ( module_name == '__main__' or module is None or not isinstance(module, types.ModuleType) ): continue try: if _getattribute(module, name)[0] is obj: return module_name except Exception: pass return None def _is_importable(obj, name=None): """Dispatcher utility to test the importability of various constructs.""" if isinstance(obj, types.FunctionType): return _lookup_module_and_qualname(obj, name=name) is not None elif issubclass(type(obj), type): return _lookup_module_and_qualname(obj, name=name) is not None elif isinstance(obj, types.ModuleType): # We assume that sys.modules is primarily used as a cache mechanism for # the Python import machinery. Checking if a module has been added in # is sys.modules therefore a cheap and simple heuristic to tell us whether # we can assume that a given module could be imported by name in # another Python process. return obj.__name__ in sys.modules else: raise TypeError( "cannot check importability of {} instances".format( type(obj).__name__) ) def _lookup_module_and_qualname(obj, name=None): if name is None: name = getattr(obj, '__qualname__', None) if name is None: # pragma: no cover # This used to be needed for Python 2.7 support but is probably not # needed anymore. However we keep the __name__ introspection in case # users of cloudpickle rely on this old behavior for unknown reasons. name = getattr(obj, '__name__', None) module_name = _whichmodule(obj, name) if module_name is None: # In this case, obj.__module__ is None AND obj was not found in any # imported module. obj is thus treated as dynamic. return None if module_name == "__main__": return None # Note: if module_name is in sys.modules, the corresponding module is # assumed importable at unpickling time. See #357 module = sys.modules.get(module_name, None) if module is None: # The main reason why obj's module would not be imported is that this # module has been dynamically created, using for example # types.ModuleType. The other possibility is that module was removed # from sys.modules after obj was created/imported. But this case is not # supported, as the standard pickle does not support it either. return None try: obj2, parent = _getattribute(module, name) except AttributeError: # obj was not found inside the module it points to return None if obj2 is not obj: return None return module, name def _extract_code_globals(co): """ Find all globals names read or written to by codeblock co """ out_names = _extract_code_globals_cache.get(co) if out_names is None: names = co.co_names out_names = {names[oparg] for _, oparg in _walk_global_ops(co)} # Declaring a function inside another one using the "def ..." # syntax generates a constant code object corresonding to the one # of the nested function's As the nested function may itself need # global variables, we need to introspect its code, extract its # globals, (look for code object in it's co_consts attribute..) and # add the result to code_globals if co.co_consts: for const in co.co_consts: if isinstance(const, types.CodeType): out_names |= _extract_code_globals(const) _extract_code_globals_cache[co] = out_names return out_names def _find_imported_submodules(code, top_level_dependencies): """ Find currently imported submodules used by a function. Submodules used by a function need to be detected and referenced for the function to work correctly at depickling time. Because submodules can be referenced as attribute of their parent package (``package.submodule``), we need a special introspection technique that does not rely on GLOBAL-related opcodes to find references of them in a code object. Example: ``` import concurrent.futures import cloudpickle def func(): x = concurrent.futures.ThreadPoolExecutor if __name__ == '__main__': cloudpickle.dumps(func) ``` The globals extracted by cloudpickle in the function's state include the concurrent package, but not its submodule (here, concurrent.futures), which is the module used by func. Find_imported_submodules will detect the usage of concurrent.futures. Saving this module alongside with func will ensure that calling func once depickled does not fail due to concurrent.futures not being imported """ subimports = [] # check if any known dependency is an imported package for x in top_level_dependencies: if (isinstance(x, types.ModuleType) and hasattr(x, '__package__') and x.__package__): # check if the package has any currently loaded sub-imports prefix = x.__name__ + '.' # A concurrent thread could mutate sys.modules, # make sure we iterate over a copy to avoid exceptions for name in list(sys.modules): # Older versions of pytest will add a "None" module to # sys.modules. if name is not None and name.startswith(prefix): # check whether the function can address the sub-module tokens = set(name[len(prefix):].split('.')) if not tokens - set(code.co_names): subimports.append(sys.modules[name]) return subimports def cell_set(cell, value): """Set the value of a closure cell. The point of this function is to set the cell_contents attribute of a cell after its creation. This operation is necessary in case the cell contains a reference to the function the cell belongs to, as when calling the function's constructor ``f = types.FunctionType(code, globals, name, argdefs, closure)``, closure will not be able to contain the yet-to-be-created f. In Python3.7, cell_contents is writeable, so setting the contents of a cell can be done simply using >>> cell.cell_contents = value In earlier Python3 versions, the cell_contents attribute of a cell is read only, but this limitation can be worked around by leveraging the Python 3 ``nonlocal`` keyword. In Python2 however, this attribute is read only, and there is no ``nonlocal`` keyword. For this reason, we need to come up with more complicated hacks to set this attribute. The chosen approach is to create a function with a STORE_DEREF opcode, which sets the content of a closure variable. Typically: >>> def inner(value): ... lambda: cell # the lambda makes cell a closure ... cell = value # cell is a closure, so this triggers a STORE_DEREF (Note that in Python2, A STORE_DEREF can never be triggered from an inner function. The function g for example here >>> def f(var): ... def g(): ... var += 1 ... return g will not modify the closure variable ``var```inplace, but instead try to load a local variable var and increment it. As g does not assign the local variable ``var`` any initial value, calling f(1)() will fail at runtime.) Our objective is to set the value of a given cell ``cell``. So we need to somewhat reference our ``cell`` object into the ``inner`` function so that this object (and not the smoke cell of the lambda function) gets affected by the STORE_DEREF operation. In inner, ``cell`` is referenced as a cell variable (an enclosing variable that is referenced by the inner function). If we create a new function cell_set with the exact same code as ``inner``, but with ``cell`` marked as a free variable instead, the STORE_DEREF will be applied on its closure - ``cell``, which we can specify explicitly during construction! The new cell_set variable thus actually sets the contents of a specified cell! Note: we do not make use of the ``nonlocal`` keyword to set the contents of a cell in early python3 versions to limit possible syntax errors in case test and checker libraries decide to parse the whole file. """ if sys.version_info[:2] >= (3, 7): # pragma: no branch cell.cell_contents = value else: _cell_set = types.FunctionType( _cell_set_template_code, {}, '_cell_set', (), (cell,),) _cell_set(value) def _make_cell_set_template_code(): def _cell_set_factory(value): lambda: cell cell = value co = _cell_set_factory.__code__ _cell_set_template_code = types.CodeType( co.co_argcount, co.co_kwonlyargcount, # Python 3 only argument co.co_nlocals, co.co_stacksize, co.co_flags, co.co_code, co.co_consts, co.co_names, co.co_varnames, co.co_filename, co.co_name, co.co_firstlineno, co.co_lnotab, co.co_cellvars, # co_freevars is initialized with co_cellvars (), # co_cellvars is made empty ) return _cell_set_template_code if sys.version_info[:2] < (3, 7): _cell_set_template_code = _make_cell_set_template_code() # relevant opcodes STORE_GLOBAL = opcode.opmap['STORE_GLOBAL'] DELETE_GLOBAL = opcode.opmap['DELETE_GLOBAL'] LOAD_GLOBAL = opcode.opmap['LOAD_GLOBAL'] GLOBAL_OPS = (STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL) HAVE_ARGUMENT = dis.HAVE_ARGUMENT EXTENDED_ARG = dis.EXTENDED_ARG _BUILTIN_TYPE_NAMES = {} for k, v in types.__dict__.items(): if type(v) is type: _BUILTIN_TYPE_NAMES[v] = k def _builtin_type(name): if name == "ClassType": # pragma: no cover # Backward compat to load pickle files generated with cloudpickle # < 1.3 even if loading pickle files from older versions is not # officially supported. return type return getattr(types, name) def _walk_global_ops(code): """ Yield (opcode, argument number) tuples for all global-referencing instructions in *code*. """ for instr in dis.get_instructions(code): op = instr.opcode if op in GLOBAL_OPS: yield op, instr.arg def _extract_class_dict(cls): """Retrieve a copy of the dict of a class without the inherited methods""" clsdict = dict(cls.__dict__) # copy dict proxy to a dict if len(cls.__bases__) == 1: inherited_dict = cls.__bases__[0].__dict__ else: inherited_dict = {} for base in reversed(cls.__bases__): inherited_dict.update(base.__dict__) to_remove = [] for name, value in clsdict.items(): try: base_value = inherited_dict[name] if value is base_value: to_remove.append(name) except KeyError: pass for name in to_remove: clsdict.pop(name) return clsdict if sys.version_info[:2] < (3, 7): # pragma: no branch def _is_parametrized_type_hint(obj): # This is very cheap but might generate false positives. # general typing Constructs is_typing = getattr(obj, '__origin__', None) is not None # typing_extensions.Literal is_litteral = getattr(obj, '__values__', None) is not None # typing_extensions.Final is_final = getattr(obj, '__type__', None) is not None # typing.Union/Tuple for old Python 3.5 is_union = getattr(obj, '__union_params__', None) is not None is_tuple = getattr(obj, '__tuple_params__', None) is not None is_callable = ( getattr(obj, '__result__', None) is not None and getattr(obj, '__args__', None) is not None ) return any((is_typing, is_litteral, is_final, is_union, is_tuple, is_callable)) def _create_parametrized_type_hint(origin, args): return origin[args] else: _is_parametrized_type_hint = None _create_parametrized_type_hint = None def parametrized_type_hint_getinitargs(obj): # The distorted type check sematic for typing construct becomes: # ``type(obj) is type(TypeHint)``, which means "obj is a # parametrized TypeHint" if type(obj) is type(Literal): # pragma: no branch initargs = (Literal, obj.__values__) elif type(obj) is type(Final): # pragma: no branch initargs = (Final, obj.__type__) elif type(obj) is type(ClassVar): initargs = (ClassVar, obj.__type__) elif type(obj) is type(Generic): parameters = obj.__parameters__ if len(obj.__parameters__) > 0: # in early Python 3.5, __parameters__ was sometimes # preferred to __args__ initargs = (obj.__origin__, parameters) else: initargs = (obj.__origin__, obj.__args__) elif type(obj) is type(Union): if sys.version_info < (3, 5, 3): # pragma: no cover initargs = (Union, obj.__union_params__) else: initargs = (Union, obj.__args__) elif type(obj) is type(Tuple): if sys.version_info < (3, 5, 3): # pragma: no cover initargs = (Tuple, obj.__tuple_params__) else: initargs = (Tuple, obj.__args__) elif type(obj) is type(Callable): if sys.version_info < (3, 5, 3): # pragma: no cover args = obj.__args__ result = obj.__result__ if args != Ellipsis: if isinstance(args, tuple): args = list(args) else: args = [args] else: (*args, result) = obj.__args__ if len(args) == 1 and args[0] is Ellipsis: args = Ellipsis else: args = list(args) initargs = (Callable, (args, result)) else: # pragma: no cover raise pickle.PicklingError( "Cloudpickle Error: Unknown type {}".format(type(obj)) ) return initargs # Tornado support def is_tornado_coroutine(func): """ Return whether *func* is a Tornado coroutine function. Running coroutines are not supported. """ if 'tornado.gen' not in sys.modules: return False gen = sys.modules['tornado.gen'] if not hasattr(gen, "is_coroutine_function"): # Tornado version is too old return False return gen.is_coroutine_function(func) def _rebuild_tornado_coroutine(func): from tornado import gen return gen.coroutine(func) # including pickles unloading functions in this namespace load = pickle.load loads = pickle.loads # hack for __import__ not working as desired def subimport(name): __import__(name) return sys.modules[name] def dynamic_subimport(name, vars): mod = types.ModuleType(name) mod.__dict__.update(vars) mod.__dict__['__builtins__'] = builtins.__dict__ return mod def _gen_ellipsis(): return Ellipsis def _gen_not_implemented(): return NotImplemented def _get_cell_contents(cell): try: return cell.cell_contents except ValueError: # sentinel used by ``_fill_function`` which will leave the cell empty return _empty_cell_value def instance(cls): """Create a new instance of a class. Parameters ---------- cls : type The class to create an instance of. Returns ------- instance : cls A new instance of ``cls``. """ return cls() @instance class _empty_cell_value(object): """sentinel for empty closures """ @classmethod def __reduce__(cls): return cls.__name__ def _fill_function(*args): """Fills in the rest of function data into the skeleton function object The skeleton itself is create by _make_skel_func(). """ if len(args) == 2: func = args[0] state = args[1] elif len(args) == 5: # Backwards compat for cloudpickle v0.4.0, after which the `module` # argument was introduced func = args[0] keys = ['globals', 'defaults', 'dict', 'closure_values'] state = dict(zip(keys, args[1:])) elif len(args) == 6: # Backwards compat for cloudpickle v0.4.1, after which the function # state was passed as a dict to the _fill_function it-self. func = args[0] keys = ['globals', 'defaults', 'dict', 'module', 'closure_values'] state = dict(zip(keys, args[1:])) else: raise ValueError('Unexpected _fill_value arguments: %r' % (args,)) # - At pickling time, any dynamic global variable used by func is # serialized by value (in state['globals']). # - At unpickling time, func's __globals__ attribute is initialized by # first retrieving an empty isolated namespace that will be shared # with other functions pickled from the same original module # by the same CloudPickler instance and then updated with the # content of state['globals'] to populate the shared isolated # namespace with all the global variables that are specifically # referenced for this function. func.__globals__.update(state['globals']) func.__defaults__ = state['defaults'] func.__dict__ = state['dict'] if 'annotations' in state: func.__annotations__ = state['annotations'] if 'doc' in state: func.__doc__ = state['doc'] if 'name' in state: func.__name__ = state['name'] if 'module' in state: func.__module__ = state['module'] if 'qualname' in state: func.__qualname__ = state['qualname'] if 'kwdefaults' in state: func.__kwdefaults__ = state['kwdefaults'] # _cloudpickle_subimports is a set of submodules that must be loaded for # the pickled function to work correctly at unpickling time. Now that these # submodules are depickled (hence imported), they can be removed from the # object's state (the object state only served as a reference holder to # these submodules) if '_cloudpickle_submodules' in state: state.pop('_cloudpickle_submodules') cells = func.__closure__ if cells is not None: for cell, value in zip(cells, state['closure_values']): if value is not _empty_cell_value: cell_set(cell, value) return func def _make_empty_cell(): if False: # trick the compiler into creating an empty cell in our lambda cell = None raise AssertionError('this route should not be executed') return (lambda: cell).__closure__[0] def _make_cell(value=_empty_cell_value): cell = _make_empty_cell() if value is not _empty_cell_value: cell_set(cell, value) return cell def _make_skel_func(code, cell_count, base_globals=None): """ Creates a skeleton function object that contains just the provided code and the correct number of cells in func_closure. All other func attributes (e.g. func_globals) are empty. """ # This function is deprecated and should be removed in cloudpickle 1.7 warnings.warn( "A pickle file created using an old (<=1.4.1) version of cloudpicke " "is currently being loaded. This is not supported by cloudpickle and " "will break in cloudpickle 1.7", category=UserWarning ) # This is backward-compatibility code: for cloudpickle versions between # 0.5.4 and 0.7, base_globals could be a string or None. base_globals # should now always be a dictionary. if base_globals is None or isinstance(base_globals, str): base_globals = {} base_globals['__builtins__'] = __builtins__ closure = ( tuple(_make_empty_cell() for _ in range(cell_count)) if cell_count >= 0 else None ) return types.FunctionType(code, base_globals, None, None, closure) def _make_skeleton_class(type_constructor, name, bases, type_kwargs, class_tracker_id, extra): """Build dynamic class with an empty __dict__ to be filled once memoized If class_tracker_id is not None, try to lookup an existing class definition matching that id. If none is found, track a newly reconstructed class definition under that id so that other instances stemming from the same class id will also reuse this class definition. The "extra" variable is meant to be a dict (or None) that can be used for forward compatibility shall the need arise. """ skeleton_class = types.new_class( name, bases, {'metaclass': type_constructor}, lambda ns: ns.update(type_kwargs) ) return _lookup_class_or_track(class_tracker_id, skeleton_class) def _rehydrate_skeleton_class(skeleton_class, class_dict): """Put attributes from `class_dict` back on `skeleton_class`. See CloudPickler.save_dynamic_class for more info. """ registry = None for attrname, attr in class_dict.items(): if attrname == "_abc_impl": registry = attr else: setattr(skeleton_class, attrname, attr) if registry is not None: for subclass in registry: skeleton_class.register(subclass) return skeleton_class def _make_skeleton_enum(bases, name, qualname, members, module, class_tracker_id, extra): """Build dynamic enum with an empty __dict__ to be filled once memoized The creation of the enum class is inspired by the code of EnumMeta._create_. If class_tracker_id is not None, try to lookup an existing enum definition matching that id. If none is found, track a newly reconstructed enum definition under that id so that other instances stemming from the same class id will also reuse this enum definition. The "extra" variable is meant to be a dict (or None) that can be used for forward compatibility shall the need arise. """ # enums always inherit from their base Enum class at the last position in # the list of base classes: enum_base = bases[-1] metacls = enum_base.__class__ classdict = metacls.__prepare__(name, bases) for member_name, member_value in members.items(): classdict[member_name] = member_value enum_class = metacls.__new__(metacls, name, bases, classdict) enum_class.__module__ = module enum_class.__qualname__ = qualname return _lookup_class_or_track(class_tracker_id, enum_class) def _make_typevar(name, bound, constraints, covariant, contravariant, class_tracker_id): tv = typing.TypeVar( name, *constraints, bound=bound, covariant=covariant, contravariant=contravariant ) if class_tracker_id is not None: return _lookup_class_or_track(class_tracker_id, tv) else: # pragma: nocover # Only for Python 3.5.3 compat. return tv def _decompose_typevar(obj): try: class_tracker_id = _get_or_create_tracker_id(obj) except TypeError: # pragma: nocover # TypeVar instances are not weakref-able in Python 3.5.3 class_tracker_id = None return ( obj.__name__, obj.__bound__, obj.__constraints__, obj.__covariant__, obj.__contravariant__, class_tracker_id, ) def _typevar_reduce(obj): # TypeVar instances have no __qualname__ hence we pass the name explicitly. module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__) if module_and_name is None: return (_make_typevar, _decompose_typevar(obj)) return (getattr, module_and_name) def _get_bases(typ): if hasattr(typ, '__orig_bases__'): # For generic types (see PEP 560) bases_attr = '__orig_bases__' else: # For regular class objects bases_attr = '__bases__' return getattr(typ, bases_attr) def _make_dict_keys(obj): return dict.fromkeys(obj).keys() def _make_dict_values(obj): return {i: _ for i, _ in enumerate(obj)}.values() def _make_dict_items(obj): return obj.items() numba-0.55.1/numba/cloudpickle/cloudpickle_fast.py000664 000000 000000 00000073701 14174536160 022170 0ustar00rootroot000000 000000 """ New, fast version of the CloudPickler. This new CloudPickler class can now extend the fast C Pickler instead of the previous Python implementation of the Pickler class. Because this functionality is only available for Python versions 3.8+, a lot of backward-compatibility code is also removed. Note that the C Pickler sublassing API is CPython-specific. Therefore, some guards present in cloudpickle.py that were written to handle PyPy specificities are not present in cloudpickle_fast.py """ import _collections_abc import abc import copyreg import io import itertools import logging import sys import struct import types import weakref import typing from enum import Enum from collections import ChainMap from .compat import pickle, Pickler from .cloudpickle import ( _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL, _find_imported_submodules, _get_cell_contents, _is_importable, _builtin_type, _get_or_create_tracker_id, _make_skeleton_class, _make_skeleton_enum, _extract_class_dict, dynamic_subimport, subimport, _typevar_reduce, _get_bases, _make_cell, _make_empty_cell, CellType, _is_parametrized_type_hint, PYPY, cell_set, parametrized_type_hint_getinitargs, _create_parametrized_type_hint, builtin_code_type, _make_dict_keys, _make_dict_values, _make_dict_items, _DYNAMIC_CLASS_TRACKER_REUSING, ) if pickle.HIGHEST_PROTOCOL >= 5 and not PYPY: # Shorthands similar to pickle.dump/pickle.dumps def dump(obj, file, protocol=None, buffer_callback=None): """Serialize obj as bytes streamed into file protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to pickle.HIGHEST_PROTOCOL. This setting favors maximum communication speed between processes running the same Python version. Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure compatibility with older versions of Python. """ CloudPickler( file, protocol=protocol, buffer_callback=buffer_callback ).dump(obj) def dumps(obj, protocol=None, buffer_callback=None): """Serialize obj as a string of bytes allocated in memory protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to pickle.HIGHEST_PROTOCOL. This setting favors maximum communication speed between processes running the same Python version. Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure compatibility with older versions of Python. """ with io.BytesIO() as file: cp = CloudPickler( file, protocol=protocol, buffer_callback=buffer_callback ) cp.dump(obj) return file.getvalue() else: # Shorthands similar to pickle.dump/pickle.dumps def dump(obj, file, protocol=None): """Serialize obj as bytes streamed into file protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to pickle.HIGHEST_PROTOCOL. This setting favors maximum communication speed between processes running the same Python version. Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure compatibility with older versions of Python. """ CloudPickler(file, protocol=protocol).dump(obj) def dumps(obj, protocol=None): """Serialize obj as a string of bytes allocated in memory protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to pickle.HIGHEST_PROTOCOL. This setting favors maximum communication speed between processes running the same Python version. Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure compatibility with older versions of Python. """ with io.BytesIO() as file: cp = CloudPickler(file, protocol=protocol) cp.dump(obj) return file.getvalue() load, loads = pickle.load, pickle.loads # COLLECTION OF OBJECTS __getnewargs__-LIKE METHODS # ------------------------------------------------- def _class_getnewargs(obj): type_kwargs = {} if "__slots__" in obj.__dict__: type_kwargs["__slots__"] = obj.__slots__ __dict__ = obj.__dict__.get('__dict__', None) if isinstance(__dict__, property): type_kwargs['__dict__'] = __dict__ return (type(obj), obj.__name__, _get_bases(obj), type_kwargs, _get_or_create_tracker_id(obj), None) def _enum_getnewargs(obj): members = dict((e.name, e.value) for e in obj) return (obj.__bases__, obj.__name__, obj.__qualname__, members, obj.__module__, _get_or_create_tracker_id(obj), None) # COLLECTION OF OBJECTS RECONSTRUCTORS # ------------------------------------ def _file_reconstructor(retval): return retval # COLLECTION OF OBJECTS STATE GETTERS # ----------------------------------- def _function_getstate(func): # - Put func's dynamic attributes (stored in func.__dict__) in state. These # attributes will be restored at unpickling time using # f.__dict__.update(state) # - Put func's members into slotstate. Such attributes will be restored at # unpickling time by iterating over slotstate and calling setattr(func, # slotname, slotvalue) slotstate = { "__name__": func.__name__, "__qualname__": func.__qualname__, "__annotations__": func.__annotations__, "__kwdefaults__": func.__kwdefaults__, "__defaults__": func.__defaults__, "__module__": func.__module__, "__doc__": func.__doc__, "__closure__": func.__closure__, } f_globals_ref = _extract_code_globals(func.__code__) f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in func.__globals__} closure_values = ( list(map(_get_cell_contents, func.__closure__)) if func.__closure__ is not None else () ) # Extract currently-imported submodules used by func. Storing these modules # in a smoke _cloudpickle_subimports attribute of the object's state will # trigger the side effect of importing these modules at unpickling time # (which is necessary for func to work correctly once depickled) slotstate["_cloudpickle_submodules"] = _find_imported_submodules( func.__code__, itertools.chain(f_globals.values(), closure_values)) slotstate["__globals__"] = f_globals state = func.__dict__ return state, slotstate def _class_getstate(obj): clsdict = _extract_class_dict(obj) clsdict.pop('__weakref__', None) if issubclass(type(obj), abc.ABCMeta): # If obj is an instance of an ABCMeta subclass, dont pickle the # cache/negative caches populated during isinstance/issubclass # checks, but pickle the list of registered subclasses of obj. clsdict.pop('_abc_cache', None) clsdict.pop('_abc_negative_cache', None) clsdict.pop('_abc_negative_cache_version', None) registry = clsdict.pop('_abc_registry', None) if registry is None: # in Python3.7+, the abc caches and registered subclasses of a # class are bundled into the single _abc_impl attribute clsdict.pop('_abc_impl', None) (registry, _, _, _) = abc._get_dump(obj) clsdict["_abc_impl"] = [subclass_weakref() for subclass_weakref in registry] else: # In the above if clause, registry is a set of weakrefs -- in # this case, registry is a WeakSet clsdict["_abc_impl"] = [type_ for type_ in registry] if "__slots__" in clsdict: # pickle string length optimization: member descriptors of obj are # created automatically from obj's __slots__ attribute, no need to # save them in obj's state if isinstance(obj.__slots__, str): clsdict.pop(obj.__slots__) else: for k in obj.__slots__: clsdict.pop(k, None) clsdict.pop('__dict__', None) # unpicklable property object return (clsdict, {}) def _enum_getstate(obj): clsdict, slotstate = _class_getstate(obj) members = dict((e.name, e.value) for e in obj) # Cleanup the clsdict that will be passed to _rehydrate_skeleton_class: # Those attributes are already handled by the metaclass. for attrname in ["_generate_next_value_", "_member_names_", "_member_map_", "_member_type_", "_value2member_map_"]: clsdict.pop(attrname, None) for member in members: clsdict.pop(member) # Special handling of Enum subclasses return clsdict, slotstate # COLLECTIONS OF OBJECTS REDUCERS # ------------------------------- # A reducer is a function taking a single argument (obj), and that returns a # tuple with all the necessary data to re-construct obj. Apart from a few # exceptions (list, dict, bytes, int, etc.), a reducer is necessary to # correctly pickle an object. # While many built-in objects (Exceptions objects, instances of the "object" # class, etc), are shipped with their own built-in reducer (invoked using # obj.__reduce__), some do not. The following methods were created to "fill # these holes". def _code_reduce(obj): """codeobject reducer""" if hasattr(obj, "co_posonlyargcount"): # pragma: no branch args = ( obj.co_argcount, obj.co_posonlyargcount, obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize, obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, obj.co_varnames, obj.co_filename, obj.co_name, obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, obj.co_cellvars ) else: args = ( obj.co_argcount, obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize, obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, obj.co_varnames, obj.co_filename, obj.co_name, obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, obj.co_cellvars ) return types.CodeType, args def _cell_reduce(obj): """Cell (containing values of a function's free variables) reducer""" try: obj.cell_contents except ValueError: # cell is empty return _make_empty_cell, () else: return _make_cell, (obj.cell_contents, ) def _classmethod_reduce(obj): orig_func = obj.__func__ return type(obj), (orig_func,) def _file_reduce(obj): """Save a file""" import io if not hasattr(obj, "name") or not hasattr(obj, "mode"): raise pickle.PicklingError( "Cannot pickle files that do not map to an actual file" ) if obj is sys.stdout: return getattr, (sys, "stdout") if obj is sys.stderr: return getattr, (sys, "stderr") if obj is sys.stdin: raise pickle.PicklingError("Cannot pickle standard input") if obj.closed: raise pickle.PicklingError("Cannot pickle closed files") if hasattr(obj, "isatty") and obj.isatty(): raise pickle.PicklingError( "Cannot pickle files that map to tty objects" ) if "r" not in obj.mode and "+" not in obj.mode: raise pickle.PicklingError( "Cannot pickle files that are not opened for reading: %s" % obj.mode ) name = obj.name retval = io.StringIO() try: # Read the whole file curloc = obj.tell() obj.seek(0) contents = obj.read() obj.seek(curloc) except IOError as e: raise pickle.PicklingError( "Cannot pickle file %s as it cannot be read" % name ) from e retval.write(contents) retval.seek(curloc) retval.name = name return _file_reconstructor, (retval,) def _getset_descriptor_reduce(obj): return getattr, (obj.__objclass__, obj.__name__) def _mappingproxy_reduce(obj): return types.MappingProxyType, (dict(obj),) def _memoryview_reduce(obj): return bytes, (obj.tobytes(),) def _module_reduce(obj): if _is_importable(obj): return subimport, (obj.__name__,) else: obj.__dict__.pop('__builtins__', None) return dynamic_subimport, (obj.__name__, vars(obj)) def _method_reduce(obj): return (types.MethodType, (obj.__func__, obj.__self__)) def _logger_reduce(obj): return logging.getLogger, (obj.name,) def _root_logger_reduce(obj): return logging.getLogger, () def _property_reduce(obj): return property, (obj.fget, obj.fset, obj.fdel, obj.__doc__) def _weakset_reduce(obj): return weakref.WeakSet, (list(obj),) def _dynamic_class_reduce(obj): """ Save a class that can't be stored as module global. This method is used to serialize classes that are defined inside functions, or that otherwise can't be serialized as attribute lookups from global modules. """ if Enum is not None and issubclass(obj, Enum): return ( _make_skeleton_enum, _enum_getnewargs(obj), _enum_getstate(obj), None, None, _class_setstate ) else: return ( _make_skeleton_class, _class_getnewargs(obj), _class_getstate(obj), None, None, _class_setstate ) def _class_reduce(obj): """Select the reducer depending on the dynamic nature of the class obj""" if obj is type(None): # noqa return type, (None,) elif obj is type(Ellipsis): return type, (Ellipsis,) elif obj is type(NotImplemented): return type, (NotImplemented,) elif obj in _BUILTIN_TYPE_NAMES: return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],) elif not _is_importable(obj): return _dynamic_class_reduce(obj) return NotImplemented def _dict_keys_reduce(obj): # Safer not to ship the full dict as sending the rest might # be unintended and could potentially cause leaking of # sensitive information return _make_dict_keys, (list(obj), ) def _dict_values_reduce(obj): # Safer not to ship the full dict as sending the rest might # be unintended and could potentially cause leaking of # sensitive information return _make_dict_values, (list(obj), ) def _dict_items_reduce(obj): return _make_dict_items, (dict(obj), ) # COLLECTIONS OF OBJECTS STATE SETTERS # ------------------------------------ # state setters are called at unpickling time, once the object is created and # it has to be updated to how it was at unpickling time. def _function_setstate(obj, state): """Update the state of a dynaamic function. As __closure__ and __globals__ are readonly attributes of a function, we cannot rely on the native setstate routine of pickle.load_build, that calls setattr on items of the slotstate. Instead, we have to modify them inplace. """ state, slotstate = state obj.__dict__.update(state) obj_globals = slotstate.pop("__globals__") obj_closure = slotstate.pop("__closure__") # _cloudpickle_subimports is a set of submodules that must be loaded for # the pickled function to work correctly at unpickling time. Now that these # submodules are depickled (hence imported), they can be removed from the # object's state (the object state only served as a reference holder to # these submodules) slotstate.pop("_cloudpickle_submodules") obj.__globals__.update(obj_globals) obj.__globals__["__builtins__"] = __builtins__ if obj_closure is not None: for i, cell in enumerate(obj_closure): try: value = cell.cell_contents except ValueError: # cell is empty continue cell_set(obj.__closure__[i], value) for k, v in slotstate.items(): setattr(obj, k, v) def _class_setstate(obj, state): # Check if class is being reused and needs bypass setstate logic. if obj in _DYNAMIC_CLASS_TRACKER_REUSING: return obj state, slotstate = state registry = None for attrname, attr in state.items(): if attrname == "_abc_impl": registry = attr else: setattr(obj, attrname, attr) if registry is not None: for subclass in registry: obj.register(subclass) return obj class CloudPickler(Pickler): # set of reducers defined and used by cloudpickle (private) _dispatch_table = {} _dispatch_table[classmethod] = _classmethod_reduce _dispatch_table[io.TextIOWrapper] = _file_reduce _dispatch_table[logging.Logger] = _logger_reduce _dispatch_table[logging.RootLogger] = _root_logger_reduce _dispatch_table[memoryview] = _memoryview_reduce _dispatch_table[property] = _property_reduce _dispatch_table[staticmethod] = _classmethod_reduce _dispatch_table[CellType] = _cell_reduce _dispatch_table[types.CodeType] = _code_reduce _dispatch_table[types.GetSetDescriptorType] = _getset_descriptor_reduce _dispatch_table[types.ModuleType] = _module_reduce _dispatch_table[types.MethodType] = _method_reduce _dispatch_table[types.MappingProxyType] = _mappingproxy_reduce _dispatch_table[weakref.WeakSet] = _weakset_reduce _dispatch_table[typing.TypeVar] = _typevar_reduce _dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce _dispatch_table[_collections_abc.dict_values] = _dict_values_reduce _dispatch_table[_collections_abc.dict_items] = _dict_items_reduce dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table) # function reducers are defined as instance methods of CloudPickler # objects, as they rely on a CloudPickler attribute (globals_ref) def _dynamic_function_reduce(self, func): """Reduce a function that is not pickleable via attribute lookup.""" newargs = self._function_getnewargs(func) state = _function_getstate(func) return (types.FunctionType, newargs, state, None, None, _function_setstate) def _function_reduce(self, obj): """Reducer for function objects. If obj is a top-level attribute of a file-backed module, this reducer returns NotImplemented, making the CloudPickler fallback to traditional _pickle.Pickler routines to save obj. Otherwise, it reduces obj using a custom cloudpickle reducer designed specifically to handle dynamic functions. As opposed to cloudpickle.py, There no special handling for builtin pypy functions because cloudpickle_fast is CPython-specific. """ if _is_importable(obj): return NotImplemented else: return self._dynamic_function_reduce(obj) def _function_getnewargs(self, func): code = func.__code__ # base_globals represents the future global namespace of func at # unpickling time. Looking it up and storing it in # CloudpiPickler.globals_ref allow functions sharing the same globals # at pickling time to also share them once unpickled, at one condition: # since globals_ref is an attribute of a CloudPickler instance, and # that a new CloudPickler is created each time pickle.dump or # pickle.dumps is called, functions also need to be saved within the # same invocation of cloudpickle.dump/cloudpickle.dumps (for example: # cloudpickle.dumps([f1, f2])). There is no such limitation when using # CloudPickler.dump, as long as the multiple invocations are bound to # the same CloudPickler. base_globals = self.globals_ref.setdefault(id(func.__globals__), {}) if base_globals == {}: # Add module attributes used to resolve relative imports # instructions inside func. for k in ["__package__", "__name__", "__path__", "__file__"]: if k in func.__globals__: base_globals[k] = func.__globals__[k] # Do not bind the free variables before the function is created to # avoid infinite recursion. if func.__closure__ is None: closure = None else: closure = tuple( _make_empty_cell() for _ in range(len(code.co_freevars))) return code, base_globals, None, None, closure def dump(self, obj): try: return Pickler.dump(self, obj) except RuntimeError as e: if "recursion" in e.args[0]: msg = ( "Could not pickle object as excessively deep recursion " "required." ) raise pickle.PicklingError(msg) from e else: raise if pickle.HIGHEST_PROTOCOL >= 5: # `CloudPickler.dispatch` is only left for backward compatibility - note # that when using protocol 5, `CloudPickler.dispatch` is not an # extension of `Pickler.dispatch` dictionary, because CloudPickler # subclasses the C-implemented Pickler, which does not expose a # `dispatch` attribute. Earlier versions of the protocol 5 CloudPickler # used `CloudPickler.dispatch` as a class-level attribute storing all # reducers implemented by cloudpickle, but the attribute name was not a # great choice given the meaning of `Cloudpickler.dispatch` when # `CloudPickler` extends the pure-python pickler. dispatch = dispatch_table # Implementation of the reducer_override callback, in order to # efficiently serialize dynamic functions and classes by subclassing # the C-implemented Pickler. # TODO: decorrelate reducer_override (which is tied to CPython's # implementation - would it make sense to backport it to pypy? - and # pickle's protocol 5 which is implementation agnostic. Currently, the # availability of both notions coincide on CPython's pickle and the # pickle5 backport, but it may not be the case anymore when pypy # implements protocol 5 def __init__(self, file, protocol=None, buffer_callback=None): if protocol is None: protocol = DEFAULT_PROTOCOL Pickler.__init__( self, file, protocol=protocol, buffer_callback=buffer_callback ) # map functions __globals__ attribute ids, to ensure that functions # sharing the same global namespace at pickling time also share # their global namespace at unpickling time. self.globals_ref = {} self.proto = int(protocol) def reducer_override(self, obj): """Type-agnostic reducing callback for function and classes. For performance reasons, subclasses of the C _pickle.Pickler class cannot register custom reducers for functions and classes in the dispatch_table. Reducer for such types must instead implemented in the special reducer_override method. Note that method will be called for any object except a few builtin-types (int, lists, dicts etc.), which differs from reducers in the Pickler's dispatch_table, each of them being invoked for objects of a specific type only. This property comes in handy for classes: although most classes are instances of the ``type`` metaclass, some of them can be instances of other custom metaclasses (such as enum.EnumMeta for example). In particular, the metaclass will likely not be known in advance, and thus cannot be special-cased using an entry in the dispatch_table. reducer_override, among other things, allows us to register a reducer that will be called for any class, independently of its type. Notes: * reducer_override has the priority over dispatch_table-registered reducers. * reducer_override can be used to fix other limitations of cloudpickle for other types that suffered from type-specific reducers, such as Exceptions. See https://github.com/cloudpipe/cloudpickle/issues/248 """ if sys.version_info[:2] < (3, 7) and _is_parametrized_type_hint(obj): # noqa # pragma: no branch return ( _create_parametrized_type_hint, parametrized_type_hint_getinitargs(obj) ) t = type(obj) try: is_anyclass = issubclass(t, type) except TypeError: # t is not a class (old Boost; see SF #502085) is_anyclass = False if is_anyclass: return _class_reduce(obj) elif isinstance(obj, types.FunctionType): return self._function_reduce(obj) else: # fallback to save_global, including the Pickler's # distpatch_table return NotImplemented else: # When reducer_override is not available, hack the pure-Python # Pickler's types.FunctionType and type savers. Note: the type saver # must override Pickler.save_global, because pickle.py contains a # hard-coded call to save_global when pickling meta-classes. dispatch = Pickler.dispatch.copy() def __init__(self, file, protocol=None): if protocol is None: protocol = DEFAULT_PROTOCOL Pickler.__init__(self, file, protocol=protocol) # map functions __globals__ attribute ids, to ensure that functions # sharing the same global namespace at pickling time also share # their global namespace at unpickling time. self.globals_ref = {} assert hasattr(self, 'proto') def _save_reduce_pickle5(self, func, args, state=None, listitems=None, dictitems=None, state_setter=None, obj=None): save = self.save write = self.write self.save_reduce( func, args, state=None, listitems=listitems, dictitems=dictitems, obj=obj ) # backport of the Python 3.8 state_setter pickle operations save(state_setter) save(obj) # simple BINGET opcode as obj is already memoized. save(state) write(pickle.TUPLE2) # Trigger a state_setter(obj, state) function call. write(pickle.REDUCE) # The purpose of state_setter is to carry-out an # inplace modification of obj. We do not care about what the # method might return, so its output is eventually removed from # the stack. write(pickle.POP) def save_global(self, obj, name=None, pack=struct.pack): """ Save a "global". The name of this method is somewhat misleading: all types get dispatched here. """ if obj is type(None): # noqa return self.save_reduce(type, (None,), obj=obj) elif obj is type(Ellipsis): return self.save_reduce(type, (Ellipsis,), obj=obj) elif obj is type(NotImplemented): return self.save_reduce(type, (NotImplemented,), obj=obj) elif obj in _BUILTIN_TYPE_NAMES: return self.save_reduce( _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj) if sys.version_info[:2] < (3, 7) and _is_parametrized_type_hint(obj): # noqa # pragma: no branch # Parametrized typing constructs in Python < 3.7 are not # compatible with type checks and ``isinstance`` semantics. For # this reason, it is easier to detect them using a # duck-typing-based check (``_is_parametrized_type_hint``) than # to populate the Pickler's dispatch with type-specific savers. self.save_reduce( _create_parametrized_type_hint, parametrized_type_hint_getinitargs(obj), obj=obj ) elif name is not None: Pickler.save_global(self, obj, name=name) elif not _is_importable(obj, name=name): self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj) else: Pickler.save_global(self, obj, name=name) dispatch[type] = save_global def save_function(self, obj, name=None): """ Registered with the dispatch to handle all function types. Determines what kind of function obj is (e.g. lambda, defined at interactive prompt, etc) and handles the pickling appropriately. """ if _is_importable(obj, name=name): return Pickler.save_global(self, obj, name=name) elif PYPY and isinstance(obj.__code__, builtin_code_type): return self.save_pypy_builtin_func(obj) else: return self._save_reduce_pickle5( *self._dynamic_function_reduce(obj), obj=obj ) def save_pypy_builtin_func(self, obj): """Save pypy equivalent of builtin functions. PyPy does not have the concept of builtin-functions. Instead, builtin-functions are simple function instances, but with a builtin-code attribute. Most of the time, builtin functions should be pickled by attribute. But PyPy has flaky support for __qualname__, so some builtin functions such as float.__new__ will be classified as dynamic. For this reason only, we created this special routine. Because builtin-functions are not expected to have closure or globals, there is no additional hack (compared the one already implemented in pickle) to protect ourselves from reference cycles. A simple (reconstructor, newargs, obj.__dict__) tuple is save_reduced. Note also that PyPy improved their support for __qualname__ in v3.6, so this routing should be removed when cloudpickle supports only PyPy 3.6 and later. """ rv = (types.FunctionType, (obj.__code__, {}, obj.__name__, obj.__defaults__, obj.__closure__), obj.__dict__) self.save_reduce(*rv, obj=obj) dispatch[types.FunctionType] = save_function numba-0.55.1/numba/cloudpickle/compat.py000664 000000 000000 00000000542 14174536160 020131 0ustar00rootroot000000 000000 import sys if sys.version_info < (3, 8): try: import pickle5 as pickle # noqa: F401 from pickle5 import Pickler # noqa: F401 except ImportError: import pickle # noqa: F401 from pickle import _Pickler as Pickler # noqa: F401 else: import pickle # noqa: F401 from _pickle import Pickler # noqa: F401 numba-0.55.1/numba/core/000775 000000 000000 00000000000 14174536160 014725 5ustar00rootroot000000 000000 numba-0.55.1/numba/core/__init__.py000664 000000 000000 00000000000 14174536160 017024 0ustar00rootroot000000 000000 numba-0.55.1/numba/core/analysis.py000664 000000 000000 00000064410 14174536160 017127 0ustar00rootroot000000 000000 """ Utils for IR analysis """ import operator from functools import reduce from collections import namedtuple, defaultdict from .controlflow import CFGraph from numba.core import types, errors, ir, consts from numba.misc import special # # Analysis related to variable lifetime # _use_defs_result = namedtuple('use_defs_result', 'usemap,defmap') # other packages that define new nodes add calls for finding defs # format: {type:function} ir_extension_usedefs = {} def compute_use_defs(blocks): """ Find variable use/def per block. """ var_use_map = {} # { block offset -> set of vars } var_def_map = {} # { block offset -> set of vars } for offset, ir_block in blocks.items(): var_use_map[offset] = use_set = set() var_def_map[offset] = def_set = set() for stmt in ir_block.body: if type(stmt) in ir_extension_usedefs: func = ir_extension_usedefs[type(stmt)] func(stmt, use_set, def_set) continue if isinstance(stmt, ir.Assign): if isinstance(stmt.value, ir.Inst): rhs_set = set(var.name for var in stmt.value.list_vars()) elif isinstance(stmt.value, ir.Var): rhs_set = set([stmt.value.name]) elif isinstance(stmt.value, (ir.Arg, ir.Const, ir.Global, ir.FreeVar)): rhs_set = () else: raise AssertionError('unreachable', type(stmt.value)) # If lhs not in rhs of the assignment if stmt.target.name not in rhs_set: def_set.add(stmt.target.name) for var in stmt.list_vars(): # do not include locally defined vars to use-map if var.name not in def_set: use_set.add(var.name) return _use_defs_result(usemap=var_use_map, defmap=var_def_map) def compute_live_map(cfg, blocks, var_use_map, var_def_map): """ Find variables that must be alive at the ENTRY of each block. We use a simple fix-point algorithm that iterates until the set of live variables is unchanged for each block. """ def fix_point_progress(dct): """Helper function to determine if a fix-point has been reached. """ return tuple(len(v) for v in dct.values()) def fix_point(fn, dct): """Helper function to run fix-point algorithm. """ old_point = None new_point = fix_point_progress(dct) while old_point != new_point: fn(dct) old_point = new_point new_point = fix_point_progress(dct) def def_reach(dct): """Find all variable definition reachable at the entry of a block """ for offset in var_def_map: used_or_defined = var_def_map[offset] | var_use_map[offset] dct[offset] |= used_or_defined # Propagate to outgoing nodes for out_blk, _ in cfg.successors(offset): dct[out_blk] |= dct[offset] def liveness(dct): """Find live variables. Push var usage backward. """ for offset in dct: # Live vars here live_vars = dct[offset] for inc_blk, _data in cfg.predecessors(offset): # Reachable at the predecessor reachable = live_vars & def_reach_map[inc_blk] # But not defined in the predecessor dct[inc_blk] |= reachable - var_def_map[inc_blk] live_map = {} for offset in blocks.keys(): live_map[offset] = set(var_use_map[offset]) def_reach_map = defaultdict(set) fix_point(def_reach, def_reach_map) fix_point(liveness, live_map) return live_map _dead_maps_result = namedtuple('dead_maps_result', 'internal,escaping,combined') def compute_dead_maps(cfg, blocks, live_map, var_def_map): """ Compute the end-of-live information for variables. `live_map` contains a mapping of block offset to all the living variables at the ENTRY of the block. """ # The following three dictionaries will be # { block offset -> set of variables to delete } # all vars that should be deleted at the start of the successors escaping_dead_map = defaultdict(set) # all vars that should be deleted within this block internal_dead_map = defaultdict(set) # all vars that should be deleted after the function exit exit_dead_map = defaultdict(set) for offset, ir_block in blocks.items(): # live vars WITHIN the block will include all the locally # defined variables cur_live_set = live_map[offset] | var_def_map[offset] # vars alive in the outgoing blocks outgoing_live_map = dict((out_blk, live_map[out_blk]) for out_blk, _data in cfg.successors(offset)) # vars to keep alive for the terminator terminator_liveset = set(v.name for v in ir_block.terminator.list_vars()) # vars to keep alive in the successors combined_liveset = reduce(operator.or_, outgoing_live_map.values(), set()) # include variables used in terminator combined_liveset |= terminator_liveset # vars that are dead within the block because they are not # propagated to any outgoing blocks internal_set = cur_live_set - combined_liveset internal_dead_map[offset] = internal_set # vars that escape this block escaping_live_set = cur_live_set - internal_set for out_blk, new_live_set in outgoing_live_map.items(): # successor should delete the unused escaped vars new_live_set = new_live_set | var_def_map[out_blk] escaping_dead_map[out_blk] |= escaping_live_set - new_live_set # if no outgoing blocks if not outgoing_live_map: # insert var used by terminator exit_dead_map[offset] = terminator_liveset # Verify that the dead maps cover all live variables all_vars = reduce(operator.or_, live_map.values(), set()) internal_dead_vars = reduce(operator.or_, internal_dead_map.values(), set()) escaping_dead_vars = reduce(operator.or_, escaping_dead_map.values(), set()) exit_dead_vars = reduce(operator.or_, exit_dead_map.values(), set()) dead_vars = (internal_dead_vars | escaping_dead_vars | exit_dead_vars) missing_vars = all_vars - dead_vars if missing_vars: # There are no exit points if not cfg.exit_points(): # We won't be able to verify this pass else: msg = 'liveness info missing for vars: {0}'.format(missing_vars) raise RuntimeError(msg) combined = dict((k, internal_dead_map[k] | escaping_dead_map[k]) for k in blocks) return _dead_maps_result(internal=internal_dead_map, escaping=escaping_dead_map, combined=combined) def compute_live_variables(cfg, blocks, var_def_map, var_dead_map): """ Compute the live variables at the beginning of each block and at each yield point. The ``var_def_map`` and ``var_dead_map`` indicates the variable defined and deleted at each block, respectively. """ # live var at the entry per block block_entry_vars = defaultdict(set) def fix_point_progress(): return tuple(map(len, block_entry_vars.values())) old_point = None new_point = fix_point_progress() # Propagate defined variables and still live the successors. # (note the entry block automatically gets an empty set) # Note: This is finding the actual available variables at the entry # of each block. The algorithm in compute_live_map() is finding # the variable that must be available at the entry of each block. # This is top-down in the dataflow. The other one is bottom-up. while old_point != new_point: # We iterate until the result stabilizes. This is necessary # because of loops in the graphself. for offset in blocks: # vars available + variable defined avail = block_entry_vars[offset] | var_def_map[offset] # subtract variables deleted avail -= var_dead_map[offset] # add ``avail`` to each successors for succ, _data in cfg.successors(offset): block_entry_vars[succ] |= avail old_point = new_point new_point = fix_point_progress() return block_entry_vars # # Analysis related to controlflow # def compute_cfg_from_blocks(blocks): cfg = CFGraph() for k in blocks: cfg.add_node(k) for k, b in blocks.items(): term = b.terminator for target in term.get_targets(): cfg.add_edge(k, target) cfg.set_entry_point(min(blocks)) cfg.process() return cfg def find_top_level_loops(cfg): """ A generator that yields toplevel loops given a control-flow-graph """ blocks_in_loop = set() # get loop bodies for loop in cfg.loops().values(): insiders = set(loop.body) | set(loop.entries) | set(loop.exits) insiders.discard(loop.header) blocks_in_loop |= insiders # find loop that is not part of other loops for loop in cfg.loops().values(): if loop.header not in blocks_in_loop: yield _fix_loop_exit(cfg, loop) def _fix_loop_exit(cfg, loop): """ Fixes loop.exits for Py3.8 bytecode CFG changes. This is to handle `break` inside loops. """ # Computes the common postdoms of exit nodes postdoms = cfg.post_dominators() exits = reduce( operator.and_, [postdoms[b] for b in loop.exits], loop.exits, ) if exits: # Put the non-common-exits as body nodes body = loop.body | loop.exits - exits return loop._replace(exits=exits, body=body) else: return loop # Used to describe a nullified condition in dead branch pruning nullified = namedtuple('nullified', 'condition, taken_br, rewrite_stmt') # Functions to manipulate IR def dead_branch_prune(func_ir, called_args): """ Removes dead branches based on constant inference from function args. This directly mutates the IR. func_ir is the IR called_args are the actual arguments with which the function is called """ from numba.core.ir_utils import (get_definition, guard, find_const, GuardException) DEBUG = 0 def find_branches(func_ir): # find *all* branches branches = [] for blk in func_ir.blocks.values(): branch_or_jump = blk.body[-1] if isinstance(branch_or_jump, ir.Branch): branch = branch_or_jump pred = guard(get_definition, func_ir, branch.cond.name) if pred is not None and pred.op == "call": function = guard(get_definition, func_ir, pred.func) if (function is not None and isinstance(function, ir.Global) and function.value is bool): condition = guard(get_definition, func_ir, pred.args[0]) if condition is not None: branches.append((branch, condition, blk)) return branches def do_prune(take_truebr, blk): keep = branch.truebr if take_truebr else branch.falsebr # replace the branch with a direct jump jmp = ir.Jump(keep, loc=branch.loc) blk.body[-1] = jmp return 1 if keep == branch.truebr else 0 def prune_by_type(branch, condition, blk, *conds): # this prunes a given branch and fixes up the IR # at least one needs to be a NoneType lhs_cond, rhs_cond = conds lhs_none = isinstance(lhs_cond, types.NoneType) rhs_none = isinstance(rhs_cond, types.NoneType) if lhs_none or rhs_none: try: take_truebr = condition.fn(lhs_cond, rhs_cond) except Exception: return False, None if DEBUG > 0: kill = branch.falsebr if take_truebr else branch.truebr print("Pruning %s" % kill, branch, lhs_cond, rhs_cond, condition.fn) taken = do_prune(take_truebr, blk) return True, taken return False, None def prune_by_value(branch, condition, blk, *conds): lhs_cond, rhs_cond = conds try: take_truebr = condition.fn(lhs_cond, rhs_cond) except Exception: return False, None if DEBUG > 0: kill = branch.falsebr if take_truebr else branch.truebr print("Pruning %s" % kill, branch, lhs_cond, rhs_cond, condition.fn) taken = do_prune(take_truebr, blk) return True, taken def prune_by_predicate(branch, pred, blk): try: # Just to prevent accidents, whilst already guarded, ensure this # is an ir.Const if not isinstance(pred, (ir.Const, ir.FreeVar, ir.Global)): raise TypeError('Expected constant Numba IR node') take_truebr = bool(pred.value) except TypeError: return False, None if DEBUG > 0: kill = branch.falsebr if take_truebr else branch.truebr print("Pruning %s" % kill, branch, pred) taken = do_prune(take_truebr, blk) return True, taken class Unknown(object): pass def resolve_input_arg_const(input_arg_idx): """ Resolves an input arg to a constant (if possible) """ input_arg_ty = called_args[input_arg_idx] # comparing to None? if isinstance(input_arg_ty, types.NoneType): return input_arg_ty # is it a kwarg default if isinstance(input_arg_ty, types.Omitted): val = input_arg_ty.value if isinstance(val, types.NoneType): return val elif val is None: return types.NoneType('none') # literal type, return the type itself so comparisons like `x == None` # still work as e.g. x = types.int64 will never be None/NoneType so # the branch can still be pruned return getattr(input_arg_ty, 'literal_type', Unknown()) if DEBUG > 1: print("before".center(80, '-')) print(func_ir.dump()) phi2lbl = dict() phi2asgn = dict() for lbl, blk in func_ir.blocks.items(): for stmt in blk.body: if isinstance(stmt, ir.Assign): if isinstance(stmt.value, ir.Expr) and stmt.value.op == 'phi': phi2lbl[stmt.value] = lbl phi2asgn[stmt.value] = stmt # This looks for branches where: # at least one arg of the condition is in input args and const # at least one an arg of the condition is a const # if the condition is met it will replace the branch with a jump branch_info = find_branches(func_ir) # stores conditions that have no impact post prune nullified_conditions = [] for branch, condition, blk in branch_info: const_conds = [] if isinstance(condition, ir.Expr) and condition.op == 'binop': prune = prune_by_value for arg in [condition.lhs, condition.rhs]: resolved_const = Unknown() arg_def = guard(get_definition, func_ir, arg) if isinstance(arg_def, ir.Arg): # it's an e.g. literal argument to the function resolved_const = resolve_input_arg_const(arg_def.index) prune = prune_by_type else: # it's some const argument to the function, cannot use guard # here as the const itself may be None try: resolved_const = find_const(func_ir, arg) if resolved_const is None: resolved_const = types.NoneType('none') except GuardException: pass if not isinstance(resolved_const, Unknown): const_conds.append(resolved_const) # lhs/rhs are consts if len(const_conds) == 2: # prune the branch, switch the branch for an unconditional jump prune_stat, taken = prune(branch, condition, blk, *const_conds) if(prune_stat): # add the condition to the list of nullified conditions nullified_conditions.append(nullified(condition, taken, True)) else: # see if this is a branch on a constant value predicate resolved_const = Unknown() try: pred_call = get_definition(func_ir, branch.cond) resolved_const = find_const(func_ir, pred_call.args[0]) if resolved_const is None: resolved_const = types.NoneType('none') except GuardException: pass if not isinstance(resolved_const, Unknown): prune_stat, taken = prune_by_predicate(branch, condition, blk) if(prune_stat): # add the condition to the list of nullified conditions nullified_conditions.append(nullified(condition, taken, False)) # 'ERE BE DRAGONS... # It is the evaluation of the condition expression that often trips up type # inference, so ideally it would be removed as it is effectively rendered # dead by the unconditional jump if a branch was pruned. However, there may # be references to the condition that exist in multiple places (e.g. dels) # and we cannot run DCE here as typing has not taken place to give enough # information to run DCE safely. Upshot of all this is the condition gets # rewritten below into a benign const that typing will be happy with and DCE # can remove it and its reference post typing when it is safe to do so # (if desired). It is required that the const is assigned a value that # indicates the branch taken as its mutated value would be read in the case # of object mode fall back in place of the condition itself. For # completeness the func_ir._definitions and ._consts are also updated to # make the IR state self consistent. deadcond = [x.condition for x in nullified_conditions] for _, cond, blk in branch_info: if cond in deadcond: for x in blk.body: if isinstance(x, ir.Assign) and x.value is cond: # rewrite the condition as a true/false bit nullified_info = nullified_conditions[deadcond.index(cond)] # only do a rewrite of conditions, predicates need to retain # their value as they may be used later. if nullified_info.rewrite_stmt: branch_bit = nullified_info.taken_br x.value = ir.Const(branch_bit, loc=x.loc) # update the specific definition to the new const defns = func_ir._definitions[x.target.name] repl_idx = defns.index(cond) defns[repl_idx] = x.value # Check post dominators of dead nodes from in the original CFG for use of # vars that are being removed in the dead blocks which might be referred to # by phi nodes. # # Multiple things to fix up: # # 1. Cases like: # # A A # |\ | # | B --> B # |/ | # C C # # i.e. the branch is dead but the block is still alive. In this case CFG # simplification will fuse A-B-C and any phi in C can be updated as an # direct assignment from the last assigned version in the dominators of the # fused block. # # 2. Cases like: # # A A # / \ | # B C --> B # \ / | # D D # # i.e. the block C is dead. In this case the phis in D need updating to # reflect the collapse of the phi condition. This should result in a direct # assignment of the surviving version in B to the LHS of the phi in D. new_cfg = compute_cfg_from_blocks(func_ir.blocks) dead_blocks = new_cfg.dead_nodes() # for all phis that are still in live blocks. for phi, lbl in phi2lbl.items(): if lbl in dead_blocks: continue new_incoming = [x[0] for x in new_cfg.predecessors(lbl)] if set(new_incoming) != set(phi.incoming_blocks): # Something has changed in the CFG... if len(new_incoming) == 1: # There's now just one incoming. Replace the PHI node by a # direct assignment idx = phi.incoming_blocks.index(new_incoming[0]) phi2asgn[phi].value = phi.incoming_values[idx] else: # There's more than one incoming still, then look through the # incoming and remove dead ic_val_tmp = [] ic_blk_tmp = [] for ic_val, ic_blk in zip(phi.incoming_values, phi.incoming_blocks): if ic_blk in dead_blocks: continue else: ic_val_tmp.append(ic_val) ic_blk_tmp.append(ic_blk) phi.incoming_values.clear() phi.incoming_values.extend(ic_val_tmp) phi.incoming_blocks.clear() phi.incoming_blocks.extend(ic_blk_tmp) # Remove dead blocks, this is safe as it relies on the CFG only. for dead in dead_blocks: del func_ir.blocks[dead] # if conditions were nullified then consts were rewritten, update if nullified_conditions: func_ir._consts = consts.ConstantInference(func_ir) if DEBUG > 1: print("after".center(80, '-')) print(func_ir.dump()) def rewrite_semantic_constants(func_ir, called_args): """ This rewrites values known to be constant by their semantics as ir.Const nodes, this is to give branch pruning the best chance possible of killing branches. An example might be rewriting len(tuple) as the literal length. func_ir is the IR called_args are the actual arguments with which the function is called """ DEBUG = 0 if DEBUG > 1: print(("rewrite_semantic_constants: " + func_ir.func_id.func_name).center(80, '-')) print("before".center(80, '*')) func_ir.dump() def rewrite_statement(func_ir, stmt, new_val): """ Rewrites the stmt as a ir.Const new_val and fixes up the entries in func_ir._definitions """ stmt.value = ir.Const(new_val, stmt.loc) defns = func_ir._definitions[stmt.target.name] repl_idx = defns.index(val) defns[repl_idx] = stmt.value def rewrite_array_ndim(val, func_ir, called_args): # rewrite Array.ndim as const(ndim) if getattr(val, 'op', None) == 'getattr': if val.attr == 'ndim': arg_def = guard(get_definition, func_ir, val.value) if isinstance(arg_def, ir.Arg): argty = called_args[arg_def.index] if isinstance(argty, types.Array): rewrite_statement(func_ir, stmt, argty.ndim) def rewrite_tuple_len(val, func_ir, called_args): # rewrite len(tuple) as const(len(tuple)) if getattr(val, 'op', None) == 'call': func = guard(get_definition, func_ir, val.func) if (func is not None and isinstance(func, ir.Global) and getattr(func, 'value', None) is len): (arg,) = val.args arg_def = guard(get_definition, func_ir, arg) if isinstance(arg_def, ir.Arg): argty = called_args[arg_def.index] if isinstance(argty, types.BaseTuple): rewrite_statement(func_ir, stmt, argty.count) from numba.core.ir_utils import get_definition, guard for blk in func_ir.blocks.values(): for stmt in blk.body: if isinstance(stmt, ir.Assign): val = stmt.value if isinstance(val, ir.Expr): rewrite_array_ndim(val, func_ir, called_args) rewrite_tuple_len(val, func_ir, called_args) if DEBUG > 1: print("after".center(80, '*')) func_ir.dump() print('-' * 80) def find_literally_calls(func_ir, argtypes): """An analysis to find `numba.literally` call inside the given IR. When an unsatisfied literal typing request is found, a `ForceLiteralArg` exception is raised. Parameters ---------- func_ir : numba.ir.FunctionIR argtypes : Sequence[numba.types.Type] The argument types. """ from numba.core import ir_utils marked_args = set() first_loc = {} # Scan for literally calls for blk in func_ir.blocks.values(): for assign in blk.find_exprs(op='call'): var = ir_utils.guard(ir_utils.get_definition, func_ir, assign.func) if isinstance(var, (ir.Global, ir.FreeVar)): fnobj = var.value else: fnobj = ir_utils.guard(ir_utils.resolve_func_from_module, func_ir, var) if fnobj is special.literally: # Found [arg] = assign.args defarg = func_ir.get_definition(arg) if isinstance(defarg, ir.Arg): argindex = defarg.index marked_args.add(argindex) first_loc.setdefault(argindex, assign.loc) # Signal the dispatcher to force literal typing for pos in marked_args: query_arg = argtypes[pos] do_raise = (isinstance(query_arg, types.InitialValue) and query_arg.initial_value is None) if do_raise: loc = first_loc[pos] raise errors.ForceLiteralArg(marked_args, loc=loc) if not isinstance(query_arg, (types.Literal, types.InitialValue)): loc = first_loc[pos] raise errors.ForceLiteralArg(marked_args, loc=loc) numba-0.55.1/numba/core/annotations/000775 000000 000000 00000000000 14174536160 017262 5ustar00rootroot000000 000000 numba-0.55.1/numba/core/annotations/__init__.py000664 000000 000000 00000000000 14174536160 021361 0ustar00rootroot000000 000000 numba-0.55.1/numba/core/annotations/pretty_annotate.py000664 000000 000000 00000022504 14174536160 023057 0ustar00rootroot000000 000000 """ This module implements code highlighting of numba function annotations. """ from warnings import warn warn("The pretty_annotate functionality is experimental and might change API", FutureWarning) def hllines(code, style): try: from pygments import highlight from pygments.lexers import PythonLexer from pygments.formatters import HtmlFormatter except ImportError: raise ImportError("please install the 'pygments' package") pylex = PythonLexer() "Given a code string, return a list of html-highlighted lines" hf = HtmlFormatter(noclasses=True, style=style, nowrap=True) res = highlight(code, pylex, hf) return res.splitlines() def htlines(code, style): try: from pygments import highlight from pygments.lexers import PythonLexer # TerminalFormatter does not support themes, Terminal256 should, # but seem to not work. from pygments.formatters import TerminalFormatter except ImportError: raise ImportError("please install the 'pygments' package") pylex = PythonLexer() "Given a code string, return a list of ANSI-highlighted lines" hf = TerminalFormatter(style=style) res = highlight(code, pylex, hf) return res.splitlines() def get_ansi_template(): try: from jinja2 import Template except ImportError: raise ImportError("please install the 'jinja2' package") return Template(""" {%- for func_key in func_data.keys() -%} Function name: \x1b[34m{{func_data[func_key]['funcname']}}\x1b[39;49;00m {%- if func_data[func_key]['filename'] -%} {{'\n'}}In file: \x1b[34m{{func_data[func_key]['filename'] -}}\x1b[39;49;00m {%- endif -%} {{'\n'}}With signature: \x1b[34m{{func_key[1]}}\x1b[39;49;00m {{- "\n" -}} {%- for num, line, hl, hc in func_data[func_key]['pygments_lines'] -%} {{-'\n'}}{{ num}}: {{hc-}} {%- if func_data[func_key]['ir_lines'][num] -%} {%- for ir_line, ir_line_type in func_data[func_key]['ir_lines'][num] %} {{-'\n'}}--{{- ' '*func_data[func_key]['python_indent'][num]}} {{- ' '*(func_data[func_key]['ir_indent'][num][loop.index0]+4) }}{{ir_line }}\x1b[41m{{ir_line_type-}}\x1b[39;49;00m {%- endfor -%} {%- endif -%} {%- endfor -%} {%- endfor -%} """) return ansi_template def get_html_template(): try: from jinja2 import Template except ImportError: raise ImportError("please install the 'jinja2' package") return Template(""" {% for func_key in func_data.keys() %}
{%- for num, line, hl, hc in func_data[func_key]['pygments_lines'] -%} {%- if func_data[func_key]['ir_lines'][num] %} {% else -%} {%- endif -%} {%- endfor -%}
{{num}}: {{' '*func_data[func_key]['python_indent'][num]}}{{hl}} {%- for ir_line, ir_line_type in func_data[func_key]['ir_lines'][num] %} {%- endfor -%}
  {{- ' '*func_data[func_key]['python_indent'][num]}} {{ ' '*func_data[func_key]['ir_indent'][num][loop.index0]}}{{ir_line|e -}} {{ir_line_type}}
{{num}}: {{' '*func_data[func_key]['python_indent'][num]}}{{hl}}
{% endfor %} """) def reform_code(annotation): """ Extract the code from the Numba annotation datastructure. Pygments can only highlight full multi-line strings, the Numba annotation is list of single lines, with indentation removed. """ ident_dict = annotation['python_indent'] s= '' for n,l in annotation['python_lines']: s = s+' '*ident_dict[n]+l+'\n' return s class Annotate: """ Construct syntax highlighted annotation for a given jitted function: Example: >>> import numba >>> from numba.pretty_annotate import Annotate >>> @numba.jit ... def test(q): ... res = 0 ... for i in range(q): ... res += i ... return res ... >>> test(10) 45 >>> Annotate(test) The last line will return an HTML and/or ANSI representation that will be displayed accordingly in Jupyter/IPython. Function annotations persist across compilation for newly encountered type signatures and as a result annotations are shown for all signatures by default. Annotations for a specific signature can be shown by using the ``signature`` parameter. >>> @numba.jit ... def add(x, y): ... return x + y ... >>> add(1, 2) 3 >>> add(1.3, 5.7) 7.0 >>> add.signatures [(int64, int64), (float64, float64)] >>> Annotate(add, signature=add.signatures[1]) # annotation for (float64, float64) """ def __init__(self, function, signature=None, **kwargs): style = kwargs.get('style', 'default') if not function.signatures: raise ValueError('function need to be jitted for at least one signature') ann = function.get_annotation_info(signature=signature) self.ann = ann for k,v in ann.items(): res = hllines(reform_code(v), style) rest = htlines(reform_code(v), style) v['pygments_lines'] = [(a,b,c, d) for (a,b),c, d in zip(v['python_lines'], res, rest)] def _repr_html_(self): return get_html_template().render(func_data=self.ann) def __repr__(self): return get_ansi_template().render(func_data=self.ann) numba-0.55.1/numba/core/annotations/template.html000664 000000 000000 00000006610 14174536160 021766 0ustar00rootroot000000 000000 {% for func_key in func_data.keys() %} {% set loop1 = loop %}
{%- for num, line in func_data[func_key]['python_lines'] -%} {%- if func_data[func_key]['ir_lines'][num] %} {% else -%} {%- endif -%} {%- endfor -%}
{{num}}: {{func_data[func_key]['python_indent'][num]}}{{line|e}} {%- for ir_line, ir_line_type in func_data[func_key]['ir_lines'][num] %} {%- endfor -%}
  {{- func_data[func_key]['python_indent'][num]}} {{func_data[func_key]['ir_indent'][num][loop.index0]}}{{ir_line|e -}} {{ir_line_type}}
{{num}}: {{func_data[func_key]['python_indent'][num]}}{{line|e}}



{% endfor %} numba-0.55.1/numba/core/annotations/type_annotations.py000664 000000 000000 00000025660 14174536160 023243 0ustar00rootroot000000 000000 from collections import defaultdict, OrderedDict from collections.abc import Mapping from contextlib import closing import copy import inspect import os import re import sys import textwrap from io import StringIO import numba.core.dispatcher from numba.core import ir class SourceLines(Mapping): def __init__(self, func): try: lines, startno = inspect.getsourcelines(func) except OSError: self.lines = () self.startno = 0 else: self.lines = textwrap.dedent(''.join(lines)).splitlines() self.startno = startno def __getitem__(self, lineno): try: return self.lines[lineno - self.startno].rstrip() except IndexError: return '' def __iter__(self): return iter((self.startno + i) for i in range(len(self.lines))) def __len__(self): return len(self.lines) @property def avail(self): return bool(self.lines) class TypeAnnotation(object): # func_data dict stores annotation data for all functions that are # compiled. We store the data in the TypeAnnotation class since a new # TypeAnnotation instance is created for each function that is compiled. # For every function that is compiled, we add the type annotation data to # this dict and write the html annotation file to disk (rewrite the html # file for every function since we don't know if this is the last function # to be compiled). func_data = OrderedDict() def __init__(self, func_ir, typemap, calltypes, lifted, lifted_from, args, return_type, html_output=None): self.func_id = func_ir.func_id self.blocks = func_ir.blocks self.typemap = typemap self.calltypes = calltypes self.filename = func_ir.loc.filename self.linenum = str(func_ir.loc.line) self.signature = str(args) + ' -> ' + str(return_type) # lifted loop information self.lifted = lifted self.num_lifted_loops = len(lifted) # If this is a lifted loop function that is being compiled, lifted_from # points to annotation data from function that this loop lifted function # was lifted from. This is used to stick lifted loop annotations back # into original function. self.lifted_from = lifted_from def prepare_annotations(self): # Prepare annotations groupedinst = defaultdict(list) found_lifted_loop = False #for blkid, blk in self.blocks.items(): for blkid in sorted(self.blocks.keys()): blk = self.blocks[blkid] groupedinst[blk.loc.line].append("label %s" % blkid) for inst in blk.body: lineno = inst.loc.line if isinstance(inst, ir.Assign): if found_lifted_loop: atype = 'XXX Lifted Loop XXX' found_lifted_loop = False elif (isinstance(inst.value, ir.Expr) and inst.value.op == 'call'): atype = self.calltypes[inst.value] elif (isinstance(inst.value, ir.Const) and isinstance(inst.value.value, numba.core.dispatcher.LiftedLoop)): atype = 'XXX Lifted Loop XXX' found_lifted_loop = True else: # TODO: fix parfor lowering so that typemap is valid. atype = self.typemap.get(inst.target.name, "") aline = "%s = %s :: %s" % (inst.target, inst.value, atype) elif isinstance(inst, ir.SetItem): atype = self.calltypes[inst] aline = "%s :: %s" % (inst, atype) else: aline = "%s" % inst groupedinst[lineno].append(" %s" % aline) return groupedinst def annotate(self): source = SourceLines(self.func_id.func) # if not source.avail: # return "Source code unavailable" groupedinst = self.prepare_annotations() # Format annotations io = StringIO() with closing(io): if source.avail: print("# File: %s" % self.filename, file=io) for num in source: srcline = source[num] ind = _getindent(srcline) print("%s# --- LINE %d --- " % (ind, num), file=io) for inst in groupedinst[num]: print('%s# %s' % (ind, inst), file=io) print(file=io) print(srcline, file=io) print(file=io) if self.lifted: print("# The function contains lifted loops", file=io) for loop in self.lifted: print("# Loop at line %d" % loop.get_source_location(), file=io) print("# Has %d overloads" % len(loop.overloads), file=io) for cres in loop.overloads.values(): print(cres.type_annotation, file=io) else: print("# Source code unavailable", file=io) for num in groupedinst: for inst in groupedinst[num]: print('%s' % (inst,), file=io) print(file=io) return io.getvalue() def html_annotate(self, outfile): # ensure that annotation information is assembled self.annotate_raw() # make a deep copy ahead of the pending mutations func_data = copy.deepcopy(self.func_data) key = 'python_indent' for this_func in func_data.values(): if key in this_func: idents = {} for line, amount in this_func[key].items(): idents[line] = ' ' * amount this_func[key] = idents key = 'ir_indent' for this_func in func_data.values(): if key in this_func: idents = {} for line, ir_id in this_func[key].items(): idents[line] = [' ' * amount for amount in ir_id] this_func[key] = idents try: from jinja2 import Template except ImportError: raise ImportError("please install the 'jinja2' package") root = os.path.join(os.path.dirname(__file__)) template_filename = os.path.join(root, 'template.html') with open(template_filename, 'r') as template: html = template.read() template = Template(html) rendered = template.render(func_data=func_data) outfile.write(rendered) def annotate_raw(self): """ This returns "raw" annotation information i.e. it has no output format specific markup included. """ python_source = SourceLines(self.func_id.func) ir_lines = self.prepare_annotations() line_nums = [num for num in python_source] lifted_lines = [l.get_source_location() for l in self.lifted] def add_ir_line(func_data, line): line_str = line.strip() line_type = '' if line_str.endswith('pyobject'): line_str = line_str.replace('pyobject', '') line_type = 'pyobject' func_data['ir_lines'][num].append((line_str, line_type)) indent_len = len(_getindent(line)) func_data['ir_indent'][num].append(indent_len) func_key = (self.func_id.filename + ':' + str(self.func_id.firstlineno + 1), self.signature) if self.lifted_from is not None and self.lifted_from[1]['num_lifted_loops'] > 0: # This is a lifted loop function that is being compiled. Get the # numba ir for lines in loop function to use for annotating # original python function that the loop was lifted from. func_data = self.lifted_from[1] for num in line_nums: if num not in ir_lines.keys(): continue func_data['ir_lines'][num] = [] func_data['ir_indent'][num] = [] for line in ir_lines[num]: add_ir_line(func_data, line) if line.strip().endswith('pyobject'): func_data['python_tags'][num] = 'object_tag' # If any pyobject line is found, make sure original python # line that was marked as a lifted loop start line is tagged # as an object line instead. Lifted loop start lines should # only be marked as lifted loop lines if the lifted loop # was successfully compiled in nopython mode. func_data['python_tags'][self.lifted_from[0]] = 'object_tag' # We're done with this lifted loop, so decrement lifted loop counter. # When lifted loop counter hits zero, that means we're ready to write # out annotations to html file. self.lifted_from[1]['num_lifted_loops'] -= 1 elif func_key not in TypeAnnotation.func_data.keys(): TypeAnnotation.func_data[func_key] = {} func_data = TypeAnnotation.func_data[func_key] for i, loop in enumerate(self.lifted): # Make sure that when we process each lifted loop function later, # we'll know where it originally came from. loop.lifted_from = (lifted_lines[i], func_data) func_data['num_lifted_loops'] = self.num_lifted_loops func_data['filename'] = self.filename func_data['funcname'] = self.func_id.func_name func_data['python_lines'] = [] func_data['python_indent'] = {} func_data['python_tags'] = {} func_data['ir_lines'] = {} func_data['ir_indent'] = {} for num in line_nums: func_data['python_lines'].append((num, python_source[num].strip())) indent_len = len(_getindent(python_source[num])) func_data['python_indent'][num] = indent_len func_data['python_tags'][num] = '' func_data['ir_lines'][num] = [] func_data['ir_indent'][num] = [] for line in ir_lines[num]: add_ir_line(func_data, line) if num in lifted_lines: func_data['python_tags'][num] = 'lifted_tag' elif line.strip().endswith('pyobject'): func_data['python_tags'][num] = 'object_tag' return self.func_data def __str__(self): return self.annotate() re_longest_white_prefix = re.compile(r'^\s*') def _getindent(text): m = re_longest_white_prefix.match(text) if not m: return '' else: return ' ' * len(m.group(0)) numba-0.55.1/numba/core/base.py000664 000000 000000 00000131254 14174536160 016217 0ustar00rootroot000000 000000 from collections import namedtuple, defaultdict import copy import os import sys import warnings from itertools import permutations, takewhile from contextlib import contextmanager import numpy as np from llvmlite import ir as llvmir import llvmlite.llvmpy.core as lc from llvmlite.llvmpy.core import Type, Constant, LLVMException import llvmlite.binding as ll from numba.core import types, utils, typing, datamodel, debuginfo, funcdesc, config, cgutils, imputils from numba.core import event, errors, targetconfig from numba import _dynfunc, _helperlib from numba.core.compiler_lock import global_compiler_lock from numba.core.pythonapi import PythonAPI from numba.core.imputils import (user_function, user_generator, builtin_registry, impl_ret_borrowed, RegistryLoader) from numba.cpython import builtins GENERIC_POINTER = Type.pointer(Type.int(8)) PYOBJECT = GENERIC_POINTER void_ptr = GENERIC_POINTER class OverloadSelector(object): """ An object matching an actual signature against a registry of formal signatures and choosing the best candidate, if any. In the current implementation: - a "signature" is a tuple of type classes or type instances - the "best candidate" is the most specific match """ def __init__(self): # A list of (formal args tuple, value) self.versions = [] self._cache = {} def find(self, sig): out = self._cache.get(sig) if out is None: out = self._find(sig) self._cache[sig] = out return out def _find(self, sig): candidates = self._select_compatible(sig) if candidates: return candidates[self._best_signature(candidates)] else: raise errors.NumbaNotImplementedError(f'{self}, {sig}') def _select_compatible(self, sig): """ Select all compatible signatures and their implementation. """ out = {} for ver_sig, impl in self.versions: if self._match_arglist(ver_sig, sig): out[ver_sig] = impl return out def _best_signature(self, candidates): """ Returns the best signature out of the candidates """ ordered, genericity = self._sort_signatures(candidates) # check for ambiguous signatures if len(ordered) > 1: firstscore = genericity[ordered[0]] same = list(takewhile(lambda x: genericity[x] == firstscore, ordered)) if len(same) > 1: msg = ["{n} ambiguous signatures".format(n=len(same))] for sig in same: msg += ["{0} => {1}".format(sig, candidates[sig])] raise errors.NumbaTypeError('\n'.join(msg)) return ordered[0] def _sort_signatures(self, candidates): """ Sort signatures in ascending level of genericity. Returns a 2-tuple: * ordered list of signatures * dictionary containing genericity scores """ # score by genericity genericity = defaultdict(int) for this, other in permutations(candidates.keys(), r=2): matched = self._match_arglist(formal_args=this, actual_args=other) if matched: # genericity score +1 for every another compatible signature genericity[this] += 1 # order candidates in ascending level of genericity ordered = sorted(candidates.keys(), key=lambda x: genericity[x]) return ordered, genericity def _match_arglist(self, formal_args, actual_args): """ Returns True if the signature is "matching". A formal signature is "matching" if the actual signature matches exactly or if the formal signature is a compatible generic signature. """ # normalize VarArg if formal_args and isinstance(formal_args[-1], types.VarArg): ndiff = len(actual_args) - len(formal_args) + 1 formal_args = formal_args[:-1] + (formal_args[-1].dtype,) * ndiff if len(formal_args) != len(actual_args): return False for formal, actual in zip(formal_args, actual_args): if not self._match(formal, actual): return False return True def _match(self, formal, actual): if formal == actual: # formal argument matches actual arguments return True elif types.Any == formal: # formal argument is any return True elif isinstance(formal, type) and issubclass(formal, types.Type): if isinstance(actual, type) and issubclass(actual, formal): # formal arg is a type class and actual arg is a subclass return True elif isinstance(actual, formal): # formal arg is a type class of which actual arg is an instance return True def append(self, value, sig): """ Add a formal signature and its associated value. """ assert isinstance(sig, tuple), (value, sig) self.versions.append((sig, value)) self._cache.clear() @utils.runonce def _load_global_helpers(): """ Execute once to install special symbols into the LLVM symbol table. """ # This is Py_None's real C name ll.add_symbol("_Py_NoneStruct", id(None)) # Add Numba C helper functions for c_helpers in (_helperlib.c_helpers, _dynfunc.c_helpers): for py_name, c_address in c_helpers.items(): c_name = "numba_" + py_name ll.add_symbol(c_name, c_address) # Add Numpy C helpers (npy_XXX) for c_name, c_address in _helperlib.npymath_exports.items(): ll.add_symbol(c_name, c_address) # Add all built-in exception classes for obj in utils.builtins.__dict__.values(): if isinstance(obj, type) and issubclass(obj, BaseException): ll.add_symbol("PyExc_%s" % (obj.__name__), id(obj)) class BaseContext(object): """ Notes on Structure ------------------ Most objects are lowered as plain-old-data structure in the generated llvm. They are passed around by reference (a pointer to the structure). Only POD structure can live across function boundaries by copying the data. """ # True if the target requires strict alignment # Causes exception to be raised if the record members are not aligned. strict_alignment = False # Force powi implementation as math.pow call implement_powi_as_math_call = False implement_pow_as_math_call = False # Emit Debug info enable_debuginfo = False DIBuilder = debuginfo.DIBuilder # Bound checking @property def enable_boundscheck(self): if config.BOUNDSCHECK is not None: return config.BOUNDSCHECK return self._boundscheck @enable_boundscheck.setter def enable_boundscheck(self, value): self._boundscheck = value # NRT enable_nrt = False # Auto parallelization auto_parallel = False # PYCC aot_mode = False # Error model for various operations (only FP exceptions currently) error_model = None # Whether dynamic globals (CPU runtime addresses) is allowed allow_dynamic_globals = False # Fast math flags fastmath = False # python execution environment environment = None # the function descriptor fndesc = None def __init__(self, typing_context, target): _load_global_helpers() self.address_size = utils.MACHINE_BITS self.typing_context = typing_context from numba.core.target_extension import target_registry self.target_name = target self.target = target_registry[target] # A mapping of installed registries to their loaders self._registries = {} # Declarations loaded from registries and other sources self._defns = defaultdict(OverloadSelector) self._getattrs = defaultdict(OverloadSelector) self._setattrs = defaultdict(OverloadSelector) self._casts = OverloadSelector() self._get_constants = OverloadSelector() # Other declarations self._generators = {} self.special_ops = {} self.cached_internal_func = {} self._pid = None self._codelib_stack = [] self._boundscheck = False self.data_model_manager = datamodel.default_manager # Initialize self.init() def init(self): """ For subclasses to add initializer """ def refresh(self): """ Refresh context with new declarations from known registries. Useful for third-party extensions. """ # load target specific registries self.load_additional_registries() # Populate the builtin registry, this has to happen after loading # additional registries as some of the "additional" registries write # their implementations into the builtin_registry and would be missed if # this ran first. self.install_registry(builtin_registry) # Also refresh typing context, since @overload declarations can # affect it. self.typing_context.refresh() def load_additional_registries(self): """ Load target-specific registries. Can be overridden by subclasses. """ def mangler(self, name, types, *, abi_tags=()): """ Perform name mangling. """ return funcdesc.default_mangler(name, types, abi_tags=abi_tags) def get_env_name(self, fndesc): """Get the environment name given a FunctionDescriptor. Use this instead of the ``fndesc.env_name`` so that the target-context can provide necessary mangling of the symbol to meet ABI requirements. """ return fndesc.env_name def declare_env_global(self, module, envname): """Declare the Environment pointer as a global of the module. The pointer is initialized to NULL. It must be filled by the runtime with the actual address of the Env before the associated function can be executed. Parameters ---------- module : The LLVM Module envname : str The name of the global variable. """ if envname not in module.globals: gv = llvmir.GlobalVariable(module, cgutils.voidptr_t, name=envname) gv.linkage = 'common' gv.initializer = cgutils.get_null_value(gv.type.pointee) return module.globals[envname] def get_arg_packer(self, fe_args): return datamodel.ArgPacker(self.data_model_manager, fe_args) def get_data_packer(self, fe_types): return datamodel.DataPacker(self.data_model_manager, fe_types) @property def target_data(self): raise NotImplementedError @utils.cached_property def nonconst_module_attrs(self): """ All module attrs are constant for targets using BaseContext. """ return tuple() @utils.cached_property def nrt(self): from numba.core.runtime.context import NRTContext return NRTContext(self, self.enable_nrt) def subtarget(self, **kws): obj = copy.copy(self) # shallow copy for k, v in kws.items(): if not hasattr(obj, k): raise NameError("unknown option {0!r}".format(k)) setattr(obj, k, v) if obj.codegen() is not self.codegen(): # We can't share functions across different codegens obj.cached_internal_func = {} return obj def install_registry(self, registry): """ Install a *registry* (a imputils.Registry instance) of function and attribute implementations. """ try: loader = self._registries[registry] except KeyError: loader = RegistryLoader(registry) self._registries[registry] = loader self.insert_func_defn(loader.new_registrations('functions')) self._insert_getattr_defn(loader.new_registrations('getattrs')) self._insert_setattr_defn(loader.new_registrations('setattrs')) self._insert_cast_defn(loader.new_registrations('casts')) self._insert_get_constant_defn(loader.new_registrations('constants')) def insert_func_defn(self, defns): for impl, func, sig in defns: self._defns[func].append(impl, sig) def _insert_getattr_defn(self, defns): for impl, attr, sig in defns: self._getattrs[attr].append(impl, sig) def _insert_setattr_defn(self, defns): for impl, attr, sig in defns: self._setattrs[attr].append(impl, sig) def _insert_cast_defn(self, defns): for impl, sig in defns: self._casts.append(impl, sig) def _insert_get_constant_defn(self, defns): for impl, sig in defns: self._get_constants.append(impl, sig) def insert_user_function(self, func, fndesc, libs=()): impl = user_function(fndesc, libs) self._defns[func].append(impl, impl.signature) def add_user_function(self, func, fndesc, libs=()): warnings.warn("Use insert_user_function instead", errors.NumbaDeprecationWarning) if func not in self._defns: msg = "{func} is not a registered user function" raise KeyError(msg.format(func=func)) impl = user_function(fndesc, libs) self._defns[func].append(impl, impl.signature) def insert_generator(self, genty, gendesc, libs=()): assert isinstance(genty, types.Generator) impl = user_generator(gendesc, libs) self._generators[genty] = gendesc, impl def remove_user_function(self, func): """ Remove user function *func*. KeyError is raised if the function isn't known to us. """ del self._defns[func] def get_external_function_type(self, fndesc): argtypes = [self.get_argument_type(aty) for aty in fndesc.argtypes] # don't wrap in pointer restype = self.get_argument_type(fndesc.restype) fnty = Type.function(restype, argtypes) return fnty def declare_function(self, module, fndesc): fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name) self.call_conv.decorate_function(fn, fndesc.args, fndesc.argtypes, noalias=fndesc.noalias) if fndesc.inline: fn.attributes.add('alwaysinline') return fn def declare_external_function(self, module, fndesc): fnty = self.get_external_function_type(fndesc) fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name) assert fn.is_declaration for ak, av in zip(fndesc.args, fn.args): av.name = "arg.%s" % ak return fn def insert_const_string(self, mod, string): """ Insert constant *string* (a str object) into module *mod*. """ stringtype = GENERIC_POINTER name = ".const.%s" % string text = cgutils.make_bytearray(string.encode("utf-8") + b"\x00") gv = self.insert_unique_const(mod, name, text) return Constant.bitcast(gv, stringtype) def insert_const_bytes(self, mod, bytes, name=None): """ Insert constant *byte* (a `bytes` object) into module *mod*. """ stringtype = GENERIC_POINTER name = ".bytes.%s" % (name or hash(bytes)) text = cgutils.make_bytearray(bytes) gv = self.insert_unique_const(mod, name, text) return Constant.bitcast(gv, stringtype) def insert_unique_const(self, mod, name, val): """ Insert a unique internal constant named *name*, with LLVM value *val*, into module *mod*. """ try: gv = mod.get_global(name) except KeyError: return cgutils.global_constant(mod, name, val) else: return gv def get_argument_type(self, ty): return self.data_model_manager[ty].get_argument_type() def get_return_type(self, ty): return self.data_model_manager[ty].get_return_type() def get_data_type(self, ty): """ Get a LLVM data representation of the Numba type *ty* that is safe for storage. Record data are stored as byte array. The return value is a llvmlite.ir.Type object, or None if the type is an opaque pointer (???). """ return self.data_model_manager[ty].get_data_type() def get_value_type(self, ty): return self.data_model_manager[ty].get_value_type() def pack_value(self, builder, ty, value, ptr, align=None): """ Pack value into the array storage at *ptr*. If *align* is given, it is the guaranteed alignment for *ptr* (by default, the standard ABI alignment). """ dataval = self.data_model_manager[ty].as_data(builder, value) builder.store(dataval, ptr, align=align) def unpack_value(self, builder, ty, ptr, align=None): """ Unpack value from the array storage at *ptr*. If *align* is given, it is the guaranteed alignment for *ptr* (by default, the standard ABI alignment). """ dm = self.data_model_manager[ty] return dm.load_from_data_pointer(builder, ptr, align) def get_constant_generic(self, builder, ty, val): """ Return a LLVM constant representing value *val* of Numba type *ty*. """ try: impl = self._get_constants.find((ty,)) return impl(self, builder, ty, val) except NotImplementedError: raise NotImplementedError("Cannot lower constant of type '%s'" % (ty,)) def get_constant(self, ty, val): """ Same as get_constant_generic(), but without specifying *builder*. Works only for simple types. """ # HACK: pass builder=None to preserve get_constant() API return self.get_constant_generic(None, ty, val) def get_constant_undef(self, ty): lty = self.get_value_type(ty) return Constant.undef(lty) def get_constant_null(self, ty): lty = self.get_value_type(ty) return Constant.null(lty) def get_function(self, fn, sig, _firstcall=True): """ Return the implementation of function *fn* for signature *sig*. The return value is a callable with the signature (builder, args). """ assert sig is not None sig = sig.as_function() if isinstance(fn, types.Callable): key = fn.get_impl_key(sig) overloads = self._defns[key] else: key = fn overloads = self._defns[key] try: return _wrap_impl(overloads.find(sig.args), self, sig) except errors.NumbaNotImplementedError: pass if isinstance(fn, types.Type): # It's a type instance => try to find a definition for the type class try: return self.get_function(type(fn), sig) except NotImplementedError: # Raise exception for the type instance, for a better error message pass # Automatically refresh the context to load new registries if we are # calling the first time. if _firstcall: self.refresh() return self.get_function(fn, sig, _firstcall=False) raise NotImplementedError("No definition for lowering %s%s" % (key, sig)) def get_generator_desc(self, genty): """ """ return self._generators[genty][0] def get_generator_impl(self, genty): """ """ res = self._generators[genty][1] self.add_linking_libs(getattr(res, 'libs', ())) return res def get_bound_function(self, builder, obj, ty): assert self.get_value_type(ty) == obj.type return obj def get_getattr(self, typ, attr): """ Get the getattr() implementation for the given type and attribute name. The return value is a callable with the signature (context, builder, typ, val, attr). """ const_attr = (typ, attr) not in self.nonconst_module_attrs is_module = isinstance(typ, types.Module) if is_module and const_attr: # Implement getattr for module-level globals that we treat as # constants. # XXX We shouldn't have to retype this attrty = self.typing_context.resolve_module_constants(typ, attr) if attrty is None or isinstance(attrty, types.Dummy): # No implementation required for dummies (functions, modules...), # which are dealt with later return None else: pyval = getattr(typ.pymod, attr) def imp(context, builder, typ, val, attr): llval = self.get_constant_generic(builder, attrty, pyval) return impl_ret_borrowed(context, builder, attrty, llval) return imp # Lookup specific getattr implementation for this type and attribute overloads = self._getattrs[attr] try: return overloads.find((typ,)) except errors.NumbaNotImplementedError: pass # Lookup generic getattr implementation for this type overloads = self._getattrs[None] try: return overloads.find((typ,)) except errors.NumbaNotImplementedError: pass raise NotImplementedError("No definition for lowering %s.%s" % (typ, attr)) def get_setattr(self, attr, sig): """ Get the setattr() implementation for the given attribute name and signature. The return value is a callable with the signature (builder, args). """ assert len(sig.args) == 2 typ = sig.args[0] valty = sig.args[1] def wrap_setattr(impl): def wrapped(builder, args): return impl(self, builder, sig, args, attr) return wrapped # Lookup specific setattr implementation for this type and attribute overloads = self._setattrs[attr] try: return wrap_setattr(overloads.find((typ, valty))) except errors.NumbaNotImplementedError: pass # Lookup generic setattr implementation for this type overloads = self._setattrs[None] try: return wrap_setattr(overloads.find((typ, valty))) except errors.NumbaNotImplementedError: pass raise NotImplementedError("No definition for lowering %s.%s = %s" % (typ, attr, valty)) def get_argument_value(self, builder, ty, val): """ Argument representation to local value representation """ return self.data_model_manager[ty].from_argument(builder, val) def get_returned_value(self, builder, ty, val): """ Return value representation to local value representation """ return self.data_model_manager[ty].from_return(builder, val) def get_return_value(self, builder, ty, val): """ Local value representation to return type representation """ return self.data_model_manager[ty].as_return(builder, val) def get_value_as_argument(self, builder, ty, val): """Prepare local value representation as argument type representation """ return self.data_model_manager[ty].as_argument(builder, val) def get_value_as_data(self, builder, ty, val): return self.data_model_manager[ty].as_data(builder, val) def get_data_as_value(self, builder, ty, val): return self.data_model_manager[ty].from_data(builder, val) def pair_first(self, builder, val, ty): """ Extract the first element of a heterogeneous pair. """ pair = self.make_helper(builder, ty, val) return pair.first def pair_second(self, builder, val, ty): """ Extract the second element of a heterogeneous pair. """ pair = self.make_helper(builder, ty, val) return pair.second def cast(self, builder, val, fromty, toty): """ Cast a value of type *fromty* to type *toty*. This implements implicit conversions as can happen due to the granularity of the Numba type system, or lax Python semantics. """ if fromty == toty or toty == types.Any: return val try: impl = self._casts.find((fromty, toty)) return impl(self, builder, fromty, toty, val) except errors.NumbaNotImplementedError: raise errors.NumbaNotImplementedError( "Cannot cast %s to %s: %s" % (fromty, toty, val)) def generic_compare(self, builder, key, argtypes, args): """ Compare the given LLVM values of the given Numba types using the comparison *key* (e.g. '=='). The values are first cast to a common safe conversion type. """ at, bt = argtypes av, bv = args ty = self.typing_context.unify_types(at, bt) assert ty is not None cav = self.cast(builder, av, at, ty) cbv = self.cast(builder, bv, bt, ty) fnty = self.typing_context.resolve_value_type(key) # the sig is homogeneous in the unified casted type cmpsig = fnty.get_call_type(self.typing_context, (ty, ty), {}) cmpfunc = self.get_function(fnty, cmpsig) self.add_linking_libs(getattr(cmpfunc, 'libs', ())) return cmpfunc(builder, (cav, cbv)) def make_optional_none(self, builder, valtype): optval = self.make_helper(builder, types.Optional(valtype)) optval.valid = cgutils.false_bit return optval._getvalue() def make_optional_value(self, builder, valtype, value): optval = self.make_helper(builder, types.Optional(valtype)) optval.valid = cgutils.true_bit optval.data = value return optval._getvalue() def is_true(self, builder, typ, val): """ Return the truth value of a value of the given Numba type. """ fnty = self.typing_context.resolve_value_type(bool) sig = fnty.get_call_type(self.typing_context, (typ,), {}) impl = self.get_function(fnty, sig) return impl(builder, (val,)) def get_c_value(self, builder, typ, name, dllimport=False): """ Get a global value through its C-accessible *name*, with the given LLVM type. If *dllimport* is true, the symbol will be marked as imported from a DLL (necessary for AOT compilation under Windows). """ module = builder.function.module try: gv = module.globals[name] except KeyError: gv = cgutils.add_global_variable(module, typ, name) if dllimport and self.aot_mode and sys.platform == 'win32': gv.storage_class = "dllimport" return gv def call_external_function(self, builder, callee, argtys, args): args = [self.get_value_as_argument(builder, ty, arg) for ty, arg in zip(argtys, args)] retval = builder.call(callee, args) return retval def get_function_pointer_type(self, typ): return self.data_model_manager[typ].get_data_type() def call_function_pointer(self, builder, funcptr, args, cconv=None): return builder.call(funcptr, args, cconv=cconv) def print_string(self, builder, text): mod = builder.module cstring = GENERIC_POINTER fnty = Type.function(Type.int(), [cstring]) puts = cgutils.get_or_insert_function(mod, fnty, "puts") return builder.call(puts, [text]) def debug_print(self, builder, text): mod = builder.module cstr = self.insert_const_string(mod, str(text)) self.print_string(builder, cstr) def printf(self, builder, format_string, *args): mod = builder.module if isinstance(format_string, str): cstr = self.insert_const_string(mod, format_string) else: cstr = format_string fnty = Type.function(Type.int(), (GENERIC_POINTER,), var_arg=True) fn = cgutils.get_or_insert_function(mod, fnty, "printf") return builder.call(fn, (cstr,) + tuple(args)) def get_struct_type(self, struct): """ Get the LLVM struct type for the given Structure class *struct*. """ fields = [self.get_value_type(v) for _, v in struct._fields] return Type.struct(fields) def get_dummy_value(self): return Constant.null(self.get_dummy_type()) def get_dummy_type(self): return GENERIC_POINTER def _compile_subroutine_no_cache(self, builder, impl, sig, locals={}, flags=None): """ Invoke the compiler to compile a function to be used inside a nopython function, but without generating code to call that function. Note this context's flags are not inherited. """ # Compile from numba.core import compiler with global_compiler_lock: codegen = self.codegen() library = codegen.create_library(impl.__name__) if flags is None: cstk = targetconfig.ConfigStack() flags = compiler.Flags() if cstk: tls_flags = cstk.top() if tls_flags.is_set("nrt") and tls_flags.nrt: flags.nrt = True flags.no_compile = True flags.no_cpython_wrapper = True flags.no_cfunc_wrapper = True cres = compiler.compile_internal(self.typing_context, self, library, impl, sig.args, sig.return_type, flags, locals=locals) # Allow inlining the function inside callers. self.active_code_library.add_linking_library(cres.library) return cres def compile_subroutine(self, builder, impl, sig, locals={}, flags=None, caching=True): """ Compile the function *impl* for the given *sig* (in nopython mode). Return an instance of CompileResult. If *caching* evaluates True, the function keeps the compiled function for reuse in *.cached_internal_func*. """ cache_key = (impl.__code__, sig, type(self.error_model)) if not caching: cached = None else: if impl.__closure__: # XXX This obviously won't work if a cell's value is # unhashable. cache_key += tuple(c.cell_contents for c in impl.__closure__) cached = self.cached_internal_func.get(cache_key) if cached is None: cres = self._compile_subroutine_no_cache(builder, impl, sig, locals=locals, flags=flags) self.cached_internal_func[cache_key] = cres cres = self.cached_internal_func[cache_key] # Allow inlining the function inside callers. self.active_code_library.add_linking_library(cres.library) return cres def compile_internal(self, builder, impl, sig, args, locals={}): """ Like compile_subroutine(), but also call the function with the given *args*. """ cres = self.compile_subroutine(builder, impl, sig, locals) return self.call_internal(builder, cres.fndesc, sig, args) def call_internal(self, builder, fndesc, sig, args): """ Given the function descriptor of an internally compiled function, emit a call to that function with the given arguments. """ status, res = self.call_internal_no_propagate(builder, fndesc, sig, args) with cgutils.if_unlikely(builder, status.is_error): self.call_conv.return_status_propagate(builder, status) res = imputils.fix_returning_optional(self, builder, sig, status, res) return res def call_internal_no_propagate(self, builder, fndesc, sig, args): """Similar to `.call_internal()` but does not handle or propagate the return status automatically. """ # Add call to the generated function llvm_mod = builder.module fn = self.declare_function(llvm_mod, fndesc) status, res = self.call_conv.call_function(builder, fn, sig.return_type, sig.args, args) return status, res def call_unresolved(self, builder, name, sig, args): """ Insert a function call to an unresolved symbol with the given *name*. Note: this is used for recursive call. In the mutual recursion case:: @njit def foo(): ... # calls bar() @njit def bar(): ... # calls foo() foo() When foo() is called, the compilation of bar() is fully completed (codegen'ed and loaded) before foo() is. Since MCJIT's eager compilation doesn't allow loading modules with declare-only functions (which is needed for foo() in bar()), the call_unresolved injects a global variable that the "linker" can update even after the module is loaded by MCJIT. The linker would allocate space for the global variable before the bar() module is loaded. When later foo() module is defined, it will update bar()'s reference to foo(). The legacy lazy JIT and the new ORC JIT would allow a declare-only function be used in a module as long as it is defined by the time of its first use. """ # Insert an unresolved reference to the function being called. codegen = self.codegen() fnty = self.call_conv.get_function_type(sig.return_type, sig.args) fn = codegen.insert_unresolved_ref(builder, fnty, name) # Normal call sequence status, res = self.call_conv.call_function(builder, fn, sig.return_type, sig.args, args) with cgutils.if_unlikely(builder, status.is_error): self.call_conv.return_status_propagate(builder, status) res = imputils.fix_returning_optional(self, builder, sig, status, res) return res def get_executable(self, func, fndesc): raise NotImplementedError def get_python_api(self, builder): return PythonAPI(self, builder) def sentry_record_alignment(self, rectyp, attr): """ Assumes offset starts from a properly aligned location """ if self.strict_alignment: offset = rectyp.offset(attr) elemty = rectyp.typeof(attr) if isinstance(elemty, types.NestedArray): # For a NestedArray we need to consider the data type of # elements of the array for alignment, not the array structure # itself elemty = elemty.dtype align = self.get_abi_alignment(self.get_data_type(elemty)) if offset % align: msg = "{rec}.{attr} of type {type} is not aligned".format( rec=rectyp, attr=attr, type=elemty) raise TypeError(msg) def get_helper_class(self, typ, kind='value'): """ Get a helper class for the given *typ*. """ # XXX handle all types: complex, array, etc. # XXX should it be a method on the model instead? this would allow a default kind... return cgutils.create_struct_proxy(typ, kind) def _make_helper(self, builder, typ, value=None, ref=None, kind='value'): cls = self.get_helper_class(typ, kind) return cls(self, builder, value=value, ref=ref) def make_helper(self, builder, typ, value=None, ref=None): """ Get a helper object to access the *typ*'s members, for the given value or reference. """ return self._make_helper(builder, typ, value, ref, kind='value') def make_data_helper(self, builder, typ, ref=None): """ As make_helper(), but considers the value as stored in memory, rather than a live value. """ return self._make_helper(builder, typ, ref=ref, kind='data') def make_array(self, typ): from numba.np import arrayobj return arrayobj.make_array(typ) def populate_array(self, arr, **kwargs): """ Populate array structure. """ from numba.np import arrayobj return arrayobj.populate_array(arr, **kwargs) def make_complex(self, builder, typ, value=None): """ Get a helper object to access the given complex numbers' members. """ assert isinstance(typ, types.Complex), typ return self.make_helper(builder, typ, value) def make_tuple(self, builder, typ, values): """ Create a tuple of the given *typ* containing the *values*. """ tup = self.get_constant_undef(typ) for i, val in enumerate(values): tup = builder.insert_value(tup, val, i) return tup def make_constant_array(self, builder, typ, ary): """ Create an array structure reifying the given constant array. A low-level contiguous array constant is created in the LLVM IR. """ datatype = self.get_data_type(typ.dtype) # don't freeze ary of non-contig or bigger than 1MB size_limit = 10**6 if (self.allow_dynamic_globals and (typ.layout not in 'FC' or ary.nbytes > size_limit)): # get pointer from the ary dataptr = ary.ctypes.data data = self.add_dynamic_addr(builder, dataptr, info=str(type(dataptr))) rt_addr = self.add_dynamic_addr(builder, id(ary), info=str(type(ary))) else: # Handle data: reify the flattened array in "C" or "F" order as a # global array of bytes. flat = ary.flatten(order=typ.layout) # Note: we use `bytearray(flat.data)` instead of `bytearray(flat)` to # workaround issue #1850 which is due to numpy issue #3147 consts = Constant.array(Type.int(8), bytearray(flat.data)) data = cgutils.global_constant(builder, ".const.array.data", consts) # Ensure correct data alignment (issue #1933) data.align = self.get_abi_alignment(datatype) # No reference to parent ndarray rt_addr = None # Handle shape llintp = self.get_value_type(types.intp) shapevals = [self.get_constant(types.intp, s) for s in ary.shape] cshape = Constant.array(llintp, shapevals) # Handle strides stridevals = [self.get_constant(types.intp, s) for s in ary.strides] cstrides = Constant.array(llintp, stridevals) # Create array structure cary = self.make_array(typ)(self, builder) intp_itemsize = self.get_constant(types.intp, ary.dtype.itemsize) self.populate_array(cary, data=builder.bitcast(data, cary.data.type), shape=cshape, strides=cstrides, itemsize=intp_itemsize, parent=rt_addr, meminfo=None) return cary._getvalue() def add_dynamic_addr(self, builder, intaddr, info): """ Returns dynamic address as a void pointer `i8*`. Internally, a global variable is added to inform the lowerer about the usage of dynamic addresses. Caching will be disabled. """ assert self.allow_dynamic_globals, "dyn globals disabled in this target" assert isinstance(intaddr, int), 'dyn addr not of int type' mod = builder.module llvoidptr = self.get_value_type(types.voidptr) addr = self.get_constant(types.uintp, intaddr).inttoptr(llvoidptr) # Use a unique name by embedding the address value symname = 'numba.dynamic.globals.{:x}'.format(intaddr) gv = cgutils.add_global_variable(mod, llvoidptr, symname) # Use linkonce linkage to allow merging with other GV of the same name. # And, avoid optimization from assuming its value. gv.linkage = 'linkonce' gv.initializer = addr return builder.load(gv) def get_abi_sizeof(self, ty): """ Get the ABI size of LLVM type *ty*. """ assert isinstance(ty, llvmir.Type), "Expected LLVM type" return ty.get_abi_size(self.target_data) def get_abi_alignment(self, ty): """ Get the ABI alignment of LLVM type *ty*. """ assert isinstance(ty, llvmir.Type), "Expected LLVM type" return ty.get_abi_alignment(self.target_data) def get_preferred_array_alignment(context, ty): """ Get preferred array alignment for Numba type *ty*. """ # AVX prefers 32-byte alignment return 32 def post_lowering(self, mod, library): """Run target specific post-lowering transformation here. """ def create_module(self, name): """Create a LLVM module The default implementation in BaseContext always raises a ``NotImplementedError`` exception. Subclasses should implement this method. """ raise NotImplementedError @property def active_code_library(self): """Get the active code library """ return self._codelib_stack[-1] @contextmanager def push_code_library(self, lib): """Push the active code library for the context """ self._codelib_stack.append(lib) try: yield finally: self._codelib_stack.pop() def add_linking_libs(self, libs): """Add iterable of linking libraries to the *active_code_library*. """ colib = self.active_code_library for lib in libs: colib.add_linking_library(lib) def get_ufunc_info(self, ufunc_key): """Get the ufunc implementation for a given ufunc object. The default implementation in BaseContext always raises a ``NotImplementedError`` exception. Subclasses may raise ``KeyError`` to signal that the given ``ufunc_key`` is not available. Parameters ---------- ufunc_key : NumPy ufunc Returns ------- res : dict[str, callable] A mapping of a NumPy ufunc type signature to a lower-level implementation. """ raise NotImplementedError(f"{self} does not support ufunc") class _wrap_impl(object): """ A wrapper object to call an implementation function with some predefined (context, signature) arguments. The wrapper also forwards attribute queries, which is important. """ def __init__(self, imp, context, sig): self._callable = _wrap_missing_loc(imp) self._imp = self._callable() self._context = context self._sig = sig def __call__(self, builder, args, loc=None): res = self._imp(self._context, builder, self._sig, args, loc=loc) self._context.add_linking_libs(getattr(self, 'libs', ())) return res def __getattr__(self, item): return getattr(self._imp, item) def __repr__(self): return "" % repr(self._callable) def _has_loc(fn): """Does function *fn* take ``loc`` argument? """ sig = utils.pysignature(fn) return 'loc' in sig.parameters class _wrap_missing_loc(object): def __init__(self, fn): self.func = fn # store this to help with debug def __call__(self): """Wrap function for missing ``loc`` keyword argument. Otherwise, return the original *fn*. """ fn = self.func if not _has_loc(fn): def wrapper(*args, **kwargs): kwargs.pop('loc') # drop unused loc return fn(*args, **kwargs) # Copy the following attributes from the wrapped. # Following similar implementation as functools.wraps but # ignore attributes if not available (i.e fix py2.7) attrs = '__name__', 'libs' for attr in attrs: try: val = getattr(fn, attr) except AttributeError: pass else: setattr(wrapper, attr, val) return wrapper else: return fn def __repr__(self): return "" % self.func @utils.runonce def _initialize_llvm_lock_event(): """Initial event triggers for LLVM lock """ def enter_fn(): event.start_event("numba:llvm_lock") def exit_fn(): event.end_event("numba:llvm_lock") ll.ffi.register_lock_callback(enter_fn, exit_fn) _initialize_llvm_lock_event() numba-0.55.1/numba/core/boxing.py000664 000000 000000 00000113720 14174536160 016571 0ustar00rootroot000000 000000 """ Boxing and unboxing of native Numba values to / from CPython objects. """ from llvmlite import ir from numba.core import types, cgutils from numba.core.pythonapi import box, unbox, reflect, NativeValue from numba.core.errors import NumbaNotImplementedError from numba.cpython import setobj, listobj from numba.np import numpy_support # # Scalar types # @box(types.Boolean) def box_bool(typ, val, c): return c.pyapi.bool_from_bool(val) @unbox(types.Boolean) def unbox_boolean(typ, obj, c): istrue = c.pyapi.object_istrue(obj) zero = ir.Constant(istrue.type, 0) val = c.builder.icmp_signed('!=', istrue, zero) return NativeValue(val, is_error=c.pyapi.c_api_error()) @box(types.IntegerLiteral) @box(types.BooleanLiteral) def box_literal_integer(typ, val, c): val = c.context.cast(c.builder, val, typ, typ.literal_type) return c.box(typ.literal_type, val) @box(types.Integer) def box_integer(typ, val, c): if typ.signed: ival = c.builder.sext(val, c.pyapi.longlong) return c.pyapi.long_from_longlong(ival) else: ullval = c.builder.zext(val, c.pyapi.ulonglong) return c.pyapi.long_from_ulonglong(ullval) @unbox(types.Integer) def unbox_integer(typ, obj, c): ll_type = c.context.get_argument_type(typ) val = cgutils.alloca_once(c.builder, ll_type) longobj = c.pyapi.number_long(obj) with c.pyapi.if_object_ok(longobj): if typ.signed: llval = c.pyapi.long_as_longlong(longobj) else: llval = c.pyapi.long_as_ulonglong(longobj) c.pyapi.decref(longobj) c.builder.store(c.builder.trunc(llval, ll_type), val) return NativeValue(c.builder.load(val), is_error=c.pyapi.c_api_error()) @box(types.Float) def box_float(typ, val, c): if typ == types.float32: dbval = c.builder.fpext(val, c.pyapi.double) else: assert typ == types.float64 dbval = val return c.pyapi.float_from_double(dbval) @unbox(types.Float) def unbox_float(typ, obj, c): fobj = c.pyapi.number_float(obj) dbval = c.pyapi.float_as_double(fobj) c.pyapi.decref(fobj) if typ == types.float32: val = c.builder.fptrunc(dbval, c.context.get_argument_type(typ)) else: assert typ == types.float64 val = dbval return NativeValue(val, is_error=c.pyapi.c_api_error()) @box(types.Complex) def box_complex(typ, val, c): cval = c.context.make_complex(c.builder, typ, value=val) if typ == types.complex64: freal = c.builder.fpext(cval.real, c.pyapi.double) fimag = c.builder.fpext(cval.imag, c.pyapi.double) else: assert typ == types.complex128 freal, fimag = cval.real, cval.imag return c.pyapi.complex_from_doubles(freal, fimag) @unbox(types.Complex) def unbox_complex(typ, obj, c): # First unbox to complex128, since that's what CPython gives us c128 = c.context.make_complex(c.builder, types.complex128) ok = c.pyapi.complex_adaptor(obj, c128._getpointer()) failed = cgutils.is_false(c.builder, ok) with cgutils.if_unlikely(c.builder, failed): c.pyapi.err_set_string("PyExc_TypeError", "conversion to %s failed" % (typ,)) if typ == types.complex64: # Downcast to complex64 if necessary cplx = c.context.make_complex(c.builder, typ) cplx.real = c.context.cast(c.builder, c128.real, types.float64, types.float32) cplx.imag = c.context.cast(c.builder, c128.imag, types.float64, types.float32) else: assert typ == types.complex128 cplx = c128 return NativeValue(cplx._getvalue(), is_error=failed) @box(types.NoneType) def box_none(typ, val, c): return c.pyapi.make_none() @unbox(types.NoneType) @unbox(types.EllipsisType) def unbox_none(typ, val, c): return NativeValue(c.context.get_dummy_value()) @box(types.NPDatetime) def box_npdatetime(typ, val, c): return c.pyapi.create_np_datetime(val, typ.unit_code) @unbox(types.NPDatetime) def unbox_npdatetime(typ, obj, c): val = c.pyapi.extract_np_datetime(obj) return NativeValue(val, is_error=c.pyapi.c_api_error()) @box(types.NPTimedelta) def box_nptimedelta(typ, val, c): return c.pyapi.create_np_timedelta(val, typ.unit_code) @unbox(types.NPTimedelta) def unbox_nptimedelta(typ, obj, c): val = c.pyapi.extract_np_timedelta(obj) return NativeValue(val, is_error=c.pyapi.c_api_error()) @box(types.RawPointer) def box_raw_pointer(typ, val, c): """ Convert a raw pointer to a Python int. """ ll_intp = c.context.get_value_type(types.uintp) addr = c.builder.ptrtoint(val, ll_intp) return c.box(types.uintp, addr) @box(types.EnumMember) def box_enum(typ, val, c): """ Fetch an enum member given its native value. """ valobj = c.box(typ.dtype, val) # Call the enum class with the value object cls_obj = c.pyapi.unserialize(c.pyapi.serialize_object(typ.instance_class)) return c.pyapi.call_function_objargs(cls_obj, (valobj,)) @unbox(types.EnumMember) def unbox_enum(typ, obj, c): """ Convert an enum member's value to its native value. """ valobj = c.pyapi.object_getattr_string(obj, "value") return c.unbox(typ.dtype, valobj) # # Composite types # @box(types.Record) def box_record(typ, val, c): # Note we will create a copy of the record # This is the only safe way. size = ir.Constant(ir.IntType(32), val.type.pointee.count) ptr = c.builder.bitcast(val, ir.PointerType(ir.IntType(8))) return c.pyapi.recreate_record(ptr, size, typ.dtype, c.env_manager) @unbox(types.Record) def unbox_record(typ, obj, c): buf = c.pyapi.alloca_buffer() ptr = c.pyapi.extract_record_data(obj, buf) is_error = cgutils.is_null(c.builder, ptr) ltyp = c.context.get_value_type(typ) val = c.builder.bitcast(ptr, ltyp) def cleanup(): c.pyapi.release_buffer(buf) return NativeValue(val, cleanup=cleanup, is_error=is_error) @box(types.UnicodeCharSeq) def box_unicodecharseq(typ, val, c): # XXX could kind be determined from strptr? unicode_kind = { 1: c.pyapi.py_unicode_1byte_kind, 2: c.pyapi.py_unicode_2byte_kind, 4: c.pyapi.py_unicode_4byte_kind}[numpy_support.sizeof_unicode_char] kind = c.context.get_constant(types.int32, unicode_kind) rawptr = cgutils.alloca_once_value(c.builder, value=val) strptr = c.builder.bitcast(rawptr, c.pyapi.cstring) fullsize = c.context.get_constant(types.intp, typ.count) zero = fullsize.type(0) one = fullsize.type(1) step = fullsize.type(numpy_support.sizeof_unicode_char) count = cgutils.alloca_once_value(c.builder, zero) with cgutils.loop_nest(c.builder, [fullsize], fullsize.type) as [idx]: # Get char at idx ch = c.builder.load(c.builder.gep(strptr, [c.builder.mul(idx, step)])) # If the char is a non-null-byte, store the next index as count with c.builder.if_then(cgutils.is_not_null(c.builder, ch)): c.builder.store(c.builder.add(idx, one), count) strlen = c.builder.load(count) return c.pyapi.string_from_kind_and_data(kind, strptr, strlen) @unbox(types.UnicodeCharSeq) def unbox_unicodecharseq(typ, obj, c): lty = c.context.get_value_type(typ) ok, buffer, size, kind, is_ascii, hashv = \ c.pyapi.string_as_string_size_and_kind(obj) # If conversion is ok, copy the buffer to the output storage. with cgutils.if_likely(c.builder, ok): # Check if the returned string size fits in the charseq storage_size = ir.Constant(size.type, typ.count) size_fits = c.builder.icmp_unsigned("<=", size, storage_size) # Allow truncation of string size = c.builder.select(size_fits, size, storage_size) # Initialize output to zero bytes null_string = ir.Constant(lty, None) outspace = cgutils.alloca_once_value(c.builder, null_string) # We don't need to set the NULL-terminator because the storage # is already zero-filled. cgutils.memcpy(c.builder, c.builder.bitcast(outspace, buffer.type), buffer, size) ret = c.builder.load(outspace) return NativeValue(ret, is_error=c.builder.not_(ok)) @box(types.Bytes) def box_bytes(typ, val, c): obj = c.context.make_helper(c.builder, typ, val) ret = c.pyapi.bytes_from_string_and_size(obj.data, obj.nitems) c.context.nrt.decref(c.builder, typ, val) return ret @box(types.CharSeq) def box_charseq(typ, val, c): rawptr = cgutils.alloca_once_value(c.builder, value=val) strptr = c.builder.bitcast(rawptr, c.pyapi.cstring) fullsize = c.context.get_constant(types.intp, typ.count) zero = fullsize.type(0) one = fullsize.type(1) count = cgutils.alloca_once_value(c.builder, zero) # Find the length of the string, mimicking Numpy's behaviour: # search for the last non-null byte in the underlying storage # (e.g. b'A\0\0B\0\0\0' will return the logical string b'A\0\0B') with cgutils.loop_nest(c.builder, [fullsize], fullsize.type) as [idx]: # Get char at idx ch = c.builder.load(c.builder.gep(strptr, [idx])) # If the char is a non-null-byte, store the next index as count with c.builder.if_then(cgutils.is_not_null(c.builder, ch)): c.builder.store(c.builder.add(idx, one), count) strlen = c.builder.load(count) return c.pyapi.bytes_from_string_and_size(strptr, strlen) @unbox(types.CharSeq) def unbox_charseq(typ, obj, c): lty = c.context.get_value_type(typ) ok, buffer, size = c.pyapi.string_as_string_and_size(obj) # If conversion is ok, copy the buffer to the output storage. with cgutils.if_likely(c.builder, ok): # Check if the returned string size fits in the charseq storage_size = ir.Constant(size.type, typ.count) size_fits = c.builder.icmp_unsigned("<=", size, storage_size) # Allow truncation of string size = c.builder.select(size_fits, size, storage_size) # Initialize output to zero bytes null_string = ir.Constant(lty, None) outspace = cgutils.alloca_once_value(c.builder, null_string) # We don't need to set the NULL-terminator because the storage # is already zero-filled. cgutils.memcpy(c.builder, c.builder.bitcast(outspace, buffer.type), buffer, size) ret = c.builder.load(outspace) return NativeValue(ret, is_error=c.builder.not_(ok)) @box(types.Optional) def box_optional(typ, val, c): optval = c.context.make_helper(c.builder, typ, val) ret = cgutils.alloca_once_value(c.builder, c.pyapi.borrow_none()) with c.builder.if_else(optval.valid) as (then, otherwise): with then: validres = c.box(typ.type, optval.data) c.builder.store(validres, ret) with otherwise: c.builder.store(c.pyapi.make_none(), ret) return c.builder.load(ret) @unbox(types.Optional) def unbox_optional(typ, obj, c): """ Convert object *obj* to a native optional structure. """ noneval = c.context.make_optional_none(c.builder, typ.type) is_not_none = c.builder.icmp_signed('!=', obj, c.pyapi.borrow_none()) retptr = cgutils.alloca_once(c.builder, noneval.type) errptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) with c.builder.if_else(is_not_none) as (then, orelse): with then: native = c.unbox(typ.type, obj) just = c.context.make_optional_value(c.builder, typ.type, native.value) c.builder.store(just, retptr) c.builder.store(native.is_error, errptr) with orelse: c.builder.store(noneval, retptr) if native.cleanup is not None: def cleanup(): with c.builder.if_then(is_not_none): native.cleanup() else: cleanup = None ret = c.builder.load(retptr) return NativeValue(ret, is_error=c.builder.load(errptr), cleanup=cleanup) @unbox(types.SliceType) def unbox_slice(typ, obj, c): """ Convert object *obj* to a native slice structure. """ from numba.cpython import slicing ok, start, stop, step = c.pyapi.slice_as_ints(obj) sli = c.context.make_helper(c.builder, typ) sli.start = start sli.stop = stop sli.step = step return NativeValue(sli._getvalue(), is_error=c.builder.not_(ok)) @unbox(types.StringLiteral) def unbox_string_literal(typ, obj, c): # A string literal is a dummy value return NativeValue(c.context.get_dummy_value()) # # Collections # # NOTE: boxing functions are supposed to steal any NRT references in # the given native value. @box(types.Array) def box_array(typ, val, c): nativearycls = c.context.make_array(typ) nativeary = nativearycls(c.context, c.builder, value=val) if c.context.enable_nrt: np_dtype = numpy_support.as_dtype(typ.dtype) dtypeptr = c.env_manager.read_const(c.env_manager.add_const(np_dtype)) newary = c.pyapi.nrt_adapt_ndarray_to_python(typ, val, dtypeptr) # Steals NRT ref c.context.nrt.decref(c.builder, typ, val) return newary else: parent = nativeary.parent c.pyapi.incref(parent) return parent @unbox(types.Buffer) def unbox_buffer(typ, obj, c): """ Convert a Py_buffer-providing object to a native array structure. """ buf = c.pyapi.alloca_buffer() res = c.pyapi.get_buffer(obj, buf) is_error = cgutils.is_not_null(c.builder, res) nativearycls = c.context.make_array(typ) nativeary = nativearycls(c.context, c.builder) aryptr = nativeary._getpointer() with cgutils.if_likely(c.builder, c.builder.not_(is_error)): ptr = c.builder.bitcast(aryptr, c.pyapi.voidptr) if c.context.enable_nrt: c.pyapi.nrt_adapt_buffer_from_python(buf, ptr) else: c.pyapi.numba_buffer_adaptor(buf, ptr) def cleanup(): c.pyapi.release_buffer(buf) return NativeValue(c.builder.load(aryptr), is_error=is_error, cleanup=cleanup) @unbox(types.Array) def unbox_array(typ, obj, c): """ Convert a Numpy array object to a native array structure. """ # This is necessary because unbox_buffer() does not work on some # dtypes, e.g. datetime64 and timedelta64. # TODO check matching dtype. # currently, mismatching dtype will still work and causes # potential memory corruption nativearycls = c.context.make_array(typ) nativeary = nativearycls(c.context, c.builder) aryptr = nativeary._getpointer() ptr = c.builder.bitcast(aryptr, c.pyapi.voidptr) if c.context.enable_nrt: errcode = c.pyapi.nrt_adapt_ndarray_from_python(obj, ptr) else: errcode = c.pyapi.numba_array_adaptor(obj, ptr) # TODO: here we have minimal typechecking by the itemsize. # need to do better try: expected_itemsize = numpy_support.as_dtype(typ.dtype).itemsize except NumbaNotImplementedError: # Don't check types that can't be `as_dtype()`-ed itemsize_mismatch = cgutils.false_bit else: expected_itemsize = nativeary.itemsize.type(expected_itemsize) itemsize_mismatch = c.builder.icmp_unsigned( '!=', nativeary.itemsize, expected_itemsize, ) failed = c.builder.or_( cgutils.is_not_null(c.builder, errcode), itemsize_mismatch, ) # Handle error with c.builder.if_then(failed, likely=False): c.pyapi.err_set_string("PyExc_TypeError", "can't unbox array from PyObject into " "native value. The object maybe of a " "different type") return NativeValue(c.builder.load(aryptr), is_error=failed) @box(types.Tuple) @box(types.UniTuple) def box_tuple(typ, val, c): """ Convert native array or structure *val* to a tuple object. """ tuple_val = c.pyapi.tuple_new(typ.count) for i, dtype in enumerate(typ): item = c.builder.extract_value(val, i) obj = c.box(dtype, item) c.pyapi.tuple_setitem(tuple_val, i, obj) return tuple_val @box(types.NamedTuple) @box(types.NamedUniTuple) def box_namedtuple(typ, val, c): """ Convert native array or structure *val* to a namedtuple object. """ cls_obj = c.pyapi.unserialize(c.pyapi.serialize_object(typ.instance_class)) tuple_obj = box_tuple(typ, val, c) obj = c.pyapi.call(cls_obj, tuple_obj) c.pyapi.decref(cls_obj) c.pyapi.decref(tuple_obj) return obj @unbox(types.BaseTuple) def unbox_tuple(typ, obj, c): """ Convert tuple *obj* to a native array (if homogeneous) or structure. """ n = len(typ) values = [] cleanups = [] lty = c.context.get_value_type(typ) is_error_ptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) value_ptr = cgutils.alloca_once(c.builder, lty) # Issue #1638: need to check the tuple size actual_size = c.pyapi.tuple_size(obj) size_matches = c.builder.icmp_unsigned('==', actual_size, ir.Constant(actual_size.type, n)) with c.builder.if_then(c.builder.not_(size_matches), likely=False): c.pyapi.err_format( "PyExc_ValueError", "size mismatch for tuple, expected %d element(s) but got %%zd" % (n,), actual_size) c.builder.store(cgutils.true_bit, is_error_ptr) # We unbox the items even if not `size_matches`, to avoid issues with # the generated IR (instruction doesn't dominate all uses) for i, eltype in enumerate(typ): elem = c.pyapi.tuple_getitem(obj, i) native = c.unbox(eltype, elem) values.append(native.value) with c.builder.if_then(native.is_error, likely=False): c.builder.store(cgutils.true_bit, is_error_ptr) if native.cleanup is not None: cleanups.append(native.cleanup) value = c.context.make_tuple(c.builder, typ, values) c.builder.store(value, value_ptr) if cleanups: with c.builder.if_then(size_matches, likely=True): def cleanup(): for func in reversed(cleanups): func() else: cleanup = None return NativeValue(c.builder.load(value_ptr), cleanup=cleanup, is_error=c.builder.load(is_error_ptr)) @box(types.List) def box_list(typ, val, c): """ Convert native list *val* to a list object. """ list = listobj.ListInstance(c.context, c.builder, typ, val) obj = list.parent res = cgutils.alloca_once_value(c.builder, obj) with c.builder.if_else(cgutils.is_not_null(c.builder, obj)) as (has_parent, otherwise): with has_parent: # List is actually reflected => return the original object # (note not all list instances whose *type* is reflected are # actually reflected; see numba.tests.test_lists for an example) c.pyapi.incref(obj) with otherwise: # Build a new Python list nitems = list.size obj = c.pyapi.list_new(nitems) with c.builder.if_then(cgutils.is_not_null(c.builder, obj), likely=True): with cgutils.for_range(c.builder, nitems) as loop: item = list.getitem(loop.index) list.incref_value(item) itemobj = c.box(typ.dtype, item) c.pyapi.list_setitem(obj, loop.index, itemobj) c.builder.store(obj, res) # Steal NRT ref c.context.nrt.decref(c.builder, typ, val) return c.builder.load(res) class _NumbaTypeHelper(object): """A helper for acquiring `numba.typeof` for type checking. Usage ----- # `c` is the boxing context. with _NumbaTypeHelper(c) as nth: # This contextmanager maintains the lifetime of the `numba.typeof` # function. the_numba_type = nth.typeof(some_object) # Do work on the type object do_checks(the_numba_type) # Cleanup c.pyapi.decref(the_numba_type) # At this point *nth* should not be used. """ def __init__(self, c): self.c = c def __enter__(self): c = self.c numba_name = c.context.insert_const_string(c.builder.module, 'numba') numba_mod = c.pyapi.import_module_noblock(numba_name) typeof_fn = c.pyapi.object_getattr_string(numba_mod, 'typeof') self.typeof_fn = typeof_fn c.pyapi.decref(numba_mod) return self def __exit__(self, *args, **kwargs): c = self.c c.pyapi.decref(self.typeof_fn) def typeof(self, obj): res = self.c.pyapi.call_function_objargs(self.typeof_fn, [obj]) return res def _python_list_to_native(typ, obj, c, size, listptr, errorptr): """ Construct a new native list from a Python list. """ def check_element_type(nth, itemobj, expected_typobj): typobj = nth.typeof(itemobj) # Check if *typobj* is NULL with c.builder.if_then( cgutils.is_null(c.builder, typobj), likely=False, ): c.builder.store(cgutils.true_bit, errorptr) loop.do_break() # Mandate that objects all have the same exact type type_mismatch = c.builder.icmp_signed('!=', typobj, expected_typobj) with c.builder.if_then(type_mismatch, likely=False): c.builder.store(cgutils.true_bit, errorptr) c.pyapi.err_format( "PyExc_TypeError", "can't unbox heterogeneous list: %S != %S", expected_typobj, typobj, ) c.pyapi.decref(typobj) loop.do_break() c.pyapi.decref(typobj) # Allocate a new native list ok, list = listobj.ListInstance.allocate_ex(c.context, c.builder, typ, size) with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok): with if_ok: list.size = size zero = ir.Constant(size.type, 0) with c.builder.if_then(c.builder.icmp_signed('>', size, zero), likely=True): # Traverse Python list and unbox objects into native list with _NumbaTypeHelper(c) as nth: # Note: *expected_typobj* can't be NULL expected_typobj = nth.typeof(c.pyapi.list_getitem(obj, zero)) with cgutils.for_range(c.builder, size) as loop: itemobj = c.pyapi.list_getitem(obj, loop.index) check_element_type(nth, itemobj, expected_typobj) # XXX we don't call native cleanup for each # list element, since that would require keeping # of which unboxings have been successful. native = c.unbox(typ.dtype, itemobj) with c.builder.if_then(native.is_error, likely=False): c.builder.store(cgutils.true_bit, errorptr) loop.do_break() # The reference is borrowed so incref=False list.setitem(loop.index, native.value, incref=False) c.pyapi.decref(expected_typobj) if typ.reflected: list.parent = obj # Stuff meminfo pointer into the Python object for # later reuse. with c.builder.if_then(c.builder.not_(c.builder.load(errorptr)), likely=False): c.pyapi.object_set_private_data(obj, list.meminfo) list.set_dirty(False) c.builder.store(list.value, listptr) with if_not_ok: c.builder.store(cgutils.true_bit, errorptr) # If an error occurred, drop the whole native list with c.builder.if_then(c.builder.load(errorptr)): c.context.nrt.decref(c.builder, typ, list.value) @unbox(types.List) def unbox_list(typ, obj, c): """ Convert list *obj* to a native list. If list was previously unboxed, we reuse the existing native list to ensure consistency. """ size = c.pyapi.list_size(obj) errorptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) listptr = cgutils.alloca_once(c.builder, c.context.get_value_type(typ)) # See if the list was previously unboxed, if so, re-use the meminfo. ptr = c.pyapi.object_get_private_data(obj) with c.builder.if_else(cgutils.is_not_null(c.builder, ptr)) \ as (has_meminfo, otherwise): with has_meminfo: # List was previously unboxed => reuse meminfo list = listobj.ListInstance.from_meminfo(c.context, c.builder, typ, ptr) list.size = size if typ.reflected: list.parent = obj c.builder.store(list.value, listptr) with otherwise: _python_list_to_native(typ, obj, c, size, listptr, errorptr) def cleanup(): # Clean up the associated pointer, as the meminfo is now invalid. c.pyapi.object_reset_private_data(obj) return NativeValue(c.builder.load(listptr), is_error=c.builder.load(errorptr), cleanup=cleanup) @reflect(types.List) def reflect_list(typ, val, c): """ Reflect the native list's contents into the Python object. """ if not typ.reflected: return if typ.dtype.reflected: msg = "cannot reflect element of reflected container: {}\n".format(typ) raise TypeError(msg) list = listobj.ListInstance(c.context, c.builder, typ, val) with c.builder.if_then(list.dirty, likely=False): obj = list.parent size = c.pyapi.list_size(obj) new_size = list.size diff = c.builder.sub(new_size, size) diff_gt_0 = c.builder.icmp_signed('>=', diff, ir.Constant(diff.type, 0)) with c.builder.if_else(diff_gt_0) as (if_grow, if_shrink): # XXX no error checking below with if_grow: # First overwrite existing items with cgutils.for_range(c.builder, size) as loop: item = list.getitem(loop.index) list.incref_value(item) itemobj = c.box(typ.dtype, item) c.pyapi.list_setitem(obj, loop.index, itemobj) # Then add missing items with cgutils.for_range(c.builder, diff) as loop: idx = c.builder.add(size, loop.index) item = list.getitem(idx) list.incref_value(item) itemobj = c.box(typ.dtype, item) c.pyapi.list_append(obj, itemobj) c.pyapi.decref(itemobj) with if_shrink: # First delete list tail c.pyapi.list_setslice(obj, new_size, size, None) # Then overwrite remaining items with cgutils.for_range(c.builder, new_size) as loop: item = list.getitem(loop.index) list.incref_value(item) itemobj = c.box(typ.dtype, item) c.pyapi.list_setitem(obj, loop.index, itemobj) # Mark the list clean, in case it is reflected twice list.set_dirty(False) def _python_set_to_native(typ, obj, c, size, setptr, errorptr): """ Construct a new native set from a Python set. """ # Allocate a new native set ok, inst = setobj.SetInstance.allocate_ex(c.context, c.builder, typ, size) with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok): with if_ok: # Traverse Python set and unbox objects into native set typobjptr = cgutils.alloca_once_value(c.builder, ir.Constant(c.pyapi.pyobj, None)) with c.pyapi.set_iterate(obj) as loop: itemobj = loop.value # Mandate that objects all have the same exact type typobj = c.pyapi.get_type(itemobj) expected_typobj = c.builder.load(typobjptr) with c.builder.if_else( cgutils.is_null(c.builder, expected_typobj), likely=False) as (if_first, if_not_first): with if_first: # First iteration => store item type c.builder.store(typobj, typobjptr) with if_not_first: # Otherwise, check item type type_mismatch = c.builder.icmp_signed('!=', typobj, expected_typobj) with c.builder.if_then(type_mismatch, likely=False): c.builder.store(cgutils.true_bit, errorptr) c.pyapi.err_set_string("PyExc_TypeError", "can't unbox heterogeneous set") loop.do_break() # XXX we don't call native cleanup for each set element, # since that would require keeping track # of which unboxings have been successful. native = c.unbox(typ.dtype, itemobj) with c.builder.if_then(native.is_error, likely=False): c.builder.store(cgutils.true_bit, errorptr) inst.add_pyapi(c.pyapi, native.value, do_resize=False) if typ.reflected: inst.parent = obj # Associate meminfo pointer with the Python object for later reuse. with c.builder.if_then(c.builder.not_(c.builder.load(errorptr)), likely=False): c.pyapi.object_set_private_data(obj, inst.meminfo) inst.set_dirty(False) c.builder.store(inst.value, setptr) with if_not_ok: c.builder.store(cgutils.true_bit, errorptr) # If an error occurred, drop the whole native set with c.builder.if_then(c.builder.load(errorptr)): c.context.nrt.decref(c.builder, typ, inst.value) @unbox(types.Set) def unbox_set(typ, obj, c): """ Convert set *obj* to a native set. If set was previously unboxed, we reuse the existing native set to ensure consistency. """ size = c.pyapi.set_size(obj) errorptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) setptr = cgutils.alloca_once(c.builder, c.context.get_value_type(typ)) # See if the set was previously unboxed, if so, re-use the meminfo. ptr = c.pyapi.object_get_private_data(obj) with c.builder.if_else(cgutils.is_not_null(c.builder, ptr)) \ as (has_meminfo, otherwise): with has_meminfo: # Set was previously unboxed => reuse meminfo inst = setobj.SetInstance.from_meminfo(c.context, c.builder, typ, ptr) if typ.reflected: inst.parent = obj c.builder.store(inst.value, setptr) with otherwise: _python_set_to_native(typ, obj, c, size, setptr, errorptr) def cleanup(): # Clean up the associated pointer, as the meminfo is now invalid. c.pyapi.object_reset_private_data(obj) return NativeValue(c.builder.load(setptr), is_error=c.builder.load(errorptr), cleanup=cleanup) def _native_set_to_python_list(typ, payload, c): """ Create a Python list from a native set's items. """ nitems = payload.used listobj = c.pyapi.list_new(nitems) ok = cgutils.is_not_null(c.builder, listobj) with c.builder.if_then(ok, likely=True): index = cgutils.alloca_once_value(c.builder, ir.Constant(nitems.type, 0)) with payload._iterate() as loop: i = c.builder.load(index) item = loop.entry.key itemobj = c.box(typ.dtype, item) c.pyapi.list_setitem(listobj, i, itemobj) i = c.builder.add(i, ir.Constant(i.type, 1)) c.builder.store(i, index) return ok, listobj @box(types.Set) def box_set(typ, val, c): """ Convert native set *val* to a set object. """ inst = setobj.SetInstance(c.context, c.builder, typ, val) obj = inst.parent res = cgutils.alloca_once_value(c.builder, obj) with c.builder.if_else(cgutils.is_not_null(c.builder, obj)) as (has_parent, otherwise): with has_parent: # Set is actually reflected => return the original object # (note not all set instances whose *type* is reflected are # actually reflected; see numba.tests.test_sets for an example) c.pyapi.incref(obj) with otherwise: # Build a new Python list and then create a set from that payload = inst.payload ok, listobj = _native_set_to_python_list(typ, payload, c) with c.builder.if_then(ok, likely=True): obj = c.pyapi.set_new(listobj) c.pyapi.decref(listobj) c.builder.store(obj, res) # Steal NRT ref c.context.nrt.decref(c.builder, typ, val) return c.builder.load(res) @reflect(types.Set) def reflect_set(typ, val, c): """ Reflect the native set's contents into the Python object. """ if not typ.reflected: return inst = setobj.SetInstance(c.context, c.builder, typ, val) payload = inst.payload with c.builder.if_then(payload.dirty, likely=False): obj = inst.parent # XXX errors are not dealt with below c.pyapi.set_clear(obj) # Build a new Python list and then update the set with that ok, listobj = _native_set_to_python_list(typ, payload, c) with c.builder.if_then(ok, likely=True): c.pyapi.set_update(obj, listobj) c.pyapi.decref(listobj) # Mark the set clean, in case it is reflected twice inst.set_dirty(False) # # Other types # @box(types.Generator) def box_generator(typ, val, c): return c.pyapi.from_native_generator(val, typ, c.env_manager.env_ptr) @unbox(types.Generator) def unbox_generator(typ, obj, c): return c.pyapi.to_native_generator(obj, typ) @box(types.DType) def box_dtype(typ, val, c): np_dtype = numpy_support.as_dtype(typ.dtype) return c.pyapi.unserialize(c.pyapi.serialize_object(np_dtype)) @unbox(types.DType) def unbox_dtype(typ, val, c): return NativeValue(c.context.get_dummy_value()) @box(types.NumberClass) def box_number_class(typ, val, c): np_dtype = numpy_support.as_dtype(typ.dtype) return c.pyapi.unserialize(c.pyapi.serialize_object(np_dtype)) @unbox(types.NumberClass) def unbox_number_class(typ, val, c): return NativeValue(c.context.get_dummy_value()) @box(types.PyObject) @box(types.Object) def box_pyobject(typ, val, c): return val @unbox(types.PyObject) @unbox(types.Object) def unbox_pyobject(typ, obj, c): return NativeValue(obj) @unbox(types.ExternalFunctionPointer) def unbox_funcptr(typ, obj, c): if typ.get_pointer is None: raise NotImplementedError(typ) # Call get_pointer() on the object to get the raw pointer value ptrty = c.context.get_function_pointer_type(typ) ret = cgutils.alloca_once_value(c.builder, ir.Constant(ptrty, None), name='fnptr') ser = c.pyapi.serialize_object(typ.get_pointer) get_pointer = c.pyapi.unserialize(ser) with cgutils.if_likely(c.builder, cgutils.is_not_null(c.builder, get_pointer)): intobj = c.pyapi.call_function_objargs(get_pointer, (obj,)) c.pyapi.decref(get_pointer) with cgutils.if_likely(c.builder, cgutils.is_not_null(c.builder, intobj)): ptr = c.pyapi.long_as_voidptr(intobj) c.pyapi.decref(intobj) c.builder.store(c.builder.bitcast(ptr, ptrty), ret) return NativeValue(c.builder.load(ret), is_error=c.pyapi.c_api_error()) @box(types.DeferredType) def box_deferred(typ, val, c): out = c.pyapi.from_native_value(typ.get(), c.builder.extract_value(val, [0]), env_manager=c.env_manager) return out @unbox(types.DeferredType) def unbox_deferred(typ, obj, c): native_value = c.pyapi.to_native_value(typ.get(), obj) model = c.context.data_model_manager[typ] res = model.set(c.builder, model.make_uninitialized(), native_value.value) return NativeValue(res, is_error=native_value.is_error, cleanup=native_value.cleanup) @unbox(types.Dispatcher) def unbox_dispatcher(typ, obj, c): # In native code, Dispatcher types can be casted to FunctionType. return NativeValue(obj) @box(types.Dispatcher) def box_pyobject(typ, val, c): c.pyapi.incref(val) return val def unbox_unsupported(typ, obj, c): c.pyapi.err_set_string("PyExc_TypeError", "can't unbox {!r} type".format(typ)) res = c.context.get_constant_null(typ) return NativeValue(res, is_error=cgutils.true_bit) def box_unsupported(typ, val, c): msg = "cannot convert native %s to Python object" % (typ,) c.pyapi.err_set_string("PyExc_TypeError", msg) res = c.pyapi.get_null_object() return res @box(types.Literal) def box_literal(typ, val, c): # Const type contains the python object of the constant value, # which we can directly return. retval = typ.literal_value # Serialize the value into the IR return c.pyapi.unserialize(c.pyapi.serialize_object(retval)) @box(types.MemInfoPointer) def box_meminfo_pointer(typ, val, c): return c.pyapi.nrt_meminfo_as_pyobject(val) @unbox(types.MemInfoPointer) def unbox_meminfo_pointer(typ, obj, c): res = c.pyapi.nrt_meminfo_from_pyobject(obj) errored = cgutils.is_null(c.builder, res) return NativeValue(res, is_error=errored) @unbox(types.TypeRef) def unbox_typeref(typ, val, c): return NativeValue(c.context.get_dummy_value(), is_error=cgutils.false_bit) @box(types.LiteralStrKeyDict) def box_LiteralStrKeyDict(typ, val, c): return box_unsupported(typ, val, c) numba-0.55.1/numba/core/bytecode.py000664 000000 000000 00000025560 14174536160 017105 0ustar00rootroot000000 000000 from collections import namedtuple, OrderedDict import dis import inspect import itertools from types import CodeType, ModuleType from numba.core import errors, utils, serialize from numba.core.utils import PYVERSION opcode_info = namedtuple('opcode_info', ['argsize']) # The following offset is used as a hack to inject a NOP at the start of the # bytecode. So that function starting with `while True` will not have block-0 # as a jump target. The Lowerer puts argument initialization at block-0. _FIXED_OFFSET = 2 def get_function_object(obj): """ Objects that wraps function should provide a "__numba__" magic attribute that contains a name of an attribute that contains the actual python function object. """ attr = getattr(obj, "__numba__", None) if attr: return getattr(obj, attr) return obj def get_code_object(obj): "Shamelessly borrowed from llpython" return getattr(obj, '__code__', getattr(obj, 'func_code', None)) def _as_opcodes(seq): lst = [] for s in seq: c = dis.opmap.get(s) if c is not None: lst.append(c) return lst JREL_OPS = frozenset(dis.hasjrel) JABS_OPS = frozenset(dis.hasjabs) JUMP_OPS = JREL_OPS | JABS_OPS TERM_OPS = frozenset(_as_opcodes(['RETURN_VALUE', 'RAISE_VARARGS'])) EXTENDED_ARG = dis.EXTENDED_ARG HAVE_ARGUMENT = dis.HAVE_ARGUMENT class ByteCodeInst(object): ''' Attributes ---------- - offset: byte offset of opcode - opcode: opcode integer value - arg: instruction arg - lineno: -1 means unknown ''' __slots__ = 'offset', 'next', 'opcode', 'opname', 'arg', 'lineno' def __init__(self, offset, opcode, arg, nextoffset): self.offset = offset self.next = nextoffset self.opcode = opcode self.opname = dis.opname[opcode] self.arg = arg self.lineno = -1 # unknown line number @property def is_jump(self): return self.opcode in JUMP_OPS @property def is_terminator(self): return self.opcode in TERM_OPS def get_jump_target(self): # With Python 3.10 the addressing of "bytecode" instructions has # changed from using bytes to using 16-bit words instead. As a # consequence the code to determine where a jump will lead had to be # adapted. # See also: # https://bugs.python.org/issue26647 # https://bugs.python.org/issue27129 # https://github.com/python/cpython/pull/25069 assert self.is_jump if PYVERSION >= (3, 10): if self.opcode in JREL_OPS: return self.next + self.arg * 2 else: assert self.opcode in JABS_OPS return self.arg * 2 - 2 else: if self.opcode in JREL_OPS: return self.next + self.arg else: assert self.opcode in JABS_OPS return self.arg def __repr__(self): return '%s(arg=%s, lineno=%d)' % (self.opname, self.arg, self.lineno) @property def block_effect(self): """Effect of the block stack Returns +1 (push), 0 (none) or -1 (pop) """ if self.opname.startswith('SETUP_'): return 1 elif self.opname == 'POP_BLOCK': return -1 else: return 0 CODE_LEN = 1 ARG_LEN = 1 NO_ARG_LEN = 1 OPCODE_NOP = dis.opname.index('NOP') # Adapted from Lib/dis.py def _unpack_opargs(code): """ Returns a 4-int-tuple of (bytecode offset, opcode, argument, offset of next bytecode). """ extended_arg = 0 n = len(code) offset = i = 0 while i < n: op = code[i] i += CODE_LEN if op >= HAVE_ARGUMENT: arg = code[i] | extended_arg for j in range(ARG_LEN): arg |= code[i + j] << (8 * j) i += ARG_LEN if op == EXTENDED_ARG: extended_arg = arg << 8 * ARG_LEN continue else: arg = None i += NO_ARG_LEN extended_arg = 0 yield (offset, op, arg, i) offset = i # Mark inst offset at first extended def _patched_opargs(bc_stream): """Patch the bytecode stream. - Adds a NOP bytecode at the start to avoid jump target being at the entry. """ # Injected NOP yield (0, OPCODE_NOP, None, _FIXED_OFFSET) # Adjust bytecode offset for the rest of the stream for offset, opcode, arg, nextoffset in bc_stream: # If the opcode has an absolute jump target, adjust it. if opcode in JABS_OPS: arg += _FIXED_OFFSET yield offset + _FIXED_OFFSET, opcode, arg, nextoffset + _FIXED_OFFSET class ByteCodeIter(object): def __init__(self, code): self.code = code self.iter = iter(_patched_opargs(_unpack_opargs(self.code.co_code))) def __iter__(self): return self def _fetch_opcode(self): return next(self.iter) def next(self): offset, opcode, arg, nextoffset = self._fetch_opcode() return offset, ByteCodeInst(offset=offset, opcode=opcode, arg=arg, nextoffset=nextoffset) __next__ = next def read_arg(self, size): buf = 0 for i in range(size): _offset, byte = next(self.iter) buf |= byte << (8 * i) return buf class ByteCode(object): """ The decoded bytecode of a function, and related information. """ __slots__ = ('func_id', 'co_names', 'co_varnames', 'co_consts', 'co_cellvars', 'co_freevars', 'table', 'labels') def __init__(self, func_id): code = func_id.code labels = set(x + _FIXED_OFFSET for x in dis.findlabels(code.co_code)) labels.add(0) # A map of {offset: ByteCodeInst} table = OrderedDict(ByteCodeIter(code)) self._compute_lineno(table, code) self.func_id = func_id self.co_names = code.co_names self.co_varnames = code.co_varnames self.co_consts = code.co_consts self.co_cellvars = code.co_cellvars self.co_freevars = code.co_freevars self.table = table self.labels = sorted(labels) @classmethod def _compute_lineno(cls, table, code): """ Compute the line numbers for all bytecode instructions. """ for offset, lineno in dis.findlinestarts(code): adj_offset = offset + _FIXED_OFFSET if adj_offset in table: table[adj_offset].lineno = lineno # Assign unfilled lineno # Start with first bytecode's lineno known = table[_FIXED_OFFSET].lineno for inst in table.values(): if inst.lineno >= 0: known = inst.lineno else: inst.lineno = known return table def __iter__(self): return iter(self.table.values()) def __getitem__(self, offset): return self.table[offset] def __contains__(self, offset): return offset in self.table def dump(self): def label_marker(i): if i[1].offset in self.labels: return '>' else: return ' ' return '\n'.join('%s %10s\t%s' % ((label_marker(i),) + i) for i in self.table.items()) @classmethod def _compute_used_globals(cls, func, table, co_consts, co_names): """ Compute the globals used by the function with the given bytecode table. """ d = {} globs = func.__globals__ builtins = globs.get('__builtins__', utils.builtins) if isinstance(builtins, ModuleType): builtins = builtins.__dict__ # Look for LOAD_GLOBALs in the bytecode for inst in table.values(): if inst.opname == 'LOAD_GLOBAL': name = co_names[inst.arg] if name not in d: try: value = globs[name] except KeyError: value = builtins[name] d[name] = value # Add globals used by any nested code object for co in co_consts: if isinstance(co, CodeType): subtable = OrderedDict(ByteCodeIter(co)) d.update(cls._compute_used_globals(func, subtable, co.co_consts, co.co_names)) return d def get_used_globals(self): """ Get a {name: value} map of the globals used by this code object and any nested code objects. """ return self._compute_used_globals(self.func_id.func, self.table, self.co_consts, self.co_names) class FunctionIdentity(serialize.ReduceMixin): """ A function's identity and metadata. Note this typically represents a function whose bytecode is being compiled, not necessarily the top-level user function (the two might be distinct, e.g. in the `@generated_jit` case). """ _unique_ids = itertools.count(1) @classmethod def from_function(cls, pyfunc): """ Create the FunctionIdentity of the given function. """ func = get_function_object(pyfunc) code = get_code_object(func) pysig = utils.pysignature(func) if not code: raise errors.ByteCodeSupportError( "%s does not provide its bytecode" % func) try: func_qualname = func.__qualname__ except AttributeError: func_qualname = func.__name__ self = cls() self.func = func self.func_qualname = func_qualname self.func_name = func_qualname.split('.')[-1] self.code = code self.module = inspect.getmodule(func) self.modname = (utils._dynamic_modname if self.module is None else self.module.__name__) self.is_generator = inspect.isgeneratorfunction(func) self.pysig = pysig self.filename = code.co_filename self.firstlineno = code.co_firstlineno self.arg_count = len(pysig.parameters) self.arg_names = list(pysig.parameters) # Even the same function definition can be compiled into # several different function objects with distinct closure # variables, so we make sure to disambiguate using an unique id. uid = next(cls._unique_ids) self.unique_name = '{}${}'.format(self.func_qualname, uid) return self def derive(self): """Copy the object and increment the unique counter. """ return self.from_function(self.func) def _reduce_states(self): """ NOTE: part of ReduceMixin protocol """ return dict(pyfunc=self.func) @classmethod def _rebuild(cls, pyfunc): """ NOTE: part of ReduceMixin protocol """ return cls.from_function(pyfunc) numba-0.55.1/numba/core/byteflow.py000664 000000 000000 00000136746 14174536160 017153 0ustar00rootroot000000 000000 """ Implement python 3.8+ bytecode analysis """ from pprint import pformat import logging from collections import namedtuple, defaultdict, deque from functools import total_ordering from numba.core.utils import UniqueDict, PYVERSION from numba.core.controlflow import NEW_BLOCKERS, CFGraph from numba.core.ir import Loc from numba.core.errors import UnsupportedError _logger = logging.getLogger(__name__) _EXCEPT_STACK_OFFSET = 6 _FINALLY_POP = _EXCEPT_STACK_OFFSET if PYVERSION >= (3, 8) else 1 _NO_RAISE_OPS = frozenset({ 'LOAD_CONST', }) @total_ordering class BlockKind(object): """Kinds of block to make related code safer than just `str`. """ _members = frozenset({ 'LOOP', 'TRY', 'EXCEPT', 'FINALLY', 'WITH', 'WITH_FINALLY', }) def __init__(self, value): assert value in self._members self._value = value def __hash__(self): return hash((type(self), self._value)) def __lt__(self, other): if isinstance(other, BlockKind): return self._value < other._value else: raise TypeError('cannot compare to {!r}'.format(type(other))) def __eq__(self, other): if isinstance(other, BlockKind): return self._value == other._value else: raise TypeError('cannot compare to {!r}'.format(type(other))) def __repr__(self): return "BlockKind({})".format(self._value) class _lazy_pformat(object): def __init__(self, *args, **kwargs): self.args = args self.kwargs = kwargs def __str__(self): return pformat(*self.args, **self.kwargs) class Flow(object): """Data+Control Flow analysis. Simulate execution to recover dataflow and controlflow information. """ def __init__(self, bytecode): _logger.debug("bytecode dump:\n%s", bytecode.dump()) self._bytecode = bytecode self.block_infos = UniqueDict() def run(self): """Run a trace over the bytecode over all reachable path. The trace starts at bytecode offset 0 and gathers stack and control- flow information by partially interpreting each bytecode. Each ``State`` instance in the trace corresponds to a basic-block. The State instances forks when a jump instruction is encountered. A newly forked state is then added to the list of pending states. The trace ends when there are no more pending states. """ firststate = State(bytecode=self._bytecode, pc=0, nstack=0, blockstack=()) runner = TraceRunner(debug_filename=self._bytecode.func_id.filename) runner.pending.append(firststate) # Enforce unique-ness on initial PC to avoid re-entering the PC with # a different stack-depth. We don't know if such a case is ever # possible, but no such case has been encountered in our tests. first_encounter = UniqueDict() # Loop over each pending state at a initial PC. # Each state is tracing a basic block while runner.pending: _logger.debug("pending: %s", runner.pending) state = runner.pending.popleft() if state not in runner.finished: _logger.debug("stack: %s", state._stack) first_encounter[state.pc_initial] = state # Loop over the state until it is terminated. while True: runner.dispatch(state) # Terminated? if state.has_terminated(): break elif (state.has_active_try() and state.get_inst().opname not in _NO_RAISE_OPS): # Is in a *try* block state.fork(pc=state.get_inst().next) tryblk = state.get_top_block('TRY') state.pop_block_and_above(tryblk) nstack = state.stack_depth kwargs = {} if nstack > tryblk['entry_stack']: kwargs['npop'] = nstack - tryblk['entry_stack'] handler = tryblk['handler'] kwargs['npush'] = { BlockKind('EXCEPT'): _EXCEPT_STACK_OFFSET, BlockKind('FINALLY'): _FINALLY_POP }[handler['kind']] kwargs['extra_block'] = handler state.fork(pc=tryblk['end'], **kwargs) break else: state.advance_pc() # Must the new PC be a new block? if self._is_implicit_new_block(state): # check if this is a with...as, abort if so self._guard_with_as(state) # else split state.split_new_block() break _logger.debug("end state. edges=%s", state.outgoing_edges) runner.finished.add(state) out_states = state.get_outgoing_states() runner.pending.extend(out_states) # Complete controlflow self._build_cfg(runner.finished) # Prune redundant PHI-nodes self._prune_phis(runner) # Post process for state in sorted(runner.finished, key=lambda x: x.pc_initial): self.block_infos[state.pc_initial] = si = adapt_state_infos(state) _logger.debug("block_infos %s:\n%s", state, si) def _build_cfg(self, all_states): graph = CFGraph() for state in all_states: b = state.pc_initial graph.add_node(b) for state in all_states: for edge in state.outgoing_edges: graph.add_edge(state.pc_initial, edge.pc, 0) graph.set_entry_point(0) graph.process() self.cfgraph = graph def _prune_phis(self, runner): # Find phis that are unused in the local block _logger.debug("Prune PHIs".center(60, '-')) # Compute dataflow for used phis and propagate # 1. Get used-phis for each block # Map block to used_phis def get_used_phis_per_state(): used_phis = defaultdict(set) phi_set = set() for state in runner.finished: used = set(state._used_regs) phis = set(state._phis) used_phis[state] |= phis & used phi_set |= phis return used_phis, phi_set # Find use-defs def find_use_defs(): defmap = {} phismap = defaultdict(set) for state in runner.finished: for phi, rhs in state._outgoing_phis.items(): if rhs not in phi_set: # Is a definition defmap[phi] = state phismap[phi].add((rhs, state)) _logger.debug("defmap: %s", _lazy_pformat(defmap)) _logger.debug("phismap: %s", _lazy_pformat(phismap)) return defmap, phismap def propagate_phi_map(phismap): """An iterative dataflow algorithm to find the definition (the source) of each PHI node. """ blacklist = defaultdict(set) while True: changing = False for phi, defsites in sorted(list(phismap.items())): for rhs, state in sorted(list(defsites)): if rhs in phi_set: defsites |= phismap[rhs] blacklist[phi].add((rhs, state)) to_remove = blacklist[phi] if to_remove & defsites: defsites -= to_remove changing = True _logger.debug("changing phismap: %s", _lazy_pformat(phismap)) if not changing: break def apply_changes(used_phis, phismap): keep = {} for state, used_set in used_phis.items(): for phi in used_set: keep[phi] = phismap[phi] _logger.debug("keep phismap: %s", _lazy_pformat(keep)) new_out = defaultdict(dict) for phi in keep: for rhs, state in keep[phi]: new_out[state][phi] = rhs _logger.debug("new_out: %s", _lazy_pformat(new_out)) for state in runner.finished: state._outgoing_phis.clear() state._outgoing_phis.update(new_out[state]) used_phis, phi_set = get_used_phis_per_state() _logger.debug("Used_phis: %s", _lazy_pformat(used_phis)) defmap, phismap = find_use_defs() propagate_phi_map(phismap) apply_changes(used_phis, phismap) _logger.debug("DONE Prune PHIs".center(60, '-')) def _is_implicit_new_block(self, state): inst = state.get_inst() if inst.offset in self._bytecode.labels: return True elif inst.opname in NEW_BLOCKERS: return True else: return False def _guard_with_as(self, state): """Checks if the next instruction after a SETUP_WITH is something other than a POP_TOP, if it is something else it'll be some sort of store which is not supported (this corresponds to `with CTXMGR as VAR(S)`).""" current_inst = state.get_inst() if current_inst.opname == "SETUP_WITH": next_op = self._bytecode[current_inst.next].opname if next_op != "POP_TOP": msg = ("The 'with (context manager) as " "(variable):' construct is not " "supported.") raise UnsupportedError(msg) class TraceRunner(object): """Trace runner contains the states for the trace and the opcode dispatch. """ def __init__(self, debug_filename): self.debug_filename = debug_filename self.pending = deque() self.finished = set() def get_debug_loc(self, lineno): return Loc(self.debug_filename, lineno) def dispatch(self, state): inst = state.get_inst() _logger.debug("dispatch pc=%s, inst=%s", state._pc, inst) _logger.debug("stack %s", state._stack) fn = getattr(self, "op_{}".format(inst.opname), None) if fn is not None: fn(state, inst) else: msg = "Use of unsupported opcode (%s) found" % inst.opname raise UnsupportedError(msg, loc=self.get_debug_loc(inst.lineno)) def op_NOP(self, state, inst): state.append(inst) def op_FORMAT_VALUE(self, state, inst): """ FORMAT_VALUE(flags): flags argument specifies format spec which is not supported yet. Currently, we just call str() on the value. Pops a value from stack and pushes results back. Required for supporting f-strings. https://docs.python.org/3/library/dis.html#opcode-FORMAT_VALUE """ if inst.arg != 0: msg = "format spec in f-strings not supported yet" raise UnsupportedError(msg, loc=self.get_debug_loc(inst.lineno)) value = state.pop() strvar = state.make_temp() res = state.make_temp() state.append(inst, value=value, res=res, strvar=strvar) state.push(res) def op_BUILD_STRING(self, state, inst): """ BUILD_STRING(count): Concatenates count strings from the stack and pushes the resulting string onto the stack. Required for supporting f-strings. https://docs.python.org/3/library/dis.html#opcode-BUILD_STRING """ count = inst.arg strings = list(reversed([state.pop() for _ in range(count)])) # corner case: f"" if count == 0: tmps = [state.make_temp()] else: tmps = [state.make_temp() for _ in range(count - 1)] state.append(inst, strings=strings, tmps=tmps) state.push(tmps[-1]) def op_POP_TOP(self, state, inst): state.pop() def op_LOAD_GLOBAL(self, state, inst): res = state.make_temp() state.append(inst, res=res) state.push(res) def op_LOAD_DEREF(self, state, inst): res = state.make_temp() state.append(inst, res=res) state.push(res) def op_LOAD_CONST(self, state, inst): res = state.make_temp("const") state.push(res) state.append(inst, res=res) def op_LOAD_ATTR(self, state, inst): item = state.pop() res = state.make_temp() state.append(inst, item=item, res=res) state.push(res) def op_LOAD_FAST(self, state, inst): name = state.get_varname(inst) res = state.make_temp(name) state.append(inst, res=res) state.push(res) def op_DELETE_FAST(self, state, inst): state.append(inst) def op_DELETE_ATTR(self, state, inst): target = state.pop() state.append(inst, target=target) def op_STORE_ATTR(self, state, inst): target = state.pop() value = state.pop() state.append(inst, target=target, value=value) def op_STORE_DEREF(self, state, inst): value = state.pop() state.append(inst, value=value) def op_STORE_FAST(self, state, inst): value = state.pop() state.append(inst, value=value) def op_SLICE_1(self, state, inst): """ TOS = TOS1[TOS:] """ tos = state.pop() tos1 = state.pop() res = state.make_temp() slicevar = state.make_temp() indexvar = state.make_temp() nonevar = state.make_temp() state.append( inst, base=tos1, start=tos, res=res, slicevar=slicevar, indexvar=indexvar, nonevar=nonevar, ) state.push(res) def op_SLICE_2(self, state, inst): """ TOS = TOS1[:TOS] """ tos = state.pop() tos1 = state.pop() res = state.make_temp() slicevar = state.make_temp() indexvar = state.make_temp() nonevar = state.make_temp() state.append( inst, base=tos1, stop=tos, res=res, slicevar=slicevar, indexvar=indexvar, nonevar=nonevar, ) state.push(res) def op_SLICE_3(self, state, inst): """ TOS = TOS2[TOS1:TOS] """ tos = state.pop() tos1 = state.pop() tos2 = state.pop() res = state.make_temp() slicevar = state.make_temp() indexvar = state.make_temp() state.append( inst, base=tos2, start=tos1, stop=tos, res=res, slicevar=slicevar, indexvar=indexvar, ) state.push(res) def op_STORE_SLICE_0(self, state, inst): """ TOS[:] = TOS1 """ tos = state.pop() value = state.pop() slicevar = state.make_temp() indexvar = state.make_temp() nonevar = state.make_temp() state.append( inst, base=tos, value=value, slicevar=slicevar, indexvar=indexvar, nonevar=nonevar, ) def op_STORE_SLICE_1(self, state, inst): """ TOS1[TOS:] = TOS2 """ tos = state.pop() tos1 = state.pop() value = state.pop() slicevar = state.make_temp() indexvar = state.make_temp() nonevar = state.make_temp() state.append( inst, base=tos1, start=tos, slicevar=slicevar, value=value, indexvar=indexvar, nonevar=nonevar, ) def op_STORE_SLICE_2(self, state, inst): """ TOS1[:TOS] = TOS2 """ tos = state.pop() tos1 = state.pop() value = state.pop() slicevar = state.make_temp() indexvar = state.make_temp() nonevar = state.make_temp() state.append( inst, base=tos1, stop=tos, value=value, slicevar=slicevar, indexvar=indexvar, nonevar=nonevar, ) def op_STORE_SLICE_3(self, state, inst): """ TOS2[TOS1:TOS] = TOS3 """ tos = state.pop() tos1 = state.pop() tos2 = state.pop() value = state.pop() slicevar = state.make_temp() indexvar = state.make_temp() state.append( inst, base=tos2, start=tos1, stop=tos, value=value, slicevar=slicevar, indexvar=indexvar, ) def op_DELETE_SLICE_0(self, state, inst): """ del TOS[:] """ tos = state.pop() slicevar = state.make_temp() indexvar = state.make_temp() nonevar = state.make_temp() state.append( inst, base=tos, slicevar=slicevar, indexvar=indexvar, nonevar=nonevar, ) def op_DELETE_SLICE_1(self, state, inst): """ del TOS1[TOS:] """ tos = state.pop() tos1 = state.pop() slicevar = state.make_temp() indexvar = state.make_temp() nonevar = state.make_temp() state.append( inst, base=tos1, start=tos, slicevar=slicevar, indexvar=indexvar, nonevar=nonevar, ) def op_DELETE_SLICE_2(self, state, inst): """ del TOS1[:TOS] """ tos = state.pop() tos1 = state.pop() slicevar = state.make_temp() indexvar = state.make_temp() nonevar = state.make_temp() state.append( inst, base=tos1, stop=tos, slicevar=slicevar, indexvar=indexvar, nonevar=nonevar, ) def op_DELETE_SLICE_3(self, state, inst): """ del TOS2[TOS1:TOS] """ tos = state.pop() tos1 = state.pop() tos2 = state.pop() slicevar = state.make_temp() indexvar = state.make_temp() state.append( inst, base=tos2, start=tos1, stop=tos, slicevar=slicevar, indexvar=indexvar ) def op_BUILD_SLICE(self, state, inst): """ slice(TOS1, TOS) or slice(TOS2, TOS1, TOS) """ argc = inst.arg if argc == 2: tos = state.pop() tos1 = state.pop() start = tos1 stop = tos step = None elif argc == 3: tos = state.pop() tos1 = state.pop() tos2 = state.pop() start = tos2 stop = tos1 step = tos else: raise Exception("unreachable") slicevar = state.make_temp() res = state.make_temp() state.append( inst, start=start, stop=stop, step=step, res=res, slicevar=slicevar ) state.push(res) def _op_POP_JUMP_IF(self, state, inst): pred = state.pop() state.append(inst, pred=pred) target_inst = inst.get_jump_target() next_inst = inst.next # if the next inst and the jump target are the same location, issue one # fork else issue a fork for the next and the target. state.fork(pc=next_inst) if target_inst != next_inst: state.fork(pc=target_inst) op_POP_JUMP_IF_TRUE = _op_POP_JUMP_IF op_POP_JUMP_IF_FALSE = _op_POP_JUMP_IF def _op_JUMP_IF_OR_POP(self, state, inst): pred = state.get_tos() state.append(inst, pred=pred) state.fork(pc=inst.next, npop=1) state.fork(pc=inst.get_jump_target()) op_JUMP_IF_FALSE_OR_POP = _op_JUMP_IF_OR_POP op_JUMP_IF_TRUE_OR_POP = _op_JUMP_IF_OR_POP def op_JUMP_FORWARD(self, state, inst): state.append(inst) state.fork(pc=inst.get_jump_target()) def op_JUMP_ABSOLUTE(self, state, inst): state.append(inst) state.fork(pc=inst.get_jump_target()) def op_BREAK_LOOP(self, state, inst): # NOTE: bytecode removed since py3.8 end = state.get_top_block('LOOP')['end'] state.append(inst, end=end) state.pop_block() state.fork(pc=end) def op_RETURN_VALUE(self, state, inst): state.append(inst, retval=state.pop(), castval=state.make_temp()) state.terminate() def op_YIELD_VALUE(self, state, inst): val = state.pop() res = state.make_temp() state.append(inst, value=val, res=res) state.push(res) def op_RAISE_VARARGS(self, state, inst): in_exc_block = any([ state.get_top_block("EXCEPT") is not None, state.get_top_block("FINALLY") is not None ]) if inst.arg == 0: exc = None if in_exc_block: raise UnsupportedError( "The re-raising of an exception is not yet supported.", loc=self.get_debug_loc(inst.lineno), ) elif inst.arg == 1: exc = state.pop() else: raise ValueError("Multiple argument raise is not supported.") state.append(inst, exc=exc) state.terminate() def op_BEGIN_FINALLY(self, state, inst): temps = [] for i in range(_EXCEPT_STACK_OFFSET): tmp = state.make_temp() temps.append(tmp) state.push(tmp) state.append(inst, temps=temps) def op_END_FINALLY(self, state, inst): blk = state.pop_block() state.reset_stack(blk['entry_stack']) def op_POP_FINALLY(self, state, inst): # we don't emulate the exact stack behavior if inst.arg != 0: msg = ('Unsupported use of a bytecode related to try..finally' ' or a with-context') raise UnsupportedError(msg, loc=self.get_debug_loc(inst.lineno)) def op_CALL_FINALLY(self, state, inst): pass def op_WITH_CLEANUP_START(self, state, inst): # we don't emulate the exact stack behavior state.append(inst) def op_WITH_CLEANUP_FINISH(self, state, inst): # we don't emulate the exact stack behavior state.append(inst) def op_SETUP_LOOP(self, state, inst): # NOTE: bytecode removed since py3.8 state.push_block( state.make_block( kind='LOOP', end=inst.get_jump_target(), ) ) def op_SETUP_WITH(self, state, inst): cm = state.pop() # the context-manager yielded = state.make_temp() exitfn = state.make_temp(prefix='setup_with_exitfn') state.append(inst, contextmanager=cm, exitfn=exitfn) # py39 doesn't have with-finally if PYVERSION < (3, 9): state.push_block( state.make_block( kind='WITH_FINALLY', end=inst.get_jump_target(), ) ) state.push(exitfn) state.push(yielded) state.push_block( state.make_block( kind='WITH', end=inst.get_jump_target(), ) ) # Forces a new block state.fork(pc=inst.next) def _setup_try(self, kind, state, next, end): handler_block = state.make_block( kind=kind, end=None, reset_stack=False, ) # Forces a new block # Fork to the body of the finally state.fork( pc=next, extra_block=state.make_block( kind='TRY', end=end, reset_stack=False, handler=handler_block, ) ) def op_SETUP_EXCEPT(self, state, inst): # Opcode removed since py3.8 state.append(inst) self._setup_try( 'EXCEPT', state, next=inst.next, end=inst.get_jump_target(), ) def op_SETUP_FINALLY(self, state, inst): state.append(inst) self._setup_try( 'FINALLY', state, next=inst.next, end=inst.get_jump_target(), ) def op_POP_EXCEPT(self, state, inst): blk = state.pop_block() if blk['kind'] not in {BlockKind('EXCEPT'), BlockKind('FINALLY')}: raise UnsupportedError( "POP_EXCEPT got an unexpected block: {}".format(blk['kind']), loc=self.get_debug_loc(inst.lineno), ) state.pop() state.pop() state.pop() # Forces a new block state.fork(pc=inst.next) def op_POP_BLOCK(self, state, inst): blk = state.pop_block() if blk['kind'] == BlockKind('TRY'): state.append(inst, kind='try') elif blk['kind'] == BlockKind('WITH'): state.append(inst, kind='with') state.fork(pc=inst.next) def op_BINARY_SUBSCR(self, state, inst): index = state.pop() target = state.pop() res = state.make_temp() state.append(inst, index=index, target=target, res=res) state.push(res) def op_STORE_SUBSCR(self, state, inst): index = state.pop() target = state.pop() value = state.pop() state.append(inst, target=target, index=index, value=value) def op_DELETE_SUBSCR(self, state, inst): index = state.pop() target = state.pop() state.append(inst, target=target, index=index) def op_CALL_FUNCTION(self, state, inst): narg = inst.arg args = list(reversed([state.pop() for _ in range(narg)])) func = state.pop() res = state.make_temp() state.append(inst, func=func, args=args, res=res) state.push(res) def op_CALL_FUNCTION_KW(self, state, inst): narg = inst.arg names = state.pop() # tuple of names args = list(reversed([state.pop() for _ in range(narg)])) func = state.pop() res = state.make_temp() state.append(inst, func=func, args=args, names=names, res=res) state.push(res) def op_CALL_FUNCTION_EX(self, state, inst): if inst.arg & 1: errmsg = "CALL_FUNCTION_EX with **kwargs not supported" raise UnsupportedError(errmsg) vararg = state.pop() func = state.pop() res = state.make_temp() state.append(inst, func=func, vararg=vararg, res=res) state.push(res) def _dup_topx(self, state, inst, count): orig = [state.pop() for _ in range(count)] orig.reverse() # We need to actually create new temporaries if we want the # IR optimization pass to work correctly (see issue #580) duped = [state.make_temp() for _ in range(count)] state.append(inst, orig=orig, duped=duped) for val in orig: state.push(val) for val in duped: state.push(val) def op_DUP_TOPX(self, state, inst): count = inst.arg assert 1 <= count <= 5, "Invalid DUP_TOPX count" self._dup_topx(state, inst, count) def op_DUP_TOP(self, state, inst): self._dup_topx(state, inst, count=1) def op_DUP_TOP_TWO(self, state, inst): self._dup_topx(state, inst, count=2) def op_ROT_TWO(self, state, inst): first = state.pop() second = state.pop() state.push(first) state.push(second) def op_ROT_THREE(self, state, inst): first = state.pop() second = state.pop() third = state.pop() state.push(first) state.push(third) state.push(second) def op_ROT_FOUR(self, state, inst): first = state.pop() second = state.pop() third = state.pop() forth = state.pop() state.push(first) state.push(forth) state.push(third) state.push(second) def op_UNPACK_SEQUENCE(self, state, inst): count = inst.arg iterable = state.pop() stores = [state.make_temp() for _ in range(count)] tupleobj = state.make_temp() state.append(inst, iterable=iterable, stores=stores, tupleobj=tupleobj) for st in reversed(stores): state.push(st) def op_BUILD_TUPLE(self, state, inst): count = inst.arg items = list(reversed([state.pop() for _ in range(count)])) tup = state.make_temp() state.append(inst, items=items, res=tup) state.push(tup) def _build_tuple_unpack(self, state, inst): # Builds tuple from other tuples on the stack tuples = list(reversed([state.pop() for _ in range(inst.arg)])) temps = [state.make_temp() for _ in range(len(tuples) - 1)] # if the unpack is assign-like, e.g. x = (*y,), it needs handling # differently. is_assign = len(tuples) == 1 if is_assign: temps = [state.make_temp(),] state.append(inst, tuples=tuples, temps=temps, is_assign=is_assign) # The result is in the last temp var state.push(temps[-1]) def op_BUILD_TUPLE_UNPACK_WITH_CALL(self, state, inst): # just unpack the input tuple, call inst will be handled afterwards self._build_tuple_unpack(state, inst) def op_BUILD_TUPLE_UNPACK(self, state, inst): self._build_tuple_unpack(state, inst) def op_LIST_TO_TUPLE(self, state, inst): # "Pops a list from the stack and pushes a tuple containing the same # values." tos = state.pop() res = state.make_temp() # new tuple var state.append(inst, const_list=tos, res=res) state.push(res) def op_BUILD_CONST_KEY_MAP(self, state, inst): keys = state.pop() vals = list(reversed([state.pop() for _ in range(inst.arg)])) keytmps = [state.make_temp() for _ in range(inst.arg)] res = state.make_temp() state.append(inst, keys=keys, keytmps=keytmps, values=vals, res=res) state.push(res) def op_BUILD_LIST(self, state, inst): count = inst.arg items = list(reversed([state.pop() for _ in range(count)])) lst = state.make_temp() state.append(inst, items=items, res=lst) state.push(lst) def op_LIST_APPEND(self, state, inst): value = state.pop() index = inst.arg target = state.peek(index) appendvar = state.make_temp() res = state.make_temp() state.append(inst, target=target, value=value, appendvar=appendvar, res=res) def op_LIST_EXTEND(self, state, inst): value = state.pop() index = inst.arg target = state.peek(index) extendvar = state.make_temp() res = state.make_temp() state.append(inst, target=target, value=value, extendvar=extendvar, res=res) def op_BUILD_MAP(self, state, inst): dct = state.make_temp() count = inst.arg items = [] # In 3.5+, BUILD_MAP takes pairs from the stack for i in range(count): v, k = state.pop(), state.pop() items.append((k, v)) state.append(inst, items=items[::-1], size=count, res=dct) state.push(dct) def op_MAP_ADD(self, state, inst): # NOTE: https://docs.python.org/3/library/dis.html#opcode-MAP_ADD # Python >= 3.8: TOS and TOS1 are value and key respectively # Python < 3.8: TOS and TOS1 are key and value respectively TOS = state.pop() TOS1 = state.pop() key, value = (TOS, TOS1) if PYVERSION < (3, 8) else (TOS1, TOS) index = inst.arg target = state.peek(index) setitemvar = state.make_temp() res = state.make_temp() state.append(inst, target=target, key=key, value=value, setitemvar=setitemvar, res=res) def op_BUILD_SET(self, state, inst): count = inst.arg # Note: related python bug http://bugs.python.org/issue26020 items = list(reversed([state.pop() for _ in range(count)])) res = state.make_temp() state.append(inst, items=items, res=res) state.push(res) def op_SET_UPDATE(self, state, inst): value = state.pop() index = inst.arg target = state.peek(index) updatevar = state.make_temp() res = state.make_temp() state.append(inst, target=target, value=value, updatevar=updatevar, res=res) def op_GET_ITER(self, state, inst): value = state.pop() res = state.make_temp() state.append(inst, value=value, res=res) state.push(res) def op_FOR_ITER(self, state, inst): iterator = state.get_tos() pair = state.make_temp() indval = state.make_temp() pred = state.make_temp() state.append(inst, iterator=iterator, pair=pair, indval=indval, pred=pred) state.push(indval) end = inst.get_jump_target() state.fork(pc=end, npop=2) state.fork(pc=inst.next) def op_GEN_START(self, state, inst): """Pops TOS. If TOS was not None, raises an exception. The kind operand corresponds to the type of generator or coroutine and determines the error message. The legal kinds are 0 for generator, 1 for coroutine, and 2 for async generator. New in version 3.10. """ # no-op in Numba pass def _unaryop(self, state, inst): val = state.pop() res = state.make_temp() state.append(inst, value=val, res=res) state.push(res) op_UNARY_NEGATIVE = _unaryop op_UNARY_POSITIVE = _unaryop op_UNARY_NOT = _unaryop op_UNARY_INVERT = _unaryop def _binaryop(self, state, inst): rhs = state.pop() lhs = state.pop() res = state.make_temp() state.append(inst, lhs=lhs, rhs=rhs, res=res) state.push(res) op_COMPARE_OP = _binaryop op_IS_OP = _binaryop op_CONTAINS_OP = _binaryop op_INPLACE_ADD = _binaryop op_INPLACE_SUBTRACT = _binaryop op_INPLACE_MULTIPLY = _binaryop op_INPLACE_DIVIDE = _binaryop op_INPLACE_TRUE_DIVIDE = _binaryop op_INPLACE_FLOOR_DIVIDE = _binaryop op_INPLACE_MODULO = _binaryop op_INPLACE_POWER = _binaryop op_INPLACE_MATRIX_MULTIPLY = _binaryop op_INPLACE_LSHIFT = _binaryop op_INPLACE_RSHIFT = _binaryop op_INPLACE_AND = _binaryop op_INPLACE_OR = _binaryop op_INPLACE_XOR = _binaryop op_BINARY_ADD = _binaryop op_BINARY_SUBTRACT = _binaryop op_BINARY_MULTIPLY = _binaryop op_BINARY_DIVIDE = _binaryop op_BINARY_TRUE_DIVIDE = _binaryop op_BINARY_FLOOR_DIVIDE = _binaryop op_BINARY_MODULO = _binaryop op_BINARY_POWER = _binaryop op_BINARY_MATRIX_MULTIPLY = _binaryop op_BINARY_LSHIFT = _binaryop op_BINARY_RSHIFT = _binaryop op_BINARY_AND = _binaryop op_BINARY_OR = _binaryop op_BINARY_XOR = _binaryop def op_MAKE_FUNCTION(self, state, inst, MAKE_CLOSURE=False): name = state.pop() code = state.pop() closure = annotations = kwdefaults = defaults = None if PYVERSION < (3, 6): num_posdefaults = inst.arg & 0xFF num_kwdefaults = (inst.arg >> 8) & 0xFF num_annotations = (inst.arg >> 16) & 0x7FFF if MAKE_CLOSURE: closure = state.pop() if num_annotations > 0: annotations = state.pop() if num_kwdefaults > 0: kwdefaults = [] for i in range(num_kwdefaults): v = state.pop() k = state.pop() kwdefaults.append((k, v)) kwdefaults = tuple(kwdefaults) if num_posdefaults: defaults = [] for i in range(num_posdefaults): defaults.append(state.pop()) defaults = tuple(defaults) else: if inst.arg & 0x8: closure = state.pop() if inst.arg & 0x4: annotations = state.pop() if inst.arg & 0x2: kwdefaults = state.pop() if inst.arg & 0x1: defaults = state.pop() res = state.make_temp() state.append( inst, name=name, code=code, closure=closure, annotations=annotations, kwdefaults=kwdefaults, defaults=defaults, res=res, ) state.push(res) def op_MAKE_CLOSURE(self, state, inst): self.op_MAKE_FUNCTION(state, inst, MAKE_CLOSURE=True) def op_LOAD_CLOSURE(self, state, inst): res = state.make_temp() state.append(inst, res=res) state.push(res) def op_LOAD_ASSERTION_ERROR(self, state, inst): res = state.make_temp("assertion_error") state.append(inst, res=res) state.push(res) def op_JUMP_IF_NOT_EXC_MATCH(self, state, inst): # Tests whether the second value on the stack is an exception matching # TOS, and jumps if it is not. Pops two values from the stack. pred = state.make_temp("predicate") tos = state.pop() tos1 = state.pop() state.append(inst, pred=pred, tos=tos, tos1=tos1) state.fork(pc=inst.next) state.fork(pc=inst.get_jump_target()) def op_RERAISE(self, state, inst): # This isn't handled, but the state is set up anyway exc = state.pop() state.append(inst, exc=exc) state.terminate() # NOTE: Please see notes in `interpreter.py` surrounding the implementation # of LOAD_METHOD and CALL_METHOD. def op_LOAD_METHOD(self, state, inst): self.op_LOAD_ATTR(state, inst) def op_CALL_METHOD(self, state, inst): self.op_CALL_FUNCTION(state, inst) @total_ordering class State(object): """State of the trace """ def __init__(self, bytecode, pc, nstack, blockstack): """ Parameters ---------- bytecode : numba.bytecode.ByteCode function bytecode pc : int program counter nstack : int stackdepth at entry blockstack : Sequence[Dict] A sequence of dictionary denoting entries on the blockstack. """ self._bytecode = bytecode self._pc_initial = pc self._pc = pc self._nstack_initial = nstack self._stack = [] self._blockstack_initial = tuple(blockstack) self._blockstack = list(blockstack) self._temp_registers = [] self._insts = [] self._outedges = [] self._terminated = False self._phis = {} self._outgoing_phis = UniqueDict() self._used_regs = set() for i in range(nstack): phi = self.make_temp("phi") self._phis[phi] = i self.push(phi) def __repr__(self): return "State(pc_initial={} nstack_initial={})".format( self._pc_initial, self._nstack_initial ) def get_identity(self): return (self._pc_initial, self._nstack_initial) def __hash__(self): return hash(self.get_identity()) def __lt__(self, other): return self.get_identity() < other.get_identity() def __eq__(self, other): return self.get_identity() == other.get_identity() @property def pc_initial(self): """The starting bytecode offset of this State. The PC given to the constructor. """ return self._pc_initial @property def instructions(self): """The list of instructions information as a 2-tuple of ``(pc : int, register_map : Dict)`` """ return self._insts @property def outgoing_edges(self): """The list of outgoing edges. Returns ------- edges : List[State] """ return self._outedges @property def outgoing_phis(self): """The dictionary of outgoing phi nodes. The keys are the name of the PHI nodes. The values are the outgoing states. """ return self._outgoing_phis @property def blockstack_initial(self): """A copy of the initial state of the blockstack """ return self._blockstack_initial @property def stack_depth(self): """The current size of the stack Returns ------- res : int """ return len(self._stack) def find_initial_try_block(self): """Find the initial *try* block. """ for blk in reversed(self._blockstack_initial): if blk['kind'] == BlockKind('TRY'): return blk def has_terminated(self): return self._terminated def get_inst(self): return self._bytecode[self._pc] def advance_pc(self): inst = self.get_inst() self._pc = inst.next def make_temp(self, prefix=""): if not prefix: name = "${prefix}{offset}{opname}.{tempct}".format( prefix=prefix, offset=self._pc, opname=self.get_inst().opname.lower(), tempct=len(self._temp_registers), ) else: name = "${prefix}{offset}.{tempct}".format( prefix=prefix, offset=self._pc, tempct=len(self._temp_registers), ) self._temp_registers.append(name) return name def append(self, inst, **kwargs): """Append new inst""" self._insts.append((inst.offset, kwargs)) self._used_regs |= set(_flatten_inst_regs(kwargs.values())) def get_tos(self): return self.peek(1) def peek(self, k): """Return the k'th element on the stack """ return self._stack[-k] def push(self, item): """Push to stack""" self._stack.append(item) def pop(self): """Pop the stack""" return self._stack.pop() def push_block(self, synblk): """Push a block to blockstack """ assert 'stack_depth' in synblk self._blockstack.append(synblk) def reset_stack(self, depth): """Reset the stack to the given stack depth. Returning the popped items. """ self._stack, popped = self._stack[:depth], self._stack[depth:] return popped def make_block(self, kind, end, reset_stack=True, handler=None): """Make a new block """ d = { 'kind': BlockKind(kind), 'end': end, 'entry_stack': len(self._stack), } if reset_stack: d['stack_depth'] = len(self._stack) else: d['stack_depth'] = None d['handler'] = handler return d def pop_block(self): """Pop a block and unwind the stack """ b = self._blockstack.pop() self.reset_stack(b['stack_depth']) return b def pop_block_and_above(self, blk): """Find *blk* in the blockstack and remove it and all blocks above it from the stack. """ idx = self._blockstack.index(blk) assert 0 <= idx < len(self._blockstack) self._blockstack = self._blockstack[:idx] def get_top_block(self, kind): """Find the first block that matches *kind* """ kind = BlockKind(kind) for bs in reversed(self._blockstack): if bs['kind'] == kind: return bs def has_active_try(self): """Returns a boolean indicating if the top-block is a *try* block """ return self.get_top_block('TRY') is not None def get_varname(self, inst): """Get referenced variable name from the oparg """ return self._bytecode.co_varnames[inst.arg] def terminate(self): """Mark block as terminated """ self._terminated = True def fork(self, pc, npop=0, npush=0, extra_block=None): """Fork the state """ # Handle changes on the stack stack = list(self._stack) if npop: assert 0 <= npop <= len(self._stack) nstack = len(self._stack) - npop stack = stack[:nstack] if npush: assert 0 <= npush for i in range(npush): stack.append(self.make_temp()) # Handle changes on the blockstack blockstack = list(self._blockstack) if extra_block: blockstack.append(extra_block) self._outedges.append(Edge( pc=pc, stack=tuple(stack), npush=npush, blockstack=tuple(blockstack), )) self.terminate() def split_new_block(self): """Split the state """ self.fork(pc=self._pc) def get_outgoing_states(self): """Get states for each outgoing edges """ # Should only call once assert not self._outgoing_phis ret = [] for edge in self._outedges: state = State(bytecode=self._bytecode, pc=edge.pc, nstack=len(edge.stack), blockstack=edge.blockstack) ret.append(state) # Map outgoing_phis for phi, i in state._phis.items(): self._outgoing_phis[phi] = edge.stack[i] return ret def get_outgoing_edgepushed(self): """ Returns ------- Dict[int, int] where keys are the PC values are the edge-pushed stack values """ return {edge.pc: tuple(edge.stack[-edge.npush:]) for edge in self._outedges} Edge = namedtuple("Edge", ["pc", "stack", "blockstack", "npush"]) class AdaptDFA(object): """Adapt Flow to the old DFA class expected by Interpreter """ def __init__(self, flow): self._flow = flow @property def infos(self): return self._flow.block_infos AdaptBlockInfo = namedtuple( "AdaptBlockInfo", ["insts", "outgoing_phis", "blockstack", "active_try_block", "outgoing_edgepushed"], ) def adapt_state_infos(state): return AdaptBlockInfo( insts=tuple(state.instructions), outgoing_phis=state.outgoing_phis, blockstack=state.blockstack_initial, active_try_block=state.find_initial_try_block(), outgoing_edgepushed=state.get_outgoing_edgepushed(), ) def _flatten_inst_regs(iterable): """Flatten an iterable of registers used in an instruction """ for item in iterable: if isinstance(item, str): yield item elif isinstance(item, (tuple, list)): for x in _flatten_inst_regs(item): yield x class AdaptCFA(object): """Adapt Flow to the old CFA class expected by Interpreter """ def __init__(self, flow): self._flow = flow self._blocks = {} for offset, blockinfo in flow.block_infos.items(): self._blocks[offset] = AdaptCFBlock(blockinfo, offset) backbone = self._flow.cfgraph.backbone() graph = flow.cfgraph # Find backbone backbone = graph.backbone() # Filter out in loop blocks (Assuming no other cyclic control blocks) # This is to unavoid variables defined in loops being considered as # function scope. inloopblocks = set() for b in self.blocks.keys(): if graph.in_loops(b): inloopblocks.add(b) self._backbone = backbone - inloopblocks @property def graph(self): return self._flow.cfgraph @property def backbone(self): return self._backbone @property def blocks(self): return self._blocks def iterliveblocks(self): for b in sorted(self.blocks): yield self.blocks[b] def dump(self): self._flow.cfgraph.dump() class AdaptCFBlock(object): def __init__(self, blockinfo, offset): self.offset = offset self.body = tuple(i for i, _ in blockinfo.insts) numba-0.55.1/numba/core/caching.py000664 000000 000000 00000056121 14174536160 016700 0ustar00rootroot000000 000000 """ Caching mechanism for compiled functions. """ from abc import ABCMeta, abstractmethod, abstractproperty import contextlib import errno import hashlib import inspect import itertools import os import pickle import sys import tempfile import warnings from numba.misc.appdirs import AppDirs import numba from numba.core.errors import NumbaWarning from numba.core.base import BaseContext from numba.core.codegen import CodeLibrary from numba.core.compiler import CompileResult from numba.core import config, compiler from numba.core.serialize import dumps def _get_codegen(obj): """ Returns the Codegen associated with the given object. """ if isinstance(obj, BaseContext): return obj.codegen() elif isinstance(obj, CodeLibrary): return obj.codegen elif isinstance(obj, CompileResult): return obj.target_context.codegen() else: raise TypeError(type(obj)) def _cache_log(msg, *args): if config.DEBUG_CACHE: msg = msg % args print(msg) class _Cache(metaclass=ABCMeta): @abstractproperty def cache_path(self): """ The base filesystem path of this cache (for example its root folder). """ @abstractmethod def load_overload(self, sig, target_context): """ Load an overload for the given signature using the target context. The saved object must be returned if successful, None if not found in the cache. """ @abstractmethod def save_overload(self, sig, data): """ Save the overload for the given signature. """ @abstractmethod def enable(self): """ Enable the cache. """ @abstractmethod def disable(self): """ Disable the cache. """ @abstractmethod def flush(self): """ Flush the cache. """ class NullCache(_Cache): @property def cache_path(self): return None def load_overload(self, sig, target_context): pass def save_overload(self, sig, cres): pass def enable(self): pass def disable(self): pass def flush(self): pass class _CacheLocator(metaclass=ABCMeta): """ A filesystem locator for caching a given function. """ def ensure_cache_path(self): path = self.get_cache_path() os.makedirs(path, exist_ok=True) # Ensure the directory is writable by trying to write a temporary file tempfile.TemporaryFile(dir=path).close() @abstractmethod def get_cache_path(self): """ Return the directory the function is cached in. """ @abstractmethod def get_source_stamp(self): """ Get a timestamp representing the source code's freshness. Can return any picklable Python object. """ @abstractmethod def get_disambiguator(self): """ Get a string disambiguator for this locator's function. It should allow disambiguating different but similarly-named functions. """ @classmethod def from_function(cls, py_func, py_file): """ Create a locator instance for the given function located in the given file. """ raise NotImplementedError @classmethod def get_suitable_cache_subpath(cls, py_file): """Given the Python file path, compute a suitable path inside the cache directory. This will reduce a file path that is too long, which can be a problem on some operating system (i.e. Windows 7). """ path = os.path.abspath(py_file) subpath = os.path.dirname(path) parentdir = os.path.split(subpath)[-1] # Use SHA1 to reduce path length. # Note: windows doesn't like long path. hashed = hashlib.sha1(subpath.encode()).hexdigest() # Retain parent directory name for easier debugging return '_'.join([parentdir, hashed]) class _SourceFileBackedLocatorMixin(object): """ A cache locator mixin for functions which are backed by a well-known Python source file. """ def get_source_stamp(self): if getattr(sys, 'frozen', False): st = os.stat(sys.executable) else: st = os.stat(self._py_file) # We use both timestamp and size as some filesystems only have second # granularity. return st.st_mtime, st.st_size def get_disambiguator(self): return str(self._lineno) @classmethod def from_function(cls, py_func, py_file): if not os.path.exists(py_file): # Perhaps a placeholder (e.g. "") return self = cls(py_func, py_file) try: self.ensure_cache_path() except OSError: # Cannot ensure the cache directory exists or is writable return return self class _UserProvidedCacheLocator(_SourceFileBackedLocatorMixin, _CacheLocator): """ A locator that always point to the user provided directory in `numba.config.CACHE_DIR` """ def __init__(self, py_func, py_file): self._py_file = py_file self._lineno = py_func.__code__.co_firstlineno cache_subpath = self.get_suitable_cache_subpath(py_file) self._cache_path = os.path.join(config.CACHE_DIR, cache_subpath) def get_cache_path(self): return self._cache_path @classmethod def from_function(cls, py_func, py_file): if not config.CACHE_DIR: return parent = super(_UserProvidedCacheLocator, cls) return parent.from_function(py_func, py_file) class _InTreeCacheLocator(_SourceFileBackedLocatorMixin, _CacheLocator): """ A locator for functions backed by a regular Python module with a writable __pycache__ directory. """ def __init__(self, py_func, py_file): self._py_file = py_file self._lineno = py_func.__code__.co_firstlineno self._cache_path = os.path.join(os.path.dirname(self._py_file), '__pycache__') def get_cache_path(self): return self._cache_path class _UserWideCacheLocator(_SourceFileBackedLocatorMixin, _CacheLocator): """ A locator for functions backed by a regular Python module or a frozen executable, cached into a user-wide cache directory. """ def __init__(self, py_func, py_file): self._py_file = py_file self._lineno = py_func.__code__.co_firstlineno appdirs = AppDirs(appname="numba", appauthor=False) cache_dir = appdirs.user_cache_dir cache_subpath = self.get_suitable_cache_subpath(py_file) self._cache_path = os.path.join(cache_dir, cache_subpath) def get_cache_path(self): return self._cache_path @classmethod def from_function(cls, py_func, py_file): if not (os.path.exists(py_file) or getattr(sys, 'frozen', False)): # Perhaps a placeholder (e.g. "") # stop function exit if frozen, since it uses a temp placeholder return self = cls(py_func, py_file) try: self.ensure_cache_path() except OSError: # Cannot ensure the cache directory exists or is writable return return self class _IPythonCacheLocator(_CacheLocator): """ A locator for functions entered at the IPython prompt (notebook or other). """ def __init__(self, py_func, py_file): self._py_file = py_file # Note IPython enhances the linecache module to be able to # inspect source code of functions defined on the interactive prompt. source = inspect.getsource(py_func) if isinstance(source, bytes): self._bytes_source = source else: self._bytes_source = source.encode('utf-8') def get_cache_path(self): # We could also use jupyter_core.paths.jupyter_runtime_dir() # In both cases this is a user-wide directory, so we need to # be careful when disambiguating if we don't want too many # conflicts (see below). try: from IPython.paths import get_ipython_cache_dir except ImportError: # older IPython version from IPython.utils.path import get_ipython_cache_dir return os.path.join(get_ipython_cache_dir(), 'numba_cache') def get_source_stamp(self): return hashlib.sha256(self._bytes_source).hexdigest() def get_disambiguator(self): # Heuristic: we don't want too many variants being saved, but # we don't want similar named functions (e.g. "f") to compete # for the cache, so we hash the first two lines of the function # source (usually this will be the @jit decorator + the function # signature). firstlines = b''.join(self._bytes_source.splitlines(True)[:2]) return hashlib.sha256(firstlines).hexdigest()[:10] @classmethod def from_function(cls, py_func, py_file): if not ( py_file.startswith("' can appear in the qualname (e.g. '') but # are forbidden in Windows filenames fixed_fullname = fullname.replace('<', '').replace('>', '') fmt = '%s-%s.py%d%d%s' return fmt % (fixed_fullname, self.locator.get_disambiguator(), sys.version_info[0], sys.version_info[1], abiflags) @property def filename_base(self): return self._filename_base @property def locator(self): return self._locator @abstractmethod def reduce(self, data): "Returns the serialized form the data" pass @abstractmethod def rebuild(self, target_context, reduced_data): "Returns the de-serialized form of the *reduced_data*" pass @abstractmethod def check_cachable(self, data): "Returns True if the given data is cachable; otherwise, returns False." pass class CompileResultCacheImpl(_CacheImpl): """ Implements the logic to cache CompileResult objects. """ def reduce(self, cres): """ Returns a serialized CompileResult """ return cres._reduce() def rebuild(self, target_context, payload): """ Returns the unserialized CompileResult """ return compiler.CompileResult._rebuild(target_context, *payload) def check_cachable(self, cres): """ Check cachability of the given compile result. """ cannot_cache = None if any(not x.can_cache for x in cres.lifted): cannot_cache = "as it uses lifted code" elif cres.library.has_dynamic_globals: cannot_cache = ("as it uses dynamic globals " "(such as ctypes pointers and large global arrays)") if cannot_cache: msg = ('Cannot cache compiled function "%s" %s' % (cres.fndesc.qualname.split('.')[-1], cannot_cache)) warnings.warn_explicit(msg, NumbaWarning, self._locator._py_file, self._lineno) return False return True class CodeLibraryCacheImpl(_CacheImpl): """ Implements the logic to cache CodeLibrary objects. """ _filename_prefix = None # must be overridden def reduce(self, codelib): """ Returns a serialized CodeLibrary """ return codelib.serialize_using_object_code() def rebuild(self, target_context, payload): """ Returns the unserialized CodeLibrary """ return target_context.codegen().unserialize_library(payload) def check_cachable(self, codelib): """ Check cachability of the given CodeLibrary. """ return not codelib.has_dynamic_globals def get_filename_base(self, fullname, abiflags): parent = super(CodeLibraryCacheImpl, self) res = parent.get_filename_base(fullname, abiflags) return '-'.join([self._filename_prefix, res]) class IndexDataCacheFile(object): """ Implements the logic for the index file and data file used by a cache. """ def __init__(self, cache_path, filename_base, source_stamp): self._cache_path = cache_path self._index_name = '%s.nbi' % (filename_base,) self._index_path = os.path.join(self._cache_path, self._index_name) self._data_name_pattern = '%s.{number:d}.nbc' % (filename_base,) self._source_stamp = source_stamp self._version = numba.__version__ def flush(self): self._save_index({}) def save(self, key, data): """ Save a new cache entry with *key* and *data*. """ overloads = self._load_index() try: # If key already exists, we will overwrite the file data_name = overloads[key] except KeyError: # Find an available name for the data file existing = set(overloads.values()) for i in itertools.count(1): data_name = self._data_name(i) if data_name not in existing: break overloads[key] = data_name self._save_index(overloads) self._save_data(data_name, data) def load(self, key): """ Load a cache entry with *key*. """ overloads = self._load_index() data_name = overloads.get(key) if data_name is None: return try: return self._load_data(data_name) except OSError: # File could have been removed while the index still refers it. return def _load_index(self): """ Load the cache index and return it as a dictionary (possibly empty if cache is empty or obsolete). """ try: with open(self._index_path, "rb") as f: version = pickle.load(f) data = f.read() except FileNotFoundError: # Index doesn't exist yet? return {} if version != self._version: # This is another version. Avoid trying to unpickling the # rest of the stream, as that may fail. return {} stamp, overloads = pickle.loads(data) _cache_log("[cache] index loaded from %r", self._index_path) if stamp != self._source_stamp: # Cache is not fresh. Stale data files will be eventually # overwritten, since they are numbered in incrementing order. return {} else: return overloads def _save_index(self, overloads): data = self._source_stamp, overloads data = self._dump(data) with self._open_for_write(self._index_path) as f: pickle.dump(self._version, f, protocol=-1) f.write(data) _cache_log("[cache] index saved to %r", self._index_path) def _load_data(self, name): path = self._data_path(name) with open(path, "rb") as f: data = f.read() tup = pickle.loads(data) _cache_log("[cache] data loaded from %r", path) return tup def _save_data(self, name, data): data = self._dump(data) path = self._data_path(name) with self._open_for_write(path) as f: f.write(data) _cache_log("[cache] data saved to %r", path) def _data_name(self, number): return self._data_name_pattern.format(number=number) def _data_path(self, name): return os.path.join(self._cache_path, name) def _dump(self, obj): return pickle.dumps(obj, protocol=-1) @contextlib.contextmanager def _open_for_write(self, filepath): """ Open *filepath* for writing in a race condition-free way (hopefully). """ tmpname = '%s.tmp.%d' % (filepath, os.getpid()) try: with open(tmpname, "wb") as f: yield f os.replace(tmpname, filepath) except Exception: # In case of error, remove dangling tmp file try: os.unlink(tmpname) except OSError: pass raise class Cache(_Cache): """ A per-function compilation cache. The cache saves data in separate data files and maintains information in an index file. There is one index file per function and Python version ("function_name-.pyXY.nbi") which contains a mapping of signatures and architectures to data files. It is prefixed by a versioning key and a timestamp of the Python source file containing the function. There is one data file ("function_name-.pyXY..nbc") per function, function signature, target architecture and Python version. Separate index and data files per Python version avoid pickle compatibility problems. Note: This contains the driver logic only. The core logic is provided by a subclass of ``_CacheImpl`` specified as *_impl_class* in the subclass. """ # The following class variables must be overridden by subclass. _impl_class = None def __init__(self, py_func): self._name = repr(py_func) self._py_func = py_func self._impl = self._impl_class(py_func) self._cache_path = self._impl.locator.get_cache_path() # This may be a bit strict but avoids us maintaining a magic number source_stamp = self._impl.locator.get_source_stamp() filename_base = self._impl.filename_base self._cache_file = IndexDataCacheFile(cache_path=self._cache_path, filename_base=filename_base, source_stamp=source_stamp) self.enable() def __repr__(self): return "<%s py_func=%r>" % (self.__class__.__name__, self._name) @property def cache_path(self): return self._cache_path def enable(self): self._enabled = True def disable(self): self._enabled = False def flush(self): self._cache_file.flush() def load_overload(self, sig, target_context): """ Load and recreate the cached object for the given signature, using the *target_context*. """ # Refresh the context to ensure it is initialized target_context.refresh() with self._guard_against_spurious_io_errors(): return self._load_overload(sig, target_context) # None returned if the `with` block swallows an exception def _load_overload(self, sig, target_context): if not self._enabled: return key = self._index_key(sig, _get_codegen(target_context)) data = self._cache_file.load(key) if data is not None: data = self._impl.rebuild(target_context, data) return data def save_overload(self, sig, data): """ Save the data for the given signature in the cache. """ with self._guard_against_spurious_io_errors(): self._save_overload(sig, data) def _save_overload(self, sig, data): if not self._enabled: return if not self._impl.check_cachable(data): return self._impl.locator.ensure_cache_path() key = self._index_key(sig, _get_codegen(data)) data = self._impl.reduce(data) self._cache_file.save(key, data) @contextlib.contextmanager def _guard_against_spurious_io_errors(self): if os.name == 'nt': # Guard against permission errors due to accessing the file # from several processes (see #2028) try: yield except OSError as e: if e.errno != errno.EACCES: raise else: # No such conditions under non-Windows OSes yield def _index_key(self, sig, codegen): """ Compute index key for the given signature and codegen. It includes a description of the OS, target architecture and hashes of the bytecode for the function and, if the function has a __closure__, a hash of the cell_contents. """ codebytes = self._py_func.__code__.co_code if self._py_func.__closure__ is not None: cvars = tuple([x.cell_contents for x in self._py_func.__closure__]) cvarbytes = dumps(cvars) else: cvarbytes = b'' hasher = lambda x: hashlib.sha256(x).hexdigest() return (sig, codegen.magic_tuple(), (hasher(codebytes), hasher(cvarbytes),)) class FunctionCache(Cache): """ Implements Cache that saves and loads CompileResult objects. """ _impl_class = CompileResultCacheImpl # Remember used cache filename prefixes. _lib_cache_prefixes = set(['']) def make_library_cache(prefix): """ Create a Cache class for additional compilation features to cache their result for reuse. The cache is saved in filename pattern like in ``FunctionCache`` but with additional *prefix* as specified. """ # avoid cache prefix reuse assert prefix not in _lib_cache_prefixes _lib_cache_prefixes.add(prefix) class CustomCodeLibraryCacheImpl(CodeLibraryCacheImpl): _filename_prefix = prefix class LibraryCache(Cache): """ Implements Cache that saves and loads CodeLibrary objects for additional feature for the specified python function. """ _impl_class = CustomCodeLibraryCacheImpl return LibraryCache numba-0.55.1/numba/core/callconv.py000664 000000 000000 00000055224 14174536160 017110 0ustar00rootroot000000 000000 """ Calling conventions for Numba-compiled functions. """ from collections import namedtuple from collections.abc import Iterable import itertools from llvmlite import ir from numba.core import types, cgutils from numba.core.base import PYOBJECT, GENERIC_POINTER TryStatus = namedtuple('TryStatus', ['in_try', 'excinfo']) Status = namedtuple("Status", ("code", # If the function returned ok (a value or None) "is_ok", # If the function returned None "is_none", # If the function errored out (== not is_ok) "is_error", # If the generator exited with StopIteration "is_stop_iteration", # If the function errored with an already set exception "is_python_exc", # If the function errored with a user exception "is_user_exc", # The pointer to the exception info structure (for user exceptions) "excinfoptr", )) int32_t = ir.IntType(32) errcode_t = int32_t def _const_int(code): return ir.Constant(errcode_t, code) RETCODE_OK = _const_int(0) RETCODE_EXC = _const_int(-1) RETCODE_NONE = _const_int(-2) # StopIteration RETCODE_STOPIT = _const_int(-3) FIRST_USEREXC = 1 RETCODE_USEREXC = _const_int(FIRST_USEREXC) class BaseCallConv(object): def __init__(self, context): self.context = context def return_optional_value(self, builder, retty, valty, value): if valty == types.none: # Value is none self.return_native_none(builder) elif retty == valty: # Value is an optional, need a runtime switch optval = self.context.make_helper(builder, retty, value=value) validbit = cgutils.as_bool_bit(builder, optval.valid) with builder.if_then(validbit): retval = self.context.get_return_value(builder, retty.type, optval.data) self.return_value(builder, retval) self.return_native_none(builder) elif not isinstance(valty, types.Optional): # Value is not an optional, need a cast if valty != retty.type: value = self.context.cast(builder, value, fromty=valty, toty=retty.type) retval = self.context.get_return_value(builder, retty.type, value) self.return_value(builder, retval) else: raise NotImplementedError("returning {0} for {1}".format(valty, retty)) def return_native_none(self, builder): self._return_errcode_raw(builder, RETCODE_NONE) def return_exc(self, builder): self._return_errcode_raw(builder, RETCODE_EXC, mark_exc=True) def return_stop_iteration(self, builder): self._return_errcode_raw(builder, RETCODE_STOPIT) def get_return_type(self, ty): """ Get the actual type of the return argument for Numba type *ty*. """ restype = self.context.data_model_manager[ty].get_return_type() return restype.as_pointer() def init_call_helper(self, builder): """ Initialize and return a call helper object for the given builder. """ ch = self._make_call_helper(builder) builder.__call_helper = ch return ch def _get_call_helper(self, builder): return builder.__call_helper def raise_error(self, builder, api, status): """ Given a non-ok *status*, raise the corresponding Python exception. """ bbend = builder.function.append_basic_block() with builder.if_then(status.is_user_exc): # Unserialize user exception. # Make sure another error may not interfere. api.err_clear() exc = api.unserialize(status.excinfoptr) with cgutils.if_likely(builder, cgutils.is_not_null(builder, exc)): api.raise_object(exc) # steals ref builder.branch(bbend) with builder.if_then(status.is_stop_iteration): api.err_set_none("PyExc_StopIteration") builder.branch(bbend) with builder.if_then(status.is_python_exc): # Error already raised => nothing to do builder.branch(bbend) api.err_set_string("PyExc_SystemError", "unknown error when calling native function") builder.branch(bbend) builder.position_at_end(bbend) def decode_arguments(self, builder, argtypes, func): """ Get the decoded (unpacked) Python arguments with *argtypes* from LLVM function *func*. A tuple of LLVM values is returned. """ raw_args = self.get_arguments(func) arginfo = self._get_arg_packer(argtypes) return arginfo.from_arguments(builder, raw_args) def _get_arg_packer(self, argtypes): """ Get an argument packer for the given argument types. """ return self.context.get_arg_packer(argtypes) class MinimalCallConv(BaseCallConv): """ A minimal calling convention, suitable for e.g. GPU targets. The implemented function signature is: retcode_t (*, ... ) The return code will be one of the RETCODE_* constants or a function-specific user exception id (>= RETCODE_USEREXC). Caller is responsible for allocating a slot for the return value (passed as a pointer in the first argument). """ def _make_call_helper(self, builder): return _MinimalCallHelper() def return_value(self, builder, retval): retptr = builder.function.args[0] assert retval.type == retptr.type.pointee, \ (str(retval.type), str(retptr.type.pointee)) builder.store(retval, retptr) self._return_errcode_raw(builder, RETCODE_OK) def return_user_exc(self, builder, exc, exc_args=None, loc=None, func_name=None): if exc is not None and not issubclass(exc, BaseException): raise TypeError("exc should be None or exception class, got %r" % (exc,)) if exc_args is not None and not isinstance(exc_args, tuple): raise TypeError("exc_args should be None or tuple, got %r" % (exc_args,)) # Build excinfo struct if loc is not None: fname = loc._raw_function_name() if fname is None: # could be exec() or REPL, try func_name fname = func_name locinfo = (fname, loc.filename, loc.line) if None in locinfo: locinfo = None else: locinfo = None call_helper = self._get_call_helper(builder) exc_id = call_helper._add_exception(exc, exc_args, locinfo) self._return_errcode_raw(builder, _const_int(exc_id), mark_exc=True) def return_status_propagate(self, builder, status): self._return_errcode_raw(builder, status.code) def _return_errcode_raw(self, builder, code, mark_exc=False): if isinstance(code, int): code = _const_int(code) builder.ret(code) def _get_return_status(self, builder, code): """ Given a return *code*, get a Status instance. """ norm = builder.icmp_signed('==', code, RETCODE_OK) none = builder.icmp_signed('==', code, RETCODE_NONE) ok = builder.or_(norm, none) err = builder.not_(ok) exc = builder.icmp_signed('==', code, RETCODE_EXC) is_stop_iteration = builder.icmp_signed('==', code, RETCODE_STOPIT) is_user_exc = builder.icmp_signed('>=', code, RETCODE_USEREXC) status = Status(code=code, is_ok=ok, is_error=err, is_python_exc=exc, is_none=none, is_user_exc=is_user_exc, is_stop_iteration=is_stop_iteration, excinfoptr=None) return status def get_function_type(self, restype, argtypes): """ Get the implemented Function type for *restype* and *argtypes*. """ arginfo = self._get_arg_packer(argtypes) argtypes = list(arginfo.argument_types) resptr = self.get_return_type(restype) fnty = ir.FunctionType(errcode_t, [resptr] + argtypes) return fnty def decorate_function(self, fn, args, fe_argtypes, noalias=False): """ Set names and attributes of function arguments. """ assert not noalias arginfo = self._get_arg_packer(fe_argtypes) arginfo.assign_names(self.get_arguments(fn), ['arg.' + a for a in args]) fn.args[0].name = ".ret" return fn def get_arguments(self, func): """ Get the Python-level arguments of LLVM *func*. """ return func.args[1:] def call_function(self, builder, callee, resty, argtys, args): """ Call the Numba-compiled *callee*. """ retty = callee.args[0].type.pointee retvaltmp = cgutils.alloca_once(builder, retty) # initialize return value builder.store(cgutils.get_null_value(retty), retvaltmp) arginfo = self._get_arg_packer(argtys) args = arginfo.as_arguments(builder, args) realargs = [retvaltmp] + list(args) code = builder.call(callee, realargs) status = self._get_return_status(builder, code) retval = builder.load(retvaltmp) out = self.context.get_returned_value(builder, resty, retval) return status, out class _MinimalCallHelper(object): """ A call helper object for the "minimal" calling convention. User exceptions are represented as integer codes and stored in a mapping for retrieval from the caller. """ def __init__(self): self.exceptions = {} def _add_exception(self, exc, exc_args, locinfo): """ Parameters ---------- exc : exception type exc_args : None or tuple exception args locinfo : tuple location information """ exc_id = len(self.exceptions) + FIRST_USEREXC self.exceptions[exc_id] = exc, exc_args, locinfo return exc_id def get_exception(self, exc_id): try: return self.exceptions[exc_id] except KeyError: msg = "unknown error %d in native function" % exc_id return SystemError, (msg,) # The structure type constructed by PythonAPI.serialize_uncached() # i.e a {i8* pickle_buf, i32 pickle_bufsz, i8* hash_buf} excinfo_t = ir.LiteralStructType([GENERIC_POINTER, int32_t, GENERIC_POINTER]) excinfo_ptr_t = ir.PointerType(excinfo_t) class CPUCallConv(BaseCallConv): """ The calling convention for CPU targets. The implemented function signature is: retcode_t (*, excinfo **, ... ) The return code will be one of the RETCODE_* constants. If RETCODE_USEREXC, the exception info pointer will be filled with a pointer to a constant struct describing the raised exception. Caller is responsible for allocating slots for the return value and the exception info pointer (passed as first and second arguments, respectively). """ _status_ids = itertools.count(1) def _make_call_helper(self, builder): return None def return_value(self, builder, retval): retptr = self._get_return_argument(builder.function) assert retval.type == retptr.type.pointee, \ (str(retval.type), str(retptr.type.pointee)) builder.store(retval, retptr) self._return_errcode_raw(builder, RETCODE_OK) def set_static_user_exc(self, builder, exc, exc_args=None, loc=None, func_name=None): if exc is not None and not issubclass(exc, BaseException): raise TypeError("exc should be None or exception class, got %r" % (exc,)) if exc_args is not None and not isinstance(exc_args, tuple): raise TypeError("exc_args should be None or tuple, got %r" % (exc_args,)) # None is indicative of no args, set the exc_args to an empty tuple # as PyObject_CallObject(exc, exc_args) requires the second argument to # be a tuple (or nullptr, but doing this makes it consistent) if exc_args is None: exc_args = tuple() pyapi = self.context.get_python_api(builder) # Build excinfo struct if loc is not None: fname = loc._raw_function_name() if fname is None: # could be exec() or REPL, try func_name fname = func_name locinfo = (fname, loc.filename, loc.line) if None in locinfo: locinfo = None else: locinfo = None exc = (exc, exc_args, locinfo) struct_gv = pyapi.serialize_object(exc) excptr = self._get_excinfo_argument(builder.function) builder.store(struct_gv, excptr) def return_user_exc(self, builder, exc, exc_args=None, loc=None, func_name=None): try_info = getattr(builder, '_in_try_block', False) self.set_static_user_exc(builder, exc, exc_args=exc_args, loc=loc, func_name=func_name) trystatus = self.check_try_status(builder) if try_info: # This is a hack for old-style impl. # We will branch directly to the exception handler. builder.branch(try_info['target']) else: # Return from the current function self._return_errcode_raw(builder, RETCODE_USEREXC, mark_exc=True) def _get_try_state(self, builder): try: return builder.__eh_try_state except AttributeError: ptr = cgutils.alloca_once( builder, cgutils.intp_t, name='try_state', zfill=True, ) builder.__eh_try_state = ptr return ptr def check_try_status(self, builder): try_state_ptr = self._get_try_state(builder) try_depth = builder.load(try_state_ptr) # try_depth > 0 in_try = builder.icmp_unsigned('>', try_depth, try_depth.type(0)) excinfoptr = self._get_excinfo_argument(builder.function) excinfo = builder.load(excinfoptr) return TryStatus(in_try=in_try, excinfo=excinfo) def set_try_status(self, builder): try_state_ptr = self._get_try_state(builder) # Increment try depth old = builder.load(try_state_ptr) new = builder.add(old, old.type(1)) builder.store(new, try_state_ptr) def unset_try_status(self, builder): try_state_ptr = self._get_try_state(builder) # Decrement try depth old = builder.load(try_state_ptr) new = builder.sub(old, old.type(1)) builder.store(new, try_state_ptr) # Needs to reset the exception state so that the exception handler # will run normally. excinfoptr = self._get_excinfo_argument(builder.function) null = cgutils.get_null_value(excinfoptr.type.pointee) builder.store(null, excinfoptr) def return_status_propagate(self, builder, status): trystatus = self.check_try_status(builder) excptr = self._get_excinfo_argument(builder.function) builder.store(status.excinfoptr, excptr) with builder.if_then(builder.not_(trystatus.in_try)): self._return_errcode_raw(builder, status.code, mark_exc=True) def _return_errcode_raw(self, builder, code, mark_exc=False): ret = builder.ret(code) if mark_exc: md = builder.module.add_metadata([ir.IntType(1)(1)]) ret.set_metadata("ret_is_raise", md) def _get_return_status(self, builder, code, excinfoptr): """ Given a return *code* and *excinfoptr*, get a Status instance. """ norm = builder.icmp_signed('==', code, RETCODE_OK) none = builder.icmp_signed('==', code, RETCODE_NONE) exc = builder.icmp_signed('==', code, RETCODE_EXC) is_stop_iteration = builder.icmp_signed('==', code, RETCODE_STOPIT) ok = builder.or_(norm, none) err = builder.not_(ok) is_user_exc = builder.icmp_signed('>=', code, RETCODE_USEREXC) excinfoptr = builder.select(is_user_exc, excinfoptr, ir.Constant(excinfo_ptr_t, ir.Undefined)) status = Status(code=code, is_ok=ok, is_error=err, is_python_exc=exc, is_none=none, is_user_exc=is_user_exc, is_stop_iteration=is_stop_iteration, excinfoptr=excinfoptr) return status def get_function_type(self, restype, argtypes): """ Get the implemented Function type for *restype* and *argtypes*. """ arginfo = self._get_arg_packer(argtypes) argtypes = list(arginfo.argument_types) resptr = self.get_return_type(restype) fnty = ir.FunctionType(errcode_t, [resptr, ir.PointerType(excinfo_ptr_t)] + argtypes) return fnty def decorate_function(self, fn, args, fe_argtypes, noalias=False): """ Set names of function arguments, and add useful attributes to them. """ arginfo = self._get_arg_packer(fe_argtypes) arginfo.assign_names(self.get_arguments(fn), ['arg.' + a for a in args]) retarg = self._get_return_argument(fn) retarg.name = "retptr" retarg.add_attribute("nocapture") retarg.add_attribute("noalias") excarg = self._get_excinfo_argument(fn) excarg.name = "excinfo" excarg.add_attribute("nocapture") excarg.add_attribute("noalias") if noalias: args = self.get_arguments(fn) for a in args: if isinstance(a.type, ir.PointerType): a.add_attribute("nocapture") a.add_attribute("noalias") # Add metadata to mark functions that may need NRT # thus disabling aggressive refct pruning in removerefctpass.py def type_may_always_need_nrt(ty): # Returns True if it's a non-Array type that is contains MemInfo if not isinstance(ty, types.Array): dmm = self.context.data_model_manager if dmm[ty].contains_nrt_meminfo(): return True return False args_may_always_need_nrt = any( map(type_may_always_need_nrt, fe_argtypes) ) if args_may_always_need_nrt: nmd = fn.module.add_named_metadata( 'numba_args_may_always_need_nrt', ) nmd.add(fn.module.add_metadata([fn])) return fn def get_arguments(self, func): """ Get the Python-level arguments of LLVM *func*. """ return func.args[2:] def _get_return_argument(self, func): return func.args[0] def _get_excinfo_argument(self, func): return func.args[1] def call_function(self, builder, callee, resty, argtys, args, attrs=None): """ Call the Numba-compiled *callee*. Parameters: ----------- attrs: LLVM style string or iterable of individual attributes, default is None which specifies no attributes. Examples: LLVM style string: "noinline fast" Equivalent iterable: ("noinline", "fast") """ # XXX better fix for callees that are not function values # (pointers to function; thus have no `.args` attribute) retty = self._get_return_argument(callee.function_type).pointee retvaltmp = cgutils.alloca_once(builder, retty) # initialize return value to zeros builder.store(cgutils.get_null_value(retty), retvaltmp) excinfoptr = cgutils.alloca_once(builder, ir.PointerType(excinfo_t), name="excinfo") arginfo = self._get_arg_packer(argtys) args = list(arginfo.as_arguments(builder, args)) realargs = [retvaltmp, excinfoptr] + args # deal with attrs, it's fine to specify a load in a string like # "noinline fast" as per LLVM or equally as an iterable of individual # attributes. if attrs is None: _attrs = () elif isinstance(attrs, Iterable) and not isinstance(attrs, str): _attrs = tuple(attrs) else: raise TypeError("attrs must be an iterable of strings or None") code = builder.call(callee, realargs, attrs=_attrs) status = self._get_return_status(builder, code, builder.load(excinfoptr)) retval = builder.load(retvaltmp) out = self.context.get_returned_value(builder, resty, retval) return status, out class ErrorModel(object): def __init__(self, call_conv): self.call_conv = call_conv def fp_zero_division(self, builder, exc_args=None, loc=None): if self.raise_on_fp_zero_division: self.call_conv.return_user_exc(builder, ZeroDivisionError, exc_args, loc) return True else: return False class PythonErrorModel(ErrorModel): """ The Python error model. Any invalid FP input raises an exception. """ raise_on_fp_zero_division = True class NumpyErrorModel(ErrorModel): """ In the Numpy error model, floating-point errors don't raise an exception. The FPU exception state is inspected by Numpy at the end of a ufunc's execution and a warning is raised if appropriate. Note there's no easy way to set the FPU exception state from LLVM. Instructions known to set an FP exception can be optimized away: https://llvm.org/bugs/show_bug.cgi?id=6050 http://lists.llvm.org/pipermail/llvm-dev/2014-September/076918.html http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20140929/237997.html """ raise_on_fp_zero_division = False error_models = { 'python': PythonErrorModel, 'numpy': NumpyErrorModel, } def create_error_model(model_name, context): """ Create an error model instance for the given target context. """ return error_models[model_name](context.call_conv) numba-0.55.1/numba/core/callwrapper.py000664 000000 000000 00000020367 14174536160 017623 0ustar00rootroot000000 000000 from llvmlite.llvmpy.core import Type, Builder, Constant import llvmlite.llvmpy.core as lc from llvmlite import ir from numba.core import types, config, cgutils class _ArgManager(object): """ A utility class to handle argument unboxing and cleanup """ def __init__(self, context, builder, api, env_manager, endblk, nargs): self.context = context self.builder = builder self.api = api self.env_manager = env_manager self.arg_count = 0 # how many function arguments have been processed self.cleanups = [] self.nextblk = endblk def add_arg(self, obj, ty): """ Unbox argument and emit code that handles any error during unboxing. Args are cleaned up in reverse order of the parameter list, and cleanup begins as soon as unboxing of any argument fails. E.g. failure on arg2 will result in control flow going through: arg2.err -> arg1.err -> arg0.err -> arg.end (returns) """ # Unbox argument native = self.api.to_native_value(ty, obj) # If an error occurred, go to the cleanup block for # the previous argument with cgutils.if_unlikely(self.builder, native.is_error): self.builder.branch(self.nextblk) # Define the cleanup function for the argument def cleanup_arg(): # Native value reflection self.api.reflect_native_value(ty, native.value, self.env_manager) # Native value cleanup if native.cleanup is not None: native.cleanup() # NRT cleanup # (happens after the native value cleanup as the latter # may need the native value) if self.context.enable_nrt: self.context.nrt.decref(self.builder, ty, native.value) self.cleanups.append(cleanup_arg) # Write the on-error cleanup block for this argument cleanupblk = self.builder.append_basic_block( "arg%d.err" % self.arg_count) with self.builder.goto_block(cleanupblk): cleanup_arg() # Go to next cleanup block self.builder.branch(self.nextblk) self.nextblk = cleanupblk self.arg_count += 1 return native.value def emit_cleanup(self): """ Emit the cleanup code after returning from the wrapped function. """ for dtor in self.cleanups: dtor() class _GilManager(object): """ A utility class to handle releasing the GIL and then re-acquiring it again. """ def __init__(self, builder, api, argman): self.builder = builder self.api = api self.argman = argman self.thread_state = api.save_thread() def emit_cleanup(self): self.api.restore_thread(self.thread_state) self.argman.emit_cleanup() class PyCallWrapper(object): def __init__(self, context, module, func, fndesc, env, call_helper, release_gil): self.context = context self.module = module self.func = func self.fndesc = fndesc self.env = env self.release_gil = release_gil def build(self): wrapname = self.fndesc.llvm_cpython_wrapper_name # This is the signature of PyCFunctionWithKeywords # (see CPython's methodobject.h) pyobj = self.context.get_argument_type(types.pyobject) wrapty = Type.function(pyobj, [pyobj, pyobj, pyobj]) wrapper = ir.Function(self.module, wrapty, name=wrapname) builder = Builder(wrapper.append_basic_block('entry')) # - `closure` will receive the `self` pointer stored in the # PyCFunction object (see _dynfunc.c) # - `args` and `kws` will receive the tuple and dict objects # of positional and keyword arguments, respectively. closure, args, kws = wrapper.args closure.name = 'py_closure' args.name = 'py_args' kws.name = 'py_kws' api = self.context.get_python_api(builder) self.build_wrapper(api, builder, closure, args, kws) return wrapper, api def build_wrapper(self, api, builder, closure, args, kws): nargs = len(self.fndesc.argtypes) objs = [api.alloca_obj() for _ in range(nargs)] parseok = api.unpack_tuple(args, self.fndesc.qualname, nargs, nargs, *objs) pred = builder.icmp(lc.ICMP_EQ, parseok, Constant.null(parseok.type)) with cgutils.if_unlikely(builder, pred): builder.ret(api.get_null_object()) # Block that returns after erroneous argument unboxing/cleanup endblk = builder.append_basic_block("arg.end") with builder.goto_block(endblk): builder.ret(api.get_null_object()) # Get the Environment object env_manager = self.get_env(api, builder) cleanup_manager = _ArgManager(self.context, builder, api, env_manager, endblk, nargs) # Compute the arguments to the compiled Numba function. innerargs = [] for obj, ty in zip(objs, self.fndesc.argtypes): if isinstance(ty, types.Omitted): # It's an omitted value => ignore dummy Python object innerargs.append(None) else: val = cleanup_manager.add_arg(builder.load(obj), ty) innerargs.append(val) if self.release_gil: cleanup_manager = _GilManager(builder, api, cleanup_manager) # We elect to not inline the top level user function into the call # wrapper, this incurs an overhead of a function call, however, it # increases optimisation stability in that the optimised user function # is what will actually be run and it is this function that all the # inspection tools "see". Further, this makes optimisation "stable" in # that calling the user function from e.g. C or from this wrapper will # result in the same code executing, were inlining permitted this may # not be the case as the inline could trigger additional optimisation # as the function goes into the wrapper, this resulting in the executing # instruction stream being different from that of the instruction stream # present in the user function. status, retval = self.context.call_conv.call_function( builder, self.func, self.fndesc.restype, self.fndesc.argtypes, innerargs, attrs=('noinline',)) # Do clean up self.debug_print(builder, "# callwrapper: emit_cleanup") cleanup_manager.emit_cleanup() self.debug_print(builder, "# callwrapper: emit_cleanup end") # Determine return status with builder.if_then(status.is_ok, likely=True): # Ok => return boxed Python value with builder.if_then(status.is_none): api.return_none() retty = self._simplified_return_type() obj = api.from_native_return(retty, retval, env_manager) builder.ret(obj) # Error out self.context.call_conv.raise_error(builder, api, status) builder.ret(api.get_null_object()) def get_env(self, api, builder): """Get the Environment object which is declared as a global in the module of the wrapped function. """ envname = self.context.get_env_name(self.fndesc) gvptr = self.context.declare_env_global(builder.module, envname) envptr = builder.load(gvptr) env_body = self.context.get_env_body(builder, envptr) api.emit_environment_sentry(envptr, return_pyobject=True, debug_msg=self.fndesc.env_name) env_manager = api.get_env_manager(self.env, env_body, envptr) return env_manager def _simplified_return_type(self): """ The NPM callconv has already converted simplified optional types. We can simply use the value type from it. """ restype = self.fndesc.restype # Optional type if isinstance(restype, types.Optional): return restype.type else: return restype def debug_print(self, builder, msg): if config.DEBUG_JIT: self.context.debug_print(builder, "DEBUGJIT: {0}".format(msg)) numba-0.55.1/numba/core/ccallback.py000664 000000 000000 00000010305 14174536160 017175 0ustar00rootroot000000 000000 """ Implementation of compiled C callbacks (@cfunc). """ import ctypes from numba.core import utils, compiler, registry from numba.core.caching import NullCache, FunctionCache from numba.core.dispatcher import _FunctionCompiler from numba.core.typing import signature from numba.core.typing.ctypes_utils import to_ctypes from numba.core.compiler_lock import global_compiler_lock class _CFuncCompiler(_FunctionCompiler): def _customize_flags(self, flags): flags.no_cpython_wrapper = True flags.no_cfunc_wrapper = False # Disable compilation of the IR module, because we first want to # add the cfunc wrapper. flags.no_compile = True # Object mode is not currently supported in C callbacks # (no reliable way to get the environment) flags.enable_pyobject = False if flags.force_pyobject: raise NotImplementedError("object mode not allowed in C callbacks") return flags class CFunc(object): """ A compiled C callback, as created by the @cfunc decorator. """ _targetdescr = registry.cpu_target def __init__(self, pyfunc, sig, locals, options, pipeline_class=compiler.Compiler): args, return_type = sig if return_type is None: raise TypeError("C callback needs an explicit return type") self.__name__ = pyfunc.__name__ self.__qualname__ = getattr(pyfunc, '__qualname__', self.__name__) self.__wrapped__ = pyfunc self._pyfunc = pyfunc self._sig = signature(return_type, *args) self._compiler = _CFuncCompiler(pyfunc, self._targetdescr, options, locals, pipeline_class=pipeline_class) self._wrapper_name = None self._wrapper_address = None self._cache = NullCache() self._cache_hits = 0 def enable_caching(self): self._cache = FunctionCache(self._pyfunc) @global_compiler_lock def compile(self): # Try to load from cache cres = self._cache.load_overload(self._sig, self._targetdescr.target_context) if cres is None: cres = self._compile_uncached() self._cache.save_overload(self._sig, cres) else: self._cache_hits += 1 self._library = cres.library self._wrapper_name = cres.fndesc.llvm_cfunc_wrapper_name self._wrapper_address = self._library.get_pointer_to_function( self._wrapper_name) def _compile_uncached(self): sig = self._sig # Compile native function as well as cfunc wrapper return self._compiler.compile(sig.args, sig.return_type) @property def native_name(self): """ The process-wide symbol the C callback is exposed as. """ # Note from our point of view, the C callback is the wrapper around # the native function. return self._wrapper_name @property def address(self): """ The address of the C callback. """ return self._wrapper_address @utils.cached_property def cffi(self): """ A cffi function pointer representing the C callback. """ import cffi ffi = cffi.FFI() # cffi compares types by name, so using precise types would risk # spurious mismatches (such as "int32_t" vs. "int"). return ffi.cast("void *", self.address) @utils.cached_property def ctypes(self): """ A ctypes function object representing the C callback. """ ctypes_args = [to_ctypes(ty) for ty in self._sig.args] ctypes_restype = to_ctypes(self._sig.return_type) functype = ctypes.CFUNCTYPE(ctypes_restype, *ctypes_args) return functype(self.address) def inspect_llvm(self): """ Return the LLVM IR of the C callback definition. """ return self._library.get_llvm_str() @property def cache_hits(self): return self._cache_hits def __repr__(self): return "" % (self.__qualname__,) def __call__(self, *args, **kwargs): return self._pyfunc(*args, **kwargs) numba-0.55.1/numba/core/cgutils.py000664 000000 000000 00000111755 14174536160 016763 0ustar00rootroot000000 000000 """ Generic helpers for LLVM code generation. """ import collections from contextlib import contextmanager import functools from llvmlite import ir from numba.core import utils, types, config, debuginfo import numba.core.datamodel bool_t = ir.IntType(1) int8_t = ir.IntType(8) int32_t = ir.IntType(32) intp_t = ir.IntType(utils.MACHINE_BITS) voidptr_t = int8_t.as_pointer() true_bit = bool_t(1) false_bit = bool_t(0) true_byte = int8_t(1) false_byte = int8_t(0) def as_bool_bit(builder, value): return builder.icmp_unsigned('!=', value, value.type(0)) def make_anonymous_struct(builder, values, struct_type=None): """ Create an anonymous struct containing the given LLVM *values*. """ if struct_type is None: struct_type = ir.LiteralStructType([v.type for v in values]) struct_val = struct_type(ir.Undefined) for i, v in enumerate(values): struct_val = builder.insert_value(struct_val, v, i) return struct_val def make_bytearray(buf): """ Make a byte array constant from *buf*. """ b = bytearray(buf) n = len(b) return ir.Constant(ir.ArrayType(ir.IntType(8), n), b) _struct_proxy_cache = {} def create_struct_proxy(fe_type, kind='value'): """ Returns a specialized StructProxy subclass for the given fe_type. """ cache_key = (fe_type, kind) res = _struct_proxy_cache.get(cache_key) if res is None: base = {'value': ValueStructProxy, 'data': DataStructProxy, }[kind] clsname = base.__name__ + '_' + str(fe_type) bases = (base,) clsmembers = dict(_fe_type=fe_type) res = type(clsname, bases, clsmembers) _struct_proxy_cache[cache_key] = res return res def copy_struct(dst, src, repl={}): """ Copy structure from *src* to *dst* with replacement from *repl*. """ repl = repl.copy() # copy data from src or use those in repl for k in src._datamodel._fields: v = repl.pop(k, getattr(src, k)) setattr(dst, k, v) # use remaining key-values in repl for k, v in repl.items(): setattr(dst, k, v) return dst class _StructProxy(object): """ Creates a `Structure` like interface that is constructed with information from DataModel instance. FE type must have a data model that is a subclass of StructModel. """ # The following class members must be overridden by subclass _fe_type = None def __init__(self, context, builder, value=None, ref=None): self._context = context self._datamodel = self._context.data_model_manager[self._fe_type] if not isinstance(self._datamodel, numba.core.datamodel.StructModel): raise TypeError( "Not a structure model: {0}".format(self._datamodel)) self._builder = builder self._be_type = self._get_be_type(self._datamodel) assert not is_pointer(self._be_type) outer_ref, ref = self._make_refs(ref) if ref.type.pointee != self._be_type: raise AssertionError("bad ref type: expected %s, got %s" % (self._be_type.as_pointer(), ref.type)) if value is not None: if value.type != outer_ref.type.pointee: raise AssertionError("bad value type: expected %s, got %s" % (outer_ref.type.pointee, value.type)) self._builder.store(value, outer_ref) self._value = ref self._outer_ref = outer_ref def _make_refs(self, ref): """ Return an (outer ref, value ref) pair. By default, these are the same pointers, but a derived class may override this. """ if ref is None: ref = alloca_once(self._builder, self._be_type, zfill=True) return ref, ref def _get_be_type(self, datamodel): raise NotImplementedError def _cast_member_to_value(self, index, val): raise NotImplementedError def _cast_member_from_value(self, index, val): raise NotImplementedError def _get_ptr_by_index(self, index): return gep_inbounds(self._builder, self._value, 0, index) def _get_ptr_by_name(self, attrname): index = self._datamodel.get_field_position(attrname) return self._get_ptr_by_index(index) def __getattr__(self, field): """ Load the LLVM value of the named *field*. """ if not field.startswith('_'): return self[self._datamodel.get_field_position(field)] else: raise AttributeError(field) def __setattr__(self, field, value): """ Store the LLVM *value* into the named *field*. """ if field.startswith('_'): return super(_StructProxy, self).__setattr__(field, value) self[self._datamodel.get_field_position(field)] = value def __getitem__(self, index): """ Load the LLVM value of the field at *index*. """ member_val = self._builder.load(self._get_ptr_by_index(index)) return self._cast_member_to_value(index, member_val) def __setitem__(self, index, value): """ Store the LLVM *value* into the field at *index*. """ ptr = self._get_ptr_by_index(index) value = self._cast_member_from_value(index, value) if value.type != ptr.type.pointee: if (is_pointer(value.type) and is_pointer(ptr.type.pointee) and value.type.pointee == ptr.type.pointee.pointee): # Differ by address-space only # Auto coerce it value = self._context.addrspacecast(self._builder, value, ptr.type.pointee.addrspace) else: raise TypeError("Invalid store of {value.type} to " "{ptr.type.pointee} in " "{self._datamodel} " "(trying to write member #{index})" .format(value=value, ptr=ptr, self=self, index=index)) self._builder.store(value, ptr) def __len__(self): """ Return the number of fields. """ return self._datamodel.field_count def _getpointer(self): """ Return the LLVM pointer to the underlying structure. """ return self._outer_ref def _getvalue(self): """ Load and return the value of the underlying LLVM structure. """ return self._builder.load(self._outer_ref) def _setvalue(self, value): """ Store the value in this structure. """ assert not is_pointer(value.type) assert value.type == self._be_type, (value.type, self._be_type) self._builder.store(value, self._value) class ValueStructProxy(_StructProxy): """ Create a StructProxy suitable for accessing regular values (e.g. LLVM values or alloca slots). """ def _get_be_type(self, datamodel): return datamodel.get_value_type() def _cast_member_to_value(self, index, val): return val def _cast_member_from_value(self, index, val): return val class DataStructProxy(_StructProxy): """ Create a StructProxy suitable for accessing data persisted in memory. """ def _get_be_type(self, datamodel): return datamodel.get_data_type() def _cast_member_to_value(self, index, val): model = self._datamodel.get_model(index) return model.from_data(self._builder, val) def _cast_member_from_value(self, index, val): model = self._datamodel.get_model(index) return model.as_data(self._builder, val) class Structure(object): """ A high-level object wrapping a alloca'ed LLVM structure, including named fields and attribute access. """ # XXX Should this warrant several separate constructors? def __init__(self, context, builder, value=None, ref=None, cast_ref=False): self._type = context.get_struct_type(self) self._context = context self._builder = builder if ref is None: self._value = alloca_once(builder, self._type, zfill=True) if value is not None: assert not is_pointer(value.type) assert value.type == self._type, (value.type, self._type) builder.store(value, self._value) else: assert value is None assert is_pointer(ref.type) if self._type != ref.type.pointee: if cast_ref: ref = builder.bitcast(ref, self._type.as_pointer()) else: raise TypeError( "mismatching pointer type: got %s, expected %s" % (ref.type.pointee, self._type)) self._value = ref self._namemap = {} self._fdmap = [] self._typemap = [] base = int32_t(0) for i, (k, tp) in enumerate(self._fields): self._namemap[k] = i self._fdmap.append((base, int32_t(i))) self._typemap.append(tp) def _get_ptr_by_index(self, index): ptr = self._builder.gep(self._value, self._fdmap[index], inbounds=True) return ptr def _get_ptr_by_name(self, attrname): return self._get_ptr_by_index(self._namemap[attrname]) def __getattr__(self, field): """ Load the LLVM value of the named *field*. """ if not field.startswith('_'): return self[self._namemap[field]] else: raise AttributeError(field) def __setattr__(self, field, value): """ Store the LLVM *value* into the named *field*. """ if field.startswith('_'): return super(Structure, self).__setattr__(field, value) self[self._namemap[field]] = value def __getitem__(self, index): """ Load the LLVM value of the field at *index*. """ return self._builder.load(self._get_ptr_by_index(index)) def __setitem__(self, index, value): """ Store the LLVM *value* into the field at *index*. """ ptr = self._get_ptr_by_index(index) if ptr.type.pointee != value.type: fmt = "Type mismatch: __setitem__(%d, ...) expected %r but got %r" raise AssertionError(fmt % (index, str(ptr.type.pointee), str(value.type))) self._builder.store(value, ptr) def __len__(self): """ Return the number of fields. """ return len(self._namemap) def _getpointer(self): """ Return the LLVM pointer to the underlying structure. """ return self._value def _getvalue(self): """ Load and return the value of the underlying LLVM structure. """ return self._builder.load(self._value) def _setvalue(self, value): """Store the value in this structure""" assert not is_pointer(value.type) assert value.type == self._type, (value.type, self._type) self._builder.store(value, self._value) # __iter__ is derived by Python from __len__ and __getitem__ def alloca_once(builder, ty, size=None, name='', zfill=False): """Allocate stack memory at the entry block of the current function pointed by ``builder`` withe llvm type ``ty``. The optional ``size`` arg set the number of element to allocate. The default is 1. The optional ``name`` arg set the symbol name inside the llvm IR for debugging. If ``zfill`` is set, fill the memory with zeros at the current use-site location. Note that the memory is always zero-filled after the ``alloca`` at init-site (the entry block). """ if isinstance(size, int): size = ir.Constant(intp_t, size) # suspend debug metadata emission else it links up python source lines with # alloca in the entry block as well as their actual location and it makes # the debug info "jump about". with debuginfo.suspend_emission(builder): with builder.goto_entry_block(): ptr = builder.alloca(ty, size=size, name=name) # Always zero-fill at init-site. This is safe. builder.store(ptr.type.pointee(None), ptr) # Also zero-fill at the use-site if zfill: builder.store(ptr.type.pointee(None), ptr) return ptr def sizeof(builder, ptr_type): """Compute sizeof using GEP """ null = ptr_type(None) offset = null.gep([int32_t(1)]) return builder.ptrtoint(offset, intp_t) def alloca_once_value(builder, value, name='', zfill=False): """ Like alloca_once(), but passing a *value* instead of a type. The type is inferred and the allocated slot is also initialized with the given value. """ storage = alloca_once(builder, value.type, zfill=zfill) builder.store(value, storage) return storage def insert_pure_function(module, fnty, name): """ Insert a pure function (in the functional programming sense) in the given module. """ fn = get_or_insert_function(module, fnty, name) fn.attributes.add("readonly") fn.attributes.add("nounwind") return fn def get_or_insert_function(module, fnty, name): """ Get the function named *name* with type *fnty* from *module*, or insert it if it doesn't exist. """ fn = module.globals.get(name, None) if fn is None: fn = ir.Function(module, fnty, name) return fn def get_or_insert_named_metadata(module, name): try: return module.get_named_metadata(name) except KeyError: return module.add_named_metadata(name) def add_global_variable(module, ty, name, addrspace=0): unique_name = module.get_unique_name(name) return ir.GlobalVariable(module, ty, unique_name, addrspace) def terminate(builder, bbend): bb = builder.basic_block if bb.terminator is None: builder.branch(bbend) def get_null_value(ltype): return ltype(None) def is_null(builder, val): null = get_null_value(val.type) return builder.icmp_unsigned('==', null, val) def is_not_null(builder, val): null = get_null_value(val.type) return builder.icmp_unsigned('!=', null, val) def if_unlikely(builder, pred): return builder.if_then(pred, likely=False) def if_likely(builder, pred): return builder.if_then(pred, likely=True) def ifnot(builder, pred): return builder.if_then(builder.not_(pred)) def increment_index(builder, val): """ Increment an index *val*. """ one = val.type(1) # We pass the "nsw" flag in the hope that LLVM understands the index # never changes sign. Unfortunately this doesn't always work # (e.g. ndindex()). return builder.add(val, one, flags=['nsw']) Loop = collections.namedtuple('Loop', ('index', 'do_break')) @contextmanager def for_range(builder, count, start=None, intp=None): """ Generate LLVM IR for a for-loop in [start, count). *start* is equal to 0 by default. Yields a Loop namedtuple with the following members: - `index` is the loop index's value - `do_break` is a no-argument callable to break out of the loop """ if intp is None: intp = count.type if start is None: start = intp(0) stop = count bbcond = builder.append_basic_block("for.cond") bbbody = builder.append_basic_block("for.body") bbend = builder.append_basic_block("for.end") def do_break(): builder.branch(bbend) bbstart = builder.basic_block builder.branch(bbcond) with builder.goto_block(bbcond): index = builder.phi(intp, name="loop.index") pred = builder.icmp_signed('<', index, stop) builder.cbranch(pred, bbbody, bbend) with builder.goto_block(bbbody): yield Loop(index, do_break) # Update bbbody as a new basic block may have been activated bbbody = builder.basic_block incr = increment_index(builder, index) terminate(builder, bbcond) index.add_incoming(start, bbstart) index.add_incoming(incr, bbbody) builder.position_at_end(bbend) @contextmanager def for_range_slice(builder, start, stop, step, intp=None, inc=True): """ Generate LLVM IR for a for-loop based on a slice. Yields a (index, count) tuple where `index` is the slice index's value inside the loop, and `count` the iteration count. Parameters ------------- builder : object Builder object start : int The beginning value of the slice stop : int The end value of the slice step : int The step value of the slice intp : The data type inc : boolean, optional Signals whether the step is positive (True) or negative (False). Returns ----------- None """ if intp is None: intp = start.type bbcond = builder.append_basic_block("for.cond") bbbody = builder.append_basic_block("for.body") bbend = builder.append_basic_block("for.end") bbstart = builder.basic_block builder.branch(bbcond) with builder.goto_block(bbcond): index = builder.phi(intp, name="loop.index") count = builder.phi(intp, name="loop.count") if (inc): pred = builder.icmp_signed('<', index, stop) else: pred = builder.icmp_signed('>', index, stop) builder.cbranch(pred, bbbody, bbend) with builder.goto_block(bbbody): yield index, count bbbody = builder.basic_block incr = builder.add(index, step) next_count = increment_index(builder, count) terminate(builder, bbcond) index.add_incoming(start, bbstart) index.add_incoming(incr, bbbody) count.add_incoming(ir.Constant(intp, 0), bbstart) count.add_incoming(next_count, bbbody) builder.position_at_end(bbend) @contextmanager def for_range_slice_generic(builder, start, stop, step): """ A helper wrapper for for_range_slice(). This is a context manager which yields two for_range_slice()-alike context managers, the first for the positive step case, the second for the negative step case. Use: with for_range_slice_generic(...) as (pos_range, neg_range): with pos_range as (idx, count): ... with neg_range as (idx, count): ... """ intp = start.type is_pos_step = builder.icmp_signed('>=', step, ir.Constant(intp, 0)) pos_for_range = for_range_slice(builder, start, stop, step, intp, inc=True) neg_for_range = for_range_slice(builder, start, stop, step, intp, inc=False) @contextmanager def cm_cond(cond, inner_cm): with cond: with inner_cm as value: yield value with builder.if_else(is_pos_step, likely=True) as (then, otherwise): yield cm_cond(then, pos_for_range), cm_cond(otherwise, neg_for_range) @contextmanager def loop_nest(builder, shape, intp, order='C'): """ Generate a loop nest walking a N-dimensional array. Yields a tuple of N indices for use in the inner loop body, iterating over the *shape* space. If *order* is 'C' (the default), indices are incremented inside-out (i.e. (0,0), (0,1), (0,2), (1,0) etc.). If *order* is 'F', they are incremented outside-in (i.e. (0,0), (1,0), (2,0), (0,1) etc.). This has performance implications when walking an array as it impacts the spatial locality of memory accesses. """ assert order in 'CF' if not shape: # 0-d array yield () else: if order == 'F': _swap = lambda x: x[::-1] else: _swap = lambda x: x with _loop_nest(builder, _swap(shape), intp) as indices: assert len(indices) == len(shape) yield _swap(indices) @contextmanager def _loop_nest(builder, shape, intp): with for_range(builder, shape[0], intp=intp) as loop: if len(shape) > 1: with _loop_nest(builder, shape[1:], intp) as indices: yield (loop.index,) + indices else: yield (loop.index,) def pack_array(builder, values, ty=None): """ Pack a sequence of values in a LLVM array. *ty* should be given if the array may be empty, in which case the type can't be inferred from the values. """ n = len(values) if ty is None: ty = values[0].type ary = ir.ArrayType(ty, n)(ir.Undefined) for i, v in enumerate(values): ary = builder.insert_value(ary, v, i) return ary def pack_struct(builder, values): """ Pack a sequence of values into a LLVM struct. """ structty = ir.LiteralStructType([v.type for v in values]) st = structty(ir.Undefined) for i, v in enumerate(values): st = builder.insert_value(st, v, i) return st def unpack_tuple(builder, tup, count=None): """ Unpack an array or structure of values, return a Python tuple. """ if count is None: # Assuming *tup* is an aggregate count = len(tup.type.elements) vals = [builder.extract_value(tup, i) for i in range(count)] return vals def get_item_pointer(context, builder, aryty, ary, inds, wraparound=False, boundscheck=False): # Set boundscheck=True for any pointer access that should be # boundschecked. do_boundscheck() will handle enabling or disabling the # actual boundschecking based on the user config. shapes = unpack_tuple(builder, ary.shape, count=aryty.ndim) strides = unpack_tuple(builder, ary.strides, count=aryty.ndim) return get_item_pointer2(context, builder, data=ary.data, shape=shapes, strides=strides, layout=aryty.layout, inds=inds, wraparound=wraparound, boundscheck=boundscheck) def do_boundscheck(context, builder, ind, dimlen, axis=None): def _dbg(): # Remove this when we figure out how to include this information # in the error message. if axis is not None: if isinstance(axis, int): printf(builder, "debug: IndexError: index %d is out of bounds " "for axis {} with size %d\n".format(axis), ind, dimlen) else: printf(builder, "debug: IndexError: index %d is out of bounds " "for axis %d with size %d\n", ind, axis, dimlen) else: printf(builder, "debug: IndexError: index %d is out of bounds for size %d\n", ind, dimlen) msg = "index is out of bounds" out_of_bounds_upper = builder.icmp_signed('>=', ind, dimlen) with if_unlikely(builder, out_of_bounds_upper): if config.FULL_TRACEBACKS: _dbg() context.call_conv.return_user_exc(builder, IndexError, (msg,)) out_of_bounds_lower = builder.icmp_signed('<', ind, ind.type(0)) with if_unlikely(builder, out_of_bounds_lower): if config.FULL_TRACEBACKS: _dbg() context.call_conv.return_user_exc(builder, IndexError, (msg,)) def get_item_pointer2(context, builder, data, shape, strides, layout, inds, wraparound=False, boundscheck=False): # Set boundscheck=True for any pointer access that should be # boundschecked. do_boundscheck() will handle enabling or disabling the # actual boundschecking based on the user config. if wraparound: # Wraparound indices = [] for ind, dimlen in zip(inds, shape): negative = builder.icmp_signed('<', ind, ind.type(0)) wrapped = builder.add(dimlen, ind) selected = builder.select(negative, wrapped, ind) indices.append(selected) else: indices = inds if boundscheck: for axis, (ind, dimlen) in enumerate(zip(indices, shape)): do_boundscheck(context, builder, ind, dimlen, axis) if not indices: # Indexing with empty tuple return builder.gep(data, [int32_t(0)]) intp = indices[0].type # Indexing code if layout in 'CF': steps = [] # Compute steps for each dimension if layout == 'C': # C contiguous for i in range(len(shape)): last = intp(1) for j in shape[i + 1:]: last = builder.mul(last, j) steps.append(last) elif layout == 'F': # F contiguous for i in range(len(shape)): last = intp(1) for j in shape[:i]: last = builder.mul(last, j) steps.append(last) else: raise Exception("unreachable") # Compute index loc = intp(0) for i, s in zip(indices, steps): tmp = builder.mul(i, s) loc = builder.add(loc, tmp) ptr = builder.gep(data, [loc]) return ptr else: # Any layout dimoffs = [builder.mul(s, i) for s, i in zip(strides, indices)] offset = functools.reduce(builder.add, dimoffs) return pointer_add(builder, data, offset) def _scalar_pred_against_zero(builder, value, fpred, icond): nullval = value.type(0) if isinstance(value.type, (ir.FloatType, ir.DoubleType)): isnull = fpred(value, nullval) elif isinstance(value.type, ir.IntType): isnull = builder.icmp_signed(icond, value, nullval) else: raise TypeError("unexpected value type %s" % (value.type,)) return isnull def is_scalar_zero(builder, value): """ Return a predicate representing whether *value* is equal to zero. """ return _scalar_pred_against_zero( builder, value, functools.partial(builder.fcmp_ordered, '=='), '==') def is_not_scalar_zero(builder, value): """ Return a predicate representing whether a *value* is not equal to zero. (not exactly "not is_scalar_zero" because of nans) """ return _scalar_pred_against_zero( builder, value, functools.partial(builder.fcmp_unordered, '!='), '!=') def is_scalar_zero_or_nan(builder, value): """ Return a predicate representing whether *value* is equal to either zero or NaN. """ return _scalar_pred_against_zero( builder, value, functools.partial(builder.fcmp_unordered, '=='), '==') is_true = is_not_scalar_zero is_false = is_scalar_zero def is_scalar_neg(builder, value): """ Is *value* negative? Assumes *value* is signed. """ return _scalar_pred_against_zero( builder, value, functools.partial(builder.fcmp_ordered, '<'), '<') def guard_null(context, builder, value, exc_tuple): """ Guard against *value* being null or zero. *exc_tuple* should be a (exception type, arguments...) tuple. """ with builder.if_then(is_scalar_zero(builder, value), likely=False): exc = exc_tuple[0] exc_args = exc_tuple[1:] or None context.call_conv.return_user_exc(builder, exc, exc_args) def guard_memory_error(context, builder, pointer, msg=None): """ Guard against *pointer* being NULL (and raise a MemoryError). """ assert isinstance(pointer.type, ir.PointerType), pointer.type exc_args = (msg,) if msg else () with builder.if_then(is_null(builder, pointer), likely=False): context.call_conv.return_user_exc(builder, MemoryError, exc_args) @contextmanager def if_zero(builder, value, likely=False): """ Execute the given block if the scalar value is zero. """ with builder.if_then(is_scalar_zero(builder, value), likely=likely): yield guard_zero = guard_null def is_pointer(ltyp): """ Whether the LLVM type *typ* is a struct type. """ return isinstance(ltyp, ir.PointerType) def get_record_member(builder, record, offset, typ): pval = gep_inbounds(builder, record, 0, offset) assert not is_pointer(pval.type.pointee) return builder.bitcast(pval, typ.as_pointer()) def is_neg_int(builder, val): return builder.icmp_signed('<', val, val.type(0)) def gep_inbounds(builder, ptr, *inds, **kws): """ Same as *gep*, but add the `inbounds` keyword. """ return gep(builder, ptr, *inds, inbounds=True, **kws) def gep(builder, ptr, *inds, **kws): """ Emit a getelementptr instruction for the given pointer and indices. The indices can be LLVM values or Python int constants. """ name = kws.pop('name', '') inbounds = kws.pop('inbounds', False) assert not kws idx = [] for i in inds: if isinstance(i, int): # NOTE: llvm only accepts int32 inside structs, not int64 ind = int32_t(i) else: ind = i idx.append(ind) return builder.gep(ptr, idx, name=name, inbounds=inbounds) def pointer_add(builder, ptr, offset, return_type=None): """ Add an integral *offset* to pointer *ptr*, and return a pointer of *return_type* (or, if omitted, the same type as *ptr*). Note the computation is done in bytes, and ignores the width of the pointed item type. """ intptr = builder.ptrtoint(ptr, intp_t) if isinstance(offset, int): offset = intp_t(offset) intptr = builder.add(intptr, offset) return builder.inttoptr(intptr, return_type or ptr.type) def memset(builder, ptr, size, value): """ Fill *size* bytes starting from *ptr* with *value*. """ fn = builder.module.declare_intrinsic('llvm.memset', (voidptr_t, size.type)) ptr = builder.bitcast(ptr, voidptr_t) if isinstance(value, int): value = int8_t(value) builder.call(fn, [ptr, value, size, bool_t(0)]) def memset_padding(builder, ptr): """ Fill padding bytes of the pointee with zeros. """ # Load existing value val = builder.load(ptr) # Fill pointee with zeros memset(builder, ptr, sizeof(builder, ptr.type), 0) # Store value back builder.store(val, ptr) def global_constant(builder_or_module, name, value, linkage='internal'): """ Get or create a (LLVM module-)global constant with *name* or *value*. """ if isinstance(builder_or_module, ir.Module): module = builder_or_module else: module = builder_or_module.module data = add_global_variable(module, value.type, name) data.linkage = linkage data.global_constant = True data.initializer = value return data def divmod_by_constant(builder, val, divisor): """ Compute the (quotient, remainder) of *val* divided by the constant positive *divisor*. The semantics reflects those of Python integer floor division, rather than C's / LLVM's signed division and modulo. The difference lies with a negative *val*. """ assert divisor > 0 divisor = val.type(divisor) one = val.type(1) quot = alloca_once(builder, val.type) with builder.if_else(is_neg_int(builder, val)) as (if_neg, if_pos): with if_pos: # quot = val / divisor quot_val = builder.sdiv(val, divisor) builder.store(quot_val, quot) with if_neg: # quot = -1 + (val + 1) / divisor val_plus_one = builder.add(val, one) quot_val = builder.sdiv(val_plus_one, divisor) builder.store(builder.sub(quot_val, one), quot) # rem = val - quot * divisor # (should be slightly faster than a separate modulo operation) quot_val = builder.load(quot) rem_val = builder.sub(val, builder.mul(quot_val, divisor)) return quot_val, rem_val def cbranch_or_continue(builder, cond, bbtrue): """ Branch conditionally or continue. Note: a new block is created and builder is moved to the end of the new block. """ bbcont = builder.append_basic_block('.continue') builder.cbranch(cond, bbtrue, bbcont) builder.position_at_end(bbcont) return bbcont def memcpy(builder, dst, src, count): """ Emit a memcpy to the builder. Copies each element of dst to src. Unlike the C equivalent, each element can be any LLVM type. Assumes ------- * dst.type == src.type * count is positive """ # Note this does seem to be optimized as a raw memcpy() by LLVM # whenever possible... assert dst.type == src.type with for_range(builder, count, intp=count.type) as loop: out_ptr = builder.gep(dst, [loop.index]) in_ptr = builder.gep(src, [loop.index]) builder.store(builder.load(in_ptr), out_ptr) def _raw_memcpy(builder, func_name, dst, src, count, itemsize, align): size_t = count.type if isinstance(itemsize, int): itemsize = ir.Constant(size_t, itemsize) memcpy = builder.module.declare_intrinsic(func_name, [voidptr_t, voidptr_t, size_t]) is_volatile = false_bit builder.call(memcpy, [builder.bitcast(dst, voidptr_t), builder.bitcast(src, voidptr_t), builder.mul(count, itemsize), is_volatile]) def raw_memcpy(builder, dst, src, count, itemsize, align=1): """ Emit a raw memcpy() call for `count` items of size `itemsize` from `src` to `dest`. """ return _raw_memcpy(builder, 'llvm.memcpy', dst, src, count, itemsize, align) def raw_memmove(builder, dst, src, count, itemsize, align=1): """ Emit a raw memmove() call for `count` items of size `itemsize` from `src` to `dest`. """ return _raw_memcpy(builder, 'llvm.memmove', dst, src, count, itemsize, align) def muladd_with_overflow(builder, a, b, c): """ Compute (a * b + c) and return a (result, overflow bit) pair. The operands must be signed integers. """ p = builder.smul_with_overflow(a, b) prod = builder.extract_value(p, 0) prod_ovf = builder.extract_value(p, 1) s = builder.sadd_with_overflow(prod, c) res = builder.extract_value(s, 0) ovf = builder.or_(prod_ovf, builder.extract_value(s, 1)) return res, ovf def printf(builder, format, *args): """ Calls printf(). Argument `format` is expected to be a Python string. Values to be printed are listed in `args`. Note: There is no checking to ensure there is correct number of values in `args` and there type matches the declaration in the format string. """ assert isinstance(format, str) mod = builder.module # Make global constant for format string cstring = voidptr_t fmt_bytes = make_bytearray((format + '\00').encode('ascii')) global_fmt = global_constant(mod, "printf_format", fmt_bytes) fnty = ir.FunctionType(int32_t, [cstring], var_arg=True) # Insert printf() try: fn = mod.get_global('printf') except KeyError: fn = ir.Function(mod, fnty, name="printf") # Call ptr_fmt = builder.bitcast(global_fmt, cstring) return builder.call(fn, [ptr_fmt] + list(args)) def snprintf(builder, buffer, bufsz, format, *args): """Calls libc snprintf(buffer, bufsz, format, ...args) """ assert isinstance(format, str) mod = builder.module # Make global constant for format string cstring = voidptr_t fmt_bytes = make_bytearray((format + '\00').encode('ascii')) global_fmt = global_constant(mod, "snprintf_format", fmt_bytes) fnty = ir.FunctionType( int32_t, [cstring, intp_t, cstring], var_arg=True, ) # Actual symbol name of snprintf is different on win32. symbol = 'snprintf' if config.IS_WIN32: symbol = '_' + symbol # Insert snprintf() try: fn = mod.get_global(symbol) except KeyError: fn = ir.Function(mod, fnty, name=symbol) # Call ptr_fmt = builder.bitcast(global_fmt, cstring) return builder.call(fn, [buffer, bufsz, ptr_fmt] + list(args)) def snprintf_stackbuffer(builder, bufsz, format, *args): """Similar to `snprintf()` but the buffer is stack allocated to size *bufsz*. Returns the buffer pointer as i8*. """ assert isinstance(bufsz, int) spacety = ir.ArrayType(ir.IntType(8), bufsz) space = alloca_once(builder, spacety, zfill=True) buffer = builder.bitcast(space, voidptr_t) snprintf(builder, buffer, intp_t(bufsz), format, *args) return buffer def normalize_ir_text(text): """ Normalize the given string to latin1 compatible encoding that is suitable for use in LLVM IR. """ # Just re-encoding to latin1 is enough return text.encode('utf8').decode('latin1') def hexdump(builder, ptr, nbytes): """Debug print the memory region in *ptr* to *ptr + nbytes* as hex. """ bytes_per_line = 16 nbytes = builder.zext(nbytes, intp_t) printf(builder, "hexdump p=%p n=%zu", ptr, nbytes) byte_t = ir.IntType(8) ptr = builder.bitcast(ptr, byte_t.as_pointer()) # Loop to print the bytes in *ptr* as hex with for_range(builder, nbytes) as idx: div_by = builder.urem(idx.index, intp_t(bytes_per_line)) do_new_line = builder.icmp_unsigned("==", div_by, intp_t(0)) with builder.if_then(do_new_line): printf(builder, "\n") offset = builder.gep(ptr, [idx.index]) val = builder.load(offset) printf(builder, " %02x", val) printf(builder, "\n") def is_nonelike(ty): """ returns if 'ty' is none """ return ( ty is None or isinstance(ty, types.NoneType) or isinstance(ty, types.Omitted) ) numba-0.55.1/numba/core/codegen.py000664 000000 000000 00000153531 14174536160 016713 0ustar00rootroot000000 000000 import warnings import functools import locale import weakref import ctypes import html import textwrap import llvmlite.llvmpy.core as lc import llvmlite.llvmpy.passes as lp import llvmlite.binding as ll import llvmlite.ir as llvmir from abc import abstractmethod, ABCMeta from numba.core import utils, config, cgutils from numba.core.runtime.nrtopt import remove_redundant_nrt_refct from numba.core.runtime import rtsys from numba.core.compiler_lock import require_global_compiler_lock from numba.core.errors import NumbaInvalidConfigWarning from numba.misc.inspection import disassemble_elf_to_cfg from numba.misc.llvm_pass_timings import PassTimingsCollection _x86arch = frozenset(['x86', 'i386', 'i486', 'i586', 'i686', 'i786', 'i886', 'i986']) def _is_x86(triple): arch = triple.split('-')[0] return arch in _x86arch def _parse_refprune_flags(): """Parse refprune flags from the `config`. Invalid values are ignored an warn via a `NumbaInvalidConfigWarning` category. Returns ------- flags : llvmlite.binding.RefPruneSubpasses """ flags = config.LLVM_REFPRUNE_FLAGS.split(',') if not flags: return 0 val = 0 for item in flags: item = item.strip() try: val |= getattr(ll.RefPruneSubpasses, item.upper()) except AttributeError: warnings.warn(f"invalid refprune flags {item!r}", NumbaInvalidConfigWarning) return val def dump(header, body, lang): if config.HIGHLIGHT_DUMPS: try: import pygments except ImportError: msg = "Please install pygments to see highlighted dumps" raise ValueError(msg) else: from pygments import highlight from pygments.lexers import GasLexer as gas_lexer from pygments.lexers import LlvmLexer as llvm_lexer from pygments.formatters import Terminal256Formatter from numba.misc.dump_style import by_colorscheme lexer_map = {'llvm': llvm_lexer, 'asm': gas_lexer} lexer = lexer_map[lang] def printer(arg): print(highlight(arg, lexer(), Terminal256Formatter(style=by_colorscheme()))) else: printer = print print('=' * 80) print(header.center(80, '-')) printer(body) print('=' * 80) class _CFG(object): """ Wraps the CFG graph for different display method. Instance of the class can be stringified (``__repr__`` is defined) to get the graph in DOT format. The ``.display()`` method plots the graph in PDF. If in IPython notebook, the returned image can be inlined. """ def __init__(self, cres, name, py_func, **kwargs): self.cres = cres self.name = name self.py_func = py_func fn = cres.get_function(name) self.dot = ll.get_function_cfg(fn) self.kwargs = kwargs def pretty_printer(self, filename=None, view=None, render_format=None, highlight=True, interleave=False, strip_ir=False, show_key=True, fontsize=10): """ "Pretty" prints the DOT graph of the CFG. For explanation of the parameters see the docstring for numba.core.dispatcher::inspect_cfg. """ import graphviz as gv import re import json import inspect from llvmlite import binding as ll from numba.typed import List from types import SimpleNamespace from collections import defaultdict _default = False _highlight = SimpleNamespace(incref=_default, decref=_default, returns=_default, raises=_default, meminfo=_default, branches=_default, llvm_intrin_calls=_default, function_calls=_default,) _interleave = SimpleNamespace(python=_default, lineinfo=_default) def parse_config(_config, kwarg): """ Parses the kwarg into a consistent format for use in configuring the Digraph rendering. _config is the configuration instance to update, kwarg is the kwarg on which to base the updates. """ if isinstance(kwarg, bool): for attr in _config.__dict__: setattr(_config, attr, kwarg) elif isinstance(kwarg, dict): for k, v in kwarg.items(): if k not in _config.__dict__: raise ValueError("Unexpected key in kwarg: %s" % k) if isinstance(v, bool): setattr(_config, k, v) else: msg = "Unexpected value for key: %s, got:%s" raise ValueError(msg % (k, v)) elif isinstance(kwarg, set): for item in kwarg: if item not in _config.__dict__: raise ValueError("Unexpected key in kwarg: %s" % item) else: setattr(_config, item, True) else: msg = "Unhandled configuration type for kwarg %s" raise ValueError(msg % type(kwarg)) parse_config(_highlight, highlight) parse_config(_interleave, interleave) # This is the colour scheme. The graphviz HTML label renderer only takes # names for colours: https://www.graphviz.org/doc/info/shapes.html#html cs = defaultdict(lambda: 'white') # default bg colour is white cs['marker'] = 'orange' cs['python'] = 'yellow' cs['truebr'] = 'green' cs['falsebr'] = 'red' cs['incref'] = 'cyan' cs['decref'] = 'turquoise' cs['raise'] = 'lightpink' cs['meminfo'] = 'lightseagreen' cs['return'] = 'purple' cs['llvm_intrin_calls'] = 'rosybrown' cs['function_calls'] = 'tomato' # Get the raw dot format information from LLVM and the LLVM IR fn = self.cres.get_function(self.name) #raw_dot = ll.get_function_cfg(fn).replace('\\l...', '') llvm_str = self.cres.get_llvm_str() def get_metadata(llvm_str): """ Gets the metadata entries from the LLVM IR, these look something like '!123 = INFORMATION'. Returns a map of metadata key to metadata value, i.e. from the example {'!123': INFORMATION}""" md = {} metadata_entry = re.compile(r'(^[!][0-9]+)(\s+=\s+.*)') for x in llvm_str.splitlines(): match = metadata_entry.match(x) if match is not None: g = match.groups() if g is not None: assert len(g) == 2 md[g[0]] = g[1] return md md = get_metadata(llvm_str) # setup digraph with initial properties def init_digraph(name, fname, fontsize): # name and fname are arbitrary graph and file names, they appear in # some rendering formats, the fontsize determines the output # fontsize. # truncate massive mangled names as file names as it causes OSError # when trying to render to pdf cmax = 200 if len(fname) > cmax: wstr = (f'CFG output filname "{fname}" exceeds maximum ' f'supported length, it will be truncated.') warnings.warn(wstr, NumbaInvalidConfigWarning) fname = fname[:cmax] f = gv.Digraph(name, filename=fname) f.attr(rankdir='TB') f.attr('node', shape='none', fontsize='%s' % str(fontsize)) return f f = init_digraph(self.name, self.name, fontsize) # A lot of regex is needed to parse the raw dot output. This output # contains a mix of LLVM IR in the labels, and also DOT markup. # DOT syntax, matches a "port" (where the tail of an edge starts) port_match = re.compile('.*{(.*)}.*') # DOT syntax, matches the "port" value from a found "port_match" port_jmp_match = re.compile('.*<(.*)>(.*)') # LLVM syntax, matches a LLVM debug marker metadata_marker = re.compile(r'.*!dbg\s+(![0-9]+).*') # LLVM syntax, matches a location entry location_expr = (r'.*!DILocation\(line:\s+([0-9]+),' r'\s+column:\s+([0-9]),.*') location_entry = re.compile(location_expr) # LLVM syntax, matches LLVMs internal debug value calls dbg_value = re.compile(r'.*call void @llvm.dbg.value.*') # LLVM syntax, matches tokens for highlighting nrt_incref = re.compile(r"@NRT_incref\b") nrt_decref = re.compile(r"@NRT_decref\b") nrt_meminfo = re.compile("@NRT_MemInfo") ll_intrin_calls = re.compile(r".*call.*@llvm\..*") ll_function_call = re.compile(r".*call.*@.*") ll_raise = re.compile(r"ret i32.*\!ret_is_raise.*") ll_return = re.compile("ret i32 [^1],?.*") # wrapper function for line wrapping LLVM lines def wrap(s): return textwrap.wrap(s, width=120, subsequent_indent='... ') # function to fix (sometimes escaped for DOT!) LLVM IR etc that needs to # be HTML escaped def clean(s): # Grab first 300 chars only, 1. this should be enough to identify # the token and it keeps names short. 2. graphviz/dot has a maximum # buffer size near 585?!, with additional transforms it's hard to # know if this would be exceeded. 3. hash of the token string is # written into the rendering to permit exact identification against # e.g. LLVM IR dump if necessary. n = 300 if len(s) > n: hs = str(hash(s)) s = '{}...'.format(s[:n], hs) s = html.escape(s) # deals with &, < and > s = s.replace('\\{', "{") s = s.replace('\\}', "}") s = s.replace('\\', "\") s = s.replace('%', "%") s = s.replace('!', "!") return s # These hold the node and edge ids from the raw dot information. They # are used later to wire up a new DiGraph that has the same structure # as the raw dot but with new nodes. node_ids = {} edge_ids = {} # Python source lines, used if python source interleave is requested if _interleave.python: src_code, firstlineno = inspect.getsourcelines(self.py_func) # This is the dot info from LLVM, it's in DOT form and has continuation # lines, strip them and then re-parse into `dot_json` form for use in # producing a formatted output. raw_dot = ll.get_function_cfg(fn).replace('\\l...', '') json_bytes = gv.Source(raw_dot).pipe(format='dot_json') jzon = json.loads(json_bytes.decode('utf-8')) idc = 0 # Walk the "objects" (nodes) in the DOT output for obj in jzon['objects']: # These are used to keep tabs on the current line and column numbers # as per the markers. They are tracked so as to make sure a marker # is only emitted if there's a change in the marker. cur_line, cur_col = -1, -1 label = obj['label'] name = obj['name'] gvid = obj['_gvid'] node_ids[gvid] = name # Label is DOT format, it needs the head and tail removing and then # splitting for walking. label = label[1:-1] lines = label.split('\\l') # Holds the new lines new_lines = [] # Aim is to produce an HTML table a bit like this: # # |------------| # | HEADER | <-- this is the block header # |------------| # | LLVM SRC | <-- # | Marker? | < this is the label/block body # | Python src?| <-- # |------------| # | T | F | <-- this is the "ports", also determines col_span # -------------- # # This is HTML syntax, its the column span. If there's a switch or a # branch at the bottom of the node this is rendered as multiple # columns in a table. First job is to go and render that and work # out how many columns are needed as that dictates how many columns # the rest of the source lines must span. In DOT syntax the places # that edges join nodes are referred to as "ports". Syntax in DOT # is like `node:port`. col_span = 1 # First see if there is a port entry for this node port_line = '' matched = port_match.match(lines[-1]) sliced_lines = lines if matched is not None: # There is a port ports = matched.groups()[0] ports_tokens = ports.split('|') col_span = len(ports_tokens) # Generate HTML table data cells, one for each port. If the # ports correspond to a branch then they can optionally # highlighted based on T/F. tdfmt = ('{}') tbl_data = [] if _highlight.branches: colors = {'T': cs['truebr'], 'F': cs['falsebr']} else: colors = {} for tok in ports_tokens: target, value = port_jmp_match.match(tok).groups() color = colors.get(value, 'white') tbl_data.append(tdfmt.format(color, target, value)) port_line = ''.join(tbl_data) # Drop the last line from the rest of the parse as it's the port # and just been dealt with. sliced_lines = lines[:-1] # loop peel the block header, it needs a HTML border fmtheader = ('{}') new_lines.append(fmtheader.format(cs['default'], col_span, clean(sliced_lines[0].strip()))) # process rest of block creating the table row at a time. fmt = ('{}') def metadata_interleave(l, new_lines): """ Search line `l` for metadata associated with python or line info and inject it into `new_lines` if requested. """ matched = metadata_marker.match(l) if matched is not None: # there's a metadata marker g = matched.groups() if g is not None: assert len(g) == 1, g marker = g[0] debug_data = md.get(marker, None) if debug_data is not None: # and the metadata marker has a corresponding piece # of metadata ld = location_entry.match(debug_data) if ld is not None: # and the metadata is line info... proceed assert len(ld.groups()) == 2, ld line, col = ld.groups() # only emit a new marker if the line number in # the metadata is "new". if line != cur_line or col != cur_col: if _interleave.lineinfo: mfmt = 'Marker %s, Line %s, column %s' mark_line = mfmt % (marker, line, col) ln = fmt.format(cs['marker'], col_span, clean(mark_line)) new_lines.append(ln) if _interleave.python: # TODO: # +1 for decorator, this probably needs # the same thing doing as for the # error messages where the decorator # is scanned for, its not always +1! lidx = int(line) - (firstlineno + 1) source_line = src_code[lidx + 1] ln = fmt.format(cs['python'], col_span, clean(source_line)) new_lines.append(ln) return line, col for l in sliced_lines[1:]: # Drop LLVM debug call entries if dbg_value.match(l): continue # if requested generate interleaving of markers or python from # metadata if _interleave.lineinfo or _interleave.python: updated_lineinfo = metadata_interleave(l, new_lines) if updated_lineinfo is not None: cur_line, cur_col = updated_lineinfo # Highlight other LLVM features if requested, HTML BGCOLOR # property is set by this. if _highlight.incref and nrt_incref.search(l): colour = cs['incref'] elif _highlight.decref and nrt_decref.search(l): colour = cs['decref'] elif _highlight.meminfo and nrt_meminfo.search(l): colour = cs['meminfo'] elif _highlight.raises and ll_raise.search(l): # search for raise as its more specific than exit colour = cs['raise'] elif _highlight.returns and ll_return.search(l): colour = cs['return'] elif _highlight.llvm_intrin_calls and ll_intrin_calls.search(l): colour = cs['llvm_intrin_calls'] elif _highlight.function_calls and ll_function_call.search(l): colour = cs['function_calls'] else: colour = cs['default'] # Use the default coloring as a flag to force printing if a # special token print was requested AND LLVM ir stripping is # required if colour is not cs['default'] or not strip_ir: for x in wrap(clean(l)): new_lines.append(fmt.format(colour, col_span, x)) # add in the port line at the end of the block if it was present # (this was built right at the top of the parse) if port_line: new_lines.append('{}'.format(port_line)) # If there was data, create a table, else don't! dat = ''.join(new_lines) if dat: tab = (('%s
') % (idc, dat)) label = '<{}>'.format(tab) else: label = '' # finally, add a replacement node for the original with a new marked # up label. f.node(name, label=label) # Parse the edge data if 'edges' in jzon: # might be a single block, no edges for edge in jzon['edges']: gvid = edge['_gvid'] tp = edge.get('tailport', None) edge_ids[gvid] = (edge['head'], edge['tail'], tp) # Write in the edge wiring with respect to the new nodes:ports. for gvid, edge in edge_ids.items(): tail = node_ids[edge[1]] head = node_ids[edge[0]] port = edge[2] if port is not None: tail += ':%s' % port f.edge(tail, head) # Add a key to the graph if requested. if show_key: key_tab = [] for k, v in cs.items(): key_tab.append(('{}').format(v, k)) # The first < and last > are DOT syntax, rest is DOT HTML. f.node("Key", label=('<{}
' 'Key:
>').format(''.join(key_tab))) # Render if required if filename is not None or view is not None: f.render(filename=filename, view=view, format=render_format) # Else pipe out a SVG return f.pipe(format='svg') def display(self, filename=None, format='pdf', view=False): """ Plot the CFG. In IPython notebook, the return image object can be inlined. The *filename* option can be set to a specific path for the rendered output to write to. If *view* option is True, the plot is opened by the system default application for the image format (PDF). *format* can be any valid format string accepted by graphviz, default is 'pdf'. """ rawbyt = self.pretty_printer(filename=filename, view=view, render_format=format, **self.kwargs) return rawbyt.decode('utf-8') def _repr_svg_(self): return self.pretty_printer(**self.kwargs).decode('utf-8') def __repr__(self): return self.dot class CodeLibrary(metaclass=ABCMeta): """ An interface for bundling LLVM code together and compiling it. It is tied to a *codegen* instance (e.g. JITCPUCodegen) that will determine how the LLVM code is transformed and linked together. """ _finalized = False _object_caching_enabled = False _disable_inspection = False def __init__(self, codegen: "CPUCodegen", name: str): self._codegen = codegen self._name = name ptc_name = f"{self.__class__.__name__}({self._name!r})" self._recorded_timings = PassTimingsCollection(ptc_name) # Track names of the dynamic globals self._dynamic_globals = [] @property def has_dynamic_globals(self): self._ensure_finalized() return len(self._dynamic_globals) > 0 @property def recorded_timings(self): return self._recorded_timings @property def codegen(self): """ The codegen object owning this library. """ return self._codegen @property def name(self): return self._name def __repr__(self): return "" % (self.name, id(self)) def _raise_if_finalized(self): if self._finalized: raise RuntimeError("operation impossible on finalized object %r" % (self,)) def _ensure_finalized(self): if not self._finalized: self.finalize() def create_ir_module(self, name): """ Create an LLVM IR module for use by this library. """ self._raise_if_finalized() ir_module = self._codegen._create_empty_module(name) return ir_module @abstractmethod def add_linking_library(self, library): """ Add a library for linking into this library, without losing the original library. """ @abstractmethod def add_ir_module(self, ir_module): """ Add an LLVM IR module's contents to this library. """ @abstractmethod def finalize(self): """ Finalize the library. After this call, nothing can be added anymore. Finalization involves various stages of code optimization and linking. """ @abstractmethod def get_function(self, name): """ Return the function named ``name``. """ @abstractmethod def get_llvm_str(self): """ Get the human-readable form of the LLVM module. """ @abstractmethod def get_asm_str(self): """ Get the human-readable assembly. """ # # Object cache hooks and serialization # def enable_object_caching(self): self._object_caching_enabled = True self._compiled_object = None self._compiled = False def _get_compiled_object(self): if not self._object_caching_enabled: raise ValueError("object caching not enabled in %s" % (self,)) if self._compiled_object is None: raise RuntimeError("no compiled object yet for %s" % (self,)) return self._compiled_object def _set_compiled_object(self, value): if not self._object_caching_enabled: raise ValueError("object caching not enabled in %s" % (self,)) if self._compiled: raise ValueError("library already compiled: %s" % (self,)) self._compiled_object = value self._disable_inspection = True class CPUCodeLibrary(CodeLibrary): def __init__(self, codegen, name): super().__init__(codegen, name) self._linking_libraries = [] # maintain insertion order self._final_module = ll.parse_assembly( str(self._codegen._create_empty_module(self.name))) self._final_module.name = cgutils.normalize_ir_text(self.name) self._shared_module = None def _optimize_functions(self, ll_module): """ Internal: run function-level optimizations inside *ll_module*. """ # Enforce data layout to enable layout-specific optimizations ll_module.data_layout = self._codegen._data_layout with self._codegen._function_pass_manager(ll_module) as fpm: # Run function-level optimizations to reduce memory usage and improve # module-level optimization. for func in ll_module.functions: k = f"Function passes on {func.name!r}" with self._recorded_timings.record(k): fpm.initialize() fpm.run(func) fpm.finalize() def _optimize_final_module(self): """ Internal: optimize this library's final module. """ cheap_name = "Module passes (cheap optimization for refprune)" with self._recorded_timings.record(cheap_name): # A cheaper optimisation pass is run first to try and get as many # refops into the same function as possible via inlining self._codegen._mpm_cheap.run(self._final_module) # Refop pruning is then run on the heavily inlined function if not config.LLVM_REFPRUNE_PASS: self._final_module = remove_redundant_nrt_refct(self._final_module) full_name = "Module passes (full optimization)" with self._recorded_timings.record(full_name): # The full optimisation suite is then run on the refop pruned IR self._codegen._mpm_full.run(self._final_module) def _get_module_for_linking(self): """ Internal: get a LLVM module suitable for linking multiple times into another library. Exported functions are made "linkonce_odr" to allow for multiple definitions, inlining, and removal of unused exports. See discussion in https://github.com/numba/numba/pull/890 """ self._ensure_finalized() if self._shared_module is not None: return self._shared_module mod = self._final_module to_fix = [] nfuncs = 0 for fn in mod.functions: nfuncs += 1 if not fn.is_declaration and fn.linkage == ll.Linkage.external: to_fix.append(fn.name) if nfuncs == 0: # This is an issue which can occur if loading a module # from an object file and trying to link with it, so detect it # here to make debugging easier. raise RuntimeError("library unfit for linking: " "no available functions in %s" % (self,)) if to_fix: mod = mod.clone() for name in to_fix: # NOTE: this will mark the symbol WEAK if serialized # to an ELF file mod.get_function(name).linkage = 'linkonce_odr' self._shared_module = mod return mod def add_linking_library(self, library): library._ensure_finalized() self._linking_libraries.append(library) def add_ir_module(self, ir_module): self._raise_if_finalized() assert isinstance(ir_module, llvmir.Module) ir = cgutils.normalize_ir_text(str(ir_module)) ll_module = ll.parse_assembly(ir) ll_module.name = ir_module.name ll_module.verify() self.add_llvm_module(ll_module) def add_llvm_module(self, ll_module): self._optimize_functions(ll_module) # TODO: we shouldn't need to recreate the LLVM module object if not config.LLVM_REFPRUNE_PASS: ll_module = remove_redundant_nrt_refct(ll_module) self._final_module.link_in(ll_module) def finalize(self): require_global_compiler_lock() # Report any LLVM-related problems to the user self._codegen._check_llvm_bugs() self._raise_if_finalized() if config.DUMP_FUNC_OPT: dump("FUNCTION OPTIMIZED DUMP %s" % self.name, self.get_llvm_str(), 'llvm') # Link libraries for shared code seen = set() for library in self._linking_libraries: if library not in seen: seen.add(library) self._final_module.link_in( library._get_module_for_linking(), preserve=True, ) # Optimize the module after all dependences are linked in above, # to allow for inlining. self._optimize_final_module() self._final_module.verify() self._finalize_final_module() def _finalize_dynamic_globals(self): # Scan for dynamic globals for gv in self._final_module.global_variables: if gv.name.startswith('numba.dynamic.globals'): self._dynamic_globals.append(gv.name) def _verify_declare_only_symbols(self): # Verify that no declare-only function compiled by numba. for fn in self._final_module.functions: # We will only check for symbol name starting with '_ZN5numba' if fn.is_declaration and fn.name.startswith('_ZN5numba'): msg = 'Symbol {} not linked properly' raise AssertionError(msg.format(fn.name)) def _finalize_final_module(self): """ Make the underlying LLVM module ready to use. """ self._finalize_dynamic_globals() self._verify_declare_only_symbols() # Remember this on the module, for the object cache hooks self._final_module.__library = weakref.proxy(self) # It seems add_module() must be done only here and not before # linking in other modules, otherwise get_pointer_to_function() # could fail. cleanup = self._codegen._add_module(self._final_module) if cleanup: weakref.finalize(self, cleanup) self._finalize_specific() self._finalized = True if config.DUMP_OPTIMIZED: dump("OPTIMIZED DUMP %s" % self.name, self.get_llvm_str(), 'llvm') if config.DUMP_ASSEMBLY: dump("ASSEMBLY %s" % self.name, self.get_asm_str(), 'asm') def get_defined_functions(self): """ Get all functions defined in the library. The library must have been finalized. """ mod = self._final_module for fn in mod.functions: if not fn.is_declaration: yield fn def get_function(self, name): return self._final_module.get_function(name) def _sentry_cache_disable_inspection(self): if self._disable_inspection: warnings.warn('Inspection disabled for cached code. ' 'Invalid result is returned.') def get_llvm_str(self): self._sentry_cache_disable_inspection() return str(self._final_module) def get_asm_str(self): self._sentry_cache_disable_inspection() return str(self._codegen._tm.emit_assembly(self._final_module)) def get_function_cfg(self, name, py_func=None, **kwargs): """ Get control-flow graph of the LLVM function """ self._sentry_cache_disable_inspection() return _CFG(self, name, py_func, **kwargs) def get_disasm_cfg(self, mangled_name): """ Get the CFG of the disassembly of the ELF object at symbol mangled_name. Requires python package: r2pipe Requires radare2 binary on $PATH. Notebook rendering requires python package: graphviz Optionally requires a compiler toolchain (via pycc) to link the ELF to get better disassembly results. """ elf = self._get_compiled_object() return disassemble_elf_to_cfg(elf, mangled_name) @classmethod def _dump_elf(cls, buf): """ Dump the symbol table of an ELF file. Needs pyelftools (https://github.com/eliben/pyelftools) """ from elftools.elf.elffile import ELFFile from elftools.elf import descriptions from io import BytesIO f = ELFFile(BytesIO(buf)) print("ELF file:") for sec in f.iter_sections(): if sec['sh_type'] == 'SHT_SYMTAB': symbols = sorted(sec.iter_symbols(), key=lambda sym: sym.name) print(" symbols:") for sym in symbols: if not sym.name: continue print(" - %r: size=%d, value=0x%x, type=%s, bind=%s" % (sym.name.decode(), sym['st_size'], sym['st_value'], descriptions.describe_symbol_type(sym['st_info']['type']), descriptions.describe_symbol_bind(sym['st_info']['bind']), )) print() @classmethod def _object_compiled_hook(cls, ll_module, buf): """ `ll_module` was compiled into object code `buf`. """ try: self = ll_module.__library except AttributeError: return if self._object_caching_enabled: self._compiled = True self._compiled_object = buf @classmethod def _object_getbuffer_hook(cls, ll_module): """ Return a cached object code for `ll_module`. """ try: self = ll_module.__library except AttributeError: return if self._object_caching_enabled and self._compiled_object: buf = self._compiled_object self._compiled_object = None return buf def serialize_using_bitcode(self): """ Serialize this library using its bitcode as the cached representation. """ self._ensure_finalized() return (self.name, 'bitcode', self._final_module.as_bitcode()) def serialize_using_object_code(self): """ Serialize this library using its object code as the cached representation. We also include its bitcode for further inlining with other libraries. """ self._ensure_finalized() data = (self._get_compiled_object(), self._get_module_for_linking().as_bitcode()) return (self.name, 'object', data) @classmethod def _unserialize(cls, codegen, state): name, kind, data = state self = codegen.create_library(name) assert isinstance(self, cls) if kind == 'bitcode': # No need to re-run optimizations, just make the module ready self._final_module = ll.parse_bitcode(data) self._finalize_final_module() return self elif kind == 'object': object_code, shared_bitcode = data self.enable_object_caching() self._set_compiled_object(object_code) self._shared_module = ll.parse_bitcode(shared_bitcode) self._finalize_final_module() # Load symbols from cache self._codegen._engine._load_defined_symbols(self._shared_module) return self else: raise ValueError("unsupported serialization kind %r" % (kind,)) class AOTCodeLibrary(CPUCodeLibrary): def emit_native_object(self): """ Return this library as a native object (a bytestring) -- for example ELF under Linux. This function implicitly calls .finalize(). """ self._ensure_finalized() return self._codegen._tm.emit_object(self._final_module) def emit_bitcode(self): """ Return this library as LLVM bitcode (a bytestring). This function implicitly calls .finalize(). """ self._ensure_finalized() return self._final_module.as_bitcode() def _finalize_specific(self): pass class JITCodeLibrary(CPUCodeLibrary): def get_pointer_to_function(self, name): """ Generate native code for function named *name* and return a pointer to the start of the function (as an integer). This function implicitly calls .finalize(). Returns ------- pointer : int - zero (null) if no symbol of *name* is defined by this code library. - non-zero if the symbol is defined. """ self._ensure_finalized() ee = self._codegen._engine if not ee.is_symbol_defined(name): return 0 else: return self._codegen._engine.get_function_address(name) def _finalize_specific(self): self._codegen._scan_and_fix_unresolved_refs(self._final_module) with self._recorded_timings.record("Finalize object"): self._codegen._engine.finalize_object() class RuntimeLinker(object): """ For tracking unresolved symbols generated at runtime due to recursion. """ PREFIX = '.numba.unresolved$' def __init__(self): self._unresolved = utils.UniqueDict() self._defined = set() self._resolved = [] def scan_unresolved_symbols(self, module, engine): """ Scan and track all unresolved external symbols in the module and allocate memory for it. """ prefix = self.PREFIX for gv in module.global_variables: if gv.name.startswith(prefix): sym = gv.name[len(prefix):] # Avoid remapping to existing GV if engine.is_symbol_defined(gv.name): continue # Allocate a memory space for the pointer abortfn = rtsys.library.get_pointer_to_function("nrt_unresolved_abort") ptr = ctypes.c_void_p(abortfn) engine.add_global_mapping(gv, ctypes.addressof(ptr)) self._unresolved[sym] = ptr def scan_defined_symbols(self, module): """ Scan and track all defined symbols. """ for fn in module.functions: if not fn.is_declaration: self._defined.add(fn.name) def resolve(self, engine): """ Fix unresolved symbols if they are defined. """ # An iterator to get all unresolved but available symbols pending = [name for name in self._unresolved if name in self._defined] # Resolve pending symbols for name in pending: # Get runtime address fnptr = engine.get_function_address(name) # Fix all usage ptr = self._unresolved[name] ptr.value = fnptr self._resolved.append((name, ptr)) # keep ptr alive # Delete resolved del self._unresolved[name] def _proxy(old): @functools.wraps(old) def wrapper(self, *args, **kwargs): return old(self._ee, *args, **kwargs) return wrapper class JitEngine(object): """Wraps an ExecutionEngine to provide custom symbol tracking. Since the symbol tracking is incomplete (doesn't consider loaded code object), we are not putting it in llvmlite. """ def __init__(self, ee): self._ee = ee # Track symbol defined via codegen'd Module # but not any cached object. # NOTE: `llvm::ExecutionEngine` will catch duplicated symbols and # we are not going to protect against that. A proper duplicated # symbol detection will need a more logic to check for the linkage # (e.g. like `weak` linkage symbol can override). This # `_defined_symbols` set will be just enough to tell if a symbol # exists and will not cause the `EE` symbol lookup to `exit(1)` # when symbol-not-found. self._defined_symbols = set() def is_symbol_defined(self, name): """Is the symbol defined in this session? """ return name in self._defined_symbols def _load_defined_symbols(self, mod): """Extract symbols from the module """ for gsets in (mod.functions, mod.global_variables): self._defined_symbols |= {gv.name for gv in gsets if not gv.is_declaration} def add_module(self, module): """Override ExecutionEngine.add_module to keep info about defined symbols. """ self._load_defined_symbols(module) return self._ee.add_module(module) def add_global_mapping(self, gv, addr): """Override ExecutionEngine.add_global_mapping to keep info about defined symbols. """ self._defined_symbols.add(gv.name) return self._ee.add_global_mapping(gv, addr) # # The remaining methods are re-export of the ExecutionEngine APIs # set_object_cache = _proxy(ll.ExecutionEngine.set_object_cache) finalize_object = _proxy(ll.ExecutionEngine.finalize_object) get_function_address = _proxy(ll.ExecutionEngine.get_function_address) get_global_value_address = _proxy( ll.ExecutionEngine.get_global_value_address ) class Codegen(metaclass=ABCMeta): """ Base Codegen class. It is expected that subclasses set the class attribute ``_library_class``, indicating the CodeLibrary class for the target. Subclasses should also initialize: ``self._data_layout``: the data layout for the target. ``self._target_data``: the binding layer ``TargetData`` for the target. """ @abstractmethod def _create_empty_module(self, name): """ Create a new empty module suitable for the target. """ @abstractmethod def _add_module(self, module): """ Add a module to the execution engine. Ownership of the module is transferred to the engine. """ @property def target_data(self): """ The LLVM "target data" object for this codegen instance. """ return self._target_data def create_library(self, name, **kwargs): """ Create a :class:`CodeLibrary` object for use with this codegen instance. """ return self._library_class(self, name, **kwargs) def unserialize_library(self, serialized): return self._library_class._unserialize(self, serialized) class CPUCodegen(Codegen): def __init__(self, module_name): initialize_llvm() self._data_layout = None self._llvm_module = ll.parse_assembly( str(self._create_empty_module(module_name))) self._llvm_module.name = "global_codegen_module" self._rtlinker = RuntimeLinker() self._init(self._llvm_module) def _init(self, llvm_module): assert list(llvm_module.global_variables) == [], "Module isn't empty" target = ll.Target.from_triple(ll.get_process_triple()) tm_options = dict(opt=config.OPT) self._tm_features = self._customize_tm_features() self._customize_tm_options(tm_options) tm = target.create_target_machine(**tm_options) engine = ll.create_mcjit_compiler(llvm_module, tm) if config.ENABLE_PROFILING: engine.enable_jit_events() self._tm = tm self._engine = JitEngine(engine) self._target_data = engine.target_data self._data_layout = str(self._target_data) self._mpm_cheap = self._module_pass_manager(loop_vectorize=False, slp_vectorize=False, opt=0, cost="cheap") self._mpm_full = self._module_pass_manager() self._engine.set_object_cache(self._library_class._object_compiled_hook, self._library_class._object_getbuffer_hook) def _create_empty_module(self, name): ir_module = llvmir.Module(cgutils.normalize_ir_text(name)) ir_module.triple = ll.get_process_triple() if self._data_layout: ir_module.data_layout = self._data_layout return ir_module def _module_pass_manager(self, **kwargs): pm = ll.create_module_pass_manager() self._tm.add_analysis_passes(pm) cost = kwargs.pop("cost", None) with self._pass_manager_builder(**kwargs) as pmb: pmb.populate(pm) # If config.OPT==0 do not include these extra passes to help with # vectorization. if cost is not None and cost == "cheap" and config.OPT != 0: # This knocks loops into rotated form early to reduce the likelihood # of vectorization failing due to unknown PHI nodes. pm.add_loop_rotate_pass() # LLVM 11 added LFTR to the IV Simplification pass, this interacted # badly with the existing use of the InstructionCombiner here and # ended up with PHI nodes that prevented vectorization from # working. The desired vectorization effects can be achieved # with this in LLVM 11 (and also < 11) but at a potentially # slightly higher cost: pm.add_licm_pass() pm.add_cfg_simplification_pass() if config.LLVM_REFPRUNE_PASS: pm.add_refprune_pass(_parse_refprune_flags()) return pm def _function_pass_manager(self, llvm_module, **kwargs): pm = ll.create_function_pass_manager(llvm_module) self._tm.add_analysis_passes(pm) with self._pass_manager_builder(**kwargs) as pmb: pmb.populate(pm) if config.LLVM_REFPRUNE_PASS: pm.add_refprune_pass(_parse_refprune_flags()) return pm def _pass_manager_builder(self, **kwargs): """ Create a PassManagerBuilder. Note: a PassManagerBuilder seems good only for one use, so you should call this method each time you want to populate a module or function pass manager. Otherwise some optimizations will be missed... """ opt_level = kwargs.pop('opt', config.OPT) loop_vectorize = kwargs.pop('loop_vectorize', config.LOOP_VECTORIZE) slp_vectorize = kwargs.pop('slp_vectorize', config.SLP_VECTORIZE) pmb = lp.create_pass_manager_builder(opt=opt_level, loop_vectorize=loop_vectorize, slp_vectorize=slp_vectorize, **kwargs) return pmb def _check_llvm_bugs(self): """ Guard against some well-known LLVM bug(s). """ # Check the locale bug at https://github.com/numba/numba/issues/1569 # Note we can't cache the result as locale settings can change # across a process's lifetime. Also, for this same reason, # the check here is a mere heuristic (there may be a race condition # between now and actually compiling IR). ir = """ define double @func() { ret double 1.23e+01 } """ mod = ll.parse_assembly(ir) ir_out = str(mod) if "12.3" in ir_out or "1.23" in ir_out: # Everything ok return if "1.0" in ir_out: loc = locale.getlocale() raise RuntimeError( "LLVM will produce incorrect floating-point code " "in the current locale %s.\nPlease read " "https://numba.readthedocs.io/en/stable/user/faq.html#llvm-locale-bug " "for more information." % (loc,)) raise AssertionError("Unexpected IR:\n%s\n" % (ir_out,)) def magic_tuple(self): """ Return a tuple unambiguously describing the codegen behaviour. """ return (self._llvm_module.triple, self._get_host_cpu_name(), self._tm_features) def _scan_and_fix_unresolved_refs(self, module): self._rtlinker.scan_unresolved_symbols(module, self._engine) self._rtlinker.scan_defined_symbols(module) self._rtlinker.resolve(self._engine) def insert_unresolved_ref(self, builder, fnty, name): voidptr = llvmir.IntType(8).as_pointer() ptrname = self._rtlinker.PREFIX + name llvm_mod = builder.module try: fnptr = llvm_mod.get_global(ptrname) except KeyError: # Not defined? fnptr = llvmir.GlobalVariable(llvm_mod, voidptr, name=ptrname) fnptr.linkage = 'external' return builder.bitcast(builder.load(fnptr), fnty.as_pointer()) def _get_host_cpu_name(self): return (ll.get_host_cpu_name() if config.CPU_NAME is None else config.CPU_NAME) def _get_host_cpu_features(self): if config.CPU_FEATURES is not None: return config.CPU_FEATURES return get_host_cpu_features() class AOTCPUCodegen(CPUCodegen): """ A codegen implementation suitable for Ahead-Of-Time compilation (e.g. generation of object files). """ _library_class = AOTCodeLibrary def __init__(self, module_name, cpu_name=None): # By default, use generic cpu model for the arch self._cpu_name = cpu_name or '' CPUCodegen.__init__(self, module_name) def _customize_tm_options(self, options): cpu_name = self._cpu_name if cpu_name == 'host': cpu_name = self._get_host_cpu_name() options['cpu'] = cpu_name options['reloc'] = 'pic' options['codemodel'] = 'default' options['features'] = self._tm_features def _customize_tm_features(self): # ISA features are selected according to the requested CPU model # in _customize_tm_options() return '' def _add_module(self, module): pass class JITCPUCodegen(CPUCodegen): """ A codegen implementation suitable for Just-In-Time compilation. """ _library_class = JITCodeLibrary def _customize_tm_options(self, options): # As long as we don't want to ship the code to another machine, # we can specialize for this CPU. options['cpu'] = self._get_host_cpu_name() # LLVM 7 change: # https://reviews.llvm.org/D47211#inline-425406 # JIT needs static relocation on x86* # native target is already initialized from base class __init__ arch = ll.Target.from_default_triple().name if arch.startswith('x86'): # one of x86 or x86_64 reloc_model = 'static' elif arch.startswith('ppc'): reloc_model = 'pic' else: reloc_model = 'default' options['reloc'] = reloc_model options['codemodel'] = 'jitdefault' # Set feature attributes (such as ISA extensions) # This overrides default feature selection by CPU model above options['features'] = self._tm_features # Deal with optional argument to ll.Target.create_target_machine sig = utils.pysignature(ll.Target.create_target_machine) if 'jit' in sig.parameters: # Mark that this is making a JIT engine options['jit'] = True def _customize_tm_features(self): # For JIT target, we will use LLVM to get the feature map return self._get_host_cpu_features() def _add_module(self, module): self._engine.add_module(module) # XXX: disabling remove module due to MCJIT engine leakage in # removeModule. The removeModule causes consistent access # violation with certain test combinations. # # Early bind the engine method to avoid keeping a reference to self. # return functools.partial(self._engine.remove_module, module) def set_env(self, env_name, env): """Set the environment address. Update the GlobalVariable named *env_name* to the address of *env*. """ gvaddr = self._engine.get_global_value_address(env_name) envptr = (ctypes.c_void_p * 1).from_address(gvaddr) envptr[0] = ctypes.c_void_p(id(env)) def initialize_llvm(): """Safe to use multiple times. """ ll.initialize() ll.initialize_native_target() ll.initialize_native_asmprinter() def get_host_cpu_features(): """Get host CPU features using LLVM. The features may be modified due to user setting. See numba.config.ENABLE_AVX. """ try: features = ll.get_host_cpu_features() except RuntimeError: return '' else: if not config.ENABLE_AVX: # Disable all features with name starting with 'avx' for k in features: if k.startswith('avx'): features[k] = False # Set feature attributes return features.flatten() numba-0.55.1/numba/core/compiler.py000664 000000 000000 00000063261 14174536160 017121 0ustar00rootroot000000 000000 from collections import namedtuple import copy import warnings from numba.core.tracing import event from numba.core import (utils, errors, typing, interpreter, bytecode, postproc, config, callconv, cpu) from numba.parfors.parfor import ParforDiagnostics from numba.core.errors import CompilerError from numba.core.environment import lookup_environment from numba.core.compiler_machinery import PassManager from numba.core.untyped_passes import (ExtractByteCode, TranslateByteCode, FixupArgs, IRProcessing, DeadBranchPrune, RewriteSemanticConstants, InlineClosureLikes, GenericRewrites, WithLifting, InlineInlinables, FindLiterallyCalls, MakeFunctionToJitFunction, CanonicalizeLoopExit, CanonicalizeLoopEntry, LiteralUnroll, ReconstructSSA, LiteralPropagationSubPipelinePass, ) from numba.core.typed_passes import (NopythonTypeInference, AnnotateTypes, NopythonRewrites, PreParforPass, ParforPass, DumpParforDiagnostics, IRLegalization, NoPythonBackend, InlineOverloads, PreLowerStripPhis, NativeLowering, NoPythonSupportedFeatureValidation, ) from numba.core.object_mode_passes import (ObjectModeFrontEnd, ObjectModeBackEnd) from numba.core.targetconfig import TargetConfig, Option, ConfigStack class Flags(TargetConfig): enable_looplift = Option( type=bool, default=False, doc="Enable loop-lifting", ) enable_pyobject = Option( type=bool, default=False, doc="Enable pyobject mode (in general)", ) enable_pyobject_looplift = Option( type=bool, default=False, doc="Enable pyobject mode inside lifted loops", ) enable_ssa = Option( type=bool, default=True, doc="Enable SSA", ) force_pyobject = Option( type=bool, default=False, doc="Force pyobject mode inside the whole function", ) release_gil = Option( type=bool, default=False, doc="Release GIL inside the native function", ) no_compile = Option( type=bool, default=False, doc="TODO", ) debuginfo = Option( type=bool, default=False, doc="TODO", ) boundscheck = Option( type=bool, default=False, doc="TODO", ) forceinline = Option( type=bool, default=False, doc="TODO", ) no_cpython_wrapper = Option( type=bool, default=False, doc="TODO", ) no_cfunc_wrapper = Option( type=bool, default=False, doc="TODO", ) auto_parallel = Option( type=cpu.ParallelOptions, default=cpu.ParallelOptions(False), doc="""Enable automatic parallel optimization, can be fine-tuned by taking a dictionary of sub-options instead of a boolean, see parfor.py for detail""", ) nrt = Option( type=bool, default=False, doc="TODO", ) no_rewrites = Option( type=bool, default=False, doc="TODO", ) error_model = Option( type=str, default="python", doc="TODO", ) fastmath = Option( type=cpu.FastMathOptions, default=cpu.FastMathOptions(False), doc="TODO", ) noalias = Option( type=bool, default=False, doc="TODO", ) inline = Option( type=cpu.InlineOptions, default=cpu.InlineOptions("never"), doc="TODO", ) # Defines a new target option for tracking the "target backend". # This will be the XYZ in @jit(_target=XYZ). target_backend = Option( type=str, default="cpu", # if not set, default to CPU doc="backend" ) DEFAULT_FLAGS = Flags() DEFAULT_FLAGS.nrt = True CR_FIELDS = ["typing_context", "target_context", "entry_point", "typing_error", "type_annotation", "signature", "objectmode", "lifted", "fndesc", "library", "call_helper", "environment", "metadata", # List of functions to call to initialize on unserialization # (i.e cache load). "reload_init", "referenced_envs", ] class CompileResult(namedtuple("_CompileResult", CR_FIELDS)): """ A structure holding results from the compilation of a function. """ __slots__ = () def _reduce(self): """ Reduce a CompileResult to picklable components. """ libdata = self.library.serialize_using_object_code() # Make it (un)picklable efficiently typeann = str(self.type_annotation) fndesc = self.fndesc # Those don't need to be pickled and may fail fndesc.typemap = fndesc.calltypes = None # Include all referenced environments referenced_envs = self._find_referenced_environments() return (libdata, self.fndesc, self.environment, self.signature, self.objectmode, self.lifted, typeann, self.reload_init, tuple(referenced_envs)) def _find_referenced_environments(self): """Returns a list of referenced environments """ mod = self.library._final_module # Find environments referenced_envs = [] for gv in mod.global_variables: gvn = gv.name if gvn.startswith("_ZN08NumbaEnv"): env = lookup_environment(gvn) if env is not None: if env.can_cache(): referenced_envs.append(env) return referenced_envs @classmethod def _rebuild(cls, target_context, libdata, fndesc, env, signature, objectmode, lifted, typeann, reload_init, referenced_envs): if reload_init: # Re-run all for fn in reload_init: fn() library = target_context.codegen().unserialize_library(libdata) cfunc = target_context.get_executable(library, fndesc, env) cr = cls(target_context=target_context, typing_context=target_context.typing_context, library=library, environment=env, entry_point=cfunc, fndesc=fndesc, type_annotation=typeann, signature=signature, objectmode=objectmode, lifted=lifted, typing_error=None, call_helper=None, metadata=None, # Do not store, arbitrary & potentially large! reload_init=reload_init, referenced_envs=referenced_envs, ) # Load Environments for env in referenced_envs: library.codegen.set_env(env.env_name, env) return cr def dump(self, tab=''): print(f'{tab}DUMP {type(self).__name__} {self.entry_point}') self.signature.dump(tab=tab + ' ') print(f'{tab}END DUMP') _LowerResult = namedtuple("_LowerResult", [ "fndesc", "call_helper", "cfunc", "env", ]) def compile_result(**kws): keys = set(kws.keys()) fieldset = set(CR_FIELDS) badnames = keys - fieldset if badnames: raise NameError(*badnames) missing = fieldset - keys for k in missing: kws[k] = None # Avoid keeping alive traceback variables err = kws['typing_error'] if err is not None: kws['typing_error'] = err.with_traceback(None) return CompileResult(**kws) def compile_isolated(func, args, return_type=None, flags=DEFAULT_FLAGS, locals={}): """ Compile the function in an isolated environment (typing and target context). Good for testing. """ from numba.core.registry import cpu_target typingctx = typing.Context() targetctx = cpu.CPUContext(typingctx, target='cpu') # Register the contexts in case for nested @jit or @overload calls with cpu_target.nested_context(typingctx, targetctx): return compile_extra(typingctx, targetctx, func, args, return_type, flags, locals) def run_frontend(func, inline_closures=False, emit_dels=False): """ Run the compiler frontend over the given Python function, and return the function's canonical Numba IR. If inline_closures is Truthy then closure inlining will be run If emit_dels is Truthy the ir.Del nodes will be emitted appropriately """ # XXX make this a dedicated Pipeline? func_id = bytecode.FunctionIdentity.from_function(func) interp = interpreter.Interpreter(func_id) bc = bytecode.ByteCode(func_id=func_id) func_ir = interp.interpret(bc) if inline_closures: from numba.core.inline_closurecall import InlineClosureCallPass inline_pass = InlineClosureCallPass(func_ir, cpu.ParallelOptions(False), {}, False) inline_pass.run() post_proc = postproc.PostProcessor(func_ir) post_proc.run(emit_dels) return func_ir class _CompileStatus(object): """ Describes the state of compilation. Used like a C record. """ __slots__ = ['fail_reason', 'can_fallback'] def __init__(self, can_fallback): self.fail_reason = None self.can_fallback = can_fallback def __repr__(self): vals = [] for k in self.__slots__: vals.append("{k}={v}".format(k=k, v=getattr(self, k))) return ', '.join(vals) class _EarlyPipelineCompletion(Exception): """ Raised to indicate that a pipeline has completed early """ def __init__(self, result): self.result = result class StateDict(dict): """ A dictionary that has an overloaded getattr and setattr to permit getting and setting key/values through the use of attributes. """ def __getattr__(self, attr): try: return self[attr] except KeyError: raise AttributeError(attr) def __setattr__(self, attr, value): self[attr] = value def _make_subtarget(targetctx, flags): """ Make a new target context from the given target context and flags. """ subtargetoptions = {} if flags.debuginfo: subtargetoptions['enable_debuginfo'] = True if flags.boundscheck: subtargetoptions['enable_boundscheck'] = True if flags.nrt: subtargetoptions['enable_nrt'] = True if flags.auto_parallel: subtargetoptions['auto_parallel'] = flags.auto_parallel if flags.fastmath: subtargetoptions['fastmath'] = flags.fastmath error_model = callconv.create_error_model(flags.error_model, targetctx) subtargetoptions['error_model'] = error_model return targetctx.subtarget(**subtargetoptions) class CompilerBase(object): """ Stores and manages states for the compiler """ def __init__(self, typingctx, targetctx, library, args, return_type, flags, locals): # Make sure the environment is reloaded config.reload_config() typingctx.refresh() targetctx.refresh() self.state = StateDict() self.state.typingctx = typingctx self.state.targetctx = _make_subtarget(targetctx, flags) self.state.library = library self.state.args = args self.state.return_type = return_type self.state.flags = flags self.state.locals = locals # Results of various steps of the compilation pipeline self.state.bc = None self.state.func_id = None self.state.func_ir = None self.state.lifted = None self.state.lifted_from = None self.state.typemap = None self.state.calltypes = None self.state.type_annotation = None # holds arbitrary inter-pipeline stage meta data self.state.metadata = {} self.state.reload_init = [] # hold this for e.g. with_lifting, null out on exit self.state.pipeline = self # parfor diagnostics info, add to metadata self.state.parfor_diagnostics = ParforDiagnostics() self.state.metadata['parfor_diagnostics'] = \ self.state.parfor_diagnostics self.state.metadata['parfors'] = {} self.state.status = _CompileStatus( can_fallback=self.state.flags.enable_pyobject ) def compile_extra(self, func): self.state.func_id = bytecode.FunctionIdentity.from_function(func) ExtractByteCode().run_pass(self.state) self.state.lifted = () self.state.lifted_from = None return self._compile_bytecode() def compile_ir(self, func_ir, lifted=(), lifted_from=None): self.state.func_id = func_ir.func_id self.state.lifted = lifted self.state.lifted_from = lifted_from self.state.func_ir = func_ir self.state.nargs = self.state.func_ir.arg_count FixupArgs().run_pass(self.state) return self._compile_ir() def define_pipelines(self): """Child classes override this to customize the pipelines in use. """ raise NotImplementedError() def _compile_core(self): """ Populate and run compiler pipeline """ with ConfigStack().enter(self.state.flags.copy()): pms = self.define_pipelines() for pm in pms: pipeline_name = pm.pipeline_name func_name = "%s.%s" % (self.state.func_id.modname, self.state.func_id.func_qualname) event("Pipeline: %s for %s" % (pipeline_name, func_name)) self.state.metadata['pipeline_times'] = {pipeline_name: pm.exec_times} is_final_pipeline = pm == pms[-1] res = None try: pm.run(self.state) if self.state.cr is not None: break except _EarlyPipelineCompletion as e: res = e.result break except Exception as e: if (utils.use_new_style_errors() and not isinstance(e, errors.NumbaError)): raise e self.state.status.fail_reason = e if is_final_pipeline: raise e else: raise CompilerError("All available pipelines exhausted") # Pipeline is done, remove self reference to release refs to user # code self.state.pipeline = None # organise a return if res is not None: # Early pipeline completion return res else: assert self.state.cr is not None return self.state.cr def _compile_bytecode(self): """ Populate and run pipeline for bytecode input """ assert self.state.func_ir is None return self._compile_core() def _compile_ir(self): """ Populate and run pipeline for IR input """ assert self.state.func_ir is not None return self._compile_core() class Compiler(CompilerBase): """The default compiler """ def define_pipelines(self): # this maintains the objmode fallback behaviour pms = [] if not self.state.flags.force_pyobject: pms.append(DefaultPassBuilder.define_nopython_pipeline(self.state)) if self.state.status.can_fallback or self.state.flags.force_pyobject: pms.append( DefaultPassBuilder.define_objectmode_pipeline(self.state) ) return pms class DefaultPassBuilder(object): """ This is the default pass builder, it contains the "classic" default pipelines as pre-canned PassManager instances: - nopython - objectmode - interpreted - typed - untyped - nopython lowering """ @staticmethod def define_nopython_pipeline(state, name='nopython'): """Returns an nopython mode pipeline based PassManager """ # compose pipeline from untyped, typed and lowering parts dpb = DefaultPassBuilder pm = PassManager(name) untyped_passes = dpb.define_untyped_pipeline(state) pm.passes.extend(untyped_passes.passes) typed_passes = dpb.define_typed_pipeline(state) pm.passes.extend(typed_passes.passes) lowering_passes = dpb.define_nopython_lowering_pipeline(state) pm.passes.extend(lowering_passes.passes) pm.finalize() return pm @staticmethod def define_nopython_lowering_pipeline(state, name='nopython_lowering'): pm = PassManager(name) # legalise pm.add_pass(NoPythonSupportedFeatureValidation, "ensure features that are in use are in a valid form") pm.add_pass(IRLegalization, "ensure IR is legal prior to lowering") # Annotate only once legalized pm.add_pass(AnnotateTypes, "annotate types") # lower pm.add_pass(NativeLowering, "native lowering") pm.add_pass(NoPythonBackend, "nopython mode backend") pm.add_pass(DumpParforDiagnostics, "dump parfor diagnostics") pm.finalize() return pm @staticmethod def define_typed_pipeline(state, name="typed"): """Returns the typed part of the nopython pipeline""" pm = PassManager(name) # typing pm.add_pass(NopythonTypeInference, "nopython frontend") # strip phis pm.add_pass(PreLowerStripPhis, "remove phis nodes") # optimisation pm.add_pass(InlineOverloads, "inline overloaded functions") if state.flags.auto_parallel.enabled: pm.add_pass(PreParforPass, "Preprocessing for parfors") if not state.flags.no_rewrites: pm.add_pass(NopythonRewrites, "nopython rewrites") if state.flags.auto_parallel.enabled: pm.add_pass(ParforPass, "convert to parfors") pm.finalize() return pm @staticmethod def define_untyped_pipeline(state, name='untyped'): """Returns an untyped part of the nopython pipeline""" pm = PassManager(name) if state.func_ir is None: pm.add_pass(TranslateByteCode, "analyzing bytecode") pm.add_pass(FixupArgs, "fix up args") pm.add_pass(IRProcessing, "processing IR") pm.add_pass(WithLifting, "Handle with contexts") # inline closures early in case they are using nonlocal's # see issue #6585. pm.add_pass(InlineClosureLikes, "inline calls to locally defined closures") # pre typing if not state.flags.no_rewrites: pm.add_pass(RewriteSemanticConstants, "rewrite semantic constants") pm.add_pass(DeadBranchPrune, "dead branch pruning") pm.add_pass(GenericRewrites, "nopython rewrites") # convert any remaining closures into functions pm.add_pass(MakeFunctionToJitFunction, "convert make_function into JIT functions") # inline functions that have been determined as inlinable and rerun # branch pruning, this needs to be run after closures are inlined as # the IR repr of a closure masks call sites if an inlinable is called # inside a closure pm.add_pass(InlineInlinables, "inline inlinable functions") if not state.flags.no_rewrites: pm.add_pass(DeadBranchPrune, "dead branch pruning") pm.add_pass(FindLiterallyCalls, "find literally calls") pm.add_pass(LiteralUnroll, "handles literal_unroll") if state.flags.enable_ssa: pm.add_pass(ReconstructSSA, "ssa") pm.add_pass(LiteralPropagationSubPipelinePass, "Literal propagation") pm.finalize() return pm @staticmethod def define_objectmode_pipeline(state, name='object'): """Returns an object-mode pipeline based PassManager """ pm = PassManager(name) if state.func_ir is None: pm.add_pass(TranslateByteCode, "analyzing bytecode") pm.add_pass(FixupArgs, "fix up args") else: # Reaches here if it's a fallback from nopython mode. # Strip the phi nodes. pm.add_pass(PreLowerStripPhis, "remove phis nodes") pm.add_pass(IRProcessing, "processing IR") if utils.PYVERSION >= (3, 7): # The following passes are needed to adjust for looplifting pm.add_pass(CanonicalizeLoopEntry, "canonicalize loop entry") pm.add_pass(CanonicalizeLoopExit, "canonicalize loop exit") pm.add_pass(ObjectModeFrontEnd, "object mode frontend") pm.add_pass(InlineClosureLikes, "inline calls to locally defined closures") # convert any remaining closures into functions pm.add_pass(MakeFunctionToJitFunction, "convert make_function into JIT functions") pm.add_pass(IRLegalization, "ensure IR is legal prior to lowering") pm.add_pass(AnnotateTypes, "annotate types") pm.add_pass(ObjectModeBackEnd, "object mode backend") pm.finalize() return pm def compile_extra(typingctx, targetctx, func, args, return_type, flags, locals, library=None, pipeline_class=Compiler): """Compiler entry point Parameter --------- typingctx : typing context targetctx : target context func : function the python function to be compiled args : tuple, list argument types return_type : Use ``None`` to indicate void return flags : numba.compiler.Flags compiler flags library : numba.codegen.CodeLibrary Used to store the compiled code. If it is ``None``, a new CodeLibrary is used. pipeline_class : type like numba.compiler.CompilerBase compiler pipeline """ pipeline = pipeline_class(typingctx, targetctx, library, args, return_type, flags, locals) return pipeline.compile_extra(func) def compile_ir(typingctx, targetctx, func_ir, args, return_type, flags, locals, lifted=(), lifted_from=None, is_lifted_loop=False, library=None, pipeline_class=Compiler): """ Compile a function with the given IR. For internal use only. """ # This is a special branch that should only run on IR from a lifted loop if is_lifted_loop: # This code is pessimistic and costly, but it is a not often trodden # path and it will go away once IR is made immutable. The problem is # that the rewrite passes can mutate the IR into a state that makes # it possible for invalid tokens to be transmitted to lowering which # then trickle through into LLVM IR and causes RuntimeErrors as LLVM # cannot compile it. As a result the following approach is taken: # 1. Create some new flags that copy the original ones but switch # off rewrites. # 2. Compile with 1. to get a compile result # 3. Try and compile another compile result but this time with the # original flags (and IR being rewritten). # 4. If 3 was successful, use the result, else use 2. # create flags with no rewrites norw_flags = copy.deepcopy(flags) norw_flags.no_rewrites = True def compile_local(the_ir, the_flags): pipeline = pipeline_class(typingctx, targetctx, library, args, return_type, the_flags, locals) return pipeline.compile_ir(func_ir=the_ir, lifted=lifted, lifted_from=lifted_from) # compile with rewrites off, IR shouldn't be mutated irreparably norw_cres = compile_local(func_ir.copy(), norw_flags) # try and compile with rewrites on if no_rewrites was not set in the # original flags, IR might get broken but we've got a CompileResult # that's usable from above. rw_cres = None if not flags.no_rewrites: # Suppress warnings in compilation retry with warnings.catch_warnings(): warnings.simplefilter("ignore", errors.NumbaWarning) try: rw_cres = compile_local(func_ir.copy(), flags) except Exception: pass # if the rewrite variant of compilation worked, use it, else use # the norewrites backup if rw_cres is not None: cres = rw_cres else: cres = norw_cres return cres else: pipeline = pipeline_class(typingctx, targetctx, library, args, return_type, flags, locals) return pipeline.compile_ir(func_ir=func_ir, lifted=lifted, lifted_from=lifted_from) def compile_internal(typingctx, targetctx, library, func, args, return_type, flags, locals): """ For internal use only. """ pipeline = Compiler(typingctx, targetctx, library, args, return_type, flags, locals) return pipeline.compile_extra(func) numba-0.55.1/numba/core/compiler_lock.py000664 000000 000000 00000002761 14174536160 020127 0ustar00rootroot000000 000000 import threading import functools import numba.core.event as ev # Lock for the preventing multiple compiler execution class _CompilerLock(object): def __init__(self): self._lock = threading.RLock() def acquire(self): ev.start_event("numba:compiler_lock") self._lock.acquire() def release(self): self._lock.release() ev.end_event("numba:compiler_lock") def __enter__(self): self.acquire() def __exit__(self, exc_val, exc_type, traceback): self.release() def is_locked(self): is_owned = getattr(self._lock, '_is_owned') if not callable(is_owned): is_owned = self._is_owned return is_owned() def __call__(self, func): @functools.wraps(func) def _acquire_compile_lock(*args, **kwargs): with self: return func(*args, **kwargs) return _acquire_compile_lock def _is_owned(self): # This method is borrowed from threading.Condition. # Return True if lock is owned by current_thread. # This method is called only if _lock doesn't have _is_owned(). if self._lock.acquire(0): self._lock.release() return False else: return True global_compiler_lock = _CompilerLock() def require_global_compiler_lock(): """Sentry that checks the global_compiler_lock is acquired. """ # Use assert to allow turning off this check assert global_compiler_lock.is_locked() numba-0.55.1/numba/core/compiler_machinery.py000664 000000 000000 00000033035 14174536160 021154 0ustar00rootroot000000 000000 import timeit from abc import abstractmethod, ABCMeta from collections import namedtuple, OrderedDict import inspect from numba.core.compiler_lock import global_compiler_lock from numba.core import errors, config, transforms, utils from numba.core.tracing import event from numba.core.postproc import PostProcessor from numba.core.ir_utils import enforce_no_dels, legalize_single_scope # terminal color markup _termcolor = errors.termcolor() class SimpleTimer(object): """ A simple context managed timer """ def __enter__(self): self.ts = timeit.default_timer() return self def __exit__(self, *exc): self.elapsed = timeit.default_timer() - self.ts class CompilerPass(metaclass=ABCMeta): """ The base class for all compiler passes. """ @abstractmethod def __init__(self, *args, **kwargs): self._analysis = None self._pass_id = None @classmethod def name(cls): """ Returns the name of the pass """ return cls._name @property def pass_id(self): """ The ID of the pass """ return self._pass_id @pass_id.setter def pass_id(self, val): """ Sets the ID of the pass """ self._pass_id = val @property def analysis(self): """ Analysis data for the pass """ return self._analysis @analysis.setter def analysis(self, val): """ Set the analysis data for the pass """ self._analysis = val def run_initialization(self, *args, **kwargs): """ Runs the initialization sequence for the pass, will run before `run_pass`. """ return False @abstractmethod def run_pass(self, *args, **kwargs): """ Runs the pass itself. Must return True/False depending on whether statement level modification took place. """ pass def run_finalizer(self, *args, **kwargs): """ Runs the initialization sequence for the pass, will run before `run_pass`. """ return False def get_analysis_usage(self, AU): """ Override to set analysis usage """ pass def get_analysis(self, pass_name): """ Gets the analysis from a given pass """ return self._analysis[pass_name] class SSACompliantMixin(object): """ Mixin to indicate a pass is SSA form compliant. Nothing is asserted about this condition at present. """ pass class FunctionPass(CompilerPass): """ Base class for function passes """ pass class AnalysisPass(CompilerPass): """ Base class for analysis passes (no modification made to state) """ pass class LoweringPass(CompilerPass): """ Base class for lowering passes """ pass class AnalysisUsage(object): """This looks and behaves like LLVM's AnalysisUsage because its like that. """ def __init__(self): self._required = set() self._preserved = set() def get_required_set(self): return self._required def get_preserved_set(self): return self._preserved def add_required(self, pss): self._required.add(pss) def add_preserved(self, pss): self._preserved.add(pss) def __str__(self): return "required: %s\n" % self._required _DEBUG = False def debug_print(*args, **kwargs): if _DEBUG: print(*args, **kwargs) pass_timings = namedtuple('pass_timings', 'init run finalize') class PassManager(object): """ The PassManager is a named instance of a particular compilation pipeline """ # TODO: Eventually enable this, it enforces self consistency after each pass _ENFORCING = False def __init__(self, pipeline_name): """ Create a new pipeline with name "pipeline_name" """ self.passes = [] self.exec_times = OrderedDict() self._finalized = False self._analysis = None self._print_after = None self.pipeline_name = pipeline_name def _validate_pass(self, pass_cls): if (not (isinstance(pass_cls, str) or (inspect.isclass(pass_cls) and issubclass(pass_cls, CompilerPass)))): msg = ("Pass must be referenced by name or be a subclass of a " "CompilerPass. Have %s" % pass_cls) raise TypeError(msg) if isinstance(pass_cls, str): pass_cls = _pass_registry.find_by_name(pass_cls) else: if not _pass_registry.is_registered(pass_cls): raise ValueError("Pass %s is not registered" % pass_cls) def add_pass(self, pss, description=""): """ Append a pass to the PassManager's compilation pipeline """ self._validate_pass(pss) func_desc_tuple = (pss, description) self.passes.append(func_desc_tuple) self._finalized = False def add_pass_after(self, pass_cls, location): """ Add a pass `pass_cls` to the PassManager's compilation pipeline after the pass `location`. """ assert self.passes self._validate_pass(pass_cls) self._validate_pass(location) for idx, (x, _) in enumerate(self.passes): if x == location: break else: raise ValueError("Could not find pass %s" % location) self.passes.insert(idx + 1, (pass_cls, str(pass_cls))) # if a pass has been added, it's not finalized self._finalized = False def _debug_init(self): # determine after which passes IR dumps should take place def parse(conf_item): print_passes = [] if conf_item != "none": if conf_item == "all": print_passes = [x.name() for (x, _) in self.passes] else: # we don't validate whether the named passes exist in this # pipeline the compiler may be used reentrantly and # different pipelines may contain different passes splitted = conf_item.split(',') print_passes = [x.strip() for x in splitted] return print_passes ret = (parse(config.DEBUG_PRINT_AFTER), parse(config.DEBUG_PRINT_BEFORE), parse(config.DEBUG_PRINT_WRAP),) return ret def finalize(self): """ Finalize the PassManager, after which no more passes may be added without re-finalization. """ self._analysis = self.dependency_analysis() self._print_after, self._print_before, self._print_wrap = \ self._debug_init() self._finalized = True @property def finalized(self): return self._finalized def _patch_error(self, desc, exc): """ Patches the error to show the stage that it arose in. """ newmsg = "{desc}\n{exc}".format(desc=desc, exc=exc) exc.args = (newmsg,) return exc @global_compiler_lock # this need a lock, likely calls LLVM def _runPass(self, index, pss, internal_state): mutated = False def check(func, compiler_state): mangled = func(compiler_state) if mangled not in (True, False): msg = ("CompilerPass implementations should return True/False. " "CompilerPass with name '%s' did not.") raise ValueError(msg % pss.name()) return mangled def debug_print(pass_name, print_condition, printable_condition): if pass_name in print_condition: fid = internal_state.func_id args = (fid.modname, fid.func_qualname, self.pipeline_name, printable_condition, pass_name) print(("%s.%s: %s: %s %s" % args).center(120, '-')) if internal_state.func_ir is not None: internal_state.func_ir.dump() else: print("func_ir is None") # debug print before this pass? debug_print(pss.name(), self._print_before + self._print_wrap, "BEFORE") # wire in the analysis info so it's accessible pss.analysis = self._analysis with SimpleTimer() as init_time: mutated |= check(pss.run_initialization, internal_state) with SimpleTimer() as pass_time: mutated |= check(pss.run_pass, internal_state) with SimpleTimer() as finalize_time: mutated |= check(pss.run_finalizer, internal_state) # Check that if the pass is an instance of a FunctionPass that it hasn't # emitted ir.Dels. if isinstance(pss, FunctionPass): enforce_no_dels(internal_state.func_ir) if self._ENFORCING: # TODO: Add in self consistency enforcement for # `func_ir._definitions` etc if _pass_registry.get(pss.__class__).mutates_CFG: if mutated: # block level changes, rebuild all PostProcessor(internal_state.func_ir).run() else: # CFG level changes rebuild CFG internal_state.func_ir.blocks = transforms.canonicalize_cfg( internal_state.func_ir.blocks) # Check the func_ir has exactly one Scope instance if not legalize_single_scope(internal_state.func_ir.blocks): raise errors.CompilerError( f"multiple scope in func_ir detected in {pss}", ) # inject runtimes pt = pass_timings(init_time.elapsed, pass_time.elapsed, finalize_time.elapsed) self.exec_times["%s_%s" % (index, pss.name())] = pt # debug print after this pass? debug_print(pss.name(), self._print_after + self._print_wrap, "AFTER") def run(self, state): """ Run the defined pipelines on the state. """ from numba.core.compiler import _EarlyPipelineCompletion if not self.finalized: raise RuntimeError("Cannot run non-finalised pipeline") # walk the passes and run them for idx, (pss, pass_desc) in enumerate(self.passes): try: event("-- %s" % pass_desc) pass_inst = _pass_registry.get(pss).pass_inst if isinstance(pass_inst, CompilerPass): self._runPass(idx, pass_inst, state) else: raise BaseException("Legacy pass in use") except _EarlyPipelineCompletion as e: raise e except Exception as e: if (utils.use_new_style_errors() and not isinstance(e, errors.NumbaError)): raise e msg = "Failed in %s mode pipeline (step: %s)" % \ (self.pipeline_name, pass_desc) patched_exception = self._patch_error(msg, e) raise patched_exception def dependency_analysis(self): """ Computes dependency analysis """ deps = dict() for (pss, _) in self.passes: x = _pass_registry.get(pss).pass_inst au = AnalysisUsage() x.get_analysis_usage(au) deps[type(x)] = au requires_map = dict() for k, v in deps.items(): requires_map[k] = v.get_required_set() def resolve_requires(key, rmap): def walk(lkey, rmap): dep_set = rmap[lkey] if lkey in rmap else set() if dep_set: for x in dep_set: dep_set |= (walk(x, rmap)) return dep_set else: return set() ret = set() for k in key: ret |= walk(k, rmap) return ret dep_chain = dict() for k, v in requires_map.items(): dep_chain[k] = set(v) | (resolve_requires(v, requires_map)) return dep_chain pass_info = namedtuple('pass_info', 'pass_inst mutates_CFG analysis_only') class PassRegistry(object): """ Pass registry singleton class. """ _id = 0 _registry = dict() def register(self, mutates_CFG, analysis_only): def make_festive(pass_class): assert not self.is_registered(pass_class) assert not self._does_pass_name_alias(pass_class.name()) pass_class.pass_id = self._id self._id += 1 self._registry[pass_class] = pass_info(pass_class(), mutates_CFG, analysis_only) return pass_class return make_festive def is_registered(self, clazz): return clazz in self._registry.keys() def get(self, clazz): assert self.is_registered(clazz) return self._registry[clazz] def _does_pass_name_alias(self, check): for k, v in self._registry.items(): if v.pass_inst.name == check: return True return False def find_by_name(self, class_name): assert isinstance(class_name, str) for k, v in self._registry.items(): if v.pass_inst.name == class_name: return v else: raise ValueError("No pass with name %s is registered" % class_name) def dump(self): for k, v in self._registry.items(): print("%s: %s" % (k, v)) _pass_registry = PassRegistry() del PassRegistry """ register_pass is used to register a compiler pass class for use with PassManager instances. """ register_pass = _pass_registry.register numba-0.55.1/numba/core/config.py000664 000000 000000 00000045316 14174536160 016555 0ustar00rootroot000000 000000 import platform import sys import os import re import warnings # YAML needed to use file based Numba config try: import yaml _HAVE_YAML = True except ImportError: _HAVE_YAML = False import llvmlite.binding as ll IS_WIN32 = sys.platform.startswith('win32') IS_OSX = sys.platform.startswith('darwin') MACHINE_BITS = tuple.__itemsize__ * 8 IS_32BITS = MACHINE_BITS == 32 # Python version in (major, minor) tuple PYVERSION = sys.version_info[:2] # this is the name of the user supplied configuration file _config_fname = '.numba_config.yaml' def _parse_cc(text): """ Parse CUDA compute capability version string. """ if not text: return None else: m = re.match(r'(\d+)\.(\d+)', text) if not m: raise ValueError("Compute capability must be specified as a " "string of \"major.minor\" where major " "and minor are decimals") grp = m.groups() return int(grp[0]), int(grp[1]) def _os_supports_avx(): """ Whether the current OS supports AVX, regardless of the CPU. This is necessary because the user may be running a very old Linux kernel (e.g. CentOS 5) on a recent CPU. """ if (not sys.platform.startswith('linux') or platform.machine() not in ('i386', 'i586', 'i686', 'x86_64')): return True # Executing the CPUID instruction may report AVX available even though # the kernel doesn't support it, so parse /proc/cpuinfo instead. try: f = open('/proc/cpuinfo', 'r') except OSError: # If /proc isn't available, assume yes return True with f: for line in f: head, _, body = line.partition(':') if head.strip() == 'flags' and 'avx' in body.split(): return True else: return False # Choose how to handle captured errors def _validate_captured_errors_style(style_str): rendered_style = str(style_str) if rendered_style not in ('new_style', 'old_style'): msg = ("Invalid style in NUMBA_CAPTURED_ERRORS: " f"{rendered_style}") raise ValueError(msg) else: return rendered_style class _EnvReloader(object): def __init__(self): self.reset() def reset(self): self.old_environ = {} self.update(force=True) def update(self, force=False): new_environ = {} # first check if there's a .numba_config.yaml and use values from that if os.path.exists(_config_fname) and os.path.isfile(_config_fname): if not _HAVE_YAML: msg = ("A Numba config file is found but YAML parsing " "capabilities appear to be missing. " "To use this feature please install `pyyaml`. e.g. " "`conda install pyyaml`.") warnings.warn(msg) else: with open(_config_fname, 'rt') as f: y_conf = yaml.safe_load(f) if y_conf is not None: for k, v in y_conf.items(): new_environ['NUMBA_' + k.upper()] = v # clobber file based config with any locally defined env vars for name, value in os.environ.items(): if name.startswith('NUMBA_'): new_environ[name] = value # We update the config variables if at least one NUMBA environment # variable was modified. This lets the user modify values # directly in the config module without having them when # reload_config() is called by the compiler. if force or self.old_environ != new_environ: self.process_environ(new_environ) # Store a copy self.old_environ = dict(new_environ) self.validate() def validate(self): global CUDA_USE_NVIDIA_BINDING if CUDA_USE_NVIDIA_BINDING: # noqa: F821 try: import cuda # noqa: F401 except ImportError as ie: msg = ("CUDA Python bindings requested (the environment " "variable NUMBA_CUDA_USE_NVIDIA_BINDING is set), " f"but they are not importable: {ie.msg}.") warnings.warn(msg) CUDA_USE_NVIDIA_BINDING = False if CUDA_PER_THREAD_DEFAULT_STREAM: # noqa: F821 warnings.warn("PTDS support is handled by CUDA Python when " "using the NVIDIA binding. Please set the " "environment variable " "CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM to 1 " "instead.") def process_environ(self, environ): def _readenv(name, ctor, default): value = environ.get(name) if value is None: return default() if callable(default) else default try: return ctor(value) except Exception: warnings.warn("environ %s defined but failed to parse '%s'" % (name, value), RuntimeWarning) return default def optional_str(x): return str(x) if x is not None else None # developer mode produces full tracebacks, disables help instructions DEVELOPER_MODE = _readenv("NUMBA_DEVELOPER_MODE", int, 0) # disable performance warnings, will switch of the generation of # warnings of the class NumbaPerformanceWarning DISABLE_PERFORMANCE_WARNINGS = _readenv( "NUMBA_DISABLE_PERFORMANCE_WARNINGS", int, 0) # Flag to enable full exception reporting FULL_TRACEBACKS = _readenv( "NUMBA_FULL_TRACEBACKS", int, DEVELOPER_MODE) # Show help text when an error occurs SHOW_HELP = _readenv("NUMBA_SHOW_HELP", int, 0) # The color scheme to use for error messages, default is no color # just bold fonts in use. COLOR_SCHEME = _readenv("NUMBA_COLOR_SCHEME", str, "no_color") # Whether to globally enable bounds checking. The default None means # to use the value of the flag to @njit. 0 or 1 overrides the flag # globally. BOUNDSCHECK = _readenv("NUMBA_BOUNDSCHECK", int, None) # Whether to always warn about potential uninitialized variables # because static controlflow analysis cannot find a definition # in one or more of the incoming paths. ALWAYS_WARN_UNINIT_VAR = _readenv( "NUMBA_ALWAYS_WARN_UNINIT_VAR", int, 0, ) # Whether to warn about kernel launches where the grid size will # under utilize the GPU due to low occupancy. On by default. CUDA_LOW_OCCUPANCY_WARNINGS = _readenv( "NUMBA_CUDA_LOW_OCCUPANCY_WARNINGS", int, 1) # Whether to use the official CUDA Python API Bindings CUDA_USE_NVIDIA_BINDING = _readenv( "NUMBA_CUDA_USE_NVIDIA_BINDING", int, 0) # Debug flag to control compiler debug print DEBUG = _readenv("NUMBA_DEBUG", int, 0) # DEBUG print IR after pass names DEBUG_PRINT_AFTER = _readenv("NUMBA_DEBUG_PRINT_AFTER", str, "none") # DEBUG print IR before pass names DEBUG_PRINT_BEFORE = _readenv("NUMBA_DEBUG_PRINT_BEFORE", str, "none") # DEBUG print IR before and after pass names DEBUG_PRINT_WRAP = _readenv("NUMBA_DEBUG_PRINT_WRAP", str, "none") # Highlighting in intermediate dumps HIGHLIGHT_DUMPS = _readenv("NUMBA_HIGHLIGHT_DUMPS", int, 0) # JIT Debug flag to trigger IR instruction print DEBUG_JIT = _readenv("NUMBA_DEBUG_JIT", int, 0) # Enable debugging of front-end operation # (up to and including IR generation) DEBUG_FRONTEND = _readenv("NUMBA_DEBUG_FRONTEND", int, 0) # Enable debug prints in nrtdynmod DEBUG_NRT = _readenv("NUMBA_DEBUG_NRT", int, 0) # How many recently deserialized functions to retain regardless # of external references FUNCTION_CACHE_SIZE = _readenv("NUMBA_FUNCTION_CACHE_SIZE", int, 128) # Maximum tuple size that parfors will unpack and pass to # internal gufunc. PARFOR_MAX_TUPLE_SIZE = _readenv("NUMBA_PARFOR_MAX_TUPLE_SIZE", int, 100) # Enable logging of cache operation DEBUG_CACHE = _readenv("NUMBA_DEBUG_CACHE", int, DEBUG) # Redirect cache directory # Contains path to the directory CACHE_DIR = _readenv("NUMBA_CACHE_DIR", str, "") # Enable tracing support TRACE = _readenv("NUMBA_TRACE", int, 0) # Enable debugging of type inference DEBUG_TYPEINFER = _readenv("NUMBA_DEBUG_TYPEINFER", int, 0) # Configure compilation target to use the specified CPU name # and CPU feature as the host information. # Note: this overrides "host" option for AOT compilation. CPU_NAME = _readenv("NUMBA_CPU_NAME", optional_str, None) CPU_FEATURES = _readenv("NUMBA_CPU_FEATURES", optional_str, ("" if str(CPU_NAME).lower() == 'generic' else None)) # Optimization level OPT = _readenv("NUMBA_OPT", int, 3) # Force dump of Python bytecode DUMP_BYTECODE = _readenv("NUMBA_DUMP_BYTECODE", int, DEBUG_FRONTEND) # Force dump of control flow graph DUMP_CFG = _readenv("NUMBA_DUMP_CFG", int, DEBUG_FRONTEND) # Force dump of Numba IR DUMP_IR = _readenv("NUMBA_DUMP_IR", int, DEBUG_FRONTEND) # Force dump of Numba IR in SSA form DUMP_SSA = _readenv("NUMBA_DUMP_SSA", int, DEBUG_FRONTEND or DEBUG_TYPEINFER) # print debug info of analysis and optimization on array operations DEBUG_ARRAY_OPT = _readenv("NUMBA_DEBUG_ARRAY_OPT", int, 0) # insert debug stmts to print information at runtime DEBUG_ARRAY_OPT_RUNTIME = _readenv( "NUMBA_DEBUG_ARRAY_OPT_RUNTIME", int, 0) # print stats about parallel for-loops DEBUG_ARRAY_OPT_STATS = _readenv("NUMBA_DEBUG_ARRAY_OPT_STATS", int, 0) # prints user friendly information about parallel PARALLEL_DIAGNOSTICS = _readenv("NUMBA_PARALLEL_DIAGNOSTICS", int, 0) # print debug info of inline closure pass DEBUG_INLINE_CLOSURE = _readenv("NUMBA_DEBUG_INLINE_CLOSURE", int, 0) # Force dump of LLVM IR DUMP_LLVM = _readenv("NUMBA_DUMP_LLVM", int, DEBUG) # Force dump of Function optimized LLVM IR DUMP_FUNC_OPT = _readenv("NUMBA_DUMP_FUNC_OPT", int, DEBUG) # Force dump of Optimized LLVM IR DUMP_OPTIMIZED = _readenv("NUMBA_DUMP_OPTIMIZED", int, DEBUG) # Force disable loop vectorize # Loop vectorizer is disabled on 32-bit win32 due to a bug (#649) LOOP_VECTORIZE = _readenv("NUMBA_LOOP_VECTORIZE", int, not (IS_WIN32 and IS_32BITS)) # Switch on superword-level parallelism vectorization, default is on. SLP_VECTORIZE = _readenv("NUMBA_SLP_VECTORIZE", int, 1) # Force dump of generated assembly DUMP_ASSEMBLY = _readenv("NUMBA_DUMP_ASSEMBLY", int, DEBUG) # Force dump of type annotation ANNOTATE = _readenv("NUMBA_DUMP_ANNOTATION", int, 0) # Dump IR in such as way as to aid in "diff"ing. DIFF_IR = _readenv("NUMBA_DIFF_IR", int, 0) # Dump type annotation in html format def fmt_html_path(path): if path is None: return path else: return os.path.abspath(path) HTML = _readenv("NUMBA_DUMP_HTML", fmt_html_path, None) # x86-64 specific # Enable AVX on supported platforms where it won't degrade performance. def avx_default(): if not _os_supports_avx(): return False else: # There are various performance issues with AVX and LLVM # on some CPUs (list at # http://llvm.org/bugs/buglist.cgi?quicksearch=avx). # For now we'd rather disable it, since it can pessimize code cpu_name = ll.get_host_cpu_name() return cpu_name not in ('corei7-avx', 'core-avx-i', 'sandybridge', 'ivybridge') ENABLE_AVX = _readenv("NUMBA_ENABLE_AVX", int, avx_default) # if set and SVML is available, it will be disabled # By default, it's disabled on 32-bit platforms. DISABLE_INTEL_SVML = _readenv( "NUMBA_DISABLE_INTEL_SVML", int, IS_32BITS) # Disable jit for debugging DISABLE_JIT = _readenv("NUMBA_DISABLE_JIT", int, 0) # choose parallel backend to use THREADING_LAYER_PRIORITY = _readenv( "NUMBA_THREADING_LAYER_PRIORITY", lambda string: string.split(), ['tbb', 'omp', 'workqueue'], ) THREADING_LAYER = _readenv("NUMBA_THREADING_LAYER", str, 'default') CAPTURED_ERRORS = _readenv("NUMBA_CAPTURED_ERRORS", _validate_captured_errors_style, 'old_style') # CUDA Configs # Whether to warn about kernel launches where a host array # is used as a parameter, forcing a copy to and from the device. # On by default. CUDA_WARN_ON_IMPLICIT_COPY = _readenv( "NUMBA_CUDA_WARN_ON_IMPLICIT_COPY", int, 1) # Force CUDA compute capability to a specific version FORCE_CUDA_CC = _readenv("NUMBA_FORCE_CUDA_CC", _parse_cc, None) # The default compute capability to target when compiling to PTX. CUDA_DEFAULT_PTX_CC = _readenv("NUMBA_CUDA_DEFAULT_PTX_CC", _parse_cc, (5, 2)) # Disable CUDA support DISABLE_CUDA = _readenv("NUMBA_DISABLE_CUDA", int, int(MACHINE_BITS == 32)) # Enable CUDA simulator ENABLE_CUDASIM = _readenv("NUMBA_ENABLE_CUDASIM", int, 0) # CUDA logging level # Any level name from the *logging* module. Case insensitive. # Defaults to CRITICAL if not set or invalid. # Note: This setting only applies when logging is not configured. # Any existing logging configuration is preserved. CUDA_LOG_LEVEL = _readenv("NUMBA_CUDA_LOG_LEVEL", str, '') # Include argument values in the CUDA Driver API logs CUDA_LOG_API_ARGS = _readenv("NUMBA_CUDA_LOG_API_ARGS", int, 0) # Maximum number of pending CUDA deallocations (default: 10) CUDA_DEALLOCS_COUNT = _readenv("NUMBA_CUDA_MAX_PENDING_DEALLOCS_COUNT", int, 10) # Maximum ratio of pending CUDA deallocations to capacity (default: 0.2) CUDA_DEALLOCS_RATIO = _readenv("NUMBA_CUDA_MAX_PENDING_DEALLOCS_RATIO", float, 0.2) CUDA_ARRAY_INTERFACE_SYNC = _readenv("NUMBA_CUDA_ARRAY_INTERFACE_SYNC", int, 1) # Path of the directory that the CUDA driver libraries are located CUDA_DRIVER = _readenv("NUMBA_CUDA_DRIVER", str, '') # Buffer size for logs produced by CUDA driver operations (e.g. # linking) CUDA_LOG_SIZE = _readenv("NUMBA_CUDA_LOG_SIZE", int, 1024) # Whether to generate verbose log messages when JIT linking CUDA_VERBOSE_JIT_LOG = _readenv("NUMBA_CUDA_VERBOSE_JIT_LOG", int, 1) # Whether the default stream is the per-thread default stream CUDA_PER_THREAD_DEFAULT_STREAM = _readenv( "NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM", int, 0) # The default number of threads to use. def num_threads_default(): try: sched_getaffinity = os.sched_getaffinity except AttributeError: pass else: return max(1, len(sched_getaffinity(0))) cpu_count = os.cpu_count() if cpu_count is not None: return max(1, cpu_count) return 1 NUMBA_DEFAULT_NUM_THREADS = num_threads_default() # Numba thread pool size (defaults to number of CPUs on the system). _NUMBA_NUM_THREADS = _readenv("NUMBA_NUM_THREADS", int, NUMBA_DEFAULT_NUM_THREADS) if ('NUMBA_NUM_THREADS' in globals() and globals()['NUMBA_NUM_THREADS'] != _NUMBA_NUM_THREADS): from numba.np.ufunc import parallel if parallel._is_initialized: raise RuntimeError("Cannot set NUMBA_NUM_THREADS to a " "different value once the threads have been " "launched (currently have %s, " "trying to set %s)" % (_NUMBA_NUM_THREADS, globals()['NUMBA_NUM_THREADS'])) NUMBA_NUM_THREADS = _NUMBA_NUM_THREADS del _NUMBA_NUM_THREADS # Profiling support # Indicates if a profiler detected. Only VTune can be detected for now RUNNING_UNDER_PROFILER = 'VS_PROFILER' in os.environ # Enables jit events in LLVM to support profiling of dynamic code ENABLE_PROFILING = _readenv( "NUMBA_ENABLE_PROFILING", int, int(RUNNING_UNDER_PROFILER)) # Debug Info # The default value for the `debug` flag DEBUGINFO_DEFAULT = _readenv("NUMBA_DEBUGINFO", int, ENABLE_PROFILING) CUDA_DEBUGINFO_DEFAULT = _readenv("NUMBA_CUDA_DEBUGINFO", int, 0) EXTEND_VARIABLE_LIFETIMES = _readenv("NUMBA_EXTEND_VARIABLE_LIFETIMES", int, 0) # gdb binary location GDB_BINARY = _readenv("NUMBA_GDB_BINARY", str, '/usr/bin/gdb') # CUDA Memory management CUDA_MEMORY_MANAGER = _readenv("NUMBA_CUDA_MEMORY_MANAGER", str, 'default') # Experimental refprune pass LLVM_REFPRUNE_PASS = _readenv( "NUMBA_LLVM_REFPRUNE_PASS", int, 1, ) LLVM_REFPRUNE_FLAGS = _readenv( "NUMBA_LLVM_REFPRUNE_FLAGS", str, "all" if LLVM_REFPRUNE_PASS else "", ) # Timing support. # LLVM_PASS_TIMINGS enables LLVM recording of pass timings. LLVM_PASS_TIMINGS = _readenv( "NUMBA_LLVM_PASS_TIMINGS", int, 0, ) # Inject the configuration values into the module globals for name, value in locals().copy().items(): if name.isupper(): globals()[name] = value _env_reloader = _EnvReloader() def reload_config(): """ Reload the configuration from environment variables, if necessary. """ _env_reloader.update() numba-0.55.1/numba/core/consts.py000664 000000 000000 00000010625 14174536160 016614 0ustar00rootroot000000 000000 from types import ModuleType import weakref from numba.core.errors import ConstantInferenceError, NumbaError from numba.core import ir class ConstantInference(object): """ A constant inference engine for a given interpreter. Inference inspects the IR to try and compute a compile-time constant for a variable. This shouldn't be used directly, instead call Interpreter.infer_constant(). """ def __init__(self, func_ir): # Avoid cyclic references as some user-visible objects may be # held alive in the cache self._func_ir = weakref.proxy(func_ir) self._cache = {} def infer_constant(self, name, loc=None): """ Infer a constant value for the given variable *name*. If no value can be inferred, numba.errors.ConstantInferenceError is raised. """ if name not in self._cache: try: self._cache[name] = (True, self._do_infer(name)) except ConstantInferenceError as exc: # Store the exception args only, to avoid keeping # a whole traceback alive. self._cache[name] = (False, (exc.__class__, exc.args)) success, val = self._cache[name] if success: return val else: exc, args = val if issubclass(exc, NumbaError): raise exc(*args, loc=loc) else: raise exc(*args) def _fail(self, val): # The location here is set to None because `val` is the ir.Var name # and not the actual offending use of the var. When this is raised it is # caught in the flow control of `infer_constant` and the class and args # (the message) are captured and then raised again but with the location # set to the expression that caused the constant inference error. raise ConstantInferenceError( "Constant inference not possible for: %s" % (val,), loc=None) def _do_infer(self, name): if not isinstance(name, str): raise TypeError("infer_constant() called with non-str %r" % (name,)) try: defn = self._func_ir.get_definition(name) except KeyError: raise ConstantInferenceError( "no single definition for %r" % (name,)) try: const = defn.infer_constant() except ConstantInferenceError: if isinstance(defn, ir.Expr): return self._infer_expr(defn) self._fail(defn) return const def _infer_expr(self, expr): # Infer an expression: handle supported cases if expr.op == 'call': func = self.infer_constant(expr.func.name, loc=expr.loc) return self._infer_call(func, expr) elif expr.op == 'getattr': value = self.infer_constant(expr.value.name, loc=expr.loc) return self._infer_getattr(value, expr) elif expr.op == 'build_list': return [self.infer_constant(i.name, loc=expr.loc) for i in expr.items] elif expr.op == 'build_tuple': return tuple(self.infer_constant(i.name, loc=expr.loc) for i in expr.items) self._fail(expr) def _infer_call(self, func, expr): if expr.kws or expr.vararg: self._fail(expr) # Check supported callables _slice = func in (slice,) _exc = isinstance(func, type) and issubclass(func, BaseException) if _slice or _exc: args = [self.infer_constant(a.name, loc=expr.loc) for a in expr.args] if _slice: return func(*args) elif _exc: # If the exception class is user defined it may implement a ctor # that does not pass the args to the super. Therefore return the # raw class and the args so this can be instantiated at the call # site in the way the user source expects it to be. return func, args else: assert 0, 'Unreachable' self._fail(expr) def _infer_getattr(self, value, expr): if isinstance(value, (ModuleType, type)): # Allow looking up a constant on a class or module try: return getattr(value, expr.attr) except AttributeError: pass self._fail(expr) numba-0.55.1/numba/core/controlflow.py000664 000000 000000 00000073561 14174536160 017663 0ustar00rootroot000000 000000 import collections import functools import sys from numba.core import utils from numba.core.ir import Loc from numba.core.errors import UnsupportedError # List of bytecodes creating a new block in the control flow graph # (in addition to explicit jump labels). NEW_BLOCKERS = frozenset(['SETUP_LOOP', 'FOR_ITER', 'SETUP_WITH']) class CFBlock(object): def __init__(self, offset): self.offset = offset self.body = [] # A map of jumps to outgoing blocks (successors): # { offset of outgoing block -> number of stack pops } self.outgoing_jumps = {} # A map of jumps to incoming blocks (predecessors): # { offset of incoming block -> number of stack pops } self.incoming_jumps = {} self.terminating = False def __repr__(self): args = (self.offset, sorted(self.outgoing_jumps), sorted(self.incoming_jumps)) return "block(offset:%d, outgoing: %s, incoming: %s)" % args def __iter__(self): return iter(self.body) class Loop(collections.namedtuple("Loop", ("entries", "exits", "header", "body"))): """ A control flow loop, as detected by a CFGraph object. """ __slots__ = () # The loop header is enough to detect that two loops are really # the same, assuming they belong to the same graph. # (note: in practice, only one loop instance is created per graph # loop, so identity would be fine) def __eq__(self, other): return isinstance(other, Loop) and other.header == self.header def __hash__(self): return hash(self.header) class _DictOfContainers(collections.defaultdict): """A defaultdict with customized equality checks that ignore empty values. Non-empty value is checked by: `bool(value_item) == True`. """ def __eq__(self, other): if isinstance(other, _DictOfContainers): mine = self._non_empty_items() theirs = other._non_empty_items() return mine == theirs return NotImplemented def __ne__(self, other): ret = self.__eq__(other) if ret is NotImplemented: return ret else: return not ret def _non_empty_items(self): return [(k, vs) for k, vs in sorted(self.items()) if vs] class CFGraph(object): """ Generic (almost) implementation of a Control Flow Graph. """ def __init__(self): self._nodes = set() self._preds = _DictOfContainers(set) self._succs = _DictOfContainers(set) self._edge_data = {} self._entry_point = None def add_node(self, node): """ Add *node* to the graph. This is necessary before adding any edges from/to the node. *node* can be any hashable object. """ self._nodes.add(node) def add_edge(self, src, dest, data=None): """ Add an edge from node *src* to node *dest*, with optional per-edge *data*. If such an edge already exists, it is replaced (duplicate edges are not possible). """ if src not in self._nodes: raise ValueError("Cannot add edge as src node %s not in nodes %s" % (src, self._nodes)) if dest not in self._nodes: raise ValueError("Cannot add edge as dest node %s not in nodes %s" % (dest, self._nodes)) self._add_edge(src, dest, data) def successors(self, src): """ Yield (node, data) pairs representing the successors of node *src*. (*data* will be None if no data was specified when adding the edge) """ for dest in self._succs[src]: yield dest, self._edge_data[src, dest] def predecessors(self, dest): """ Yield (node, data) pairs representing the predecessors of node *dest*. (*data* will be None if no data was specified when adding the edge) """ for src in self._preds[dest]: yield src, self._edge_data[src, dest] def set_entry_point(self, node): """ Set the entry point of the graph to *node*. """ assert node in self._nodes self._entry_point = node def process(self): """ Compute essential properties of the control flow graph. The graph must have been fully populated, and its entry point specified. Other graph properties are computed on-demand. """ if self._entry_point is None: raise RuntimeError("no entry point defined!") self._eliminate_dead_blocks() def dominators(self): """ Return a dictionary of {node -> set(nodes)} mapping each node to the nodes dominating it. A node D dominates a node N when any path leading to N must go through D """ return self._doms def post_dominators(self): """ Return a dictionary of {node -> set(nodes)} mapping each node to the nodes post-dominating it. A node P post-dominates a node N when any path starting from N must go through P. """ return self._post_doms def immediate_dominators(self): """ Return a dictionary of {node -> node} mapping each node to its immediate dominator (idom). The idom(B) is the closest strict dominator of V """ return self._idom def dominance_frontier(self): """ Return a dictionary of {node -> set(nodes)} mapping each node to the nodes in its dominance frontier. The dominance frontier _df(N) is the set of all nodes that are immediate successors to blocks dominanted by N but which aren't stricly dominanted by N """ return self._df def dominator_tree(self): """ return a dictionary of {node -> set(nodes)} mapping each node to the set of nodes it immediately dominates The domtree(B) is the closest strict set of nodes that B dominates """ return self._domtree @utils.cached_property def _exit_points(self): return self._find_exit_points() @utils.cached_property def _doms(self): return self._find_dominators() @utils.cached_property def _back_edges(self): return self._find_back_edges() @utils.cached_property def _topo_order(self): return self._find_topo_order() @utils.cached_property def _descs(self): return self._find_descendents() @utils.cached_property def _loops(self): return self._find_loops() @utils.cached_property def _in_loops(self): return self._find_in_loops() @utils.cached_property def _post_doms(self): return self._find_post_dominators() @utils.cached_property def _idom(self): return self._find_immediate_dominators() @utils.cached_property def _df(self): return self._find_dominance_frontier() @utils.cached_property def _domtree(self): return self._find_dominator_tree() def descendents(self, node): """ Return the set of descendents of the given *node*, in topological order (ignoring back edges). """ return self._descs[node] def entry_point(self): """ Return the entry point node. """ assert self._entry_point is not None return self._entry_point def exit_points(self): """ Return the computed set of exit nodes (may be empty). """ return self._exit_points def backbone(self): """ Return the set of nodes constituting the graph's backbone. (i.e. the nodes that every path starting from the entry point must go through). By construction, it is non-empty: it contains at least the entry point. """ return self._post_doms[self._entry_point] def loops(self): """ Return a dictionary of {node -> loop} mapping each loop header to the loop (a Loop instance) starting with it. """ return self._loops def in_loops(self, node): """ Return the list of Loop objects the *node* belongs to, from innermost to outermost. """ return [self._loops[x] for x in self._in_loops.get(node, ())] def dead_nodes(self): """ Return the set of dead nodes (eliminated from the graph). """ return self._dead_nodes def nodes(self): """ Return the set of live nodes. """ return self._nodes def topo_order(self): """ Return the sequence of nodes in topological order (ignoring back edges). """ return self._topo_order def topo_sort(self, nodes, reverse=False): """ Iterate over the *nodes* in topological order (ignoring back edges). The sort isn't guaranteed to be stable. """ nodes = set(nodes) it = self._topo_order if reverse: it = reversed(it) for n in it: if n in nodes: yield n def dump(self, file=None): """ Dump extensive debug information. """ import pprint file = file or sys.stdout if 1: print("CFG adjacency lists:", file=file) self._dump_adj_lists(file) print("CFG dominators:", file=file) pprint.pprint(self._doms, stream=file) print("CFG post-dominators:", file=file) pprint.pprint(self._post_doms, stream=file) print("CFG back edges:", sorted(self._back_edges), file=file) print("CFG loops:", file=file) pprint.pprint(self._loops, stream=file) print("CFG node-to-loops:", file=file) pprint.pprint(self._in_loops, stream=file) print("CFG backbone:", file=file) pprint.pprint(self.backbone(), stream=file) def render_dot(self, filename="numba_cfg.dot"): """Render the controlflow graph with GraphViz DOT via the ``graphviz`` python binding. Returns ------- g : graphviz.Digraph Use `g.view()` to open the graph in the default PDF application. """ try: import graphviz as gv except ImportError: raise ImportError( "The feature requires `graphviz` but it is not available. " "Please install with `pip install graphviz`" ) g = gv.Digraph(filename=filename) # Populate the nodes for n in self._nodes: g.node(str(n)) # Populate the edges for n in self._nodes: for edge in self._succs[n]: g.edge(str(n), str(edge)) return g # Internal APIs def _add_edge(self, from_, to, data=None): # This internal version allows adding edges to/from unregistered # (ghost) nodes. self._preds[to].add(from_) self._succs[from_].add(to) self._edge_data[from_, to] = data def _remove_node_edges(self, node): for succ in self._succs.pop(node, ()): self._preds[succ].remove(node) del self._edge_data[node, succ] for pred in self._preds.pop(node, ()): self._succs[pred].remove(node) del self._edge_data[pred, node] def _dfs(self, entries=None): if entries is None: entries = (self._entry_point,) seen = set() stack = list(entries) while stack: node = stack.pop() if node not in seen: yield node seen.add(node) for succ in self._succs[node]: stack.append(succ) def _eliminate_dead_blocks(self): """ Eliminate all blocks not reachable from the entry point, and stash them into self._dead_nodes. """ live = set() for node in self._dfs(): live.add(node) self._dead_nodes = self._nodes - live self._nodes = live # Remove all edges leading from dead nodes for dead in self._dead_nodes: self._remove_node_edges(dead) def _find_exit_points(self): """ Compute the graph's exit points. """ exit_points = set() for n in self._nodes: if not self._succs.get(n): exit_points.add(n) return exit_points def _find_postorder(self): succs = self._succs back_edges = self._back_edges post_order = [] seen = set() post_order = [] # DFS def dfs_rec(node): if node not in seen: seen.add(node) stack.append((post_order.append, node)) for dest in succs[node]: if (node, dest) not in back_edges: stack.append((dfs_rec, dest)) stack = [(dfs_rec, self._entry_point)] while stack: cb, data = stack.pop() cb(data) return post_order def _find_immediate_dominators(self): # The algorithm implemented computes the immediate dominator # for each node in the CFG which is equivalent to build a dominator tree # Based on the implementation from NetworkX # library - nx.immediate_dominators # https://github.com/networkx/networkx/blob/858e7cb183541a78969fed0cbcd02346f5866c02/networkx/algorithms/dominance.py # noqa: E501 # References: # Keith D. Cooper, Timothy J. Harvey, and Ken Kennedy # A Simple, Fast Dominance Algorithm # https://www.cs.rice.edu/~keith/EMBED/dom.pdf def intersect(u, v): while u != v: while idx[u] < idx[v]: u = idom[u] while idx[u] > idx[v]: v = idom[v] return u entry = self._entry_point preds_table = self._preds order = self._find_postorder() idx = {e: i for i, e in enumerate(order)} # index of each node idom = {entry : entry} order.pop() order.reverse() changed = True while changed: changed = False for u in order: new_idom = functools.reduce(intersect, (v for v in preds_table[u] if v in idom)) if u not in idom or idom[u] != new_idom: idom[u] = new_idom changed = True return idom def _find_dominator_tree(self): idom = self._idom domtree = _DictOfContainers(set) for u, v in idom.items(): # v dominates u if u not in domtree: domtree[u] = set() if u != v: domtree[v].add(u) return domtree def _find_dominance_frontier(self): idom = self._idom preds_table = self._preds df = {u: set() for u in idom} for u in idom: if len(preds_table[u]) < 2: continue for v in preds_table[u]: while v != idom[u]: df[v].add(u) v = idom[v] return df def _find_dominators_internal(self, post=False): # See theoretical description in # http://en.wikipedia.org/wiki/Dominator_%28graph_theory%29 # The algorithm implemented here uses a todo-list as described # in http://pages.cs.wisc.edu/~fischer/cs701.f08/finding.loops.html if post: entries = set(self._exit_points) preds_table = self._succs succs_table = self._preds else: entries = set([self._entry_point]) preds_table = self._preds succs_table = self._succs if not entries: raise RuntimeError("no entry points: dominator algorithm " "cannot be seeded") doms = {} for e in entries: doms[e] = set([e]) todo = [] for n in self._nodes: if n not in entries: doms[n] = set(self._nodes) todo.append(n) while todo: n = todo.pop() if n in entries: continue new_doms = set([n]) preds = preds_table[n] if preds: new_doms |= functools.reduce(set.intersection, [doms[p] for p in preds]) if new_doms != doms[n]: assert len(new_doms) < len(doms[n]) doms[n] = new_doms todo.extend(succs_table[n]) return doms def _find_dominators(self): return self._find_dominators_internal(post=False) def _find_post_dominators(self): # To handle infinite loops correctly, we need to add a dummy # exit point, and link members of infinite loops to it. dummy_exit = object() self._exit_points.add(dummy_exit) for loop in self._loops.values(): if not loop.exits: for b in loop.body: self._add_edge(b, dummy_exit) pdoms = self._find_dominators_internal(post=True) # Fix the _post_doms table to make no reference to the dummy exit del pdoms[dummy_exit] for doms in pdoms.values(): doms.discard(dummy_exit) self._remove_node_edges(dummy_exit) self._exit_points.remove(dummy_exit) return pdoms # Finding loops and back edges: see # http://pages.cs.wisc.edu/~fischer/cs701.f08/finding.loops.html def _find_back_edges(self, stats=None): """ Find back edges. An edge (src, dest) is a back edge if and only if *dest* dominates *src*. """ # Prepare stats to capture execution information if stats is not None: if not isinstance(stats, dict): raise TypeError(f"*stats* must be a dict; got {type(stats)}") stats.setdefault('iteration_count', 0) # Uses a simple DFS to find back-edges. # The new algorithm is faster than the the previous dominator based # algorithm. back_edges = set() # stack: keeps track of the traversal path stack = [] # succs_state: keep track of unvisited successors of a node succs_state = {} entry_point = self.entry_point() checked = set() def push_state(node): stack.append(node) succs_state[node] = [dest for dest in self._succs[node]] push_state(entry_point) # Keep track for iteration count for debugging iter_ct = 0 while stack: iter_ct += 1 tos = stack[-1] tos_succs = succs_state[tos] # Are there successors not checked? if tos_succs: # Check the next successor cur_node = tos_succs.pop() # Is it in our traversal path? if cur_node in stack: # Yes, it's a backedge back_edges.add((tos, cur_node)) elif cur_node not in checked: # Push push_state(cur_node) else: # Checked all successors. Pop stack.pop() checked.add(tos) if stats is not None: stats['iteration_count'] += iter_ct return back_edges def _find_topo_order(self): succs = self._succs back_edges = self._back_edges post_order = [] seen = set() def _dfs_rec(node): if node not in seen: seen.add(node) for dest in succs[node]: if (node, dest) not in back_edges: _dfs_rec(dest) post_order.append(node) _dfs_rec(self._entry_point) post_order.reverse() return post_order def _find_descendents(self): descs = {} for node in reversed(self._topo_order): descs[node] = node_descs = set() for succ in self._succs[node]: if (node, succ) not in self._back_edges: node_descs.add(succ) node_descs.update(descs[succ]) return descs def _find_loops(self): """ Find the loops defined by the graph's back edges. """ bodies = {} for src, dest in self._back_edges: # The destination of the back edge is the loop header header = dest # Build up the loop body from the back edge's source node, # up to the source header. body = set([header]) queue = [src] while queue: n = queue.pop() if n not in body: body.add(n) queue.extend(self._preds[n]) # There can be several back edges to a given loop header; # if so, merge the resulting body fragments. if header in bodies: bodies[header].update(body) else: bodies[header] = body # Create a Loop object for each header. loops = {} for header, body in bodies.items(): entries = set() exits = set() for n in body: entries.update(self._preds[n] - body) exits.update(self._succs[n] - body) loop = Loop(header=header, body=body, entries=entries, exits=exits) loops[header] = loop return loops def _find_in_loops(self): loops = self._loops # Compute the loops to which each node belongs. in_loops = dict((n, []) for n in self._nodes) # Sort loops from longest to shortest # This ensures that outer loops will come before inner loops for loop in sorted(loops.values(), key=lambda loop: len(loop.body)): for n in loop.body: in_loops[n].append(loop.header) return in_loops def _dump_adj_lists(self, file): adj_lists = dict((src, sorted(list(dests))) for src, dests in self._succs.items()) import pprint pprint.pprint(adj_lists, stream=file) def __eq__(self, other): if not isinstance(other, CFGraph): raise NotImplementedError for x in ['_nodes', '_edge_data', '_entry_point', '_preds', '_succs']: this = getattr(self, x, None) that = getattr(other, x, None) if this != that: return False return True def __ne__(self, other): return not self.__eq__(other) class ControlFlowAnalysis(object): """ Attributes ---------- - bytecode - blocks - blockseq - doms: dict of set Dominators - backbone: set of block offsets The set of block that is common to all possible code path. """ def __init__(self, bytecode): self.bytecode = bytecode self.blocks = {} self.liveblocks = {} self.blockseq = [] self.doms = None self.backbone = None # Internal temp states self._force_new_block = True self._curblock = None self._blockstack = [] self._loops = [] self._withs = [] def iterblocks(self): """ Return all blocks in sequence of occurrence """ for i in self.blockseq: yield self.blocks[i] def iterliveblocks(self): """ Return all live blocks in sequence of occurrence """ for i in self.blockseq: if i in self.liveblocks: yield self.blocks[i] def incoming_blocks(self, block): """ Yield (incoming block, number of stack pops) pairs for *block*. """ for i, pops in block.incoming_jumps.items(): if i in self.liveblocks: yield self.blocks[i], pops def dump(self, file=None): self.graph.dump(file=None) def run(self): for inst in self._iter_inst(): fname = "op_%s" % inst.opname fn = getattr(self, fname, None) if fn is not None: fn(inst) elif inst.is_jump: # this catches e.g. try... except l = Loc(self.bytecode.func_id.filename, inst.lineno) if inst.opname in {"SETUP_EXCEPT", "SETUP_FINALLY"}: msg = "'try' block not supported until python3.7 or later" else: msg = "Use of unsupported opcode (%s) found" % inst.opname raise UnsupportedError(msg, loc=l) else: # Non-jump instructions are ignored pass # intentionally # Close all blocks for cur, nxt in zip(self.blockseq, self.blockseq[1:]): blk = self.blocks[cur] if not blk.outgoing_jumps and not blk.terminating: blk.outgoing_jumps[nxt] = 0 graph = CFGraph() for b in self.blocks: graph.add_node(b) for b in self.blocks.values(): for out, pops in b.outgoing_jumps.items(): graph.add_edge(b.offset, out, pops) graph.set_entry_point(min(self.blocks)) graph.process() self.graph = graph # Fill incoming for b in self.blocks.values(): for out, pops in b.outgoing_jumps.items(): self.blocks[out].incoming_jumps[b.offset] = pops # Find liveblocks self.liveblocks = dict((i, self.blocks[i]) for i in self.graph.nodes()) for lastblk in reversed(self.blockseq): if lastblk in self.liveblocks: break else: raise AssertionError("No live block that exits!?") # Find backbone backbone = self.graph.backbone() # Filter out in loop blocks (Assuming no other cyclic control blocks) # This is to unavoid variable defined in loops to be considered as # function scope. inloopblocks = set() for b in self.blocks.keys(): if self.graph.in_loops(b): inloopblocks.add(b) self.backbone = backbone - inloopblocks def jump(self, target, pops=0): """ Register a jump (conditional or not) to *target* offset. *pops* is the number of stack pops implied by the jump (default 0). """ self._curblock.outgoing_jumps[target] = pops def _iter_inst(self): for inst in self.bytecode: if self._use_new_block(inst): self._guard_with_as(inst) self._start_new_block(inst) self._curblock.body.append(inst.offset) yield inst def _use_new_block(self, inst): if inst.offset in self.bytecode.labels: res = True elif inst.opname in NEW_BLOCKERS: res = True else: res = self._force_new_block self._force_new_block = False return res def _start_new_block(self, inst): self._curblock = CFBlock(inst.offset) self.blocks[inst.offset] = self._curblock self.blockseq.append(inst.offset) def _guard_with_as(self, current_inst): """Checks if the next instruction after a SETUP_WITH is something other than a POP_TOP, if it is something else it'll be some sort of store which is not supported (this corresponds to `with CTXMGR as VAR(S)`).""" if current_inst.opname == "SETUP_WITH": next_op = self.bytecode[current_inst.next].opname if next_op != "POP_TOP": msg = ("The 'with (context manager) as " "(variable):' construct is not " "supported.") raise UnsupportedError(msg) def op_SETUP_LOOP(self, inst): end = inst.get_jump_target() self._blockstack.append(end) self._loops.append((inst.offset, end)) # TODO: Looplifting requires the loop entry be its own block. # Forcing a new block here is the simplest solution for now. # But, we should consider other less ad-hoc ways. self.jump(inst.next) self._force_new_block = True def op_SETUP_WITH(self, inst): end = inst.get_jump_target() self._blockstack.append(end) self._withs.append((inst.offset, end)) # TODO: WithLifting requires the loop entry be its own block. # Forcing a new block here is the simplest solution for now. # But, we should consider other less ad-hoc ways. self.jump(inst.next) self._force_new_block = True def op_POP_BLOCK(self, inst): self._blockstack.pop() def op_FOR_ITER(self, inst): self.jump(inst.get_jump_target()) self.jump(inst.next) self._force_new_block = True def _op_ABSOLUTE_JUMP_IF(self, inst): self.jump(inst.get_jump_target()) self.jump(inst.next) self._force_new_block = True op_POP_JUMP_IF_FALSE = _op_ABSOLUTE_JUMP_IF op_POP_JUMP_IF_TRUE = _op_ABSOLUTE_JUMP_IF op_JUMP_IF_FALSE = _op_ABSOLUTE_JUMP_IF op_JUMP_IF_TRUE = _op_ABSOLUTE_JUMP_IF def _op_ABSOLUTE_JUMP_OR_POP(self, inst): self.jump(inst.get_jump_target()) self.jump(inst.next, pops=1) self._force_new_block = True op_JUMP_IF_FALSE_OR_POP = _op_ABSOLUTE_JUMP_OR_POP op_JUMP_IF_TRUE_OR_POP = _op_ABSOLUTE_JUMP_OR_POP def op_JUMP_ABSOLUTE(self, inst): self.jump(inst.get_jump_target()) self._force_new_block = True def op_JUMP_FORWARD(self, inst): self.jump(inst.get_jump_target()) self._force_new_block = True def op_RETURN_VALUE(self, inst): self._curblock.terminating = True self._force_new_block = True def op_RAISE_VARARGS(self, inst): self._curblock.terminating = True self._force_new_block = True def op_BREAK_LOOP(self, inst): self.jump(self._blockstack[-1]) self._force_new_block = True numba-0.55.1/numba/core/cpu.py000664 000000 000000 00000031140 14174536160 016065 0ustar00rootroot000000 000000 import sys import platform import llvmlite.binding as ll import llvmlite.llvmpy.core as lc from llvmlite import ir from numba import _dynfunc from numba.core.callwrapper import PyCallWrapper from numba.core.base import BaseContext, PYOBJECT from numba.core import utils, types, config, cgutils, callconv, codegen, externals, fastmathpass, intrinsics from numba.core.utils import cached_property from numba.core.options import TargetOptions, include_default_options from numba.core.runtime import rtsys from numba.core.compiler_lock import global_compiler_lock import numba.core.entrypoints from numba.core.cpu_options import (ParallelOptions, FastMathOptions, InlineOptions) from numba.np import ufunc_db # Keep those structures in sync with _dynfunc.c. class ClosureBody(cgutils.Structure): _fields = [('env', types.pyobject)] class EnvBody(cgutils.Structure): _fields = [ ('globals', types.pyobject), ('consts', types.pyobject), ] class CPUContext(BaseContext): """ Changes BaseContext calling convention """ allow_dynamic_globals = True def __init__(self, typingctx, target='cpu'): super().__init__(typingctx, target) # Overrides def create_module(self, name): return self._internal_codegen._create_empty_module(name) @global_compiler_lock def init(self): self.is32bit = (utils.MACHINE_BITS == 32) self._internal_codegen = codegen.JITCPUCodegen("numba.exec") # Add ARM ABI functions from libgcc_s if platform.machine() == 'armv7l': ll.load_library_permanently('libgcc_s.so.1') # Map external C functions. externals.c_math_functions.install(self) # Initialize NRT runtime rtsys.initialize(self) # Add lower_extension attribute self.lower_extensions = {} from numba.parfors.parfor_lowering import _lower_parfor_parallel from numba.parfors.parfor import Parfor # Specify how to lower Parfor nodes using the lower_extensions self.lower_extensions[Parfor] = _lower_parfor_parallel def load_additional_registries(self): # Add implementations that work via import from numba.cpython import (builtins, charseq, enumimpl, hashing, heapq, iterators, listobj, numbers, rangeobj, setobj, slicing, tupleobj, unicode,) from numba.core import optional from numba.misc import gdb_hook, literal from numba.np import linalg, polynomial, arraymath, arrayobj from numba.typed import typeddict, dictimpl from numba.typed import typedlist, listobject from numba.experimental import jitclass, function_type from numba.np import npdatetime # Add target specific implementations from numba.np import npyimpl from numba.cpython import cmathimpl, mathimpl, printimpl, randomimpl from numba.misc import cffiimpl from numba.experimental.jitclass.base import ClassBuilder as \ jitclassimpl self.install_registry(cmathimpl.registry) self.install_registry(cffiimpl.registry) self.install_registry(mathimpl.registry) self.install_registry(npyimpl.registry) self.install_registry(printimpl.registry) self.install_registry(randomimpl.registry) self.install_registry(jitclassimpl.class_impl_registry) # load 3rd party extensions numba.core.entrypoints.init_all() @property def target_data(self): return self._internal_codegen.target_data def with_aot_codegen(self, name, **aot_options): aot_codegen = codegen.AOTCPUCodegen(name, **aot_options) return self.subtarget(_internal_codegen=aot_codegen, aot_mode=True) def codegen(self): return self._internal_codegen @cached_property def call_conv(self): return callconv.CPUCallConv(self) def get_env_body(self, builder, envptr): """ From the given *envptr* (a pointer to a _dynfunc.Environment object), get a EnvBody allowing structured access to environment fields. """ body_ptr = cgutils.pointer_add( builder, envptr, _dynfunc._impl_info['offsetof_env_body']) return EnvBody(self, builder, ref=body_ptr, cast_ref=True) def get_env_manager(self, builder): envgv = self.declare_env_global(builder.module, self.get_env_name(self.fndesc)) envarg = builder.load(envgv) pyapi = self.get_python_api(builder) pyapi.emit_environment_sentry( envarg, debug_msg=self.fndesc.env_name, ) env_body = self.get_env_body(builder, envarg) return pyapi.get_env_manager(self.environment, env_body, envarg) def get_generator_state(self, builder, genptr, return_type): """ From the given *genptr* (a pointer to a _dynfunc.Generator object), get a pointer to its state area. """ return cgutils.pointer_add( builder, genptr, _dynfunc._impl_info['offsetof_generator_state'], return_type=return_type) def build_list(self, builder, list_type, items): """ Build a list from the Numba *list_type* and its initial *items*. """ from numba.cpython import listobj return listobj.build_list(self, builder, list_type, items) def build_set(self, builder, set_type, items): """ Build a set from the Numba *set_type* and its initial *items*. """ from numba.cpython import setobj return setobj.build_set(self, builder, set_type, items) def build_map(self, builder, dict_type, item_types, items): from numba.typed import dictobject return dictobject.build_map(self, builder, dict_type, item_types, items) def post_lowering(self, mod, library): if self.fastmath: fastmathpass.rewrite_module(mod, self.fastmath) if self.is32bit: # 32-bit machine needs to replace all 64-bit div/rem to avoid # calls to compiler-rt intrinsics.fix_divmod(mod) library.add_linking_library(rtsys.library) def create_cpython_wrapper(self, library, fndesc, env, call_helper, release_gil=False): wrapper_module = self.create_module("wrapper") fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) wrapper_callee = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name) builder = PyCallWrapper(self, wrapper_module, wrapper_callee, fndesc, env, call_helper=call_helper, release_gil=release_gil) builder.build() library.add_ir_module(wrapper_module) def create_cfunc_wrapper(self, library, fndesc, env, call_helper): wrapper_module = self.create_module("cfunc_wrapper") fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) wrapper_callee = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name) ll_argtypes = [self.get_value_type(ty) for ty in fndesc.argtypes] ll_return_type = self.get_value_type(fndesc.restype) wrapty = ir.FunctionType(ll_return_type, ll_argtypes) wrapfn = ir.Function(wrapper_module, wrapty, fndesc.llvm_cfunc_wrapper_name) builder = ir.IRBuilder(wrapfn.append_basic_block('entry')) status, out = self.call_conv.call_function( builder, wrapper_callee, fndesc.restype, fndesc.argtypes, wrapfn.args, attrs=('noinline',)) with builder.if_then(status.is_error, likely=False): # If (and only if) an error occurred, acquire the GIL # and use the interpreter to write out the exception. pyapi = self.get_python_api(builder) gil_state = pyapi.gil_ensure() self.call_conv.raise_error(builder, pyapi, status) cstr = self.insert_const_string(builder.module, repr(self)) strobj = pyapi.string_from_string(cstr) pyapi.err_write_unraisable(strobj) pyapi.decref(strobj) pyapi.gil_release(gil_state) builder.ret(out) library.add_ir_module(wrapper_module) def get_executable(self, library, fndesc, env): """ Returns ------- (cfunc, fnptr) - cfunc callable function (Can be None) - fnptr callable function address - env an execution environment (from _dynfunc) """ # Code generation baseptr = library.get_pointer_to_function(fndesc.llvm_func_name) fnptr = library.get_pointer_to_function(fndesc.llvm_cpython_wrapper_name) # Note: we avoid reusing the original docstring to avoid encoding # issues on Python 2, see issue #1908 doc = "compiled wrapper for %r" % (fndesc.qualname,) cfunc = _dynfunc.make_function(fndesc.lookup_module(), fndesc.qualname.split('.')[-1], doc, fnptr, env, # objects to keepalive with the function (library,) ) library.codegen.set_env(self.get_env_name(fndesc), env) return cfunc def calc_array_sizeof(self, ndim): ''' Calculate the size of an array struct on the CPU target ''' aryty = types.Array(types.int32, ndim, 'A') return self.get_abi_sizeof(self.get_value_type(aryty)) # Overrides def get_ufunc_info(self, ufunc_key): return ufunc_db.get_ufunc_info(ufunc_key) # ---------------------------------------------------------------------------- # TargetOptions _options_mixin = include_default_options( "nopython", "forceobj", "looplift", "_nrt", "debug", "boundscheck", "nogil", "no_rewrites", "no_cpython_wrapper", "no_cfunc_wrapper", "parallel", "fastmath", "error_model", "inline", "forceinline", # Add "target_backend" as a accepted option for the CPU in @jit(...) "target_backend", ) class CPUTargetOptions(_options_mixin, TargetOptions): def finalize(self, flags, options): if not flags.is_set("enable_pyobject"): flags.enable_pyobject = True if not flags.is_set("enable_looplift"): flags.enable_looplift = True flags.inherit_if_not_set("nrt", default=True) if not flags.is_set("debuginfo"): flags.debuginfo = config.DEBUGINFO_DEFAULT if not flags.is_set("boundscheck"): flags.boundscheck = flags.debuginfo flags.enable_pyobject_looplift = True flags.inherit_if_not_set("fastmath") flags.inherit_if_not_set("error_model", default="python") # Add "target_backend" as a option that inherits from the caller flags.inherit_if_not_set("target_backend") flags.inherit_if_not_set("forceinline") # ---------------------------------------------------------------------------- # Internal def remove_refct_calls(func): """ Remove redundant incref/decref within on a per block basis """ for bb in func.basic_blocks: remove_null_refct_call(bb) remove_refct_pairs(bb) def remove_null_refct_call(bb): """ Remove refct api calls to NULL pointer """ pass ## Skipped for now # for inst in bb.instructions: # if isinstance(inst, lc.CallOrInvokeInstruction): # fname = inst.called_function.name # if fname == "Py_IncRef" or fname == "Py_DecRef": # arg = inst.args[0] # print(type(arg)) # if isinstance(arg, lc.ConstantPointerNull): # inst.erase_from_parent() def remove_refct_pairs(bb): """ Remove incref decref pairs on the same variable """ didsomething = True while didsomething: didsomething = False increfs = {} decrefs = {} # Mark for inst in bb.instructions: if isinstance(inst, lc.CallOrInvokeInstruction): fname = inst.called_function.name if fname == "Py_IncRef": arg = inst.operands[0] increfs[arg] = inst elif fname == "Py_DecRef": arg = inst.operands[0] decrefs[arg] = inst # Sweep for val in increfs.keys(): if val in decrefs: increfs[val].erase_from_parent() decrefs[val].erase_from_parent() didsomething = True numba-0.55.1/numba/core/cpu_options.py000664 000000 000000 00000012610 14174536160 017641 0ustar00rootroot000000 000000 """ Defines CPU Options for use in the CPU target """ from abc import ABCMeta, abstractmethod class AbstractOptionValue(metaclass=ABCMeta): """Abstract base class for custom option values. """ @abstractmethod def encode(self) -> str: """Returns an encoding of the values """ ... def __repr__(self) -> str: return f"{self.__class__.__name__}({self.encode()})" class FastMathOptions(AbstractOptionValue): """ Options for controlling fast math optimization. """ def __init__(self, value): # https://releases.llvm.org/7.0.0/docs/LangRef.html#fast-math-flags valid_flags = { 'fast', 'nnan', 'ninf', 'nsz', 'arcp', 'contract', 'afn', 'reassoc', } if isinstance(value, FastMathOptions): self.flags = value.flags.copy() elif value is True: self.flags = {'fast'} elif value is False: self.flags = set() elif isinstance(value, set): invalid = value - valid_flags if invalid: raise ValueError("Unrecognized fastmath flags: %s" % invalid) self.flags = value elif isinstance(value, dict): invalid = set(value.keys()) - valid_flags if invalid: raise ValueError("Unrecognized fastmath flags: %s" % invalid) self.flags = {v for v, enable in value.items() if enable} else: msg = "Expected fastmath option(s) to be either a bool, dict or set" raise ValueError(msg) def __bool__(self): return bool(self.flags) __nonzero__ = __bool__ def encode(self) -> str: return str(self.flags) def __eq__(self, other): if type(other) is type(self): return self.flags == other.flags return NotImplemented class ParallelOptions(AbstractOptionValue): """ Options for controlling auto parallelization. """ __slots__ = ("enabled", "comprehension", "reduction", "inplace_binop", "setitem", "numpy", "stencil", "fusion", "prange") def __init__(self, value): if isinstance(value, bool): self.enabled = value self.comprehension = value self.reduction = value self.inplace_binop = value self.setitem = value self.numpy = value self.stencil = value self.fusion = value self.prange = value elif isinstance(value, dict): self.enabled = True self.comprehension = value.pop('comprehension', True) self.reduction = value.pop('reduction', True) self.inplace_binop = value.pop('inplace_binop', True) self.setitem = value.pop('setitem', True) self.numpy = value.pop('numpy', True) self.stencil = value.pop('stencil', True) self.fusion = value.pop('fusion', True) self.prange = value.pop('prange', True) if value: msg = "Unrecognized parallel options: %s" % value.keys() raise NameError(msg) elif isinstance(value, ParallelOptions): self.enabled = value.enabled self.comprehension = value.comprehension self.reduction = value.reduction self.inplace_binop = value.inplace_binop self.setitem = value.setitem self.numpy = value.numpy self.stencil = value.stencil self.fusion = value.fusion self.prange = value.prange else: msg = "Expect parallel option to be either a bool or a dict" raise ValueError(msg) def _get_values(self): """Get values as dictionary. """ return {k: getattr(self, k) for k in self.__slots__} def __eq__(self, other): if type(other) is type(self): return self._get_values() == other._get_values() return NotImplemented def encode(self) -> str: return ", ".join(f"{k}={v}" for k, v in self._get_values().items()) class InlineOptions(AbstractOptionValue): """ Options for controlling inlining """ def __init__(self, value): ok = False if isinstance(value, str): if value in ('always', 'never'): ok = True else: ok = hasattr(value, '__call__') if ok: self._inline = value else: msg = ("kwarg 'inline' must be one of the strings 'always' or " "'never', or it can be a callable that returns True/False. " "Found value %s" % value) raise ValueError(msg) @property def is_never_inline(self): """ True if never inline """ return self._inline == 'never' @property def is_always_inline(self): """ True if always inline """ return self._inline == 'always' @property def has_cost_model(self): """ True if a cost model is provided """ return not (self.is_always_inline or self.is_never_inline) @property def value(self): """ The raw value """ return self._inline def __eq__(self, other): if type(other) is type(self): return self.value == other.value return NotImplemented def encode(self) -> str: return repr(self._inline) numba-0.55.1/numba/core/dataflow.py000664 000000 000000 00000070442 14174536160 017107 0ustar00rootroot000000 000000 import collections from pprint import pprint import sys import warnings from numba.core.errors import UnsupportedError from numba.core.ir import Loc class DataFlowAnalysis(object): """ Perform stack2reg This is necessary to resolve blocks that propagates stack value. This would allow the use of `and` and `or` and python2.6 jumps. """ def __init__(self, cfa): self.cfa = cfa self.bytecode = cfa.bytecode # { block offset -> BlockInfo } self.infos = {} self.edge_process = {} def run(self): for blk in self.cfa.iterliveblocks(): self.infos[blk.offset] = self.run_on_block(blk) def run_on_block(self, blk): incoming_blocks = [] info = BlockInfo(blk, blk.offset, incoming_blocks) edge_callbacks = [] for ib, pops in self.cfa.incoming_blocks(blk): # By nature of Python bytecode, there will be no incoming # variables from subsequent blocks. This is an easy way # of breaking the potential circularity of the problem. if ib.offset >= blk.offset: continue ib = self.infos[ib.offset] incoming_blocks.append(ib) if (ib.offset, blk.offset) in self.edge_process: edge_callbacks.append(self.edge_process[(ib.offset, blk.offset)]) # Compute stack offset at block entry # The stack effect of our predecessors should be known assert ib.stack_offset is not None, ib new_offset = ib.stack_offset + ib.stack_effect - pops if new_offset < 0: raise RuntimeError("computed negative stack offset for %s" % blk) if info.stack_offset is None: info.stack_offset = new_offset elif info.stack_offset != new_offset: warnings.warn("inconsistent stack offset for %s" % blk, RuntimeWarning) # Compute syntax blocks at block entry assert ib.syntax_blocks is not None, ib if info.syntax_blocks is None: info.syntax_blocks = ib.syntax_blocks[:] elif info.syntax_blocks != ib.syntax_blocks: warnings.warn("inconsistent entry syntax blocks for %s" % blk, RuntimeWarning) if info.stack_offset is None: # No incoming blocks => assume it's the entry block info.stack_offset = 0 info.syntax_blocks = [] info.stack_effect = 0 for callback in edge_callbacks: callback(info) for offset in blk: inst = self.bytecode[offset] self.dispatch(info, inst) return info def dump(self): for blk in self.infos.values(): blk.dump() def dispatch(self, info, inst): fname = "op_%s" % inst.opname.replace('+', '_') fn = getattr(self, fname, self.handle_unknown_opcode) fn(info, inst) def handle_unknown_opcode(self, info, inst): raise UnsupportedError( "Use of unknown opcode '{}'".format(inst.opname), loc=Loc(filename=self.bytecode.func_id.filename, line=inst.lineno) ) def dup_topx(self, info, inst, count): orig = [info.pop() for _ in range(count)] orig.reverse() # We need to actually create new temporaries if we want the # IR optimization pass to work correctly (see issue #580) duped = [info.make_temp() for _ in range(count)] info.append(inst, orig=orig, duped=duped) for val in orig: info.push(val) for val in duped: info.push(val) def add_syntax_block(self, info, block): """ Add an inner syntax block. """ block.stack_offset = info.stack_offset info.syntax_blocks.append(block) def pop_syntax_block(self, info): """ Pop the innermost syntax block and revert its stack effect. """ block = info.syntax_blocks.pop() assert info.stack_offset >= block.stack_offset while info.stack_offset + info.stack_effect > block.stack_offset: info.pop(discard=True) return block def op_NOP(self, info, inst): pass def op_DUP_TOPX(self, info, inst): count = inst.arg assert 1 <= count <= 5, "Invalid DUP_TOPX count" self.dup_topx(info, inst, count) def op_DUP_TOP(self, info, inst): self.dup_topx(info, inst, count=1) def op_DUP_TOP_TWO(self, info, inst): self.dup_topx(info, inst, count=2) def op_ROT_TWO(self, info, inst): first = info.pop() second = info.pop() info.push(first) info.push(second) def op_ROT_THREE(self, info, inst): first = info.pop() second = info.pop() third = info.pop() info.push(first) info.push(third) info.push(second) def op_ROT_FOUR(self, info, inst): first = info.pop() second = info.pop() third = info.pop() forth = info.pop() info.push(first) info.push(forth) info.push(third) info.push(second) def op_UNPACK_SEQUENCE(self, info, inst): count = inst.arg iterable = info.pop() stores = [info.make_temp() for _ in range(count)] tupleobj = info.make_temp() info.append(inst, iterable=iterable, stores=stores, tupleobj=tupleobj) for st in reversed(stores): info.push(st) def op_FORMAT_VALUE(self, info, inst): """ FORMAT_VALUE(flags): flags argument specifies format spec which is not supported yet. Currently, str() is simply called on the value. Pops a value from stack and pushes results back. Required for supporting f-strings. https://docs.python.org/3/library/dis.html#opcode-FORMAT_VALUE """ if inst.arg != 0: msg = "format spec in f-strings not supported yet" raise UnsupportedError( msg, loc=Loc(filename=self.bytecode.func_id.filename, line=inst.lineno) ) value = info.pop() strvar = info.make_temp() res = info.make_temp() info.append(inst, value=value, res=res, strvar=strvar) info.push(res) def op_BUILD_STRING(self, info, inst): """ BUILD_STRING(count): Concatenates count strings from the stack and pushes the resulting string onto the stack. Required for supporting f-strings. https://docs.python.org/3/library/dis.html#opcode-BUILD_STRING """ count = inst.arg strings = list(reversed([info.pop() for _ in range(count)])) # corner case: f"" if count == 0: tmps = [info.make_temp()] else: tmps = [info.make_temp() for _ in range(count - 1)] info.append(inst, strings=strings, tmps=tmps) info.push(tmps[-1]) def op_BUILD_TUPLE(self, info, inst): count = inst.arg items = list(reversed([info.pop() for _ in range(count)])) tup = info.make_temp() info.append(inst, items=items, res=tup) info.push(tup) def op_BUILD_LIST(self, info, inst): count = inst.arg items = list(reversed([info.pop() for _ in range(count)])) lst = info.make_temp() info.append(inst, items=items, res=lst) info.push(lst) def op_LIST_APPEND(self, info, inst): value = info.pop() index = inst.arg target = info.peek(index) appendvar = info.make_temp() res = info.make_temp() info.append(inst, target=target, value=value, appendvar=appendvar, res=res) def op_BUILD_MAP(self, info, inst): dct = info.make_temp() count = inst.arg items = [] # BUILD_MAP takes pairs from the stack for i in range(count): v, k = info.pop(), info.pop() items.append((k, v)) info.append(inst, items=items[::-1], size=count, res=dct) info.push(dct) def op_MAP_ADD(self, info, inst): key = info.pop() value = info.pop() index = inst.arg target = info.peek(index) setitemvar = info.make_temp() res = info.make_temp() info.append(inst, target=target, key=key, value=value, setitemvar=setitemvar, res=res) def op_BUILD_SET(self, info, inst): count = inst.arg # Note: related python bug http://bugs.python.org/issue26020 items = list(reversed([info.pop() for _ in range(count)])) res = info.make_temp() info.append(inst, items=items, res=res) info.push(res) def op_POP_TOP(self, info, inst): info.pop(discard=True) def op_STORE_ATTR(self, info, inst): target = info.pop() value = info.pop() info.append(inst, target=target, value=value) def op_DELETE_ATTR(self, info, inst): target = info.pop() info.append(inst, target=target) def op_STORE_FAST(self, info, inst): value = info.pop() info.append(inst, value=value) def op_STORE_MAP(self, info, inst): key = info.pop() value = info.pop() dct = info.tos info.append(inst, dct=dct, key=key, value=value) def op_STORE_DEREF(self, info, inst): value = info.pop() info.append(inst, value=value) def op_LOAD_FAST(self, info, inst): name = self.bytecode.co_varnames[inst.arg] res = info.make_temp(name) info.append(inst, res=res) info.push(res) def op_LOAD_CONST(self, info, inst): res = info.make_temp('const') info.append(inst, res=res) info.push(res) def op_LOAD_GLOBAL(self, info, inst): res = info.make_temp() info.append(inst, res=res) info.push(res) def op_LOAD_DEREF(self, info, inst): res = info.make_temp() info.append(inst, res=res) info.push(res) def op_LOAD_ATTR(self, info, inst): item = info.pop() res = info.make_temp() info.append(inst, item=item, res=res) info.push(res) def op_BINARY_SUBSCR(self, info, inst): index = info.pop() target = info.pop() res = info.make_temp() info.append(inst, index=index, target=target, res=res) info.push(res) def op_STORE_SUBSCR(self, info, inst): index = info.pop() target = info.pop() value = info.pop() info.append(inst, target=target, index=index, value=value) def op_DELETE_SUBSCR(self, info, inst): index = info.pop() target = info.pop() info.append(inst, target=target, index=index) def op_GET_ITER(self, info, inst): value = info.pop() res = info.make_temp() info.append(inst, value=value, res=res) info.push(res) def op_FOR_ITER(self, info, inst): iterator = info.tos pair = info.make_temp() indval = info.make_temp() pred = info.make_temp() info.append(inst, iterator=iterator, pair=pair, indval=indval, pred=pred) info.push(indval) # Setup for stack POP (twice) at loop exit (before processing instruction at jump target) def pop_info(info): info.pop() info.pop() self.edge_process[(info.block.offset, inst.get_jump_target())] = pop_info def op_CALL_FUNCTION(self, info, inst): narg = inst.arg args = list(reversed([info.pop() for _ in range(narg)])) func = info.pop() res = info.make_temp() info.append(inst, func=func, args=args, res=res) info.push(res) def op_CALL_FUNCTION_KW(self, info, inst): narg = inst.arg names = info.pop() # tuple of names args = list(reversed([info.pop() for _ in range(narg)])) func = info.pop() res = info.make_temp() info.append(inst, func=func, args=args, names=names, res=res) info.push(res) def op_CALL_FUNCTION_EX(self, info, inst): if inst.arg & 1: errmsg = 'CALL_FUNCTION_EX with **kwargs not supported' raise NotImplementedError(errmsg) vararg = info.pop() func = info.pop() res = info.make_temp() info.append(inst, func=func, vararg=vararg, res=res) info.push(res) def _build_tuple_unpack(self, info, inst): # Builds tuple from other tuples on the stack tuples = list(reversed([info.pop() for _ in range(inst.arg)])) temps = [info.make_temp() for _ in range(len(tuples) - 1)] # if the unpack is assign-like, e.g. x = (*y,), it needs handling # differently. is_assign = len(tuples) == 1 if is_assign: temps = [info.make_temp(),] info.append(inst, tuples=tuples, temps=temps, is_assign=is_assign) # The result is in the last temp var info.push(temps[-1]) def op_BUILD_TUPLE_UNPACK_WITH_CALL(self, info, inst): # just unpack the input tuple, call inst will be handled afterwards self._build_tuple_unpack(info, inst) def op_BUILD_TUPLE_UNPACK(self, info, inst): self._build_tuple_unpack(info, inst) def op_BUILD_CONST_KEY_MAP(self, info, inst): keys = info.pop() vals = list(reversed([info.pop() for _ in range(inst.arg)])) keytmps = [info.make_temp() for _ in range(inst.arg)] res = info.make_temp() info.append(inst, keys=keys, keytmps=keytmps, values=vals, res=res) info.push(res) def op_PRINT_ITEM(self, info, inst): warnings.warn("Python2 style print partially supported. Please use " "Python3 style print.", RuntimeWarning) item = info.pop() printvar = info.make_temp() res = info.make_temp() info.append(inst, item=item, printvar=printvar, res=res) def op_PRINT_NEWLINE(self, info, inst): printvar = info.make_temp() res = info.make_temp() info.append(inst, printvar=printvar, res=res) def _unaryop(self, info, inst): val = info.pop() res = info.make_temp() info.append(inst, value=val, res=res) info.push(res) op_UNARY_NEGATIVE = _unaryop op_UNARY_POSITIVE = _unaryop op_UNARY_NOT = _unaryop op_UNARY_INVERT = _unaryop def _binaryop(self, info, inst): rhs = info.pop() lhs = info.pop() res = info.make_temp() info.append(inst, lhs=lhs, rhs=rhs, res=res) info.push(res) op_COMPARE_OP = _binaryop op_IS_OP = _binaryop op_CONTAINS_OP = _binaryop op_INPLACE_ADD = _binaryop op_INPLACE_SUBTRACT = _binaryop op_INPLACE_MULTIPLY = _binaryop op_INPLACE_DIVIDE = _binaryop op_INPLACE_TRUE_DIVIDE = _binaryop op_INPLACE_FLOOR_DIVIDE = _binaryop op_INPLACE_MODULO = _binaryop op_INPLACE_POWER = _binaryop op_INPLACE_MATRIX_MULTIPLY = _binaryop op_INPLACE_LSHIFT = _binaryop op_INPLACE_RSHIFT = _binaryop op_INPLACE_AND = _binaryop op_INPLACE_OR = _binaryop op_INPLACE_XOR = _binaryop op_BINARY_ADD = _binaryop op_BINARY_SUBTRACT = _binaryop op_BINARY_MULTIPLY = _binaryop op_BINARY_DIVIDE = _binaryop op_BINARY_TRUE_DIVIDE = _binaryop op_BINARY_FLOOR_DIVIDE = _binaryop op_BINARY_MODULO = _binaryop op_BINARY_POWER = _binaryop op_BINARY_MATRIX_MULTIPLY = _binaryop op_BINARY_LSHIFT = _binaryop op_BINARY_RSHIFT = _binaryop op_BINARY_AND = _binaryop op_BINARY_OR = _binaryop op_BINARY_XOR = _binaryop def op_SLICE_0(self, info, inst): """ TOS = TOS[:] """ tos = info.pop() res = info.make_temp() slicevar = info.make_temp() indexvar = info.make_temp() nonevar = info.make_temp() info.append(inst, base=tos, res=res, slicevar=slicevar, indexvar=indexvar, nonevar=nonevar) info.push(res) def op_SLICE_1(self, info, inst): """ TOS = TOS1[TOS:] """ tos = info.pop() tos1 = info.pop() res = info.make_temp() slicevar = info.make_temp() indexvar = info.make_temp() nonevar = info.make_temp() info.append(inst, base=tos1, start=tos, res=res, slicevar=slicevar, indexvar=indexvar, nonevar=nonevar) info.push(res) def op_SLICE_2(self, info, inst): """ TOS = TOS1[:TOS] """ tos = info.pop() tos1 = info.pop() res = info.make_temp() slicevar = info.make_temp() indexvar = info.make_temp() nonevar = info.make_temp() info.append(inst, base=tos1, stop=tos, res=res, slicevar=slicevar, indexvar=indexvar, nonevar=nonevar) info.push(res) def op_SLICE_3(self, info, inst): """ TOS = TOS2[TOS1:TOS] """ tos = info.pop() tos1 = info.pop() tos2 = info.pop() res = info.make_temp() slicevar = info.make_temp() indexvar = info.make_temp() info.append(inst, base=tos2, start=tos1, stop=tos, res=res, slicevar=slicevar, indexvar=indexvar) info.push(res) def op_STORE_SLICE_0(self, info, inst): """ TOS[:] = TOS1 """ tos = info.pop() value = info.pop() slicevar = info.make_temp() indexvar = info.make_temp() nonevar = info.make_temp() info.append(inst, base=tos, value=value, slicevar=slicevar, indexvar=indexvar, nonevar=nonevar) def op_STORE_SLICE_1(self, info, inst): """ TOS1[TOS:] = TOS2 """ tos = info.pop() tos1 = info.pop() value = info.pop() slicevar = info.make_temp() indexvar = info.make_temp() nonevar = info.make_temp() info.append(inst, base=tos1, start=tos, slicevar=slicevar, value=value, indexvar=indexvar, nonevar=nonevar) def op_STORE_SLICE_2(self, info, inst): """ TOS1[:TOS] = TOS2 """ tos = info.pop() tos1 = info.pop() value = info.pop() slicevar = info.make_temp() indexvar = info.make_temp() nonevar = info.make_temp() info.append(inst, base=tos1, stop=tos, value=value, slicevar=slicevar, indexvar=indexvar, nonevar=nonevar) def op_STORE_SLICE_3(self, info, inst): """ TOS2[TOS1:TOS] = TOS3 """ tos = info.pop() tos1 = info.pop() tos2 = info.pop() value = info.pop() slicevar = info.make_temp() indexvar = info.make_temp() info.append(inst, base=tos2, start=tos1, stop=tos, value=value, slicevar=slicevar, indexvar=indexvar) def op_DELETE_SLICE_0(self, info, inst): """ del TOS[:] """ tos = info.pop() slicevar = info.make_temp() indexvar = info.make_temp() nonevar = info.make_temp() info.append(inst, base=tos, slicevar=slicevar, indexvar=indexvar, nonevar=nonevar) def op_DELETE_SLICE_1(self, info, inst): """ del TOS1[TOS:] """ tos = info.pop() tos1 = info.pop() slicevar = info.make_temp() indexvar = info.make_temp() nonevar = info.make_temp() info.append(inst, base=tos1, start=tos, slicevar=slicevar, indexvar=indexvar, nonevar=nonevar) def op_DELETE_SLICE_2(self, info, inst): """ del TOS1[:TOS] """ tos = info.pop() tos1 = info.pop() slicevar = info.make_temp() indexvar = info.make_temp() nonevar = info.make_temp() info.append(inst, base=tos1, stop=tos, slicevar=slicevar, indexvar=indexvar, nonevar=nonevar) def op_DELETE_SLICE_3(self, info, inst): """ del TOS2[TOS1:TOS] """ tos = info.pop() tos1 = info.pop() tos2 = info.pop() slicevar = info.make_temp() indexvar = info.make_temp() info.append(inst, base=tos2, start=tos1, stop=tos, slicevar=slicevar, indexvar=indexvar) def op_BUILD_SLICE(self, info, inst): """ slice(TOS1, TOS) or slice(TOS2, TOS1, TOS) """ argc = inst.arg if argc == 2: tos = info.pop() tos1 = info.pop() start = tos1 stop = tos step = None elif argc == 3: tos = info.pop() tos1 = info.pop() tos2 = info.pop() start = tos2 stop = tos1 step = tos else: raise Exception("unreachable") slicevar = info.make_temp() res = info.make_temp() info.append(inst, start=start, stop=stop, step=step, res=res, slicevar=slicevar) info.push(res) def op_POP_JUMP_IF_TRUE(self, info, inst): pred = info.pop() info.append(inst, pred=pred) info.terminator = inst def op_POP_JUMP_IF_FALSE(self, info, inst): pred = info.pop() info.append(inst, pred=pred) info.terminator = inst def op_JUMP_IF_TRUE(self, info, inst): pred = info.tos info.append(inst, pred=pred) info.terminator = inst def op_JUMP_IF_FALSE(self, info, inst): pred = info.tos info.append(inst, pred=pred) info.terminator = inst op_JUMP_IF_FALSE_OR_POP = op_JUMP_IF_FALSE op_JUMP_IF_TRUE_OR_POP = op_JUMP_IF_TRUE def op_JUMP_ABSOLUTE(self, info, inst): info.append(inst) info.terminator = inst def op_JUMP_FORWARD(self, info, inst): info.append(inst) info.terminator = inst def op_BREAK_LOOP(self, info, inst): self.pop_syntax_block(info) info.append(inst) info.terminator = inst def op_RETURN_VALUE(self, info, inst): info.append(inst, retval=info.pop(), castval=info.make_temp()) info.terminator = inst def op_YIELD_VALUE(self, info, inst): val = info.pop() res = info.make_temp() info.append(inst, value=val, res=res) info.push(res) def op_SETUP_LOOP(self, info, inst): self.add_syntax_block(info, LoopBlock()) info.append(inst) def op_SETUP_WITH(self, info, inst): cm = info.pop() # the context-manager self.add_syntax_block(info, WithBlock()) yielded = info.make_temp() info.push(yielded) info.append(inst, contextmanager=cm) def op_WITH_CLEANUP(self, info, inst): """ Note: py2 only opcode """ # TOS is the return value of __exit__() info.pop() info.append(inst) def op_WITH_CLEANUP_START(self, info, inst): # TOS is the return value of __exit__() info.pop() info.append(inst) def op_WITH_CLEANUP_FINISH(self, info, inst): info.append(inst) def op_END_FINALLY(self, info, inst): info.append(inst) def op_POP_BLOCK(self, info, inst): block = self.pop_syntax_block(info) info.append(inst) def op_RAISE_VARARGS(self, info, inst): if inst.arg == 0: exc = None elif inst.arg == 1: exc = info.pop() else: raise ValueError("Multiple argument raise is not supported.") info.append(inst, exc=exc) def op_MAKE_FUNCTION(self, info, inst, MAKE_CLOSURE=False): name = info.pop() code = info.pop() closure = annotations = kwdefaults = defaults = None if inst.arg & 0x8: closure = info.pop() if inst.arg & 0x4: annotations = info.pop() if inst.arg & 0x2: kwdefaults = info.pop() if inst.arg & 0x1: defaults = info.pop() res = info.make_temp() info.append(inst, name=name, code=code, closure=closure, annotations=annotations, kwdefaults=kwdefaults, defaults=defaults, res=res) info.push(res) def op_MAKE_CLOSURE(self, info, inst): self.op_MAKE_FUNCTION(info, inst, MAKE_CLOSURE=True) def op_LOAD_CLOSURE(self, info, inst): res = info.make_temp() info.append(inst, res=res) info.push(res) #NOTE: Please see notes in `interpreter.py` surrounding the implementation # of LOAD_METHOD and CALL_METHOD. def op_LOAD_METHOD(self, *args, **kws): self.op_LOAD_ATTR(*args, **kws) def op_CALL_METHOD(self, *args, **kws): self.op_CALL_FUNCTION(*args, **kws) def _ignored(self, info, inst): pass class LoopBlock(object): __slots__ = ('stack_offset',) def __init__(self): self.stack_offset = None class WithBlock(object): __slots__ = ('stack_offset',) def __init__(self): self.stack_offset = None class BlockInfo(object): def __init__(self, block, offset, incoming_blocks): self.block = block self.offset = offset # The list of incoming BlockInfo objects (obtained by control # flow analysis). self.incoming_blocks = incoming_blocks self.stack = [] # Outgoing variables from this block: # { outgoing phi name -> var name } self.outgoing_phis = {} self.insts = [] self.tempct = 0 self._term = None self.stack_offset = None self.stack_effect = 0 self.syntax_blocks = None def __repr__(self): return "<%s at offset %d>" % (self.__class__.__name__, self.offset) def dump(self): print("offset", self.offset, "{") print(" stack: ", end='') pprint(self.stack) pprint(self.insts) print("}") def make_temp(self, prefix=''): self.tempct += 1 name = '$%s%s.%s' % (prefix, self.offset, self.tempct) return name def push(self, val): self.stack_effect += 1 self.stack.append(val) def pop(self, discard=False): """ Pop a variable from the stack, or request it from incoming blocks if the stack is empty. If *discard* is true, the variable isn't meant to be used anymore, which allows reducing the number of temporaries created. """ if not self.stack: self.stack_offset -= 1 if not discard: return self.make_incoming() else: self.stack_effect -= 1 return self.stack.pop() def peek(self, k): """ Return the k'th element back from the top of the stack. peek(1) is the top of the stack. """ num_pops = k top_k = [self.pop() for _ in range(num_pops)] r = top_k[-1] for i in range(num_pops - 1, -1, -1): self.push(top_k[i]) return r def make_incoming(self): """ Create an incoming variable (due to not enough values being available on our stack) and request its assignment from our incoming blocks' own stacks. """ assert self.incoming_blocks ret = self.make_temp('phi') for ib in self.incoming_blocks: stack_index = self.stack_offset + self.stack_effect ib.request_outgoing(self, ret, stack_index) return ret def request_outgoing(self, outgoing_block, phiname, stack_index): """ Request the assignment of the next available stack variable for block *outgoing_block* with target name *phiname*. """ if phiname in self.outgoing_phis: # If phiname was already requested, ignore this new request # (can happen with a diamond-shaped block flow structure). return if stack_index < self.stack_offset: assert self.incoming_blocks for ib in self.incoming_blocks: ib.request_outgoing(self, phiname, stack_index) else: varname = self.stack[stack_index - self.stack_offset] self.outgoing_phis[phiname] = varname @property def tos(self): r = self.pop() self.push(r) return r def append(self, inst, **kws): self.insts.append((inst.offset, kws)) @property def terminator(self): assert self._term is None return self._term @terminator.setter def terminator(self, inst): self._term = inst @property def active_try_block(self): """Try except not supported. See byteflow.py """ return None numba-0.55.1/numba/core/datamodel/000775 000000 000000 00000000000 14174536160 016657 5ustar00rootroot000000 000000 numba-0.55.1/numba/core/datamodel/__init__.py000664 000000 000000 00000000322 14174536160 020765 0ustar00rootroot000000 000000 from .manager import DataModelManager from .packer import ArgPacker, DataPacker from .registry import register_default, default_manager, register from .models import PrimitiveModel, CompositeModel, StructModel numba-0.55.1/numba/core/datamodel/manager.py000664 000000 000000 00000002465 14174536160 020652 0ustar00rootroot000000 000000 import weakref from numba.core import types class DataModelManager(object): """Manages mapping of FE types to their corresponding data model """ def __init__(self): # { numba type class -> model factory } self._handlers = {} # { numba type instance -> model instance } self._cache = weakref.WeakKeyDictionary() def register(self, fetypecls, handler): """Register the datamodel factory corresponding to a frontend-type class """ assert issubclass(fetypecls, types.Type) self._handlers[fetypecls] = handler def lookup(self, fetype): """Returns the corresponding datamodel given the frontend-type instance """ try: return self._cache[fetype] except KeyError: pass handler = self._handlers[type(fetype)] model = self._cache[fetype] = handler(self, fetype) return model def __getitem__(self, fetype): """Shorthand for lookup() """ return self.lookup(fetype) def copy(self): """ Make a copy of the manager. Use this to inherit from the default data model and specialize it for custom target. """ dmm = DataModelManager() dmm._handlers = self._handlers.copy() return dmm numba-0.55.1/numba/core/datamodel/models.py000664 000000 000000 00000126325 14174536160 020525 0ustar00rootroot000000 000000 from functools import partial from collections import deque from llvmlite import ir from numba.core.datamodel.registry import register_default from numba.core import types, cgutils from numba.np import numpy_support class DataModel(object): """ DataModel describe how a FE type is represented in the LLVM IR at different contexts. Contexts are: - value: representation inside function body. Maybe stored in stack. The representation here are flexible. - data: representation used when storing into containers (e.g. arrays). - argument: representation used for function argument. All composite types are unflattened into multiple primitive types. - return: representation used for return argument. Throughput the compiler pipeline, a LLVM value is usually passed around in the "value" representation. All "as_" prefix function converts from "value" representation. All "from_" prefix function converts to the "value" representation. """ def __init__(self, dmm, fe_type): self._dmm = dmm self._fe_type = fe_type @property def fe_type(self): return self._fe_type def get_value_type(self): raise NotImplementedError(self) def get_data_type(self): return self.get_value_type() def get_argument_type(self): """Return a LLVM type or nested tuple of LLVM type """ return self.get_value_type() def get_return_type(self): return self.get_value_type() def as_data(self, builder, value): raise NotImplementedError(self) def as_argument(self, builder, value): """ Takes one LLVM value Return a LLVM value or nested tuple of LLVM value """ raise NotImplementedError(self) def as_return(self, builder, value): raise NotImplementedError(self) def from_data(self, builder, value): raise NotImplementedError(self) def from_argument(self, builder, value): """ Takes a LLVM value or nested tuple of LLVM value Returns one LLVM value """ raise NotImplementedError(self) def from_return(self, builder, value): raise NotImplementedError(self) def load_from_data_pointer(self, builder, ptr, align=None): """ Load value from a pointer to data. This is the default implementation, sufficient for most purposes. """ return self.from_data(builder, builder.load(ptr, align=align)) def traverse(self, builder): """ Traverse contained members. Returns a iterable of contained (types, getters). Each getter is a one-argument function accepting a LLVM value. """ return [] def traverse_models(self): """ Recursively list all models involved in this model. """ return [self._dmm[t] for t in self.traverse_types()] def traverse_types(self): """ Recursively list all frontend types involved in this model. """ types = [self._fe_type] queue = deque([self]) while len(queue) > 0: dm = queue.popleft() for i_dm in dm.inner_models(): if i_dm._fe_type not in types: queue.append(i_dm) types.append(i_dm._fe_type) return types def inner_models(self): """ List all *inner* models. """ return [] def get_nrt_meminfo(self, builder, value): """ Returns the MemInfo object or None if it is not tracked. It is only defined for types.meminfo_pointer """ return None def has_nrt_meminfo(self): return False def contains_nrt_meminfo(self): """ Recursively check all contained types for need for NRT meminfo. """ return any(model.has_nrt_meminfo() for model in self.traverse_models()) def _compared_fields(self): return (type(self), self._fe_type) def __hash__(self): return hash(tuple(self._compared_fields())) def __eq__(self, other): if type(self) is type(other): return self._compared_fields() == other._compared_fields() else: return False def __ne__(self, other): return not self.__eq__(other) @register_default(types.Omitted) class OmittedArgDataModel(DataModel): """ A data model for omitted arguments. Only the "argument" representation is defined, other representations raise a NotImplementedError. """ # Omitted arguments are using a dummy value type def get_value_type(self): return ir.LiteralStructType([]) # Omitted arguments don't produce any LLVM function argument. def get_argument_type(self): return () def as_argument(self, builder, val): return () def from_argument(self, builder, val): assert val == (), val return None @register_default(types.Boolean) @register_default(types.BooleanLiteral) class BooleanModel(DataModel): _bit_type = ir.IntType(1) _byte_type = ir.IntType(8) def get_value_type(self): return self._bit_type def get_data_type(self): return self._byte_type def get_return_type(self): return self.get_data_type() def get_argument_type(self): return self.get_data_type() def as_data(self, builder, value): return builder.zext(value, self.get_data_type()) def as_argument(self, builder, value): return self.as_data(builder, value) def as_return(self, builder, value): return self.as_data(builder, value) def from_data(self, builder, value): ty = self.get_value_type() resalloca = cgutils.alloca_once(builder, ty) cond = builder.icmp_unsigned('==', value, value.type(0)) with builder.if_else(cond) as (then, otherwise): with then: builder.store(ty(0), resalloca) with otherwise: builder.store(ty(1), resalloca) return builder.load(resalloca) def from_argument(self, builder, value): return self.from_data(builder, value) def from_return(self, builder, value): return self.from_data(builder, value) class PrimitiveModel(DataModel): """A primitive type can be represented natively in the target in all usage contexts. """ def __init__(self, dmm, fe_type, be_type): super(PrimitiveModel, self).__init__(dmm, fe_type) self.be_type = be_type def get_value_type(self): return self.be_type def as_data(self, builder, value): return value def as_argument(self, builder, value): return value def as_return(self, builder, value): return value def from_data(self, builder, value): return value def from_argument(self, builder, value): return value def from_return(self, builder, value): return value class ProxyModel(DataModel): """ Helper class for models which delegate to another model. """ def get_value_type(self): return self._proxied_model.get_value_type() def get_data_type(self): return self._proxied_model.get_data_type() def get_return_type(self): return self._proxied_model.get_return_type() def get_argument_type(self): return self._proxied_model.get_argument_type() def as_data(self, builder, value): return self._proxied_model.as_data(builder, value) def as_argument(self, builder, value): return self._proxied_model.as_argument(builder, value) def as_return(self, builder, value): return self._proxied_model.as_return(builder, value) def from_data(self, builder, value): return self._proxied_model.from_data(builder, value) def from_argument(self, builder, value): return self._proxied_model.from_argument(builder, value) def from_return(self, builder, value): return self._proxied_model.from_return(builder, value) @register_default(types.EnumMember) @register_default(types.IntEnumMember) class EnumModel(ProxyModel): """ Enum members are represented exactly like their values. """ def __init__(self, dmm, fe_type): super(EnumModel, self).__init__(dmm, fe_type) self._proxied_model = dmm.lookup(fe_type.dtype) @register_default(types.Opaque) @register_default(types.PyObject) @register_default(types.RawPointer) @register_default(types.NoneType) @register_default(types.StringLiteral) @register_default(types.EllipsisType) @register_default(types.Function) @register_default(types.Type) @register_default(types.Object) @register_default(types.Module) @register_default(types.Phantom) @register_default(types.ContextManager) @register_default(types.Dispatcher) @register_default(types.ObjModeDispatcher) @register_default(types.ExceptionClass) @register_default(types.Dummy) @register_default(types.ExceptionInstance) @register_default(types.ExternalFunction) @register_default(types.EnumClass) @register_default(types.IntEnumClass) @register_default(types.NumberClass) @register_default(types.TypeRef) @register_default(types.NamedTupleClass) @register_default(types.DType) @register_default(types.RecursiveCall) @register_default(types.MakeFunctionLiteral) @register_default(types.Poison) class OpaqueModel(PrimitiveModel): """ Passed as opaque pointers """ _ptr_type = ir.IntType(8).as_pointer() def __init__(self, dmm, fe_type): be_type = self._ptr_type super(OpaqueModel, self).__init__(dmm, fe_type, be_type) @register_default(types.MemInfoPointer) class MemInfoModel(OpaqueModel): def inner_models(self): return [self._dmm.lookup(self._fe_type.dtype)] def has_nrt_meminfo(self): return True def get_nrt_meminfo(self, builder, value): return value @register_default(types.Integer) @register_default(types.IntegerLiteral) class IntegerModel(PrimitiveModel): def __init__(self, dmm, fe_type): be_type = ir.IntType(fe_type.bitwidth) super(IntegerModel, self).__init__(dmm, fe_type, be_type) @register_default(types.Float) class FloatModel(PrimitiveModel): def __init__(self, dmm, fe_type): if fe_type == types.float32: be_type = ir.FloatType() elif fe_type == types.float64: be_type = ir.DoubleType() else: raise NotImplementedError(fe_type) super(FloatModel, self).__init__(dmm, fe_type, be_type) @register_default(types.CPointer) class PointerModel(PrimitiveModel): def __init__(self, dmm, fe_type): self._pointee_model = dmm.lookup(fe_type.dtype) self._pointee_be_type = self._pointee_model.get_data_type() be_type = self._pointee_be_type.as_pointer() super(PointerModel, self).__init__(dmm, fe_type, be_type) @register_default(types.EphemeralPointer) class EphemeralPointerModel(PointerModel): def get_data_type(self): return self._pointee_be_type def as_data(self, builder, value): value = builder.load(value) return self._pointee_model.as_data(builder, value) def from_data(self, builder, value): raise NotImplementedError("use load_from_data_pointer() instead") def load_from_data_pointer(self, builder, ptr, align=None): return builder.bitcast(ptr, self.get_value_type()) @register_default(types.EphemeralArray) class EphemeralArrayModel(PointerModel): def __init__(self, dmm, fe_type): super(EphemeralArrayModel, self).__init__(dmm, fe_type) self._data_type = ir.ArrayType(self._pointee_be_type, self._fe_type.count) def get_data_type(self): return self._data_type def as_data(self, builder, value): values = [builder.load(cgutils.gep_inbounds(builder, value, i)) for i in range(self._fe_type.count)] return cgutils.pack_array(builder, values) def from_data(self, builder, value): raise NotImplementedError("use load_from_data_pointer() instead") def load_from_data_pointer(self, builder, ptr, align=None): return builder.bitcast(ptr, self.get_value_type()) @register_default(types.ExternalFunctionPointer) class ExternalFuncPointerModel(PrimitiveModel): def __init__(self, dmm, fe_type): sig = fe_type.sig # Since the function is non-Numba, there is no adaptation # of arguments and return value, hence get_value_type(). retty = dmm.lookup(sig.return_type).get_value_type() args = [dmm.lookup(t).get_value_type() for t in sig.args] be_type = ir.PointerType(ir.FunctionType(retty, args)) super(ExternalFuncPointerModel, self).__init__(dmm, fe_type, be_type) @register_default(types.UniTuple) @register_default(types.NamedUniTuple) @register_default(types.StarArgUniTuple) class UniTupleModel(DataModel): def __init__(self, dmm, fe_type): super(UniTupleModel, self).__init__(dmm, fe_type) self._elem_model = dmm.lookup(fe_type.dtype) self._count = len(fe_type) self._value_type = ir.ArrayType(self._elem_model.get_value_type(), self._count) self._data_type = ir.ArrayType(self._elem_model.get_data_type(), self._count) def get_value_type(self): return self._value_type def get_data_type(self): return self._data_type def get_return_type(self): return self.get_value_type() def get_argument_type(self): return (self._elem_model.get_argument_type(),) * self._count def as_argument(self, builder, value): out = [] for i in range(self._count): v = builder.extract_value(value, [i]) v = self._elem_model.as_argument(builder, v) out.append(v) return out def from_argument(self, builder, value): out = ir.Constant(self.get_value_type(), ir.Undefined) for i, v in enumerate(value): v = self._elem_model.from_argument(builder, v) out = builder.insert_value(out, v, [i]) return out def as_data(self, builder, value): out = ir.Constant(self.get_data_type(), ir.Undefined) for i in range(self._count): val = builder.extract_value(value, [i]) dval = self._elem_model.as_data(builder, val) out = builder.insert_value(out, dval, [i]) return out def from_data(self, builder, value): out = ir.Constant(self.get_value_type(), ir.Undefined) for i in range(self._count): val = builder.extract_value(value, [i]) dval = self._elem_model.from_data(builder, val) out = builder.insert_value(out, dval, [i]) return out def as_return(self, builder, value): return value def from_return(self, builder, value): return value def traverse(self, builder): def getter(i, value): return builder.extract_value(value, i) return [(self._fe_type.dtype, partial(getter, i)) for i in range(self._count)] def inner_models(self): return [self._elem_model] class CompositeModel(DataModel): """Any model that is composed of multiple other models should subclass from this. """ pass class StructModel(CompositeModel): _value_type = None _data_type = None def __init__(self, dmm, fe_type, members): super(StructModel, self).__init__(dmm, fe_type) if members: self._fields, self._members = zip(*members) else: self._fields = self._members = () self._models = tuple([self._dmm.lookup(t) for t in self._members]) def get_member_fe_type(self, name): """ StructModel-specific: get the Numba type of the field named *name*. """ pos = self.get_field_position(name) return self._members[pos] def get_value_type(self): if self._value_type is None: self._value_type = ir.LiteralStructType([t.get_value_type() for t in self._models]) return self._value_type def get_data_type(self): if self._data_type is None: self._data_type = ir.LiteralStructType([t.get_data_type() for t in self._models]) return self._data_type def get_argument_type(self): return tuple([t.get_argument_type() for t in self._models]) def get_return_type(self): return self.get_data_type() def _as(self, methname, builder, value): extracted = [] for i, dm in enumerate(self._models): extracted.append(getattr(dm, methname)(builder, self.get(builder, value, i))) return tuple(extracted) def _from(self, methname, builder, value): struct = ir.Constant(self.get_value_type(), ir.Undefined) for i, (dm, val) in enumerate(zip(self._models, value)): v = getattr(dm, methname)(builder, val) struct = self.set(builder, struct, v, i) return struct def as_data(self, builder, value): """ Converts the LLVM struct in `value` into a representation suited for storing into arrays. Note ---- Current implementation rarely changes how types are represented for "value" and "data". This is usually a pointless rebuild of the immutable LLVM struct value. Luckily, LLVM optimization removes all redundancy. Sample usecase: Structures nested with pointers to other structures that can be serialized into a flat representation when storing into array. """ elems = self._as("as_data", builder, value) struct = ir.Constant(self.get_data_type(), ir.Undefined) for i, el in enumerate(elems): struct = builder.insert_value(struct, el, [i]) return struct def from_data(self, builder, value): """ Convert from "data" representation back into "value" representation. Usually invoked when loading from array. See notes in `as_data()` """ vals = [builder.extract_value(value, [i]) for i in range(len(self._members))] return self._from("from_data", builder, vals) def load_from_data_pointer(self, builder, ptr, align=None): values = [] for i, model in enumerate(self._models): elem_ptr = cgutils.gep_inbounds(builder, ptr, 0, i) val = model.load_from_data_pointer(builder, elem_ptr, align) values.append(val) struct = ir.Constant(self.get_value_type(), ir.Undefined) for i, val in enumerate(values): struct = self.set(builder, struct, val, i) return struct def as_argument(self, builder, value): return self._as("as_argument", builder, value) def from_argument(self, builder, value): return self._from("from_argument", builder, value) def as_return(self, builder, value): elems = self._as("as_data", builder, value) struct = ir.Constant(self.get_data_type(), ir.Undefined) for i, el in enumerate(elems): struct = builder.insert_value(struct, el, [i]) return struct def from_return(self, builder, value): vals = [builder.extract_value(value, [i]) for i in range(len(self._members))] return self._from("from_data", builder, vals) def get(self, builder, val, pos): """Get a field at the given position or the fieldname Args ---- builder: LLVM IRBuilder val: value to be inserted pos: int or str field index or field name Returns ------- Extracted value """ if isinstance(pos, str): pos = self.get_field_position(pos) return builder.extract_value(val, [pos], name="extracted." + self._fields[pos]) def set(self, builder, stval, val, pos): """Set a field at the given position or the fieldname Args ---- builder: LLVM IRBuilder stval: LLVM struct value val: value to be inserted pos: int or str field index or field name Returns ------- A new LLVM struct with the value inserted """ if isinstance(pos, str): pos = self.get_field_position(pos) return builder.insert_value(stval, val, [pos], name="inserted." + self._fields[pos]) def get_field_position(self, field): try: return self._fields.index(field) except ValueError: raise KeyError("%s does not have a field named %r" % (self.__class__.__name__, field)) @property def field_count(self): return len(self._fields) def get_type(self, pos): """Get the frontend type (numba type) of a field given the position or the fieldname Args ---- pos: int or str field index or field name """ if isinstance(pos, str): pos = self.get_field_position(pos) return self._members[pos] def get_model(self, pos): """ Get the datamodel of a field given the position or the fieldname. Args ---- pos: int or str field index or field name """ return self._models[pos] def traverse(self, builder): def getter(k, value): if value.type != self.get_value_type(): args = self.get_value_type(), value.type raise TypeError("expecting {0} but got {1}".format(*args)) return self.get(builder, value, k) return [(self.get_type(k), partial(getter, k)) for k in self._fields] def inner_models(self): return self._models @register_default(types.Complex) class ComplexModel(StructModel): _element_type = NotImplemented def __init__(self, dmm, fe_type): members = [ ('real', fe_type.underlying_float), ('imag', fe_type.underlying_float), ] super(ComplexModel, self).__init__(dmm, fe_type, members) @register_default(types.LiteralList) @register_default(types.LiteralStrKeyDict) @register_default(types.Tuple) @register_default(types.NamedTuple) @register_default(types.StarArgTuple) class TupleModel(StructModel): def __init__(self, dmm, fe_type): members = [('f' + str(i), t) for i, t in enumerate(fe_type)] super(TupleModel, self).__init__(dmm, fe_type, members) @register_default(types.UnionType) class UnionModel(StructModel): def __init__(self, dmm, fe_type): members = [ ('tag', types.uintp), # XXX: it should really be a MemInfoPointer(types.voidptr) ('payload', types.Tuple.from_types(fe_type.types)), ] super(UnionModel, self).__init__(dmm, fe_type, members) @register_default(types.Pair) class PairModel(StructModel): def __init__(self, dmm, fe_type): members = [('first', fe_type.first_type), ('second', fe_type.second_type)] super(PairModel, self).__init__(dmm, fe_type, members) @register_default(types.ListPayload) class ListPayloadModel(StructModel): def __init__(self, dmm, fe_type): # The fields are mutable but the payload is always manipulated # by reference. This scheme allows mutations of an array to # be seen by its iterators. members = [ ('size', types.intp), ('allocated', types.intp), # This member is only used only for reflected lists ('dirty', types.boolean), # Actually an inlined var-sized array ('data', fe_type.container.dtype), ] super(ListPayloadModel, self).__init__(dmm, fe_type, members) @register_default(types.List) class ListModel(StructModel): def __init__(self, dmm, fe_type): payload_type = types.ListPayload(fe_type) members = [ # The meminfo data points to a ListPayload ('meminfo', types.MemInfoPointer(payload_type)), # This member is only used only for reflected lists ('parent', types.pyobject), ] super(ListModel, self).__init__(dmm, fe_type, members) @register_default(types.ListIter) class ListIterModel(StructModel): def __init__(self, dmm, fe_type): payload_type = types.ListPayload(fe_type.container) members = [ # The meminfo data points to a ListPayload (shared with the # original list object) ('meminfo', types.MemInfoPointer(payload_type)), ('index', types.EphemeralPointer(types.intp)), ] super(ListIterModel, self).__init__(dmm, fe_type, members) @register_default(types.SetEntry) class SetEntryModel(StructModel): def __init__(self, dmm, fe_type): dtype = fe_type.set_type.dtype members = [ # -1 = empty, -2 = deleted ('hash', types.intp), ('key', dtype), ] super(SetEntryModel, self).__init__(dmm, fe_type, members) @register_default(types.SetPayload) class SetPayloadModel(StructModel): def __init__(self, dmm, fe_type): entry_type = types.SetEntry(fe_type.container) members = [ # Number of active + deleted entries ('fill', types.intp), # Number of active entries ('used', types.intp), # Allocated size - 1 (size being a power of 2) ('mask', types.intp), # Search finger ('finger', types.intp), # This member is only used only for reflected sets ('dirty', types.boolean), # Actually an inlined var-sized array ('entries', entry_type), ] super(SetPayloadModel, self).__init__(dmm, fe_type, members) @register_default(types.Set) class SetModel(StructModel): def __init__(self, dmm, fe_type): payload_type = types.SetPayload(fe_type) members = [ # The meminfo data points to a SetPayload ('meminfo', types.MemInfoPointer(payload_type)), # This member is only used only for reflected sets ('parent', types.pyobject), ] super(SetModel, self).__init__(dmm, fe_type, members) @register_default(types.SetIter) class SetIterModel(StructModel): def __init__(self, dmm, fe_type): payload_type = types.SetPayload(fe_type.container) members = [ # The meminfo data points to a SetPayload (shared with the # original set object) ('meminfo', types.MemInfoPointer(payload_type)), # The index into the entries table ('index', types.EphemeralPointer(types.intp)), ] super(SetIterModel, self).__init__(dmm, fe_type, members) @register_default(types.Array) @register_default(types.Buffer) @register_default(types.ByteArray) @register_default(types.Bytes) @register_default(types.MemoryView) @register_default(types.PyArray) class ArrayModel(StructModel): def __init__(self, dmm, fe_type): ndim = fe_type.ndim members = [ ('meminfo', types.MemInfoPointer(fe_type.dtype)), ('parent', types.pyobject), ('nitems', types.intp), ('itemsize', types.intp), ('data', types.CPointer(fe_type.dtype)), ('shape', types.UniTuple(types.intp, ndim)), ('strides', types.UniTuple(types.intp, ndim)), ] super(ArrayModel, self).__init__(dmm, fe_type, members) @register_default(types.ArrayFlags) class ArrayFlagsModel(StructModel): def __init__(self, dmm, fe_type): members = [ ('parent', fe_type.array_type), ] super(ArrayFlagsModel, self).__init__(dmm, fe_type, members) @register_default(types.NestedArray) class NestedArrayModel(ArrayModel): def __init__(self, dmm, fe_type): self._be_type = dmm.lookup(fe_type.dtype).get_data_type() super(NestedArrayModel, self).__init__(dmm, fe_type) @register_default(types.Optional) class OptionalModel(StructModel): def __init__(self, dmm, fe_type): members = [ ('data', fe_type.type), ('valid', types.boolean), ] self._value_model = dmm.lookup(fe_type.type) super(OptionalModel, self).__init__(dmm, fe_type, members) def get_return_type(self): return self._value_model.get_return_type() def as_return(self, builder, value): raise NotImplementedError def from_return(self, builder, value): return self._value_model.from_return(builder, value) def traverse(self, builder): def get_data(value): valid = get_valid(value) data = self.get(builder, value, "data") return builder.select(valid, data, ir.Constant(data.type, None)) def get_valid(value): return self.get(builder, value, "valid") return [(self.get_type("data"), get_data), (self.get_type("valid"), get_valid)] @register_default(types.Record) class RecordModel(CompositeModel): def __init__(self, dmm, fe_type): super(RecordModel, self).__init__(dmm, fe_type) self._models = [self._dmm.lookup(t) for _, t in fe_type.members] self._be_type = ir.ArrayType(ir.IntType(8), fe_type.size) self._be_ptr_type = self._be_type.as_pointer() def get_value_type(self): """Passed around as reference to underlying data """ return self._be_ptr_type def get_argument_type(self): return self._be_ptr_type def get_return_type(self): return self._be_ptr_type def get_data_type(self): return self._be_type def as_data(self, builder, value): return builder.load(value) def from_data(self, builder, value): raise NotImplementedError("use load_from_data_pointer() instead") def as_argument(self, builder, value): return value def from_argument(self, builder, value): return value def as_return(self, builder, value): return value def from_return(self, builder, value): return value def load_from_data_pointer(self, builder, ptr, align=None): return builder.bitcast(ptr, self.get_value_type()) @register_default(types.UnicodeCharSeq) class UnicodeCharSeq(DataModel): def __init__(self, dmm, fe_type): super(UnicodeCharSeq, self).__init__(dmm, fe_type) charty = ir.IntType(numpy_support.sizeof_unicode_char * 8) self._be_type = ir.ArrayType(charty, fe_type.count) def get_value_type(self): return self._be_type def get_data_type(self): return self._be_type def as_data(self, builder, value): return value def from_data(self, builder, value): return value def as_return(self, builder, value): return value def from_return(self, builder, value): return value def as_argument(self, builder, value): return value def from_argument(self, builder, value): return value @register_default(types.CharSeq) class CharSeq(DataModel): def __init__(self, dmm, fe_type): super(CharSeq, self).__init__(dmm, fe_type) charty = ir.IntType(8) self._be_type = ir.ArrayType(charty, fe_type.count) def get_value_type(self): return self._be_type def get_data_type(self): return self._be_type def as_data(self, builder, value): return value def from_data(self, builder, value): return value def as_return(self, builder, value): return value def from_return(self, builder, value): return value def as_argument(self, builder, value): return value def from_argument(self, builder, value): return value class CContiguousFlatIter(StructModel): def __init__(self, dmm, fe_type, need_indices): assert fe_type.array_type.layout == 'C' array_type = fe_type.array_type dtype = array_type.dtype ndim = array_type.ndim members = [('array', array_type), ('stride', types.intp), ('index', types.EphemeralPointer(types.intp)), ] if need_indices: # For ndenumerate() members.append(('indices', types.EphemeralArray(types.intp, ndim))) super(CContiguousFlatIter, self).__init__(dmm, fe_type, members) class FlatIter(StructModel): def __init__(self, dmm, fe_type): array_type = fe_type.array_type dtype = array_type.dtype ndim = array_type.ndim members = [('array', array_type), ('pointers', types.EphemeralArray(types.CPointer(dtype), ndim)), ('indices', types.EphemeralArray(types.intp, ndim)), ('exhausted', types.EphemeralPointer(types.boolean)), ] super(FlatIter, self).__init__(dmm, fe_type, members) @register_default(types.UniTupleIter) class UniTupleIter(StructModel): def __init__(self, dmm, fe_type): members = [('index', types.EphemeralPointer(types.intp)), ('tuple', fe_type.container,)] super(UniTupleIter, self).__init__(dmm, fe_type, members) @register_default(types.misc.SliceLiteral) @register_default(types.SliceType) class SliceModel(StructModel): def __init__(self, dmm, fe_type): members = [('start', types.intp), ('stop', types.intp), ('step', types.intp), ] super(SliceModel, self).__init__(dmm, fe_type, members) @register_default(types.NPDatetime) @register_default(types.NPTimedelta) class NPDatetimeModel(PrimitiveModel): def __init__(self, dmm, fe_type): be_type = ir.IntType(64) super(NPDatetimeModel, self).__init__(dmm, fe_type, be_type) @register_default(types.ArrayIterator) class ArrayIterator(StructModel): def __init__(self, dmm, fe_type): # We use an unsigned index to avoid the cost of negative index tests. members = [('index', types.EphemeralPointer(types.uintp)), ('array', fe_type.array_type)] super(ArrayIterator, self).__init__(dmm, fe_type, members) @register_default(types.EnumerateType) class EnumerateType(StructModel): def __init__(self, dmm, fe_type): members = [('count', types.EphemeralPointer(types.intp)), ('iter', fe_type.source_type)] super(EnumerateType, self).__init__(dmm, fe_type, members) @register_default(types.ZipType) class ZipType(StructModel): def __init__(self, dmm, fe_type): members = [('iter%d' % i, source_type.iterator_type) for i, source_type in enumerate(fe_type.source_types)] super(ZipType, self).__init__(dmm, fe_type, members) @register_default(types.RangeIteratorType) class RangeIteratorType(StructModel): def __init__(self, dmm, fe_type): int_type = fe_type.yield_type members = [('iter', types.EphemeralPointer(int_type)), ('stop', int_type), ('step', int_type), ('count', types.EphemeralPointer(int_type))] super(RangeIteratorType, self).__init__(dmm, fe_type, members) @register_default(types.Generator) class GeneratorModel(CompositeModel): def __init__(self, dmm, fe_type): super(GeneratorModel, self).__init__(dmm, fe_type) # XXX Fold this in DataPacker? self._arg_models = [self._dmm.lookup(t) for t in fe_type.arg_types if not isinstance(t, types.Omitted)] self._state_models = [self._dmm.lookup(t) for t in fe_type.state_types] self._args_be_type = ir.LiteralStructType( [t.get_data_type() for t in self._arg_models]) self._state_be_type = ir.LiteralStructType( [t.get_data_type() for t in self._state_models]) # The whole generator closure self._be_type = ir.LiteralStructType( [self._dmm.lookup(types.int32).get_value_type(), self._args_be_type, self._state_be_type]) self._be_ptr_type = self._be_type.as_pointer() def get_value_type(self): """ The generator closure is passed around as a reference. """ return self._be_ptr_type def get_argument_type(self): return self._be_ptr_type def get_return_type(self): return self._be_type def get_data_type(self): return self._be_type def as_argument(self, builder, value): return value def from_argument(self, builder, value): return value def as_return(self, builder, value): return self.as_data(builder, value) def from_return(self, builder, value): return self.from_data(builder, value) def as_data(self, builder, value): return builder.load(value) def from_data(self, builder, value): stack = cgutils.alloca_once(builder, value.type) builder.store(value, stack) return stack @register_default(types.ArrayCTypes) class ArrayCTypesModel(StructModel): def __init__(self, dmm, fe_type): # ndim = fe_type.ndim members = [('data', types.CPointer(fe_type.dtype)), ('meminfo', types.MemInfoPointer(fe_type.dtype))] super(ArrayCTypesModel, self).__init__(dmm, fe_type, members) @register_default(types.RangeType) class RangeModel(StructModel): def __init__(self, dmm, fe_type): int_type = fe_type.iterator_type.yield_type members = [('start', int_type), ('stop', int_type), ('step', int_type)] super(RangeModel, self).__init__(dmm, fe_type, members) # ============================================================================= @register_default(types.NumpyNdIndexType) class NdIndexModel(StructModel): def __init__(self, dmm, fe_type): ndim = fe_type.ndim members = [('shape', types.UniTuple(types.intp, ndim)), ('indices', types.EphemeralArray(types.intp, ndim)), ('exhausted', types.EphemeralPointer(types.boolean)), ] super(NdIndexModel, self).__init__(dmm, fe_type, members) @register_default(types.NumpyFlatType) def handle_numpy_flat_type(dmm, ty): if ty.array_type.layout == 'C': return CContiguousFlatIter(dmm, ty, need_indices=False) else: return FlatIter(dmm, ty) @register_default(types.NumpyNdEnumerateType) def handle_numpy_ndenumerate_type(dmm, ty): if ty.array_type.layout == 'C': return CContiguousFlatIter(dmm, ty, need_indices=True) else: return FlatIter(dmm, ty) @register_default(types.BoundFunction) def handle_bound_function(dmm, ty): # The same as the underlying type return dmm[ty.this] @register_default(types.NumpyNdIterType) class NdIter(StructModel): def __init__(self, dmm, fe_type): array_types = fe_type.arrays ndim = fe_type.ndim shape_len = ndim if fe_type.need_shaped_indexing else 1 members = [('exhausted', types.EphemeralPointer(types.boolean)), ('arrays', types.Tuple(array_types)), # The iterator's main shape and indices ('shape', types.UniTuple(types.intp, shape_len)), ('indices', types.EphemeralArray(types.intp, shape_len)), ] # Indexing state for the various sub-iterators # XXX use a tuple instead? for i, sub in enumerate(fe_type.indexers): kind, start_dim, end_dim, _ = sub member_name = 'index%d' % i if kind == 'flat': # A single index into the flattened array members.append((member_name, types.EphemeralPointer(types.intp))) elif kind in ('scalar', 'indexed', '0d'): # Nothing required pass else: assert 0 # Slots holding values of the scalar args # XXX use a tuple instead? for i, ty in enumerate(fe_type.arrays): if not isinstance(ty, types.Array): member_name = 'scalar%d' % i members.append((member_name, types.EphemeralPointer(ty))) super(NdIter, self).__init__(dmm, fe_type, members) @register_default(types.DeferredType) class DeferredStructModel(CompositeModel): def __init__(self, dmm, fe_type): super(DeferredStructModel, self).__init__(dmm, fe_type) self.typename = "deferred.{0}".format(id(fe_type)) self.actual_fe_type = fe_type.get() def get_value_type(self): return ir.global_context.get_identified_type(self.typename + '.value') def get_data_type(self): return ir.global_context.get_identified_type(self.typename + '.data') def get_argument_type(self): return self._actual_model.get_argument_type() def as_argument(self, builder, value): inner = self.get(builder, value) return self._actual_model.as_argument(builder, inner) def from_argument(self, builder, value): res = self._actual_model.from_argument(builder, value) return self.set(builder, self.make_uninitialized(), res) def from_data(self, builder, value): self._define() elem = self.get(builder, value) value = self._actual_model.from_data(builder, elem) out = self.make_uninitialized() return self.set(builder, out, value) def as_data(self, builder, value): self._define() elem = self.get(builder, value) value = self._actual_model.as_data(builder, elem) out = self.make_uninitialized(kind='data') return self.set(builder, out, value) def from_return(self, builder, value): return value def as_return(self, builder, value): return value def get(self, builder, value): return builder.extract_value(value, [0]) def set(self, builder, value, content): return builder.insert_value(value, content, [0]) def make_uninitialized(self, kind='value'): self._define() if kind == 'value': ty = self.get_value_type() else: ty = self.get_data_type() return ir.Constant(ty, ir.Undefined) def _define(self): valty = self.get_value_type() self._define_value_type(valty) datty = self.get_data_type() self._define_data_type(datty) def _define_value_type(self, value_type): if value_type.is_opaque: value_type.set_body(self._actual_model.get_value_type()) def _define_data_type(self, data_type): if data_type.is_opaque: data_type.set_body(self._actual_model.get_data_type()) @property def _actual_model(self): return self._dmm.lookup(self.actual_fe_type) def traverse(self, builder): return [(self.actual_fe_type, lambda value: builder.extract_value(value, [0]))] @register_default(types.StructRefPayload) class StructPayloadModel(StructModel): """Model for the payload of a mutable struct """ def __init__(self, dmm, fe_typ): members = tuple(fe_typ.field_dict.items()) super().__init__(dmm, fe_typ, members) class StructRefModel(StructModel): """Model for a mutable struct. A reference to the payload """ def __init__(self, dmm, fe_typ): dtype = fe_typ.get_data_type() members = [ ("meminfo", types.MemInfoPointer(dtype)), ] super().__init__(dmm, fe_typ, members) numba-0.55.1/numba/core/datamodel/packer.py000664 000000 000000 00000014765 14174536160 020513 0ustar00rootroot000000 000000 from collections import deque from numba.core import types, cgutils class DataPacker(object): """ A helper to pack a number of typed arguments into a data structure. Omitted arguments (i.e. values with the type `Omitted`) are automatically skipped. """ # XXX should DataPacker be a model for a dedicated type? def __init__(self, dmm, fe_types): self._dmm = dmm self._fe_types = fe_types self._models = [dmm.lookup(ty) for ty in fe_types] self._pack_map = [] self._be_types = [] for i, ty in enumerate(fe_types): if not isinstance(ty, types.Omitted): self._pack_map.append(i) self._be_types.append(self._models[i].get_data_type()) def as_data(self, builder, values): """ Return the given values packed as a data structure. """ elems = [self._models[i].as_data(builder, values[i]) for i in self._pack_map] return cgutils.make_anonymous_struct(builder, elems) def _do_load(self, builder, ptr, formal_list=None): res = [] for i, i_formal in enumerate(self._pack_map): elem_ptr = cgutils.gep_inbounds(builder, ptr, 0, i) val = self._models[i_formal].load_from_data_pointer(builder, elem_ptr) if formal_list is None: res.append((self._fe_types[i_formal], val)) else: formal_list[i_formal] = val return res def load(self, builder, ptr): """ Load the packed values and return a (type, value) tuples. """ return self._do_load(builder, ptr) def load_into(self, builder, ptr, formal_list): """ Load the packed values into a sequence indexed by formal argument number (skipping any Omitted position). """ self._do_load(builder, ptr, formal_list) class ArgPacker(object): """ Compute the position for each high-level typed argument. It flattens every composite argument into primitive types. It maintains a position map for unflattening the arguments. Since struct (esp. nested struct) have specific ABI requirements (e.g. alignemnt, pointer address-space, ...) in different architecture (e.g. OpenCL, CUDA), flattening composite argument types simplifes the call setup from the Python side. Functions are receiving simple primitive types and there are only a handful of these. """ def __init__(self, dmm, fe_args): self._dmm = dmm self._fe_args = fe_args self._nargs = len(fe_args) self._dm_args = [] argtys = [] for ty in fe_args: dm = self._dmm.lookup(ty) self._dm_args.append(dm) argtys.append(dm.get_argument_type()) self._unflattener = _Unflattener(argtys) self._be_args = list(_flatten(argtys)) def as_arguments(self, builder, values): """Flatten all argument values """ if len(values) != self._nargs: raise TypeError("invalid number of args: expected %d, got %d" % (self._nargs, len(values))) if not values: return () args = [dm.as_argument(builder, val) for dm, val in zip(self._dm_args, values) ] args = tuple(_flatten(args)) return args def from_arguments(self, builder, args): """Unflatten all argument values """ valtree = self._unflattener.unflatten(args) values = [dm.from_argument(builder, val) for dm, val in zip(self._dm_args, valtree) ] return values def assign_names(self, args, names): """Assign names for each flattened argument values. """ valtree = self._unflattener.unflatten(args) for aval, aname in zip(valtree, names): self._assign_names(aval, aname) def _assign_names(self, val_or_nested, name, depth=()): if isinstance(val_or_nested, (tuple, list)): for pos, aval in enumerate(val_or_nested): self._assign_names(aval, name, depth=depth + (pos,)) else: postfix = '.'.join(map(str, depth)) parts = [name, postfix] val_or_nested.name = '.'.join(filter(bool, parts)) @property def argument_types(self): """Return a list of LLVM types that are results of flattening composite types. """ return tuple(ty for ty in self._be_args if ty != ()) def _flatten(iterable): """ Flatten nested iterable of (tuple, list). """ def rec(iterable): for i in iterable: if isinstance(i, (tuple, list)): for j in rec(i): yield j else: yield i return rec(iterable) _PUSH_LIST = 1 _APPEND_NEXT_VALUE = 2 _APPEND_EMPTY_TUPLE = 3 _POP = 4 class _Unflattener(object): """ An object used to unflatten nested sequences after a given pattern (an arbitrarily nested sequence). The pattern shows the nested sequence shape desired when unflattening; the values it contains are irrelevant. """ def __init__(self, pattern): self._code = self._build_unflatten_code(pattern) def _build_unflatten_code(self, iterable): """Build the unflatten opcode sequence for the given *iterable* structure (an iterable of nested sequences). """ code = [] def rec(iterable): for i in iterable: if isinstance(i, (tuple, list)): if len(i) > 0: code.append(_PUSH_LIST) rec(i) code.append(_POP) else: code.append(_APPEND_EMPTY_TUPLE) else: code.append(_APPEND_NEXT_VALUE) rec(iterable) return code def unflatten(self, flatiter): """Rebuild a nested tuple structure. """ vals = deque(flatiter) res = [] cur = res stack = [] for op in self._code: if op is _PUSH_LIST: stack.append(cur) cur.append([]) cur = cur[-1] elif op is _APPEND_NEXT_VALUE: cur.append(vals.popleft()) elif op is _APPEND_EMPTY_TUPLE: cur.append(()) elif op is _POP: cur = stack.pop() assert not stack, stack assert not vals, vals return res numba-0.55.1/numba/core/datamodel/registry.py000664 000000 000000 00000000640 14174536160 021101 0ustar00rootroot000000 000000 import functools from .manager import DataModelManager def register(dmm, typecls): """Used as decorator to simplify datamodel registration. Returns the object being decorated so that chaining is possible. """ def wraps(fn): dmm.register(typecls, fn) return fn return wraps default_manager = DataModelManager() register_default = functools.partial(register, default_manager) numba-0.55.1/numba/core/datamodel/testing.py000664 000000 000000 00000012342 14174536160 020710 0ustar00rootroot000000 000000 from llvmlite import ir from llvmlite import binding as ll from numba.core import datamodel import unittest class DataModelTester(unittest.TestCase): """ Test the implementation of a DataModel for a frontend type. """ fe_type = NotImplemented def setUp(self): self.module = ir.Module() self.datamodel = datamodel.default_manager[self.fe_type] def test_as_arg(self): """ - Is as_arg() and from_arg() implemented? - Are they the inverse of each other? """ fnty = ir.FunctionType(ir.VoidType(), []) function = ir.Function(self.module, fnty, name="test_as_arg") builder = ir.IRBuilder() builder.position_at_end(function.append_basic_block()) undef_value = ir.Constant(self.datamodel.get_value_type(), None) args = self.datamodel.as_argument(builder, undef_value) self.assertIsNot(args, NotImplemented, "as_argument returned " "NotImplementedError") if isinstance(args, (tuple, list)): def recur_tuplize(args, func=None): for arg in args: if isinstance(arg, (tuple, list)): yield tuple(recur_tuplize(arg, func=func)) else: if func is None: yield arg else: yield func(arg) argtypes = tuple(recur_tuplize(args, func=lambda x: x.type)) exptypes = tuple(recur_tuplize( self.datamodel.get_argument_type())) self.assertEqual(exptypes, argtypes) else: self.assertEqual(args.type, self.datamodel.get_argument_type()) rev_value = self.datamodel.from_argument(builder, args) self.assertEqual(rev_value.type, self.datamodel.get_value_type()) builder.ret_void() # end function # Ensure valid LLVM generation materialized = ll.parse_assembly(str(self.module)) str(materialized) def test_as_return(self): """ - Is as_return() and from_return() implemented? - Are they the inverse of each other? """ fnty = ir.FunctionType(ir.VoidType(), []) function = ir.Function(self.module, fnty, name="test_as_return") builder = ir.IRBuilder() builder.position_at_end(function.append_basic_block()) undef_value = ir.Constant(self.datamodel.get_value_type(), None) ret = self.datamodel.as_return(builder, undef_value) self.assertIsNot(ret, NotImplemented, "as_return returned " "NotImplementedError") self.assertEqual(ret.type, self.datamodel.get_return_type()) rev_value = self.datamodel.from_return(builder, ret) self.assertEqual(rev_value.type, self.datamodel.get_value_type()) builder.ret_void() # end function # Ensure valid LLVM generation materialized = ll.parse_assembly(str(self.module)) str(materialized) class SupportAsDataMixin(object): """Test as_data() and from_data() """ # XXX test load_from_data_pointer() as well def test_as_data(self): fnty = ir.FunctionType(ir.VoidType(), []) function = ir.Function(self.module, fnty, name="test_as_data") builder = ir.IRBuilder() builder.position_at_end(function.append_basic_block()) undef_value = ir.Constant(self.datamodel.get_value_type(), None) data = self.datamodel.as_data(builder, undef_value) self.assertIsNot(data, NotImplemented, "as_data returned NotImplemented") self.assertEqual(data.type, self.datamodel.get_data_type()) rev_value = self.datamodel.from_data(builder, data) self.assertEqual(rev_value.type, self.datamodel.get_value_type()) builder.ret_void() # end function # Ensure valid LLVM generation materialized = ll.parse_assembly(str(self.module)) str(materialized) class NotSupportAsDataMixin(object): """Ensure as_data() and from_data() raise NotImplementedError. """ def test_as_data_not_supported(self): fnty = ir.FunctionType(ir.VoidType(), []) function = ir.Function(self.module, fnty, name="test_as_data") builder = ir.IRBuilder() builder.position_at_end(function.append_basic_block()) undef_value = ir.Constant(self.datamodel.get_value_type(), None) with self.assertRaises(NotImplementedError): data = self.datamodel.as_data(builder, undef_value) with self.assertRaises(NotImplementedError): rev_data = self.datamodel.from_data(builder, undef_value) class DataModelTester_SupportAsDataMixin(DataModelTester, SupportAsDataMixin): pass class DataModelTester_NotSupportAsDataMixin(DataModelTester, NotSupportAsDataMixin): pass def test_factory(support_as_data=True): """A helper for returning a unittest TestCase for testing """ if support_as_data: return DataModelTester_SupportAsDataMixin else: return DataModelTester_NotSupportAsDataMixin numba-0.55.1/numba/core/debuginfo.py000664 000000 000000 00000056053 14174536160 017252 0ustar00rootroot000000 000000 """ Implements helpers to build LLVM debuginfo. """ import abc import os.path from contextlib import contextmanager from llvmlite import ir from numba.core import cgutils, types from numba.core.datamodel.models import ComplexModel, UniTupleModel from numba.core import config @contextmanager def suspend_emission(builder): """Suspends the emission of debug_metadata for the duration of the context managed block.""" ref = builder.debug_metadata builder.debug_metadata = None try: yield finally: builder.debug_metadata = ref class AbstractDIBuilder(metaclass=abc.ABCMeta): @abc.abstractmethod def mark_variable(self, builder, allocavalue, name, lltype, size, line, datamodel=None, argidx=None): """Emit debug info for the variable. """ pass @abc.abstractmethod def mark_location(self, builder, line): """Emit source location information to the given IRBuilder. """ pass @abc.abstractmethod def mark_subprogram(self, function, qualname, argnames, argtypes, line): """Emit source location information for the given function. """ pass @abc.abstractmethod def initialize(self): """Initialize the debug info. An opportunity for the debuginfo to prepare any necessary data structures. """ @abc.abstractmethod def finalize(self): """Finalize the debuginfo by emitting all necessary metadata. """ pass class DummyDIBuilder(AbstractDIBuilder): def __init__(self, module, filepath, cgctx): pass def mark_variable(self, builder, allocavalue, name, lltype, size, line, datamodel=None, argidx=None): pass def mark_location(self, builder, line): pass def mark_subprogram(self, function, qualname, argnames, argtypes, line): pass def initialize(self): pass def finalize(self): pass _BYTE_SIZE = 8 class DIBuilder(AbstractDIBuilder): DWARF_VERSION = 4 DEBUG_INFO_VERSION = 3 DBG_CU_NAME = 'llvm.dbg.cu' _DEBUG = False def __init__(self, module, filepath, cgctx): self.module = module self.filepath = os.path.abspath(filepath) self.difile = self._di_file() self.subprograms = [] self.cgctx = cgctx self.initialize() def initialize(self): # Create the compile unit now because it is referenced when # constructing subprograms self.dicompileunit = self._di_compile_unit() def _var_type(self, lltype, size, datamodel=None): if self._DEBUG: print("-->", lltype, size, datamodel, getattr(datamodel, 'fe_type', 'NO FE TYPE')) m = self.module bitsize = _BYTE_SIZE * size int_type = ir.IntType, real_type = ir.FloatType, ir.DoubleType # For simple numeric types, choose the closest encoding. # We treat all integers as unsigned when there's no known datamodel. if isinstance(lltype, int_type + real_type): if datamodel is None: # This is probably something like an `i8*` member of a struct name = str(lltype) if isinstance(lltype, int_type): ditok = 'DW_ATE_unsigned' else: ditok = 'DW_ATE_float' else: # This is probably a known int/float scalar type name = str(datamodel.fe_type) if isinstance(datamodel.fe_type, types.Integer): if datamodel.fe_type.signed: ditok = 'DW_ATE_signed' else: ditok = 'DW_ATE_unsigned' else: ditok = 'DW_ATE_float' mdtype = m.add_debug_info('DIBasicType', { 'name': name, 'size': bitsize, 'encoding': ir.DIToken(ditok), }) elif isinstance(datamodel, ComplexModel): # TODO: Is there a better way of determining "this is a complex # number"? # # NOTE: Commented below is the way to generate the metadata for a # C99 complex type that's directly supported by DWARF. Numba however # generates a struct with real/imag cf. CPython to give a more # pythonic feel to inspection. # # mdtype = m.add_debug_info('DIBasicType', { # 'name': f"{datamodel.fe_type} ({str(lltype)})", # 'size': bitsize, # 'encoding': ir.DIToken('DW_ATE_complex_float'), #}) meta = [] offset = 0 for ix, name in enumerate(('real', 'imag')): component = lltype.elements[ix] component_size = self.cgctx.get_abi_sizeof(component) component_basetype = m.add_debug_info('DIBasicType', { 'name': str(component), 'size': _BYTE_SIZE * component_size, # bits 'encoding': ir.DIToken('DW_ATE_float'), }) derived_type = m.add_debug_info('DIDerivedType', { 'tag': ir.DIToken('DW_TAG_member'), 'name': name, 'baseType': component_basetype, 'size': _BYTE_SIZE * component_size, # DW_TAG_member size is in bits 'offset': offset, }) meta.append(derived_type) offset += (_BYTE_SIZE * component_size) # offset is in bits mdtype = m.add_debug_info('DICompositeType', { 'tag': ir.DIToken('DW_TAG_structure_type'), 'name': f"{datamodel.fe_type} ({str(lltype)})", 'identifier': str(lltype), 'elements': m.add_metadata(meta), 'size': offset, }, is_distinct=True) elif isinstance(datamodel, UniTupleModel): element = lltype.element el_size = self.cgctx.get_abi_sizeof(element) basetype = self._var_type(element, el_size) name = f"{datamodel.fe_type} ({str(lltype)})" count = size // el_size mdrange = m.add_debug_info('DISubrange', { 'count': count, }) mdtype = m.add_debug_info('DICompositeType', { 'tag': ir.DIToken('DW_TAG_array_type'), 'baseType': basetype, 'name': name, 'size': bitsize, 'identifier': str(lltype), 'elements': m.add_metadata([mdrange]), }) elif isinstance(lltype, ir.PointerType): model = getattr(datamodel, '_pointee_model', None) basetype = self._var_type(lltype.pointee, self.cgctx.get_abi_sizeof(lltype.pointee), model) mdtype = m.add_debug_info('DIDerivedType', { 'tag': ir.DIToken('DW_TAG_pointer_type'), 'baseType': basetype, 'size': _BYTE_SIZE * self.cgctx.get_abi_sizeof(lltype) }) elif isinstance(lltype, ir.LiteralStructType): # Struct type meta = [] offset = 0 if datamodel is None or not datamodel.inner_models(): name = f"Anonymous struct ({str(lltype)})" for field_id, element in enumerate(lltype.elements): size = self.cgctx.get_abi_sizeof(element) basetype = self._var_type(element, size) derived_type = m.add_debug_info('DIDerivedType', { 'tag': ir.DIToken('DW_TAG_member'), 'name': f'', 'baseType': basetype, 'size': _BYTE_SIZE * size, # DW_TAG_member size is in bits 'offset': offset, }) meta.append(derived_type) offset += (_BYTE_SIZE * size) # offset is in bits else: name = f"{datamodel.fe_type} ({str(lltype)})" for element, field, model in zip(lltype.elements, datamodel._fields, datamodel.inner_models()): size = self.cgctx.get_abi_sizeof(element) basetype = self._var_type(element, size, datamodel=model) derived_type = m.add_debug_info('DIDerivedType', { 'tag': ir.DIToken('DW_TAG_member'), 'name': field, 'baseType': basetype, 'size': _BYTE_SIZE * size, # DW_TAG_member size is in bits 'offset': offset, }) meta.append(derived_type) offset += (_BYTE_SIZE * size) # offset is in bits mdtype = m.add_debug_info('DICompositeType', { 'tag': ir.DIToken('DW_TAG_structure_type'), 'name': name, 'identifier': str(lltype), 'elements': m.add_metadata(meta), 'size': offset, }, is_distinct=True) elif isinstance(lltype, ir.ArrayType): element = lltype.element el_size = self.cgctx.get_abi_sizeof(element) basetype = self._var_type(element, el_size) count = size // el_size mdrange = m.add_debug_info('DISubrange', { 'count': count, }) mdtype = m.add_debug_info('DICompositeType', { 'tag': ir.DIToken('DW_TAG_array_type'), 'baseType': basetype, 'name': str(lltype), 'size': bitsize, 'identifier': str(lltype), 'elements': m.add_metadata([mdrange]), }) else: # For all other types, describe it as sequence of bytes count = size mdrange = m.add_debug_info('DISubrange', { 'count': count, }) mdbase = m.add_debug_info('DIBasicType', { 'name': 'byte', 'size': _BYTE_SIZE, 'encoding': ir.DIToken('DW_ATE_unsigned_char'), }) mdtype = m.add_debug_info('DICompositeType', { 'tag': ir.DIToken('DW_TAG_array_type'), 'baseType': mdbase, 'name': str(lltype), 'size': bitsize, 'identifier': str(lltype), 'elements': m.add_metadata([mdrange]), }) return mdtype def mark_variable(self, builder, allocavalue, name, lltype, size, line, datamodel=None, argidx=None): arg_index = 0 if argidx is None else argidx m = self.module fnty = ir.FunctionType(ir.VoidType(), [ir.MetaDataType()] * 3) decl = cgutils.get_or_insert_function(m, fnty, 'llvm.dbg.declare') mdtype = self._var_type(lltype, size, datamodel=datamodel) name = name.replace('.', '$') # for gdb to work correctly mdlocalvar = m.add_debug_info('DILocalVariable', { 'name': name, 'arg': arg_index, 'scope': self.subprograms[-1], 'file': self.difile, 'line': line, 'type': mdtype, }) mdexpr = m.add_debug_info('DIExpression', {}) return builder.call(decl, [allocavalue, mdlocalvar, mdexpr]) def mark_location(self, builder, line): builder.debug_metadata = self._add_location(line) def mark_subprogram(self, function, qualname, argnames, argtypes, line): name = qualname argmap = dict(zip(argnames, argtypes)) di_subp = self._add_subprogram(name=name, linkagename=function.name, line=line, function=function, argmap=argmap) function.set_metadata("dbg", di_subp) # Don't marked alwaysinline functions as noinline. if 'alwaysinline' not in function.attributes: # disable inlining for this function for easier debugging function.attributes.add('noinline') def finalize(self): dbgcu = cgutils.get_or_insert_named_metadata(self.module, self.DBG_CU_NAME) dbgcu.add(self.dicompileunit) self._set_module_flags() # # Internal APIs # def _set_module_flags(self): """Set the module flags metadata """ module = self.module mflags = cgutils.get_or_insert_named_metadata(module, 'llvm.module.flags') # Set *require* behavior to warning # See http://llvm.org/docs/LangRef.html#module-flags-metadata require_warning_behavior = self._const_int(2) if self.DWARF_VERSION is not None: dwarf_version = module.add_metadata([ require_warning_behavior, "Dwarf Version", self._const_int(self.DWARF_VERSION) ]) if dwarf_version not in mflags.operands: mflags.add(dwarf_version) debuginfo_version = module.add_metadata([ require_warning_behavior, "Debug Info Version", self._const_int(self.DEBUG_INFO_VERSION) ]) if debuginfo_version not in mflags.operands: mflags.add(debuginfo_version) def _add_subprogram(self, name, linkagename, line, function, argmap): """Emit subprogram metadata """ subp = self._di_subprogram(name, linkagename, line, function, argmap) self.subprograms.append(subp) return subp def _add_location(self, line): """Emit location metatdaa """ loc = self._di_location(line) return loc @classmethod def _const_int(cls, num, bits=32): """Util to create constant int in metadata """ return ir.IntType(bits)(num) @classmethod def _const_bool(cls, boolean): """Util to create constant boolean in metadata """ return ir.IntType(1)(boolean) # # Helpers to emit the metadata nodes # def _di_file(self): return self.module.add_debug_info('DIFile', { 'directory': os.path.dirname(self.filepath), 'filename': os.path.basename(self.filepath), }) def _di_compile_unit(self): return self.module.add_debug_info('DICompileUnit', { 'language': ir.DIToken('DW_LANG_C_plus_plus'), 'file': self.difile, # Numba has to pretend to be clang to ensure the prologue is skipped # correctly in gdb. See: # https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=gdb/amd64-tdep.c;h=e563d369d8cb3eb3c2f732c2fa850ec70ba8d63b;hb=a4b0231e179607e47b1cdf1fe15c5dc25e482fad#l2521 # Note the "producer_is_llvm" call to specialise the prologue # handling, this is defined here: # https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=gdb/producer.c;h=cdfd80d904c09394febd18749bb90359b2d128cc;hb=a4b0231e179607e47b1cdf1fe15c5dc25e482fad#l124 # and to get a match for this condition the 'producer' must start # with "clang ", hence the following... 'producer': 'clang (Numba)', 'runtimeVersion': 0, 'isOptimized': config.OPT != 0, 'emissionKind': 1, # 0-NoDebug, 1-FullDebug }, is_distinct=True) def _di_subroutine_type(self, line, function, argmap): # The function call conv needs encoding. llfunc = function md = [] for idx, llarg in enumerate(llfunc.args): if not llarg.name.startswith('arg.'): name = llarg.name.replace('.', '$') # for gdb to work correctly lltype = llarg.type size = self.cgctx.get_abi_sizeof(lltype) mdtype = self._var_type(lltype, size, datamodel=None) md.append(mdtype) for idx, (name, nbtype) in enumerate(argmap.items()): name = name.replace('.', '$') # for gdb to work correctly datamodel = self.cgctx.data_model_manager[nbtype] lltype = self.cgctx.get_value_type(nbtype) size = self.cgctx.get_abi_sizeof(lltype) mdtype = self._var_type(lltype, size, datamodel=datamodel) md.append(mdtype) return self.module.add_debug_info('DISubroutineType', { 'types': self.module.add_metadata(md), }) def _di_subprogram(self, name, linkagename, line, function, argmap): return self.module.add_debug_info('DISubprogram', { 'name': name, 'linkageName': linkagename, 'scope': self.difile, 'file': self.difile, 'line': line, 'type': self._di_subroutine_type(line, function, argmap), 'isLocal': False, 'isDefinition': True, 'scopeLine': line, 'isOptimized': config.OPT != 0, 'unit': self.dicompileunit, }, is_distinct=True) def _di_location(self, line): return self.module.add_debug_info('DILocation', { 'line': line, 'column': 1, 'scope': self.subprograms[-1], }) class NvvmDIBuilder(DIBuilder): """ Only implemented the minimal metadata to get line number information. See http://llvm.org/releases/3.4/docs/LangRef.html """ # These constants are copied from llvm3.4 DW_LANG_Python = 0x0014 DI_Compile_unit = 786449 DI_Subroutine_type = 786453 DI_Subprogram = 786478 DI_File = 786473 DWARF_VERSION = None # don't emit DWARF version DEBUG_INFO_VERSION = 1 # as required by NVVM IR Spec # Rename DIComputeUnit MD to hide it from llvm.parse_assembly() # which strips invalid/outdated debug metadata DBG_CU_NAME = 'numba.llvm.dbg.cu' # Default member # Used in mark_location to remember last lineno to avoid duplication _last_lineno = None def mark_variable(self, builder, allocavalue, name, lltype, size, line, datamodel=None, argidx=None): # unsupported pass def mark_location(self, builder, line): # Avoid duplication if self._last_lineno == line: return self._last_lineno = line # Add call to an inline asm to mark line location asmty = ir.FunctionType(ir.VoidType(), []) asm = ir.InlineAsm(asmty, "// dbg {}".format(line), "", side_effect=True) call = builder.call(asm, []) md = self._di_location(line) call.set_metadata('numba.dbg', md) def mark_subprogram(self, function, qualname, argnames, argtypes, line): argmap = dict(zip(argnames, argtypes)) self._add_subprogram(name=qualname, linkagename=function.name, line=line) def _add_subprogram(self, name, linkagename, line): """Emit subprogram metadata """ subp = self._di_subprogram(name, linkagename, line) self.subprograms.append(subp) return subp # # Helper methods to create the metadata nodes. # def _filepair(self): return self.module.add_metadata([ os.path.basename(self.filepath), os.path.dirname(self.filepath), ]) def _di_file(self): return self.module.add_metadata([ self._const_int(self.DI_File), self._filepair(), ]) def _di_compile_unit(self): filepair = self._filepair() empty = self.module.add_metadata([self._const_int(0)]) sp_metadata = self.module.add_metadata(self.subprograms) return self.module.add_metadata([ self._const_int(self.DI_Compile_unit), # tag filepair, # source directory and file pair self._const_int(self.DW_LANG_Python), # language 'Numba', # producer self._const_bool(True), # optimized "", # flags?? self._const_int(0), # runtime version empty, # enums types empty, # retained types self.module.add_metadata(self.subprograms), # subprograms empty, # global variables empty, # imported entities "", # split debug filename ]) def _di_subroutine_type(self): types = self.module.add_metadata([None]) return self.module.add_metadata([ self._const_int(self.DI_Subroutine_type), # tag self._const_int(0), None, "", self._const_int(0), # line of definition self._const_int(0, 64), # size in bits self._const_int(0, 64), # offset in bits self._const_int(0, 64), # align in bits self._const_int(0), # flags None, types, self._const_int(0), None, None, None, ]) def _di_subprogram(self, name, linkagename, line): function_ptr = self.module.get_global(linkagename) subroutine_type = self._di_subroutine_type() funcvars = self.module.add_metadata([self._const_int(0)]) context = self._di_file() return self.module.add_metadata([ self._const_int(self.DI_Subprogram), # tag self._filepair(), # source dir & file context, # context descriptor name, # name name, # display name linkagename, # linkage name self._const_int(line), # line subroutine_type, # type descriptor self._const_bool(False), # is local self._const_bool(True), # is definition self._const_int(0), # virtuality self._const_int(0), # virtual function index None, # vtable base type self._const_int(0), # flags self._const_bool(True), # is optimized function_ptr, # pointer to function None, # function template parameters None, # function declaration descriptor funcvars, # function variables self._const_int(line) # scope line ]) def _di_location(self, line): return self.module.add_metadata([ self._const_int(line), # line self._const_int(0), # column self.subprograms[-1], # scope None, # original scope ]) def initialize(self): pass def finalize(self): # We create the compile unit at this point because subprograms is # populated and can be referred to by the compile unit. self.dicompileunit = self._di_compile_unit() super().finalize() numba-0.55.1/numba/core/decorators.py000664 000000 000000 00000026225 14174536160 017453 0ustar00rootroot000000 000000 """ Define @jit and related decorators. """ import sys import warnings import inspect import logging from numba.core.errors import DeprecationError, NumbaDeprecationWarning from numba.stencils.stencil import stencil from numba.core import config, extending, sigutils, registry _logger = logging.getLogger(__name__) # ----------------------------------------------------------------------------- # Decorators _msg_deprecated_signature_arg = ("Deprecated keyword argument `{0}`. " "Signatures should be passed as the first " "positional argument.") def jit(signature_or_function=None, locals={}, cache=False, pipeline_class=None, boundscheck=None, **options): """ This decorator is used to compile a Python function into native code. Args ----- signature_or_function: The (optional) signature or list of signatures to be compiled. If not passed, required signatures will be compiled when the decorated function is called, depending on the argument values. As a convenience, you can directly pass the function to be compiled instead. locals: dict Mapping of local variable names to Numba types. Used to override the types deduced by Numba's type inference engine. pipeline_class: type numba.compiler.CompilerBase The compiler pipeline type for customizing the compilation stages. options: For a cpu target, valid options are: nopython: bool Set to True to disable the use of PyObjects and Python API calls. The default behavior is to allow the use of PyObjects and Python API. Default value is False. forceobj: bool Set to True to force the use of PyObjects for every value. Default value is False. looplift: bool Set to True to enable jitting loops in nopython mode while leaving surrounding code in object mode. This allows functions to allocate NumPy arrays and use Python objects, while the tight loops in the function can still be compiled in nopython mode. Any arrays that the tight loop uses should be created before the loop is entered. Default value is True. error_model: str The error-model affects divide-by-zero behavior. Valid values are 'python' and 'numpy'. The 'python' model raises exception. The 'numpy' model sets the result to *+/-inf* or *nan*. Default value is 'python'. inline: str or callable The inline option will determine whether a function is inlined at into its caller if called. String options are 'never' (default) which will never inline, and 'always', which will always inline. If a callable is provided it will be called with the call expression node that is requesting inlining, the caller's IR and callee's IR as arguments, it is expected to return Truthy as to whether to inline. NOTE: This inlining is performed at the Numba IR level and is in no way related to LLVM inlining. boundscheck: bool or None Set to True to enable bounds checking for array indices. Out of bounds accesses will raise IndexError. The default is to not do bounds checking. If False, bounds checking is disabled, out of bounds accesses can produce garbage results or segfaults. However, enabling bounds checking will slow down typical functions, so it is recommended to only use this flag for debugging. You can also set the NUMBA_BOUNDSCHECK environment variable to 0 or 1 to globally override this flag. The default value is None, which under normal execution equates to False, but if debug is set to True then bounds checking will be enabled. Returns -------- A callable usable as a compiled function. Actual compiling will be done lazily if no explicit signatures are passed. Examples -------- The function can be used in the following ways: 1) jit(signatures, **targetoptions) -> jit(function) Equivalent to: d = dispatcher(function, targetoptions) for signature in signatures: d.compile(signature) Create a dispatcher object for a python function. Then, compile the function with the given signature(s). Example: @jit("int32(int32, int32)") def foo(x, y): return x + y @jit(["int32(int32, int32)", "float32(float32, float32)"]) def bar(x, y): return x + y 2) jit(function, **targetoptions) -> dispatcher Create a dispatcher function object that specializes at call site. Examples: @jit def foo(x, y): return x + y @jit(nopython=True) def bar(x, y): return x + y """ if 'argtypes' in options: raise DeprecationError(_msg_deprecated_signature_arg.format('argtypes')) if 'restype' in options: raise DeprecationError(_msg_deprecated_signature_arg.format('restype')) if options.get('nopython', False) and options.get('forceobj', False): raise ValueError("Only one of 'nopython' or 'forceobj' can be True.") if "_target" in options: # Set the "target_backend" option if "_target" is defined. options['target_backend'] = options['_target'] target = options.pop('_target', 'cpu') options['boundscheck'] = boundscheck # Handle signature if signature_or_function is None: # No signature, no function pyfunc = None sigs = None elif isinstance(signature_or_function, list): # A list of signatures is passed pyfunc = None sigs = signature_or_function elif sigutils.is_signature(signature_or_function): # A single signature is passed pyfunc = None sigs = [signature_or_function] else: # A function is passed pyfunc = signature_or_function sigs = None dispatcher_args = {} if pipeline_class is not None: dispatcher_args['pipeline_class'] = pipeline_class wrapper = _jit(sigs, locals=locals, target=target, cache=cache, targetoptions=options, **dispatcher_args) if pyfunc is not None: return wrapper(pyfunc) else: return wrapper def _jit(sigs, locals, target, cache, targetoptions, **dispatcher_args): from numba.core.target_extension import resolve_dispatcher_from_str dispatcher = resolve_dispatcher_from_str(target) def wrapper(func): if extending.is_jitted(func): raise TypeError( "A jit decorator was called on an already jitted function " f"{func}. If trying to access the original python " f"function, use the {func}.py_func attribute." ) if not inspect.isfunction(func): raise TypeError( "The decorated object is not a function (got type " f"{type(func)})." ) if config.ENABLE_CUDASIM and target == 'cuda': from numba import cuda return cuda.jit(func) if config.DISABLE_JIT and not target == 'npyufunc': return func disp = dispatcher(py_func=func, locals=locals, targetoptions=targetoptions, **dispatcher_args) if cache: disp.enable_caching() if sigs is not None: # Register the Dispatcher to the type inference mechanism, # even though the decorator hasn't returned yet. from numba.core import typeinfer with typeinfer.register_dispatcher(disp): for sig in sigs: disp.compile(sig) disp.disable_compile() return disp return wrapper def generated_jit(function=None, cache=False, pipeline_class=None, **options): """ This decorator allows flexible type-based compilation of a jitted function. It works as `@jit`, except that the decorated function is called at compile-time with the *types* of the arguments and should return an implementation function for those types. """ dispatcher_args = {} if pipeline_class is not None: dispatcher_args['pipeline_class'] = pipeline_class wrapper = _jit(sigs=None, locals={}, target='cpu', cache=cache, targetoptions=options, impl_kind='generated', **dispatcher_args) if function is not None: return wrapper(function) else: return wrapper def njit(*args, **kws): """ Equivalent to jit(nopython=True) See documentation for jit function/decorator for full description. """ if 'nopython' in kws: warnings.warn('nopython is set for njit and is ignored', RuntimeWarning) if 'forceobj' in kws: warnings.warn('forceobj is set for njit and is ignored', RuntimeWarning) del kws['forceobj'] kws.update({'nopython': True}) return jit(*args, **kws) def cfunc(sig, locals={}, cache=False, pipeline_class=None, **options): """ This decorator is used to compile a Python function into a C callback usable with foreign C libraries. Usage:: @cfunc("float64(float64, float64)", nopython=True, cache=True) def add(a, b): return a + b """ sig = sigutils.normalize_signature(sig) def wrapper(func): from numba.core.ccallback import CFunc additional_args = {} if pipeline_class is not None: additional_args['pipeline_class'] = pipeline_class res = CFunc(func, sig, locals=locals, options=options, **additional_args) if cache: res.enable_caching() res.compile() return res return wrapper def jit_module(**kwargs): """ Automatically ``jit``-wraps functions defined in a Python module Note that ``jit_module`` should only be called at the end of the module to be jitted. In addition, only functions which are defined in the module ``jit_module`` is called from are considered for automatic jit-wrapping. See the Numba documentation for more information about what can/cannot be jitted. :param kwargs: Keyword arguments to pass to ``jit`` such as ``nopython`` or ``error_model``. """ # Get the module jit_module is being called from frame = inspect.stack()[1] module = inspect.getmodule(frame[0]) # Replace functions in module with jit-wrapped versions for name, obj in module.__dict__.items(): if inspect.isfunction(obj) and inspect.getmodule(obj) == module: _logger.debug("Auto decorating function {} from module {} with jit " "and options: {}".format(obj, module.__name__, kwargs)) module.__dict__[name] = jit(obj, **kwargs) numba-0.55.1/numba/core/descriptors.py000664 000000 000000 00000000531 14174536160 017637 0ustar00rootroot000000 000000 """ Target Descriptors """ from abc import ABCMeta, abstractmethod class TargetDescriptor(metaclass=ABCMeta): def __init__(self, target_name): self._target_name = target_name @property @abstractmethod def typing_context(self): ... @property @abstractmethod def target_context(self): ... numba-0.55.1/numba/core/dispatcher.py000664 000000 000000 00000141265 14174536160 017436 0ustar00rootroot000000 000000 # -*- coding: utf-8 -*- import collections import functools import sys import types as pytypes import uuid import weakref from contextlib import ExitStack from numba import _dispatcher from numba.core import ( utils, types, errors, typing, serialize, config, compiler, sigutils ) from numba.core.compiler_lock import global_compiler_lock from numba.core.typeconv.rules import default_type_manager from numba.core.typing.templates import fold_arguments from numba.core.typing.typeof import Purpose, typeof from numba.core.bytecode import get_code_object from numba.core.caching import NullCache, FunctionCache from numba.core import entrypoints from numba.core.retarget import BaseRetarget import numba.core.event as ev class _RetargetStack(utils.ThreadLocalStack, stack_name="retarget"): def push(self, state): super().push(state) _dispatcher.set_use_tls_target_stack(len(self) > 0) def pop(self): super().pop() _dispatcher.set_use_tls_target_stack(len(self) > 0) class TargetConfigurationStack: """The target configuration stack. Uses the BORG pattern and stores states in threadlocal storage. WARNING: features associated with this class are experimental. The API may change without notice. """ def __init__(self): self._stack = _RetargetStack() def get(self): """Get the current target from the top of the stack. May raise IndexError if the stack is empty. Users should check the size of the stack beforehand. """ return self._stack.top() def __len__(self): """Size of the stack """ return len(self._stack) @classmethod def switch_target(cls, retarget: BaseRetarget): """Returns a contextmanager that pushes a new retarget handler, an instance of `numba.core.retarget.BaseRetarget`, onto the target-config stack for the duration of the context-manager. """ return cls()._stack.enter(retarget) class OmittedArg(object): """ A placeholder for omitted arguments with a default value. """ def __init__(self, value): self.value = value def __repr__(self): return "omitted arg(%r)" % (self.value,) @property def _numba_type_(self): return types.Omitted(self.value) class _FunctionCompiler(object): def __init__(self, py_func, targetdescr, targetoptions, locals, pipeline_class): self.py_func = py_func self.targetdescr = targetdescr self.targetoptions = targetoptions self.locals = locals self.pysig = utils.pysignature(self.py_func) self.pipeline_class = pipeline_class # Remember key=(args, return_type) combinations that will fail # compilation to avoid compilation attempt on them. The values are # the exceptions. self._failed_cache = {} def fold_argument_types(self, args, kws): """ Given positional and named argument types, fold keyword arguments and resolve defaults by inserting types.Omitted() instances. A (pysig, argument types) tuple is returned. """ def normal_handler(index, param, value): return value def default_handler(index, param, default): return types.Omitted(default) def stararg_handler(index, param, values): return types.StarArgTuple(values) # For now, we take argument values from the @jit function, even # in the case of generated jit. args = fold_arguments(self.pysig, args, kws, normal_handler, default_handler, stararg_handler) return self.pysig, args def compile(self, args, return_type): status, retval = self._compile_cached(args, return_type) if status: return retval else: raise retval def _compile_cached(self, args, return_type): key = tuple(args), return_type try: return False, self._failed_cache[key] except KeyError: pass try: retval = self._compile_core(args, return_type) except errors.TypingError as e: self._failed_cache[key] = e return False, e else: return True, retval def _compile_core(self, args, return_type): flags = compiler.Flags() self.targetdescr.options.parse_as_flags(flags, self.targetoptions) flags = self._customize_flags(flags) impl = self._get_implementation(args, {}) cres = compiler.compile_extra(self.targetdescr.typing_context, self.targetdescr.target_context, impl, args=args, return_type=return_type, flags=flags, locals=self.locals, pipeline_class=self.pipeline_class) # Check typing error if object mode is used if cres.typing_error is not None and not flags.enable_pyobject: raise cres.typing_error return cres def get_globals_for_reduction(self): return serialize._get_function_globals_for_reduction(self.py_func) def _get_implementation(self, args, kws): return self.py_func def _customize_flags(self, flags): return flags class _GeneratedFunctionCompiler(_FunctionCompiler): def __init__(self, py_func, targetdescr, targetoptions, locals, pipeline_class): super(_GeneratedFunctionCompiler, self).__init__( py_func, targetdescr, targetoptions, locals, pipeline_class) self.impls = set() def get_globals_for_reduction(self): # This will recursively get the globals used by any nested # implementation function. return serialize._get_function_globals_for_reduction(self.py_func) def _get_implementation(self, args, kws): impl = self.py_func(*args, **kws) # Check the generating function and implementation signatures are # compatible, otherwise compiling would fail later. pysig = utils.pysignature(self.py_func) implsig = utils.pysignature(impl) ok = len(pysig.parameters) == len(implsig.parameters) if ok: for pyparam, implparam in zip(pysig.parameters.values(), implsig.parameters.values()): # We allow the implementation to omit default values, but # if it mentions them, they should have the same value... if (pyparam.name != implparam.name or pyparam.kind != implparam.kind or (implparam.default is not implparam.empty and implparam.default != pyparam.default)): ok = False if not ok: raise TypeError("generated implementation %s should be compatible " "with signature '%s', but has signature '%s'" % (impl, pysig, implsig)) self.impls.add(impl) return impl _CompileStats = collections.namedtuple( '_CompileStats', ('cache_path', 'cache_hits', 'cache_misses')) class CompilingCounter(object): """ A simple counter that increment in __enter__ and decrement in __exit__. """ def __init__(self): self.counter = 0 def __enter__(self): assert self.counter >= 0 self.counter += 1 def __exit__(self, *args, **kwargs): self.counter -= 1 assert self.counter >= 0 def __bool__(self): return self.counter > 0 __nonzero__ = __bool__ class _DispatcherBase(_dispatcher.Dispatcher): """ Common base class for dispatcher Implementations. """ __numba__ = "py_func" def __init__(self, arg_count, py_func, pysig, can_fallback, exact_match_required): self._tm = default_type_manager # A mapping of signatures to compile results self.overloads = collections.OrderedDict() self.py_func = py_func # other parts of Numba assume the old Python 2 name for code object self.func_code = get_code_object(py_func) # but newer python uses a different name self.__code__ = self.func_code # a place to keep an active reference to the types of the active call self._types_active_call = [] # Default argument values match the py_func self.__defaults__ = py_func.__defaults__ argnames = tuple(pysig.parameters) default_values = self.py_func.__defaults__ or () defargs = tuple(OmittedArg(val) for val in default_values) try: lastarg = list(pysig.parameters.values())[-1] except IndexError: has_stararg = False else: has_stararg = lastarg.kind == lastarg.VAR_POSITIONAL _dispatcher.Dispatcher.__init__(self, self._tm.get_pointer(), arg_count, self._fold_args, argnames, defargs, can_fallback, has_stararg, exact_match_required) self.doc = py_func.__doc__ self._compiling_counter = CompilingCounter() weakref.finalize(self, self._make_finalizer()) def _compilation_chain_init_hook(self): """ This will be called ahead of any part of compilation taking place (this even includes being ahead of working out the types of the arguments). This permits activities such as initialising extension entry points so that the compiler knows about additional externally defined types etc before it does anything. """ entrypoints.init_all() def _reset_overloads(self): self._clear() self.overloads.clear() def _make_finalizer(self): """ Return a finalizer function that will release references to related compiled functions. """ overloads = self.overloads targetctx = self.targetctx # Early-bind utils.shutting_down() into the function's local namespace # (see issue #689) def finalizer(shutting_down=utils.shutting_down): # The finalizer may crash at shutdown, skip it (resources # will be cleared by the process exiting, anyway). if shutting_down(): return # This function must *not* hold any reference to self: # we take care to bind the necessary objects in the closure. for cres in overloads.values(): try: targetctx.remove_user_function(cres.entry_point) except KeyError: pass return finalizer @property def signatures(self): """ Returns a list of compiled function signatures. """ return list(self.overloads) @property def nopython_signatures(self): return [cres.signature for cres in self.overloads.values() if not cres.objectmode] def disable_compile(self, val=True): """Disable the compilation of new signatures at call time. """ # If disabling compilation then there must be at least one signature assert (not val) or len(self.signatures) > 0 self._can_compile = not val def add_overload(self, cres): args = tuple(cres.signature.args) sig = [a._code for a in args] self._insert(sig, cres.entry_point, cres.objectmode) self.overloads[args] = cres def fold_argument_types(self, args, kws): return self._compiler.fold_argument_types(args, kws) def get_call_template(self, args, kws): """ Get a typing.ConcreteTemplate for this dispatcher and the given *args* and *kws* types. This allows to resolve the return type. A (template, pysig, args, kws) tuple is returned. """ # XXX how about a dispatcher template class automating the # following? # Fold keyword arguments and resolve default values pysig, args = self._compiler.fold_argument_types(args, kws) kws = {} # Ensure an overload is available if self._can_compile: self.compile(tuple(args)) # Create function type for typing func_name = self.py_func.__name__ name = "CallTemplate({0})".format(func_name) # The `key` isn't really used except for diagnosis here, # so avoid keeping a reference to `cfunc`. call_template = typing.make_concrete_template( name, key=func_name, signatures=self.nopython_signatures) return call_template, pysig, args, kws def get_overload(self, sig): """ Return the compiled function for the given signature. """ args, return_type = sigutils.normalize_signature(sig) return self.overloads[tuple(args)].entry_point @property def is_compiling(self): """ Whether a specialization is currently being compiled. """ return self._compiling_counter def _compile_for_args(self, *args, **kws): """ For internal use. Compile a specialized version of the function for the given *args* and *kws*, and return the resulting callable. """ assert not kws # call any initialisation required for the compilation chain (e.g. # extension point registration). self._compilation_chain_init_hook() def error_rewrite(e, issue_type): """ Rewrite and raise Exception `e` with help supplied based on the specified issue_type. """ if config.SHOW_HELP: help_msg = errors.error_extras[issue_type] e.patch_message('\n'.join((str(e).rstrip(), help_msg))) if config.FULL_TRACEBACKS: raise e else: raise e.with_traceback(None) argtypes = [] for a in args: if isinstance(a, OmittedArg): argtypes.append(types.Omitted(a.value)) else: argtypes.append(self.typeof_pyval(a)) return_val = None try: return_val = self.compile(tuple(argtypes)) except errors.ForceLiteralArg as e: # Received request for compiler re-entry with the list of arguments # indicated by e.requested_args. # First, check if any of these args are already Literal-ized already_lit_pos = [i for i in e.requested_args if isinstance(args[i], types.Literal)] if already_lit_pos: # Abort compilation if any argument is already a Literal. # Letting this continue will cause infinite compilation loop. m = ("Repeated literal typing request.\n" "{}.\n" "This is likely caused by an error in typing. " "Please see nested and suppressed exceptions.") info = ', '.join('Arg #{} is {}'.format(i, args[i]) for i in sorted(already_lit_pos)) raise errors.CompilerError(m.format(info)) # Convert requested arguments into a Literal. args = [(types.literal if i in e.requested_args else lambda x: x)(args[i]) for i, v in enumerate(args)] # Re-enter compilation with the Literal-ized arguments return_val = self._compile_for_args(*args) except errors.TypingError as e: # Intercept typing error that may be due to an argument # that failed inferencing as a Numba type failed_args = [] for i, arg in enumerate(args): val = arg.value if isinstance(arg, OmittedArg) else arg try: tp = typeof(val, Purpose.argument) except ValueError as typeof_exc: failed_args.append((i, str(typeof_exc))) else: if tp is None: failed_args.append( (i, f"cannot determine Numba type of value {val}")) if failed_args: # Patch error message to ease debugging args_str = "\n".join( f"- argument {i}: {err}" for i, err in failed_args ) msg = (f"{str(e).rstrip()} \n\nThis error may have been caused " f"by the following argument(s):\n{args_str}\n") e.patch_message(msg) error_rewrite(e, 'typing') except errors.UnsupportedError as e: # Something unsupported is present in the user code, add help info error_rewrite(e, 'unsupported_error') except (errors.NotDefinedError, errors.RedefinedError, errors.VerificationError) as e: # These errors are probably from an issue with either the code # supplied being syntactically or otherwise invalid error_rewrite(e, 'interpreter') except errors.ConstantInferenceError as e: # this is from trying to infer something as constant when it isn't # or isn't supported as a constant error_rewrite(e, 'constant_inference') except Exception as e: if config.SHOW_HELP: if hasattr(e, 'patch_message'): help_msg = errors.error_extras['reportable'] e.patch_message('\n'.join((str(e).rstrip(), help_msg))) # ignore the FULL_TRACEBACKS config, this needs reporting! raise e finally: self._types_active_call = [] return return_val def inspect_llvm(self, signature=None): """Get the LLVM intermediate representation generated by compilation. Parameters ---------- signature : tuple of numba types, optional Specify a signature for which to obtain the LLVM IR. If None, the IR is returned for all available signatures. Returns ------- llvm : dict[signature, str] or str Either the LLVM IR string for the specified signature, or, if no signature was given, a dictionary mapping signatures to LLVM IR strings. """ if signature is not None: lib = self.overloads[signature].library return lib.get_llvm_str() return dict((sig, self.inspect_llvm(sig)) for sig in self.signatures) def inspect_asm(self, signature=None): """Get the generated assembly code. Parameters ---------- signature : tuple of numba types, optional Specify a signature for which to obtain the assembly code. If None, the assembly code is returned for all available signatures. Returns ------- asm : dict[signature, str] or str Either the assembly code for the specified signature, or, if no signature was given, a dictionary mapping signatures to assembly code. """ if signature is not None: lib = self.overloads[signature].library return lib.get_asm_str() return dict((sig, self.inspect_asm(sig)) for sig in self.signatures) def inspect_types(self, file=None, signature=None, pretty=False, style='default', **kwargs): """Print/return Numba intermediate representation (IR)-annotated code. Parameters ---------- file : file-like object, optional File to which to print. Defaults to sys.stdout if None. Must be None if ``pretty=True``. signature : tuple of numba types, optional Print/return the intermediate representation for only the given signature. If None, the IR is printed for all available signatures. pretty : bool, optional If True, an Annotate object will be returned that can render the IR with color highlighting in Jupyter and IPython. ``file`` must be None if ``pretty`` is True. Additionally, the ``pygments`` library must be installed for ``pretty=True``. style : str, optional Choose a style for rendering. Ignored if ``pretty`` is ``False``. This is directly consumed by ``pygments`` formatters. To see a list of available styles, import ``pygments`` and run ``list(pygments.styles.get_all_styles())``. Returns ------- annotated : Annotate object, optional Only returned if ``pretty=True``, otherwise this function is only used for its printing side effect. If ``pretty=True``, an Annotate object is returned that can render itself in Jupyter and IPython. """ overloads = self.overloads if signature is not None: overloads = {signature: self.overloads[signature]} if not pretty: if file is None: file = sys.stdout for ver, res in overloads.items(): print("%s %s" % (self.py_func.__name__, ver), file=file) print('-' * 80, file=file) print(res.type_annotation, file=file) print('=' * 80, file=file) else: if file is not None: raise ValueError("`file` must be None if `pretty=True`") from numba.core.annotations.pretty_annotate import Annotate return Annotate(self, signature=signature, style=style) def inspect_cfg(self, signature=None, show_wrapper=None, **kwargs): """ For inspecting the CFG of the function. By default the CFG of the user function is shown. The *show_wrapper* option can be set to "python" or "cfunc" to show the python wrapper function or the *cfunc* wrapper function, respectively. Parameters accepted in kwargs ----------------------------- filename : string, optional the name of the output file, if given this will write the output to filename view : bool, optional whether to immediately view the optional output file highlight : bool, set, dict, optional what, if anything, to highlight, options are: { incref : bool, # highlight NRT_incref calls decref : bool, # highlight NRT_decref calls returns : bool, # highlight exits which are normal returns raises : bool, # highlight exits which are from raise meminfo : bool, # highlight calls to NRT*meminfo branches : bool, # highlight true/false branches } Default is True which sets all of the above to True. Supplying a set of strings is also accepted, these are interpreted as key:True with respect to the above dictionary. e.g. {'incref', 'decref'} would switch on highlighting on increfs and decrefs. interleave: bool, set, dict, optional what, if anything, to interleave in the LLVM IR, options are: { python: bool # interleave python source code with the LLVM IR lineinfo: bool # interleave line information markers with the LLVM # IR } Default is True which sets all of the above to True. Supplying a set of strings is also accepted, these are interpreted as key:True with respect to the above dictionary. e.g. {'python',} would switch on interleaving of python source code in the LLVM IR. strip_ir : bool, optional Default is False. If set to True all LLVM IR that is superfluous to that requested in kwarg `highlight` will be removed. show_key : bool, optional Default is True. Create a "key" for the highlighting in the rendered CFG. fontsize : int, optional Default is 8. Set the fontsize in the output to this value. """ if signature is not None: cres = self.overloads[signature] lib = cres.library if show_wrapper == 'python': fname = cres.fndesc.llvm_cpython_wrapper_name elif show_wrapper == 'cfunc': fname = cres.fndesc.llvm_cfunc_wrapper_name else: fname = cres.fndesc.mangled_name return lib.get_function_cfg(fname, py_func=self.py_func, **kwargs) return dict((sig, self.inspect_cfg(sig, show_wrapper=show_wrapper)) for sig in self.signatures) def inspect_disasm_cfg(self, signature=None): """ For inspecting the CFG of the disassembly of the function. Requires python package: r2pipe Requires radare2 binary on $PATH. Notebook rendering requires python package: graphviz signature : tuple of Numba types, optional Print/return the disassembly CFG for only the given signatures. If None, the IR is printed for all available signatures. """ if signature is not None: cres = self.overloads[signature] lib = cres.library return lib.get_disasm_cfg(cres.fndesc.mangled_name) return dict((sig, self.inspect_disasm_cfg(sig)) for sig in self.signatures) def get_annotation_info(self, signature=None): """ Gets the annotation information for the function specified by signature. If no signature is supplied a dictionary of signature to annotation information is returned. """ signatures = self.signatures if signature is None else [signature] out = collections.OrderedDict() for sig in signatures: cres = self.overloads[sig] ta = cres.type_annotation key = (ta.func_id.filename + ':' + str(ta.func_id.firstlineno + 1), ta.signature) out[key] = ta.annotate_raw()[key] return out def _explain_ambiguous(self, *args, **kws): """ Callback for the C _Dispatcher object. """ assert not kws, "kwargs not handled" args = tuple([self.typeof_pyval(a) for a in args]) # The order here must be deterministic for testing purposes, which # is ensured by the OrderedDict. sigs = self.nopython_signatures # This will raise self.typingctx.resolve_overload(self.py_func, sigs, args, kws, allow_ambiguous=False) def _explain_matching_error(self, *args, **kws): """ Callback for the C _Dispatcher object. """ assert not kws, "kwargs not handled" args = [self.typeof_pyval(a) for a in args] msg = ("No matching definition for argument type(s) %s" % ', '.join(map(str, args))) raise TypeError(msg) def _search_new_conversions(self, *args, **kws): """ Callback for the C _Dispatcher object. Search for approximately matching signatures for the given arguments, and ensure the corresponding conversions are registered in the C++ type manager. """ assert not kws, "kwargs not handled" args = [self.typeof_pyval(a) for a in args] found = False for sig in self.nopython_signatures: conv = self.typingctx.install_possible_conversions(args, sig.args) if conv: found = True return found def __repr__(self): return "%s(%s)" % (type(self).__name__, self.py_func) def typeof_pyval(self, val): """ Resolve the Numba type of Python value *val*. This is called from numba._dispatcher as a fallback if the native code cannot decide the type. """ # Not going through the resolve_argument_type() indirection # can save a couple µs. try: tp = typeof(val, Purpose.argument) except ValueError: tp = types.pyobject else: if tp is None: tp = types.pyobject self._types_active_call.append(tp) return tp def _callback_add_timer(self, duration, cres, lock_name): md = cres.metadata # md can be None when code is loaded from cache if md is not None: timers = md.setdefault("timers", {}) if lock_name not in timers: # Only write if the metadata does not exist timers[lock_name] = duration else: msg = f"'{lock_name} metadata is already defined." raise AssertionError(msg) def _callback_add_compiler_timer(self, duration, cres): return self._callback_add_timer(duration, cres, lock_name="compiler_lock") def _callback_add_llvm_timer(self, duration, cres): return self._callback_add_timer(duration, cres, lock_name="llvm_lock") class _MemoMixin: __uuid = None # A {uuid -> instance} mapping, for deserialization _memo = weakref.WeakValueDictionary() # hold refs to last N functions deserialized, retaining them in _memo # regardless of whether there is another reference _recent = collections.deque(maxlen=config.FUNCTION_CACHE_SIZE) @property def _uuid(self): """ An instance-specific UUID, to avoid multiple deserializations of a given instance. Note: this is lazily-generated, for performance reasons. """ u = self.__uuid if u is None: u = str(uuid.uuid1()) self._set_uuid(u) return u def _set_uuid(self, u): assert self.__uuid is None self.__uuid = u self._memo[u] = self self._recent.append(self) class Dispatcher(serialize.ReduceMixin, _MemoMixin, _DispatcherBase): """ Implementation of user-facing dispatcher objects (i.e. created using the @jit decorator). This is an abstract base class. Subclasses should define the targetdescr class attribute. """ _fold_args = True _impl_kinds = { 'direct': _FunctionCompiler, 'generated': _GeneratedFunctionCompiler, } __numba__ = 'py_func' def __init__(self, py_func, locals={}, targetoptions={}, impl_kind='direct', pipeline_class=compiler.Compiler): """ Parameters ---------- py_func: function object to be compiled locals: dict, optional Mapping of local variable names to Numba types. Used to override the types deduced by the type inference engine. targetoptions: dict, optional Target-specific config options. impl_kind: str Select the compiler mode for `@jit` and `@generated_jit` pipeline_class: type numba.compiler.CompilerBase The compiler pipeline type. """ self.typingctx = self.targetdescr.typing_context self.targetctx = self.targetdescr.target_context pysig = utils.pysignature(py_func) arg_count = len(pysig.parameters) can_fallback = not targetoptions.get('nopython', False) _DispatcherBase.__init__(self, arg_count, py_func, pysig, can_fallback, exact_match_required=False) functools.update_wrapper(self, py_func) self.targetoptions = targetoptions self.locals = locals self._cache = NullCache() compiler_class = self._impl_kinds[impl_kind] self._impl_kind = impl_kind self._compiler = compiler_class(py_func, self.targetdescr, targetoptions, locals, pipeline_class) self._cache_hits = collections.Counter() self._cache_misses = collections.Counter() self._type = types.Dispatcher(self) self.typingctx.insert_global(self, self._type) # Remember target restriction self._required_target_backend = targetoptions.get('target_backend') def dump(self, tab=''): print(f'{tab}DUMP {type(self).__name__}[{self.py_func.__name__}' f', type code={self._type._code}]') for cres in self.overloads.values(): cres.dump(tab=tab + ' ') print(f'{tab}END DUMP {type(self).__name__}[{self.py_func.__name__}]') @property def _numba_type_(self): return types.Dispatcher(self) def enable_caching(self): self._cache = FunctionCache(self.py_func) def __get__(self, obj, objtype=None): '''Allow a JIT function to be bound as a method to an object''' if obj is None: # Unbound method return self else: # Bound method return pytypes.MethodType(self, obj) def _reduce_states(self): """ Reduce the instance for pickling. This will serialize the original function as well the compilation options and compiled signatures, but not the compiled code itself. NOTE: part of ReduceMixin protocol """ if self._can_compile: sigs = [] else: sigs = [cr.signature for cr in self.overloads.values()] return dict( uuid=str(self._uuid), py_func=self.py_func, locals=self.locals, targetoptions=self.targetoptions, impl_kind=self._impl_kind, can_compile=self._can_compile, sigs=sigs, ) @classmethod def _rebuild(cls, uuid, py_func, locals, targetoptions, impl_kind, can_compile, sigs): """ Rebuild an Dispatcher instance after it was __reduce__'d. NOTE: part of ReduceMixin protocol """ try: return cls._memo[uuid] except KeyError: pass self = cls(py_func, locals, targetoptions, impl_kind) # Make sure this deserialization will be merged with subsequent ones self._set_uuid(uuid) for sig in sigs: self.compile(sig) self._can_compile = can_compile return self def compile(self, sig): disp = self._get_dispatcher_for_current_target() if disp is not self: return disp.compile(sig) with ExitStack() as scope: cres = None def cb_compiler(dur): if cres is not None: self._callback_add_compiler_timer(dur, cres) def cb_llvm(dur): if cres is not None: self._callback_add_llvm_timer(dur, cres) scope.enter_context(ev.install_timer("numba:compiler_lock", cb_compiler)) scope.enter_context(ev.install_timer("numba:llvm_lock", cb_llvm)) scope.enter_context(global_compiler_lock) if not self._can_compile: raise RuntimeError("compilation disabled") # Use counter to track recursion compilation depth with self._compiling_counter: args, return_type = sigutils.normalize_signature(sig) # Don't recompile if signature already exists existing = self.overloads.get(tuple(args)) if existing is not None: return existing.entry_point # Try to load from disk cache cres = self._cache.load_overload(sig, self.targetctx) if cres is not None: self._cache_hits[sig] += 1 # XXX fold this in add_overload()? (also see compiler.py) if not cres.objectmode: self.targetctx.insert_user_function(cres.entry_point, cres.fndesc, [cres.library]) self.add_overload(cres) return cres.entry_point self._cache_misses[sig] += 1 ev_details = dict( dispatcher=self, args=args, return_type=return_type, ) with ev.trigger_event("numba:compile", data=ev_details): try: cres = self._compiler.compile(args, return_type) except errors.ForceLiteralArg as e: def folded(args, kws): return self._compiler.fold_argument_types(args, kws)[1] raise e.bind_fold_arguments(folded) self.add_overload(cres) self._cache.save_overload(sig, cres) return cres.entry_point def get_compile_result(self, sig): """Compile (if needed) and return the compilation result with the given signature. """ atypes = tuple(sig.args) if atypes not in self.overloads: self.compile(atypes) return self.overloads[atypes] def recompile(self): """ Recompile all signatures afresh. """ sigs = list(self.overloads) old_can_compile = self._can_compile # Ensure the old overloads are disposed of, # including compiled functions. self._make_finalizer()() self._reset_overloads() self._cache.flush() self._can_compile = True try: for sig in sigs: self.compile(sig) finally: self._can_compile = old_can_compile @property def stats(self): return _CompileStats( cache_path=self._cache.cache_path, cache_hits=self._cache_hits, cache_misses=self._cache_misses, ) def parallel_diagnostics(self, signature=None, level=1): """ Print parallel diagnostic information for the given signature. If no signature is present it is printed for all known signatures. level is used to adjust the verbosity, level=1 (default) is minimal verbosity, and 2, 3, and 4 provide increasing levels of verbosity. """ def dump(sig): ol = self.overloads[sig] pfdiag = ol.metadata.get('parfor_diagnostics', None) if pfdiag is None: msg = "No parfors diagnostic available, is 'parallel=True' set?" raise ValueError(msg) pfdiag.dump(level) if signature is not None: dump(signature) else: [dump(sig) for sig in self.signatures] def get_metadata(self, signature=None): """ Obtain the compilation metadata for a given signature. """ if signature is not None: return self.overloads[signature].metadata else: return dict( (sig,self.overloads[sig].metadata) for sig in self.signatures ) def get_function_type(self): """Return unique function type of dispatcher when possible, otherwise return None. A Dispatcher instance has unique function type when it contains exactly one compilation result and its compilation has been disabled (via its disable_compile method). """ if not self._can_compile and len(self.overloads) == 1: cres = tuple(self.overloads.values())[0] return types.FunctionType(cres.signature) def _get_retarget_dispatcher(self): """Returns a dispatcher for the retarget request. """ # Check TLS target configuration tc = TargetConfigurationStack() retarget = tc.get() retarget.check_compatible(self) disp = retarget.retarget(self) return disp def _get_dispatcher_for_current_target(self): """Returns a dispatcher for the current target registered in `TargetConfigurationStack`. `self` is returned if no target is specified. """ tc = TargetConfigurationStack() if tc: return self._get_retarget_dispatcher() else: return self def _call_tls_target(self, *args, **kwargs): """This is called when the C dispatcher logic sees a retarget request. """ disp = self._get_retarget_dispatcher() # Call the new dispatcher return disp(*args, **kwargs) class LiftedCode(serialize.ReduceMixin, _MemoMixin, _DispatcherBase): """ Implementation of the hidden dispatcher objects used for lifted code (a lifted loop is really compiled as a separate function). """ _fold_args = False can_cache = False def __init__(self, func_ir, typingctx, targetctx, flags, locals): self.func_ir = func_ir self.lifted_from = None self.typingctx = typingctx self.targetctx = targetctx self.flags = flags self.locals = locals _DispatcherBase.__init__(self, self.func_ir.arg_count, self.func_ir.func_id.func, self.func_ir.func_id.pysig, can_fallback=True, exact_match_required=False) def _reduce_states(self): """ Reduce the instance for pickling. This will serialize the original function as well the compilation options and compiled signatures, but not the compiled code itself. NOTE: part of ReduceMixin protocol """ return dict( uuid=self._uuid, func_ir=self.func_ir, flags=self.flags, locals=self.locals, extras=self._reduce_extras(), ) def _reduce_extras(self): """ NOTE: sub-class can override to add extra states """ return {} @classmethod def _rebuild(cls, uuid, func_ir, flags, locals, extras): """ Rebuild an Dispatcher instance after it was __reduce__'d. NOTE: part of ReduceMixin protocol """ try: return cls._memo[uuid] except KeyError: pass # NOTE: We are assuming that this is must be cpu_target, which is true # for now. # TODO: refactor this to not assume on `cpu_target` from numba.core import registry typingctx = registry.cpu_target.typing_context targetctx = registry.cpu_target.target_context self = cls(func_ir, typingctx, targetctx, flags, locals, **extras) self._set_uuid(uuid) return self def get_source_location(self): """Return the starting line number of the loop. """ return self.func_ir.loc.line def _pre_compile(self, args, return_type, flags): """Pre-compile actions """ pass def compile(self, sig): with ExitStack() as scope: cres = None def cb_compiler(dur): if cres is not None: self._callback_add_compiler_timer(dur, cres) def cb_llvm(dur): if cres is not None: self._callback_add_llvm_timer(dur, cres) scope.enter_context(ev.install_timer("numba:compiler_lock", cb_compiler)) scope.enter_context(ev.install_timer("numba:llvm_lock", cb_llvm)) scope.enter_context(global_compiler_lock) # Use counter to track recursion compilation depth with self._compiling_counter: # XXX this is mostly duplicated from Dispatcher. flags = self.flags args, return_type = sigutils.normalize_signature(sig) # Don't recompile if signature already exists # (e.g. if another thread compiled it before we got the lock) existing = self.overloads.get(tuple(args)) if existing is not None: return existing.entry_point self._pre_compile(args, return_type, flags) # Clone IR to avoid (some of the) mutation in the rewrite pass cloned_func_ir = self.func_ir.copy() ev_details = dict( dispatcher=self, args=args, return_type=return_type, ) with ev.trigger_event("numba:compile", data=ev_details): cres = compiler.compile_ir(typingctx=self.typingctx, targetctx=self.targetctx, func_ir=cloned_func_ir, args=args, return_type=return_type, flags=flags, locals=self.locals, lifted=(), lifted_from=self.lifted_from, is_lifted_loop=True,) # Check typing error if object mode is used if (cres.typing_error is not None and not flags.enable_pyobject): raise cres.typing_error self.add_overload(cres) return cres.entry_point def _get_dispatcher_for_current_target(self): # Lifted code does not honor the target switch currently. # No work has been done to check if this can be allowed. return self class LiftedLoop(LiftedCode): def _pre_compile(self, args, return_type, flags): assert not flags.enable_looplift, "Enable looplift flags is on" class LiftedWith(LiftedCode): can_cache = True def _reduce_extras(self): return dict(output_types=self.output_types) @property def _numba_type_(self): return types.Dispatcher(self) def get_call_template(self, args, kws): """ Get a typing.ConcreteTemplate for this dispatcher and the given *args* and *kws* types. This enables the resolving of the return type. A (template, pysig, args, kws) tuple is returned. """ # Ensure an overload is available if self._can_compile: self.compile(tuple(args)) pysig = None # Create function type for typing func_name = self.py_func.__name__ name = "CallTemplate({0})".format(func_name) # The `key` isn't really used except for diagnosis here, # so avoid keeping a reference to `cfunc`. call_template = typing.make_concrete_template( name, key=func_name, signatures=self.nopython_signatures) return call_template, pysig, args, kws class ObjModeLiftedWith(LiftedWith): def __init__(self, *args, **kwargs): self.output_types = kwargs.pop('output_types', None) super(LiftedWith, self).__init__(*args, **kwargs) if not self.flags.force_pyobject: raise ValueError("expecting `flags.force_pyobject`") if self.output_types is None: raise TypeError('`output_types` must be provided') # switch off rewrites, they have no effect self.flags.no_rewrites = True @property def _numba_type_(self): return types.ObjModeDispatcher(self) def get_call_template(self, args, kws): """ Get a typing.ConcreteTemplate for this dispatcher and the given *args* and *kws* types. This enables the resolving of the return type. A (template, pysig, args, kws) tuple is returned. """ assert not kws self._legalize_arg_types(args) # Coerce to object mode args = [types.ffi_forced_object] * len(args) if self._can_compile: self.compile(tuple(args)) signatures = [typing.signature(self.output_types, *args)] pysig = None func_name = self.py_func.__name__ name = "CallTemplate({0})".format(func_name) call_template = typing.make_concrete_template( name, key=func_name, signatures=signatures) return call_template, pysig, args, kws def _legalize_arg_types(self, args): for i, a in enumerate(args, start=1): if isinstance(a, types.List): msg = ( 'Does not support list type inputs into ' 'with-context for arg {}' ) raise errors.TypingError(msg.format(i)) elif isinstance(a, types.Dispatcher): msg = ( 'Does not support function type inputs into ' 'with-context for arg {}' ) raise errors.TypingError(msg.format(i)) @global_compiler_lock def compile(self, sig): args, _ = sigutils.normalize_signature(sig) sig = (types.ffi_forced_object,) * len(args) return super().compile(sig) # Initialize typeof machinery _dispatcher.typeof_init( OmittedArg, dict((str(t), t._code) for t in types.number_domain)) numba-0.55.1/numba/core/entrypoints.py000664 000000 000000 00000002003 14174536160 017670 0ustar00rootroot000000 000000 import logging import warnings from pkg_resources import iter_entry_points _already_initialized = False logger = logging.getLogger(__name__) def init_all(): '''Execute all `numba_extensions` entry points with the name `init` If extensions have already been initialized, this function does nothing. ''' global _already_initialized if _already_initialized: return # Must put this here to avoid extensions re-triggering initialization _already_initialized = True for entry_point in iter_entry_points('numba_extensions', 'init'): logger.debug('Loading extension: %s', entry_point) try: func = entry_point.load() func() except Exception as e: msg = "Numba extension module '{}' failed to load due to '{}({})'." warnings.warn(msg.format(entry_point.module_name, type(e).__name__, str(e)), stacklevel=2) logger.debug('Extension loading failed for: %s', entry_point) numba-0.55.1/numba/core/environment.py000664 000000 000000 00000003147 14174536160 017650 0ustar00rootroot000000 000000 import weakref import importlib from numba import _dynfunc class Environment(_dynfunc.Environment): """Stores globals and constant pyobjects for runtime. It is often needed to convert b/w nopython objects and pyobjects. """ __slots__ = ('env_name', '__weakref__') # A weak-value dictionary to store live environment with env_name as the # key. _memo = weakref.WeakValueDictionary() @classmethod def from_fndesc(cls, fndesc): try: # Avoid creating new Env return cls._memo[fndesc.env_name] except KeyError: inst = cls(fndesc.lookup_globals()) inst.env_name = fndesc.env_name cls._memo[fndesc.env_name] = inst return inst def can_cache(self): is_dyn = '__name__' not in self.globals return not is_dyn def __reduce__(self): return _rebuild_env, ( self.globals.get('__name__'), self.consts, self.env_name, ) def __del__(self): return def __repr__(self): return f"" def _rebuild_env(modname, consts, env_name): env = lookup_environment(env_name) if env is not None: return env mod = importlib.import_module(modname) env = Environment(mod.__dict__) env.consts[:] = consts env.env_name = env_name # Cache loaded object Environment._memo[env_name] = env return env def lookup_environment(env_name): """Returns the Environment object for the given name; or None if not found """ return Environment._memo.get(env_name) numba-0.55.1/numba/core/errors.py000664 000000 000000 00000060323 14174536160 016617 0ustar00rootroot000000 000000 """ Numba-specific errors and warnings. """ import abc import contextlib import os import sys import warnings import numba.core.config import numpy as np from collections import defaultdict from numba.core.utils import (chain_exception, use_old_style_errors, use_new_style_errors) from functools import wraps from abc import abstractmethod # Filled at the end __all__ = [] class NumbaWarning(Warning): """ Base category for all Numba compiler warnings. """ def __init__(self, msg, loc=None, highlighting=True, ): self.msg = msg self.loc = loc if highlighting: highlight = termcolor().errmsg else: def highlight(x): return x if loc: super(NumbaWarning, self).__init__( highlight("%s\n%s\n" % (msg, loc.strformat()))) else: super(NumbaWarning, self).__init__(highlight("%s" % (msg,))) class NumbaPerformanceWarning(NumbaWarning): """ Warning category for when an operation might not be as fast as expected. """ class NumbaDeprecationWarning(NumbaWarning): """ Warning category for use of a deprecated feature. """ class NumbaPendingDeprecationWarning(NumbaWarning): """ Warning category for use of a feature that is pending deprecation. """ class NumbaParallelSafetyWarning(NumbaWarning): """ Warning category for when an operation in a prange might not have parallel semantics. """ class NumbaTypeSafetyWarning(NumbaWarning): """ Warning category for unsafe casting operations. """ class NumbaExperimentalFeatureWarning(NumbaWarning): """ Warning category for using an experimental feature. """ class NumbaInvalidConfigWarning(NumbaWarning): """ Warning category for using an invalid configuration. """ class NumbaPedanticWarning(NumbaWarning): """ Warning category for reporting pedantic messages. """ def __init__(self, msg, **kwargs): super().__init__(f"{msg}\n{pedantic_warning_info}") class NumbaIRAssumptionWarning(NumbaPedanticWarning): """ Warning category for reporting an IR assumption violation. """ class NumbaDebugInfoWarning(NumbaWarning): """ Warning category for an issue with the emission of debug information. """ # These are needed in the color formatting of errors setup class _ColorScheme(metaclass=abc.ABCMeta): @abstractmethod def code(self, msg): pass @abstractmethod def errmsg(self, msg): pass @abstractmethod def filename(self, msg): pass @abstractmethod def indicate(self, msg): pass @abstractmethod def highlight(self, msg): pass @abstractmethod def reset(self, msg): pass class _DummyColorScheme(_ColorScheme): def __init__(self, theme=None): pass def code(self, msg): pass def errmsg(self, msg): pass def filename(self, msg): pass def indicate(self, msg): pass def highlight(self, msg): pass def reset(self, msg): pass # holds reference to the instance of the terminal color scheme in use _termcolor_inst = None try: import colorama # If the colorama version is < 0.3.9 it can break stdout/stderr in some # situations, as a result if this condition is met colorama is disabled and # the user is warned. Note that early versions did not have a __version__. colorama_version = getattr(colorama, '__version__', '0.0.0') if tuple([int(x) for x in colorama_version.split('.')]) < (0, 3, 9): msg = ("Insufficiently recent colorama version found. " "Numba requires colorama >= 0.3.9") # warn the user warnings.warn(msg) # trip the exception to disable color errors raise ImportError # If Numba is running in testsuite mode then do not use error message # coloring so CI system output is consistently readable without having # to read between shell escape characters. if os.environ.get('NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING', None): raise ImportError # just to trigger the exception handler below except ImportError: class NOPColorScheme(_DummyColorScheme): def __init__(self, theme=None): if theme is not None: raise ValueError("specifying a theme has no effect") _DummyColorScheme.__init__(self, theme=theme) def code(self, msg): return msg def errmsg(self, msg): return msg def filename(self, msg): return msg def indicate(self, msg): return msg def highlight(self, msg): return msg def reset(self, msg): return msg def termcolor(): global _termcolor_inst if _termcolor_inst is None: _termcolor_inst = NOPColorScheme() return _termcolor_inst else: from colorama import init, reinit, deinit, Fore, Style class ColorShell(object): _has_initialized = False def __init__(self): init() self._has_initialized = True def __enter__(self): if self._has_initialized: reinit() def __exit__(self, *exc_detail): Style.RESET_ALL deinit() class reset_terminal(object): def __init__(self): self._buf = bytearray(b'') def __enter__(self): return self._buf def __exit__(self, *exc_detail): self._buf += bytearray(Style.RESET_ALL.encode('utf-8')) # define some default themes, if more are added, update the envvars docs! themes = {} # No color added, just bold weighting themes['no_color'] = {'code': None, 'errmsg': None, 'filename': None, 'indicate': None, 'highlight': None, 'reset': None, } # suitable for terminals with a dark background themes['dark_bg'] = {'code': Fore.BLUE, 'errmsg': Fore.YELLOW, 'filename': Fore.WHITE, 'indicate': Fore.GREEN, 'highlight': Fore.RED, 'reset': Style.RESET_ALL, } # suitable for terminals with a light background themes['light_bg'] = {'code': Fore.BLUE, 'errmsg': Fore.BLACK, 'filename': Fore.MAGENTA, 'indicate': Fore.BLACK, 'highlight': Fore.RED, 'reset': Style.RESET_ALL, } # suitable for terminals with a blue background themes['blue_bg'] = {'code': Fore.WHITE, 'errmsg': Fore.YELLOW, 'filename': Fore.MAGENTA, 'indicate': Fore.CYAN, 'highlight': Fore.RED, 'reset': Style.RESET_ALL, } # suitable for use in jupyter notebooks themes['jupyter_nb'] = {'code': Fore.BLACK, 'errmsg': Fore.BLACK, 'filename': Fore.GREEN, 'indicate': Fore.CYAN, 'highlight': Fore.RED, 'reset': Style.RESET_ALL, } default_theme = themes['no_color'] class HighlightColorScheme(_DummyColorScheme): def __init__(self, theme=default_theme): self._code = theme['code'] self._errmsg = theme['errmsg'] self._filename = theme['filename'] self._indicate = theme['indicate'] self._highlight = theme['highlight'] self._reset = theme['reset'] _DummyColorScheme.__init__(self, theme=theme) def _markup(self, msg, color=None, style=Style.BRIGHT): features = '' if color: features += color if style: features += style with ColorShell(): with reset_terminal() as mu: mu += features.encode('utf-8') mu += (msg).encode('utf-8') return mu.decode('utf-8') def code(self, msg): return self._markup(msg, self._code) def errmsg(self, msg): return self._markup(msg, self._errmsg) def filename(self, msg): return self._markup(msg, self._filename) def indicate(self, msg): return self._markup(msg, self._indicate) def highlight(self, msg): return self._markup(msg, self._highlight) def reset(self, msg): return self._markup(msg, self._reset) def termcolor(): global _termcolor_inst if _termcolor_inst is None: scheme = themes[numba.core.config.COLOR_SCHEME] _termcolor_inst = HighlightColorScheme(scheme) return _termcolor_inst pedantic_warning_info = """ This warning came from an internal pedantic check. Please report the warning message and traceback, along with a minimal reproducer at: https://github.com/numba/numba/issues/new?template=bug_report.md """ feedback_details = """ Please report the error message and traceback, along with a minimal reproducer at: https://github.com/numba/numba/issues/new?template=bug_report.md If more help is needed please feel free to speak to the Numba core developers directly at: https://gitter.im/numba/numba Thanks in advance for your help in improving Numba! """ unsupported_error_info = """ Unsupported functionality was found in the code Numba was trying to compile. If this functionality is important to you please file a feature request at: https://github.com/numba/numba/issues/new?template=feature_request.md """ interpreter_error_info = """ Unsupported Python functionality was found in the code Numba was trying to compile. This error could be due to invalid code, does the code work without Numba? (To temporarily disable Numba JIT, set the `NUMBA_DISABLE_JIT` environment variable to non-zero, and then rerun the code). If the code is valid and the unsupported functionality is important to you please file a feature request at: https://github.com/numba/numba/issues/new?template=feature_request.md To see Python/NumPy features supported by the latest release of Numba visit: https://numba.readthedocs.io/en/stable/reference/pysupported.html and https://numba.readthedocs.io/en/stable/reference/numpysupported.html """ constant_inference_info = """ Numba could not make a constant out of something that it decided should be a constant. This could well be a current limitation in Numba's internals, however please first check that your code is valid for compilation, particularly with respect to string interpolation (not supported!) and the requirement of compile time constants as arguments to exceptions: https://numba.readthedocs.io/en/stable/reference/pysupported.html?highlight=exceptions#constructs If the code is valid and the unsupported functionality is important to you please file a feature request at: https://github.com/numba/numba/issues/new?template=feature_request.md If you think your code should work with Numba. %s """ % feedback_details typing_error_info = """ This is not usually a problem with Numba itself but instead often caused by the use of unsupported features or an issue in resolving types. To see Python/NumPy features supported by the latest release of Numba visit: https://numba.readthedocs.io/en/stable/reference/pysupported.html and https://numba.readthedocs.io/en/stable/reference/numpysupported.html For more information about typing errors and how to debug them visit: https://numba.readthedocs.io/en/stable/user/troubleshoot.html#my-code-doesn-t-compile If you think your code should work with Numba, please report the error message and traceback, along with a minimal reproducer at: https://github.com/numba/numba/issues/new?template=bug_report.md """ reportable_issue_info = """ ------------------------------------------------------------------------------- This should not have happened, a problem has occurred in Numba's internals. You are currently using Numba version %s. %s """ % (numba.__version__, feedback_details) error_extras = dict() error_extras['unsupported_error'] = unsupported_error_info error_extras['typing'] = typing_error_info error_extras['reportable'] = reportable_issue_info error_extras['interpreter'] = interpreter_error_info error_extras['constant_inference'] = constant_inference_info def deprecated(arg): """Define a deprecation decorator. An optional string should refer to the new API to be used instead. Example: @deprecated def old_func(): ... @deprecated('new_func') def old_func(): ...""" subst = arg if isinstance(arg, str) else None def decorator(func): def wrapper(*args, **kwargs): msg = "Call to deprecated function \"{}\"." if subst: msg += "\n Use \"{}\" instead." warnings.warn(msg.format(func.__name__, subst), category=DeprecationWarning, stacklevel=2) return func(*args, **kwargs) return wraps(func)(wrapper) if not subst: return decorator(arg) else: return decorator class WarningsFixer(object): """ An object "fixing" warnings of a given category caught during certain phases. The warnings can have their filename and lineno fixed, and they are deduplicated as well. """ def __init__(self, category): self._category = category # {(filename, lineno, category) -> messages} self._warnings = defaultdict(set) @contextlib.contextmanager def catch_warnings(self, filename=None, lineno=None): """ Store warnings and optionally fix their filename and lineno. """ with warnings.catch_warnings(record=True) as wlist: warnings.simplefilter('always', self._category) yield for w in wlist: msg = str(w.message) if issubclass(w.category, self._category): # Store warnings of this category for deduplication filename = filename or w.filename lineno = lineno or w.lineno self._warnings[filename, lineno, w.category].add(msg) else: # Simply emit other warnings again warnings.warn_explicit(msg, w.category, w.filename, w.lineno) def flush(self): """ Emit all stored warnings. """ def key(arg): # It is possible through codegen to create entirely identical # warnings, this leads to comparing types when sorting which breaks # on Python 3. Key as str() and if the worse happens then `id` # creates some uniqueness return str(arg) + str(id(arg)) for (filename, lineno, category), messages in sorted( self._warnings.items(), key=key): for msg in sorted(messages): warnings.warn_explicit(msg, category, filename, lineno) self._warnings.clear() class NumbaError(Exception): def __init__(self, msg, loc=None, highlighting=True): self.msg = msg self.loc = loc if highlighting: highlight = termcolor().errmsg else: def highlight(x): return x if loc: new_msg = "%s\n%s\n" % (msg, loc.strformat()) else: new_msg = "%s" % (msg,) super(NumbaError, self).__init__(highlight(new_msg)) @property def contexts(self): try: return self._contexts except AttributeError: self._contexts = lst = [] return lst def add_context(self, msg): """ Add contextual info. The exception message is expanded with the new contextual information. """ self.contexts.append(msg) f = termcolor().errmsg('{0}\n') + termcolor().filename('During: {1}') newmsg = f.format(self, msg) self.args = (newmsg,) return self def patch_message(self, new_message): """ Change the error message to the given new message. """ self.args = (new_message,) + self.args[1:] class UnsupportedError(NumbaError): """ Numba does not have an implementation for this functionality. """ pass class UnsupportedRewriteError(UnsupportedError): """UnsupportedError from rewrite passes """ pass class IRError(NumbaError): """ An error occurred during Numba IR generation. """ pass class RedefinedError(IRError): """ An error occurred during interpretation of IR due to variable redefinition. """ pass class NotDefinedError(IRError): """ An undefined variable is encountered during interpretation of IR. """ def __init__(self, name, loc=None): self.name = name msg = ("The compiler failed to analyze the bytecode. " "Variable '%s' is not defined." % name) super(NotDefinedError, self).__init__(msg, loc=loc) class VerificationError(IRError): """ An error occurred during IR verification. Once Numba's internal representation (IR) is constructed it is then verified to ensure that terminators are both present and in the correct places within the IR. If it is the case that this condition is not met, a VerificationError is raised. """ pass class DeprecationError(NumbaError): """ Functionality is deprecated. """ pass class LoweringError(NumbaError): """ An error occurred during lowering. """ def __init__(self, msg, loc=None): super(LoweringError, self).__init__(msg, loc=loc) class UnsupportedParforsError(NumbaError): """ An error ocurred because parfors is not supported on the platform. """ pass class ForbiddenConstruct(LoweringError): """ A forbidden Python construct was encountered (e.g. use of locals()). """ pass class TypingError(NumbaError): """ A type inference failure. """ pass class UntypedAttributeError(TypingError): def __init__(self, value, attr, loc=None): module = getattr(value, 'pymod', None) if module is not None and module == np: # unsupported numpy feature. msg = ("Use of unsupported NumPy function 'numpy.%s' " "or unsupported use of the function.") % attr else: msg = "Unknown attribute '{attr}' of type {type}" msg = msg.format(type=value, attr=attr) super(UntypedAttributeError, self).__init__(msg, loc=loc) class ByteCodeSupportError(NumbaError): """ Failure to extract the bytecode of the user's function. """ def __init__(self, msg, loc=None): super(ByteCodeSupportError, self).__init__(msg, loc=loc) class CompilerError(NumbaError): """ Some high-level error in the compiler. """ pass class ConstantInferenceError(NumbaError): """ Failure during constant inference. """ def __init__(self, value, loc=None): super(ConstantInferenceError, self).__init__(value, loc=loc) class InternalError(NumbaError): """ For wrapping internal error occured within the compiler """ def __init__(self, exception): super(InternalError, self).__init__(str(exception)) self.old_exception = exception class InternalTargetMismatchError(InternalError): """For signalling a target mismatch error occurred internally within the compiler. """ def __init__(self, kind, target_hw, hw_clazz): msg = (f"{kind.title()} being resolved on a target from which it does " f"not inherit. Local target is {target_hw}, declared " f"target class is {hw_clazz}.") super().__init__(msg) class RequireLiteralValue(TypingError): """ For signalling that a function's typing requires a constant value for some of its arguments. """ pass class ForceLiteralArg(NumbaError): """A Pseudo-exception to signal the dispatcher to type an argument literally Attributes ---------- requested_args : frozenset[int] requested positions of the arguments. """ def __init__(self, arg_indices, fold_arguments=None, loc=None): """ Parameters ---------- arg_indices : Sequence[int] requested positions of the arguments. fold_arguments: callable A function ``(tuple, dict) -> tuple`` that binds and flattens the ``args`` and ``kwargs``. loc : numba.ir.Loc or None """ super(ForceLiteralArg, self).__init__( "Pseudo-exception to force literal arguments in the dispatcher", loc=loc, ) self.requested_args = frozenset(arg_indices) self.fold_arguments = fold_arguments def bind_fold_arguments(self, fold_arguments): """Bind the fold_arguments function """ e = ForceLiteralArg(self.requested_args, fold_arguments, loc=self.loc) return chain_exception(e, self) def combine(self, other): """Returns a new instance by or'ing the requested_args. """ if not isinstance(other, ForceLiteralArg): m = '*other* must be a {} but got a {} instead' raise TypeError(m.format(ForceLiteralArg, type(other))) return ForceLiteralArg(self.requested_args | other.requested_args) def __or__(self, other): """Same as self.combine(other) """ return self.combine(other) class LiteralTypingError(TypingError): """ Failure in typing a Literal type """ pass # These Exception classes are just Numba copies of their Python equivalents for # use internally in cases where we want e.g. type inference to keep on trying. # Exceptions extending from NumbaError are considered "special" by Numba's # internals and are treated differently to standard Python exceptions which are # permitted to just propagate up the stack. class NumbaValueError(TypingError): pass class NumbaTypeError(TypingError): pass class NumbaAttributeError(TypingError): pass class NumbaAssertionError(TypingError): pass class NumbaNotImplementedError(TypingError): pass class NumbaKeyError(TypingError): pass class NumbaIndexError(TypingError): pass class NumbaRuntimeError(NumbaError): pass def _format_msg(fmt, args, kwargs): return fmt.format(*args, **kwargs) _numba_path = os.path.dirname(__file__) loc_info = {} @contextlib.contextmanager def new_error_context(fmt_, *args, **kwargs): """ A contextmanager that prepend contextual information to any exception raised within. If the exception type is not an instance of NumbaError, it will be wrapped into a InternalError. The exception class can be changed by providing a "errcls_" keyword argument with the exception constructor. The first argument is a message that describes the context. It can be a format string. If there are additional arguments, it will be used as ``fmt_.format(*args, **kwargs)`` to produce the final message string. """ errcls = kwargs.pop('errcls_', InternalError) loc = kwargs.get('loc', None) if loc is not None and not loc.filename.startswith(_numba_path): loc_info.update(kwargs) try: yield except NumbaError as e: e.add_context(_format_msg(fmt_, args, kwargs)) raise except AssertionError: # Let assertion error pass through for shorter traceback in debugging raise except Exception as e: if use_old_style_errors(): newerr = errcls(e).add_context(_format_msg(fmt_, args, kwargs)) if numba.core.config.FULL_TRACEBACKS: tb = sys.exc_info()[2] else: tb = None raise newerr.with_traceback(tb) elif use_new_style_errors(): raise e else: msg = ("Unknown CAPTURED_ERRORS style: " f"'{numba.core.config.CAPTURED_ERRORS}'.") assert 0, msg __all__ += [name for (name, value) in globals().items() if not name.startswith('_') and isinstance(value, type) and issubclass(value, (Exception, Warning))] numba-0.55.1/numba/core/event.py000664 000000 000000 00000023744 14174536160 016432 0ustar00rootroot000000 000000 """ The ``numba.core.event`` module provides a simple event system for applications to register callbacks to listen to specific compiler events. The following events are built in: - ``"numba:compile"`` is broadcast when a dispatcher is compiling. Events of this kind have ``data`` defined to be a ``dict`` with the following key-values: - ``"dispatcher"``: the dispatcher object that is compiling. - ``"args"``: the argument types. - ``"return_type"``: the return type. - ``"numba:compiler_lock"`` is broadcast when the internal compiler-lock is acquired. This is mostly used internally to measure time spent with the lock acquired. - ``"numba:llvm_lock"`` is broadcast when the internal LLVM-lock is acquired. This is used internally to measure time spent with the lock acquired. Applications can register callbacks that are listening for specific events using ``register(kind: str, listener: Listener)``, where ``listener`` is an instance of ``Listener`` that defines custom actions on occurrence of the specific event. """ import abc import enum import time from timeit import default_timer as timer from contextlib import contextmanager, ExitStack from collections import defaultdict class EventStatus(enum.Enum): """Status of an event. """ START = enum.auto() END = enum.auto() # Builtin event kinds. _builtin_kinds = frozenset([ "numba:compiler_lock", "numba:compile", "numba:llvm_lock", ]) def _guard_kind(kind): """Guard to ensure that an event kind is valid. All event kinds with a "numba:" prefix must be defined in the pre-defined ``numba.core.event._builtin_kinds``. Custom event kinds are allowed by not using the above prefix. Parameters ---------- kind : str Return ------ res : str """ if kind.startswith("numba:") and kind not in _builtin_kinds: msg = (f"{kind} is not a valid event kind, " "it starts with the reserved prefix 'numba:'") raise ValueError(msg) return kind class Event: """An event. Parameters ---------- kind : str status : EventStatus data : any; optional Additional data for the event. exc_details : 3-tuple; optional Same 3-tuple for ``__exit__``. """ def __init__(self, kind, status, data=None, exc_details=None): self._kind = _guard_kind(kind) self._status = status self._data = data self._exc_details = (None if exc_details is None or exc_details[0] is None else exc_details) @property def kind(self): """Event kind Returns ------- res : str """ return self._kind @property def status(self): """Event status Returns ------- res : EventStatus """ return self._status @property def data(self): """Event data Returns ------- res : object """ return self._data @property def is_start(self): """Is it a *START* event? Returns ------- res : bool """ return self._status == EventStatus.START @property def is_end(self): """Is it an *END* event? Returns ------- res : bool """ return self._status == EventStatus.END @property def is_failed(self): """Is the event carrying an exception? This is used for *END* event. This method will never return ``True`` in a *START* event. Returns ------- res : bool """ return self._exc_details is None def __str__(self): data = (f"{type(self.data).__qualname__}" if self.data is not None else "None") return f"Event({self._kind}, {self._status}, data: {data})" __repr__ = __str__ _registered = defaultdict(list) def register(kind, listener): """Register a listener for a given event kind. Parameters ---------- kind : str listener : Listener """ assert isinstance(listener, Listener) kind = _guard_kind(kind) _registered[kind].append(listener) def unregister(kind, listener): """Unregister a listener for a given event kind. Parameters ---------- kind : str listener : Listener """ assert isinstance(listener, Listener) kind = _guard_kind(kind) lst = _registered[kind] lst.remove(listener) def broadcast(event): """Broadcast an event to all registered listeners. Parameters ---------- event : Event """ for listener in _registered[event.kind]: listener.notify(event) class Listener(abc.ABC): """Base class for all event listeners. """ @abc.abstractmethod def on_start(self, event): """Called when there is a *START* event. Parameters ---------- event : Event """ pass @abc.abstractmethod def on_end(self, event): """Called when there is a *END* event. Parameters ---------- event : Event """ pass def notify(self, event): """Notify this Listener with the given Event. Parameters ---------- event : Event """ if event.is_start: self.on_start(event) elif event.is_end: self.on_end(event) else: raise AssertionError("unreachable") class TimingListener(Listener): """A listener that measures the total time spent between *START* and *END* events during the time this listener is active. """ def __init__(self): self._depth = 0 def on_start(self, event): if self._depth == 0: self._ts = timer() self._depth += 1 def on_end(self, event): self._depth -= 1 if self._depth == 0: last = getattr(self, "_duration", 0) self._duration = (timer() - self._ts) + last @property def done(self): """Returns a ``bool`` indicating whether a measurement has been made. When this returns ``False``, the matching event has never fired. If and only if this returns ``True``, ``.duration`` can be read without error. """ return hasattr(self, "_duration") @property def duration(self): """Returns the measured duration. This may raise ``AttributeError``. Users can use ``.done`` to check that a measurement has been made. """ return self._duration class RecordingListener(Listener): """A listener that records all events and stores them in the ``.buffer`` attribute as a list of 2-tuple ``(float, Event)``, where the first element is the time the event occurred as returned by ``time.time()`` and the second element is the event. """ def __init__(self): self.buffer = [] def on_start(self, event): self.buffer.append((time.time(), event)) def on_end(self, event): self.buffer.append((time.time(), event)) @contextmanager def install_listener(kind, listener): """Install a listener for event "kind" temporarily within the duration of the context. Returns ------- res : Listener The *listener* provided. Examples -------- >>> with install_listener("numba:compile", listener): >>> some_code() # listener will be active here. >>> other_code() # listener will be unregistered by this point. """ register(kind, listener) try: yield listener finally: unregister(kind, listener) @contextmanager def install_timer(kind, callback): """Install a TimingListener temporarily to measure the duration of an event. If the context completes successfully, the *callback* function is executed. The *callback* function is expected to take a float argument for the duration in seconds. Returns ------- res : TimingListener Examples -------- This is equivalent to: >>> with install_listener(kind, TimingListener()) as res: >>> ... """ tl = TimingListener() with install_listener(kind, tl): yield tl if tl.done: callback(tl.duration) @contextmanager def install_recorder(kind): """Install a RecordingListener temporarily to record all events. Once the context is closed, users can use ``RecordingListener.buffer`` to access the recorded events. Returns ------- res : RecordingListener Examples -------- This is equivalent to: >>> with install_listener(kind, RecordingListener()) as res: >>> ... """ rl = RecordingListener() with install_listener(kind, rl): yield rl def start_event(kind, data=None): """Trigger the start of an event of *kind* with *data*. Parameters ---------- kind : str Event kind. data : any; optional Extra event data. """ evt = Event(kind=kind, status=EventStatus.START, data=data) broadcast(evt) def end_event(kind, data=None, exc_details=None): """Trigger the end of an event of *kind*, *exc_details*. Parameters ---------- kind : str Event kind. data : any; optional Extra event data. exc_details : 3-tuple; optional Same 3-tuple for ``__exit__``. Or, ``None`` if no error. """ evt = Event( kind=kind, status=EventStatus.END, data=data, exc_details=exc_details, ) broadcast(evt) @contextmanager def trigger_event(kind, data=None): """A context manager to trigger the start and end events of *kind* with *data*. The start event is triggered when entering the context. The end event is triggered when exiting the context. Parameters ---------- kind : str Event kind. data : any; optional Extra event data. """ with ExitStack() as scope: @scope.push def on_exit(*exc_details): end_event(kind, data=data, exc_details=exc_details) start_event(kind, data=data) yield numba-0.55.1/numba/core/extending.py000664 000000 000000 00000045603 14174536160 017274 0ustar00rootroot000000 000000 import os import uuid import weakref import collections import functools import numba from numba.core import types, errors, utils, config # Exported symbols from numba.core.typing.typeof import typeof_impl # noqa: F401 from numba.core.typing.asnumbatype import as_numba_type # noqa: F401 from numba.core.typing.templates import infer, infer_getattr # noqa: F401 from numba.core.imputils import ( # noqa: F401 lower_builtin, lower_getattr, lower_getattr_generic, # noqa: F401 lower_setattr, lower_setattr_generic, lower_cast) # noqa: F401 from numba.core.datamodel import models # noqa: F401 from numba.core.datamodel import register_default as register_model # noqa: F401, E501 from numba.core.pythonapi import box, unbox, reflect, NativeValue # noqa: F401 from numba._helperlib import _import_cython_function # noqa: F401 from numba.core.serialize import ReduceMixin def type_callable(func): """ Decorate a function as implementing typing for the callable *func*. *func* can be a callable object (probably a global) or a string denoting a built-in operation (such 'getitem' or '__array_wrap__') """ from numba.core.typing.templates import (CallableTemplate, infer, infer_global) if not callable(func) and not isinstance(func, str): raise TypeError("`func` should be a function or string") try: func_name = func.__name__ except AttributeError: func_name = str(func) def decorate(typing_func): def generic(self): return typing_func(self.context) name = "%s_CallableTemplate" % (func_name,) bases = (CallableTemplate,) class_dict = dict(key=func, generic=generic) template = type(name, bases, class_dict) infer(template) if callable(func): infer_global(func, types.Function(template)) return typing_func return decorate # By default, an *overload* does not have a cpython wrapper because it is not # callable from python. _overload_default_jit_options = {'no_cpython_wrapper': True} def overload(func, jit_options={}, strict=True, inline='never', prefer_literal=False, **kwargs): """ A decorator marking the decorated function as typing and implementing *func* in nopython mode. The decorated function will have the same formal parameters as *func* and be passed the Numba types of those parameters. It should return a function implementing *func* for the given types. Here is an example implementing len() for tuple types:: @overload(len) def tuple_len(seq): if isinstance(seq, types.BaseTuple): n = len(seq) def len_impl(seq): return n return len_impl Compiler options can be passed as an dictionary using the **jit_options** argument. Overloading strictness (that the typing and implementing signatures match) is enforced by the **strict** keyword argument, it is recommended that this is set to True (default). To handle a function that accepts imprecise types, an overload definition can return 2-tuple of ``(signature, impl_function)``, where the ``signature`` is a ``typing.Signature`` specifying the precise signature to be used; and ``impl_function`` is the same implementation function as in the simple case. If the kwarg inline determines whether the overload is inlined in the calling function and can be one of three values: * 'never' (default) - the overload is never inlined. * 'always' - the overload is always inlined. * a function that takes two arguments, both of which are instances of a namedtuple with fields: * func_ir * typemap * calltypes * signature The first argument holds the information from the caller, the second holds the information from the callee. The function should return Truthy to determine whether to inline, this essentially permitting custom inlining rules (typical use might be cost models). The *prefer_literal* option allows users to control if literal types should be tried first or last. The default (`False`) is to use non-literal types. Implementations that can specialize based on literal values should set the option to `True`. Note, this option maybe expanded in the near future to allow for more control (e.g. disabling non-literal types). **kwargs prescribes additional arguments passed through to the overload template. The only accepted key at present is 'target' which is a string corresponding to the target that this overload should be bound against. """ from numba.core.typing.templates import make_overload_template, infer_global # set default options opts = _overload_default_jit_options.copy() opts.update(jit_options) # let user options override # TODO: abort now if the kwarg 'target' relates to an unregistered target, # this requires sorting out the circular imports first. def decorate(overload_func): template = make_overload_template(func, overload_func, opts, strict, inline, prefer_literal, **kwargs) infer(template) if callable(func): infer_global(func, types.Function(template)) return overload_func return decorate def register_jitable(*args, **kwargs): """ Register a regular python function that can be executed by the python interpreter and can be compiled into a nopython function when referenced by other jit'ed functions. Can be used as:: @register_jitable def foo(x, y): return x + y Or, with compiler options:: @register_jitable(_nrt=False) # disable runtime allocation def foo(x, y): return x + y """ def wrap(fn): # It is just a wrapper for @overload inline = kwargs.pop('inline', 'never') @overload(fn, jit_options=kwargs, inline=inline, strict=False) def ov_wrap(*args, **kwargs): return fn return fn if kwargs: return wrap else: return wrap(*args) def overload_attribute(typ, attr, **kwargs): """ A decorator marking the decorated function as typing and implementing attribute *attr* for the given Numba type in nopython mode. *kwargs* are passed to the underlying `@overload` call. Here is an example implementing .nbytes for array types:: @overload_attribute(types.Array, 'nbytes') def array_nbytes(arr): def get(arr): return arr.size * arr.itemsize return get """ # TODO implement setters from numba.core.typing.templates import make_overload_attribute_template def decorate(overload_func): template = make_overload_attribute_template( typ, attr, overload_func, inline=kwargs.get('inline', 'never'), ) infer_getattr(template) overload(overload_func, **kwargs)(overload_func) return overload_func return decorate def _overload_method_common(typ, attr, **kwargs): """Common code for overload_method and overload_classmethod """ from numba.core.typing.templates import make_overload_method_template def decorate(overload_func): copied_kwargs = kwargs.copy() # avoid mutating parent dict template = make_overload_method_template( typ, attr, overload_func, inline=copied_kwargs.pop('inline', 'never'), prefer_literal=copied_kwargs.pop('prefer_literal', False), **copied_kwargs, ) infer_getattr(template) overload(overload_func, **kwargs)(overload_func) return overload_func return decorate def overload_method(typ, attr, **kwargs): """ A decorator marking the decorated function as typing and implementing method *attr* for the given Numba type in nopython mode. *kwargs* are passed to the underlying `@overload` call. Here is an example implementing .take() for array types:: @overload_method(types.Array, 'take') def array_take(arr, indices): if isinstance(indices, types.Array): def take_impl(arr, indices): n = indices.shape[0] res = np.empty(n, arr.dtype) for i in range(n): res[i] = arr[indices[i]] return res return take_impl """ return _overload_method_common(typ, attr, **kwargs) def overload_classmethod(typ, attr, **kwargs): """ A decorator marking the decorated function as typing and implementing classmethod *attr* for the given Numba type in nopython mode. Similar to ``overload_method``. Here is an example implementing a classmethod on the Array type to call ``np.arange()``:: @overload_classmethod(types.Array, "make") def ov_make(cls, nitems): def impl(cls, nitems): return np.arange(nitems) return impl The above code will allow the following to work in jit-compiled code:: @njit def foo(n): return types.Array.make(n) """ return _overload_method_common(types.TypeRef(typ), attr, **kwargs) def make_attribute_wrapper(typeclass, struct_attr, python_attr): """ Make an automatic attribute wrapper exposing member named *struct_attr* as a read-only attribute named *python_attr*. The given *typeclass*'s model must be a StructModel subclass. """ from numba.core.typing.templates import AttributeTemplate from numba.core.datamodel import default_manager from numba.core.datamodel.models import StructModel from numba.core.imputils import impl_ret_borrowed from numba.core import cgutils if not isinstance(typeclass, type) or not issubclass(typeclass, types.Type): raise TypeError("typeclass should be a Type subclass, got %s" % (typeclass,)) def get_attr_fe_type(typ): """ Get the Numba type of member *struct_attr* in *typ*. """ model = default_manager.lookup(typ) if not isinstance(model, StructModel): raise TypeError("make_struct_attribute_wrapper() needs a type " "with a StructModel, but got %s" % (model,)) return model.get_member_fe_type(struct_attr) @infer_getattr class StructAttribute(AttributeTemplate): key = typeclass def generic_resolve(self, typ, attr): if attr == python_attr: return get_attr_fe_type(typ) @lower_getattr(typeclass, python_attr) def struct_getattr_impl(context, builder, typ, val): val = cgutils.create_struct_proxy(typ)(context, builder, value=val) attrty = get_attr_fe_type(typ) attrval = getattr(val, struct_attr) return impl_ret_borrowed(context, builder, attrty, attrval) class _Intrinsic(ReduceMixin): """ Dummy callable for intrinsic """ _memo = weakref.WeakValueDictionary() # hold refs to last N functions deserialized, retaining them in _memo # regardless of whether there is another reference _recent = collections.deque(maxlen=config.FUNCTION_CACHE_SIZE) __uuid = None def __init__(self, name, defn, **kwargs): self._ctor_kwargs = kwargs self._name = name self._defn = defn functools.update_wrapper(self, defn) @property def _uuid(self): """ An instance-specific UUID, to avoid multiple deserializations of a given instance. Note this is lazily-generated, for performance reasons. """ u = self.__uuid if u is None: u = str(uuid.uuid1()) self._set_uuid(u) return u def _set_uuid(self, u): assert self.__uuid is None self.__uuid = u self._memo[u] = self self._recent.append(self) def _register(self): # _ctor_kwargs from numba.core.typing.templates import (make_intrinsic_template, infer_global) template = make_intrinsic_template(self, self._defn, self._name, self._ctor_kwargs) infer(template) infer_global(self, types.Function(template)) def __call__(self, *args, **kwargs): """ This is only defined to pretend to be a callable from CPython. """ msg = '{0} is not usable in pure-python'.format(self) raise NotImplementedError(msg) def __repr__(self): return "".format(self._name) def __deepcopy__(self, memo): # NOTE: Intrinsic are immutable and we don't need to copy. # This is triggered from deepcopy of statements. return self def _reduce_states(self): """ NOTE: part of ReduceMixin protocol """ return dict(uuid=self._uuid, name=self._name, defn=self._defn) @classmethod def _rebuild(cls, uuid, name, defn): """ NOTE: part of ReduceMixin protocol """ try: return cls._memo[uuid] except KeyError: llc = cls(name=name, defn=defn) llc._register() llc._set_uuid(uuid) return llc def intrinsic(*args, **kwargs): """ A decorator marking the decorated function as typing and implementing *func* in nopython mode using the llvmlite IRBuilder API. This is an escape hatch for expert users to build custom LLVM IR that will be inlined to the caller. The first argument to *func* is the typing context. The rest of the arguments corresponds to the type of arguments of the decorated function. These arguments are also used as the formal argument of the decorated function. If *func* has the signature ``foo(typing_context, arg0, arg1)``, the decorated function will have the signature ``foo(arg0, arg1)``. The return values of *func* should be a 2-tuple of expected type signature, and a code-generation function that will passed to ``lower_builtin``. For unsupported operation, return None. Here is an example implementing a ``cast_int_to_byte_ptr`` that cast any integer to a byte pointer:: @intrinsic def cast_int_to_byte_ptr(typingctx, src): # check for accepted types if isinstance(src, types.Integer): # create the expected type signature result_type = types.CPointer(types.uint8) sig = result_type(types.uintp) # defines the custom code generation def codegen(context, builder, signature, args): # llvm IRBuilder code here [src] = args rtype = signature.return_type llrtype = context.get_value_type(rtype) return builder.inttoptr(src, llrtype) return sig, codegen """ # Make inner function for the actual work def _intrinsic(func): name = getattr(func, '__name__', str(func)) llc = _Intrinsic(name, func, **kwargs) llc._register() return llc if not kwargs: # No option is given return _intrinsic(*args) else: # options are given, create a new callable to recv the # definition function def wrapper(func): return _intrinsic(func) return wrapper def get_cython_function_address(module_name, function_name): """ Get the address of a Cython function. Args ---- module_name: Name of the Cython module function_name: Name of the Cython function Returns ------- A Python int containing the address of the function """ return _import_cython_function(module_name, function_name) def include_path(): """Returns the C include directory path. """ include_dir = os.path.dirname(os.path.dirname(numba.__file__)) path = os.path.abspath(include_dir) return path def sentry_literal_args(pysig, literal_args, args, kwargs): """Ensures that the given argument types (in *args* and *kwargs*) are literally typed for a function with the python signature *pysig* and the list of literal argument names in *literal_args*. Alternatively, this is the same as:: SentryLiteralArgs(literal_args).for_pysig(pysig).bind(*args, **kwargs) """ boundargs = pysig.bind(*args, **kwargs) # Find literal argument positions and whether it is satisfied. request_pos = set() missing = False for i, (k, v) in enumerate(boundargs.arguments.items()): if k in literal_args: request_pos.add(i) if not isinstance(v, types.Literal): missing = True if missing: # Yes, there are missing required literal arguments e = errors.ForceLiteralArg(request_pos) # A helper function to fold arguments def folded(args, kwargs): out = pysig.bind(*args, **kwargs).arguments.values() return tuple(out) raise e.bind_fold_arguments(folded) class SentryLiteralArgs(collections.namedtuple( '_SentryLiteralArgs', ['literal_args'])): """ Parameters ---------- literal_args : Sequence[str] A sequence of names for literal arguments Examples -------- The following line: >>> SentryLiteralArgs(literal_args).for_pysig(pysig).bind(*args, **kwargs) is equivalent to: >>> sentry_literal_args(pysig, literal_args, args, kwargs) """ def for_function(self, func): """Bind the sentry to the signature of *func*. Parameters ---------- func : Function A python function. Returns ------- obj : BoundLiteralArgs """ return self.for_pysig(utils.pysignature(func)) def for_pysig(self, pysig): """Bind the sentry to the given signature *pysig*. Parameters ---------- pysig : inspect.Signature Returns ------- obj : BoundLiteralArgs """ return BoundLiteralArgs( pysig=pysig, literal_args=self.literal_args, ) class BoundLiteralArgs(collections.namedtuple( 'BoundLiteralArgs', ['pysig', 'literal_args'])): """ This class is usually created by SentryLiteralArgs. """ def bind(self, *args, **kwargs): """Bind to argument types. """ return sentry_literal_args( self.pysig, self.literal_args, args, kwargs, ) def is_jitted(function): """Returns True if a function is wrapped by one of the Numba @jit decorators, for example: numba.jit, numba.njit The purpose of this function is to provide a means to check if a function is already JIT decorated. """ # don't want to export this so import locally from numba.core.dispatcher import Dispatcher return isinstance(function, Dispatcher) numba-0.55.1/numba/core/externals.py000664 000000 000000 00000011364 14174536160 017311 0ustar00rootroot000000 000000 """ Register external C functions necessary for Numba code generation. """ import sys from llvmlite import ir import llvmlite.binding as ll from numba.core import utils, intrinsics from numba import _helperlib def _add_missing_symbol(symbol, addr): """Add missing symbol into LLVM internal symtab """ if not ll.address_of_symbol(symbol): ll.add_symbol(symbol, addr) def _get_msvcrt_symbol(symbol): """ Under Windows, look up a symbol inside the C runtime and return the raw pointer value as an integer. """ from ctypes import cdll, cast, c_void_p f = getattr(cdll.msvcrt, symbol) return cast(f, c_void_p).value def compile_multi3(context): """ Compile the multi3() helper function used by LLVM for 128-bit multiplication on 32-bit platforms. """ codegen = context.codegen() library = codegen.create_library("multi3") ir_mod = library.create_ir_module("multi3") i64 = ir.IntType(64) i128 = ir.IntType(128) lower_mask = ir.Constant(i64, 0xffffffff) _32 = ir.Constant(i64, 32) _64 = ir.Constant(i128, 64) fn_type = ir.FunctionType(i128, [i128, i128]) fn = ir.Function(ir_mod, fn_type, name="multi3") a, b = fn.args bb = fn.append_basic_block() builder = ir.IRBuilder(bb) # This implementation mimicks compiler-rt's. al = builder.trunc(a, i64) bl = builder.trunc(b, i64) ah = builder.trunc(builder.ashr(a, _64), i64) bh = builder.trunc(builder.ashr(b, _64), i64) # Compute {rh, rl} = al * bl (unsigned 64-bit multiplication) # rl = (al & 0xffffffff) * (bl & 0xffffffff) rl = builder.mul(builder.and_(al, lower_mask), builder.and_(bl, lower_mask)) # t = rl >> 32 t = builder.lshr(rl, _32) # rl &= 0xffffffff rl = builder.and_(rl, lower_mask) # t += (al >> 32) * (bl & 0xffffffff) t = builder.add(t, builder.mul(builder.lshr(al, _32), builder.and_(bl, lower_mask))) # rl += t << 32 rl = builder.add(rl, builder.shl(t, _32)) # rh = t >> 32 rh = builder.lshr(t, _32) # t = rl >> 32 t = builder.lshr(rl, _32) # rl &= 0xffffffff rl = builder.and_(rl, lower_mask) # t += (bl >> 32) * (al & 0xffffffff) t = builder.add(t, builder.mul(builder.lshr(bl, _32), builder.and_(al, lower_mask))) # rl += t << 32 rl = builder.add(rl, builder.shl(t, _32)) # rh += t >> 32 rh = builder.add(rh, builder.lshr(t, _32)) # rh += (al >> 32) * (bl >> 32) rh = builder.add(rh, builder.mul(builder.lshr(al, _32), builder.lshr(bl, _32))) # rh += (bh * al) + (bl * ah) rh = builder.add(rh, builder.mul(bh, al)) rh = builder.add(rh, builder.mul(bl, ah)) # r = rl + (rh << 64) r = builder.zext(rl, i128) r = builder.add(r, builder.shl(builder.zext(rh, i128), _64)) builder.ret(r) library.add_ir_module(ir_mod) library.finalize() return library class _Installer(object): _installed = False def install(self, context): """ Install the functions into LLVM. This only needs to be done once, as the mappings are persistent during the process lifetime. """ if not self._installed: self._do_install(context) self._installed = True class _ExternalMathFunctions(_Installer): """ Map the math functions from the C runtime library into the LLVM execution environment. """ def _do_install(self, context): is32bit = utils.MACHINE_BITS == 32 c_helpers = _helperlib.c_helpers if sys.platform.startswith('win32') and is32bit: # For Windows XP _ftol2 is not defined, we will just use # _ftol as a replacement. # On Windows 7, this is not necessary but will work anyway. ftol = _get_msvcrt_symbol("_ftol") _add_missing_symbol("_ftol2", ftol) elif sys.platform.startswith('linux') and is32bit: _add_missing_symbol("__fixunsdfdi", c_helpers["fptoui"]) _add_missing_symbol("__fixunssfdi", c_helpers["fptouif"]) if is32bit: # Make the library immortal self._multi3_lib = compile_multi3(context) ptr = self._multi3_lib.get_pointer_to_function("multi3") assert ptr _add_missing_symbol("__multi3", ptr) # List available C-math for fname in intrinsics.INTR_MATH: # Force binding from CPython's C runtime library. # (under Windows, different versions of the C runtime can # be loaded at the same time, for example msvcrt100 by # CPython and msvcrt120 by LLVM) ll.add_symbol(fname, c_helpers[fname]) c_math_functions = _ExternalMathFunctions() numba-0.55.1/numba/core/fastmathpass.py000664 000000 000000 00000002273 14174536160 020001 0ustar00rootroot000000 000000 from llvmlite import ir from llvmlite.ir.transforms import Visitor, CallVisitor class FastFloatBinOpVisitor(Visitor): """ A pass to add fastmath flag to float-binop instruction if they don't have any flags. """ float_binops = frozenset(['fadd', 'fsub', 'fmul', 'fdiv', 'frem', 'fcmp']) def __init__(self, flags): self.flags = flags def visit_Instruction(self, instr): if instr.opname in self.float_binops: if not instr.flags: for flag in self.flags: instr.flags.append(flag) class FastFloatCallVisitor(CallVisitor): """ A pass to change all float function calls to use fastmath. """ def __init__(self, flags): self.flags = flags def visit_Call(self, instr): # Add to any call that has float/double return type if instr.type in (ir.FloatType(), ir.DoubleType()): for flag in self.flags: instr.fastmath.add(flag) def rewrite_module(mod, options): """ Rewrite the given LLVM module to use fastmath everywhere. """ flags = options.flags FastFloatBinOpVisitor(flags).visit(mod) FastFloatCallVisitor(flags).visit(mod) numba-0.55.1/numba/core/funcdesc.py000664 000000 000000 00000020117 14174536160 017072 0ustar00rootroot000000 000000 """ Function descriptors. """ from collections import defaultdict import importlib from numba.core import types, itanium_mangler from numba.core.utils import _dynamic_modname, _dynamic_module def default_mangler(name, argtypes, *, abi_tags=()): return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags) def qualifying_prefix(modname, qualname): """ Returns a new string that is used for the first half of the mangled name. """ # XXX choose a different convention for object mode return '{}.{}'.format(modname, qualname) if modname else qualname class FunctionDescriptor(object): """ Base class for function descriptors: an object used to carry useful metadata about a natively callable function. Note that while `FunctionIdentity` denotes a Python function which is being concretely compiled by Numba, `FunctionDescriptor` may be more "abstract": e.g. a function decorated with `@generated_jit`. """ __slots__ = ('native', 'modname', 'qualname', 'doc', 'typemap', 'calltypes', 'args', 'kws', 'restype', 'argtypes', 'mangled_name', 'unique_name', 'env_name', 'global_dict', 'inline', 'noalias', 'abi_tags') def __init__(self, native, modname, qualname, unique_name, doc, typemap, restype, calltypes, args, kws, mangler=None, argtypes=None, inline=False, noalias=False, env_name=None, global_dict=None, abi_tags=()): self.native = native self.modname = modname self.global_dict = global_dict self.qualname = qualname self.unique_name = unique_name self.doc = doc # XXX typemap and calltypes should be on the compile result, # not the FunctionDescriptor self.typemap = typemap self.calltypes = calltypes self.args = args self.kws = kws self.restype = restype # Argument types if argtypes is not None: assert isinstance(argtypes, tuple), argtypes self.argtypes = argtypes else: # Get argument types from the type inference result # (note the "arg.FOO" convention as used in typeinfer self.argtypes = tuple(self.typemap['arg.' + a] for a in args) mangler = default_mangler if mangler is None else mangler # The mangled name *must* be unique, else the wrong function can # be chosen at link time. qualprefix = qualifying_prefix(self.modname, self.unique_name) self.mangled_name = mangler( qualprefix, self.argtypes, abi_tags=abi_tags, ) if env_name is None: env_name = mangler(".NumbaEnv.{}".format(qualprefix), self.argtypes, abi_tags=abi_tags) self.env_name = env_name self.inline = inline self.noalias = noalias self.abi_tags = abi_tags def lookup_globals(self): """ Return the global dictionary of the function. It may not match the Module's globals if the function is created dynamically (i.e. exec) """ return self.global_dict or self.lookup_module().__dict__ def lookup_module(self): """ Return the module in which this function is supposed to exist. This may be a dummy module if the function was dynamically generated. Raise exception if the module can't be found. """ if self.modname == _dynamic_modname: return _dynamic_module else: try: # ensure module exist return importlib.import_module(self.modname) except ImportError: raise ModuleNotFoundError( f"can't compile {self.qualname}: " f"import of module {self.modname} failed") from None def lookup_function(self): """ Return the original function object described by this object. """ return getattr(self.lookup_module(), self.qualname) @property def llvm_func_name(self): """ The LLVM-registered name for the raw function. """ return self.mangled_name # XXX refactor this @property def llvm_cpython_wrapper_name(self): """ The LLVM-registered name for a CPython-compatible wrapper of the raw function (i.e. a PyCFunctionWithKeywords). """ return itanium_mangler.prepend_namespace(self.mangled_name, ns='cpython') @property def llvm_cfunc_wrapper_name(self): """ The LLVM-registered name for a C-compatible wrapper of the raw function. """ return 'cfunc.' + self.mangled_name def __repr__(self): return "" % (self.unique_name) @classmethod def _get_function_info(cls, func_ir): """ Returns ------- qualname, unique_name, modname, doc, args, kws, globals ``unique_name`` must be a unique name. """ func = func_ir.func_id.func qualname = func_ir.func_id.func_qualname # XXX to func_id modname = func.__module__ doc = func.__doc__ or '' args = tuple(func_ir.arg_names) kws = () # TODO global_dict = None if modname is None: # Dynamically generated function. modname = _dynamic_modname # Retain a reference to the dictionary of the function. # This disables caching, serialization and pickling. global_dict = func_ir.func_id.func.__globals__ unique_name = func_ir.func_id.unique_name return qualname, unique_name, modname, doc, args, kws, global_dict @classmethod def _from_python_function(cls, func_ir, typemap, restype, calltypes, native, mangler=None, inline=False, noalias=False, abi_tags=()): (qualname, unique_name, modname, doc, args, kws, global_dict, ) = cls._get_function_info(func_ir) self = cls(native, modname, qualname, unique_name, doc, typemap, restype, calltypes, args, kws, mangler=mangler, inline=inline, noalias=noalias, global_dict=global_dict, abi_tags=abi_tags) return self class PythonFunctionDescriptor(FunctionDescriptor): """ A FunctionDescriptor subclass for Numba-compiled functions. """ __slots__ = () @classmethod def from_specialized_function(cls, func_ir, typemap, restype, calltypes, mangler, inline, noalias, abi_tags): """ Build a FunctionDescriptor for a given specialization of a Python function (in nopython mode). """ return cls._from_python_function(func_ir, typemap, restype, calltypes, native=True, mangler=mangler, inline=inline, noalias=noalias, abi_tags=abi_tags) @classmethod def from_object_mode_function(cls, func_ir): """ Build a FunctionDescriptor for an object mode variant of a Python function. """ typemap = defaultdict(lambda: types.pyobject) calltypes = typemap.copy() restype = types.pyobject return cls._from_python_function(func_ir, typemap, restype, calltypes, native=False) class ExternalFunctionDescriptor(FunctionDescriptor): """ A FunctionDescriptor subclass for opaque external functions (e.g. raw C functions). """ __slots__ = () def __init__(self, name, restype, argtypes): args = ["arg%d" % i for i in range(len(argtypes))] super(ExternalFunctionDescriptor, self ).__init__(native=True, modname=None, qualname=name, unique_name=name, doc='', typemap=None, restype=restype, calltypes=None, args=args, kws=None, mangler=lambda a, x, abi_tags: a, argtypes=argtypes) numba-0.55.1/numba/core/generators.py000664 000000 000000 00000033407 14174536160 017457 0ustar00rootroot000000 000000 """ Support for lowering generators. """ from llvmlite.llvmpy.core import Constant, Type, Builder from numba.core import types, config, cgutils from numba.core.funcdesc import FunctionDescriptor class GeneratorDescriptor(FunctionDescriptor): """ The descriptor for a generator's next function. """ __slots__ = () @classmethod def from_generator_fndesc(cls, func_ir, fndesc, gentype, mangler): """ Build a GeneratorDescriptor for the generator returned by the function described by *fndesc*, with type *gentype*. The generator inherits the env_name from the *fndesc*. All emitted functions for the generator shares the same Env. """ assert isinstance(gentype, types.Generator) restype = gentype.yield_type args = ['gen'] argtypes = (gentype,) qualname = fndesc.qualname + '.next' unique_name = fndesc.unique_name + '.next' self = cls(fndesc.native, fndesc.modname, qualname, unique_name, fndesc.doc, fndesc.typemap, restype, fndesc.calltypes, args, fndesc.kws, argtypes=argtypes, mangler=mangler, inline=False, env_name=fndesc.env_name) return self @property def llvm_finalizer_name(self): """ The LLVM name of the generator's finalizer function (if .has_finalizer is true). """ return 'finalize_' + self.mangled_name class BaseGeneratorLower(object): """ Base support class for lowering generators. """ def __init__(self, lower): self.context = lower.context self.fndesc = lower.fndesc self.library = lower.library self.call_conv = lower.call_conv self.func_ir = lower.func_ir self.geninfo = lower.generator_info self.gentype = self.get_generator_type() self.gendesc = GeneratorDescriptor.from_generator_fndesc( lower.func_ir, self.fndesc, self.gentype, self.context.mangler) # Helps packing non-omitted arguments into a structure self.arg_packer = self.context.get_data_packer(self.fndesc.argtypes) self.resume_blocks = {} def get_args_ptr(self, builder, genptr): return cgutils.gep_inbounds(builder, genptr, 0, 1) def get_resume_index_ptr(self, builder, genptr): return cgutils.gep_inbounds(builder, genptr, 0, 0, name='gen.resume_index') def get_state_ptr(self, builder, genptr): return cgutils.gep_inbounds(builder, genptr, 0, 2, name='gen.state') def lower_init_func(self, lower): """ Lower the generator's initialization function (which will fill up the passed-by-reference generator structure). """ lower.setup_function(self.fndesc) builder = lower.builder # Insert the generator into the target context in order to allow # calling from other Numba-compiled functions. lower.context.insert_generator(self.gentype, self.gendesc, [self.library]) # Init argument values lower.extract_function_arguments() lower.pre_lower() # Initialize the return structure (i.e. the generator structure). retty = self.context.get_return_type(self.gentype) # Structure index #0: the initial resume index (0 == start of generator) resume_index = self.context.get_constant(types.int32, 0) # Structure index #1: the function arguments argsty = retty.elements[1] statesty = retty.elements[2] lower.debug_print("# low_init_func incref") # Incref all NRT arguments before storing into generator states if self.context.enable_nrt: for argty, argval in zip(self.fndesc.argtypes, lower.fnargs): self.context.nrt.incref(builder, argty, argval) # Filter out omitted arguments argsval = self.arg_packer.as_data(builder, lower.fnargs) # Zero initialize states statesval = Constant.null(statesty) gen_struct = cgutils.make_anonymous_struct(builder, [resume_index, argsval, statesval], retty) retval = self.box_generator_struct(lower, gen_struct) lower.debug_print("# low_init_func before return") self.call_conv.return_value(builder, retval) lower.post_lower() def lower_next_func(self, lower): """ Lower the generator's next() function (which takes the passed-by-reference generator structure and returns the next yielded value). """ lower.setup_function(self.gendesc) lower.debug_print("# lower_next_func: {0}".format(self.gendesc.unique_name)) assert self.gendesc.argtypes[0] == self.gentype builder = lower.builder function = lower.function # Extract argument values and other information from generator struct genptr, = self.call_conv.get_arguments(function) self.arg_packer.load_into(builder, self.get_args_ptr(builder, genptr), lower.fnargs) self.resume_index_ptr = self.get_resume_index_ptr(builder, genptr) self.gen_state_ptr = self.get_state_ptr(builder, genptr) prologue = function.append_basic_block("generator_prologue") # Lower the generator's Python code entry_block_tail = lower.lower_function_body() # Add block for StopIteration on entry stop_block = function.append_basic_block("stop_iteration") builder.position_at_end(stop_block) self.call_conv.return_stop_iteration(builder) # Add prologue switch to resume blocks builder.position_at_end(prologue) # First Python block is also the resume point on first next() call first_block = self.resume_blocks[0] = lower.blkmap[lower.firstblk] # Create front switch to resume points switch = builder.switch(builder.load(self.resume_index_ptr), stop_block) for index, block in self.resume_blocks.items(): switch.add_case(index, block) # Close tail of entry block builder.position_at_end(entry_block_tail) builder.branch(prologue) def lower_finalize_func(self, lower): """ Lower the generator's finalizer. """ fnty = Type.function(Type.void(), [self.context.get_value_type(self.gentype)]) function = cgutils.get_or_insert_function( lower.module, fnty, self.gendesc.llvm_finalizer_name) entry_block = function.append_basic_block('entry') builder = Builder(entry_block) genptrty = self.context.get_value_type(self.gentype) genptr = builder.bitcast(function.args[0], genptrty) self.lower_finalize_func_body(builder, genptr) def return_from_generator(self, lower): """ Emit a StopIteration at generator end and mark the generator exhausted. """ indexval = Constant.int(self.resume_index_ptr.type.pointee, -1) lower.builder.store(indexval, self.resume_index_ptr) self.call_conv.return_stop_iteration(lower.builder) def create_resumption_block(self, lower, index): block_name = "generator_resume%d" % (index,) block = lower.function.append_basic_block(block_name) lower.builder.position_at_end(block) self.resume_blocks[index] = block def debug_print(self, builder, msg): if config.DEBUG_JIT: self.context.debug_print(builder, "DEBUGJIT: {0}".format(msg)) class GeneratorLower(BaseGeneratorLower): """ Support class for lowering nopython generators. """ def get_generator_type(self): return self.fndesc.restype def box_generator_struct(self, lower, gen_struct): return gen_struct def lower_finalize_func_body(self, builder, genptr): """ Lower the body of the generator's finalizer: decref all live state variables. """ self.debug_print(builder, "# generator: finalize") if self.context.enable_nrt: # Always dereference all arguments # self.debug_print(builder, "# generator: clear args") args_ptr = self.get_args_ptr(builder, genptr) for ty, val in self.arg_packer.load(builder, args_ptr): self.context.nrt.decref(builder, ty, val) self.debug_print(builder, "# generator: finalize end") builder.ret_void() class PyGeneratorLower(BaseGeneratorLower): """ Support class for lowering object mode generators. """ def get_generator_type(self): """ Compute the actual generator type (the generator function's return type is simply "pyobject"). """ return types.Generator( gen_func=self.func_ir.func_id.func, yield_type=types.pyobject, arg_types=(types.pyobject,) * self.func_ir.arg_count, state_types=(types.pyobject,) * len(self.geninfo.state_vars), has_finalizer=True, ) def box_generator_struct(self, lower, gen_struct): """ Box the raw *gen_struct* as a Python object. """ gen_ptr = cgutils.alloca_once_value(lower.builder, gen_struct) return lower.pyapi.from_native_generator(gen_ptr, self.gentype, lower.envarg) def init_generator_state(self, lower): """ NULL-initialize all generator state variables, to avoid spurious decref's on cleanup. """ lower.builder.store(Constant.null(self.gen_state_ptr.type.pointee), self.gen_state_ptr) def lower_finalize_func_body(self, builder, genptr): """ Lower the body of the generator's finalizer: decref all live state variables. """ pyapi = self.context.get_python_api(builder) resume_index_ptr = self.get_resume_index_ptr(builder, genptr) resume_index = builder.load(resume_index_ptr) # If resume_index is 0, next() was never called # If resume_index is -1, generator terminated cleanly # (note function arguments are saved in state variables, # so they don't need a separate cleanup step) need_cleanup = builder.icmp_signed( '>', resume_index, Constant.int(resume_index.type, 0)) with cgutils.if_unlikely(builder, need_cleanup): # Decref all live vars (some may be NULL) gen_state_ptr = self.get_state_ptr(builder, genptr) for state_index in range(len(self.gentype.state_types)): state_slot = cgutils.gep_inbounds(builder, gen_state_ptr, 0, state_index) ty = self.gentype.state_types[state_index] val = self.context.unpack_value(builder, ty, state_slot) pyapi.decref(val) builder.ret_void() class LowerYield(object): """ Support class for lowering a particular yield point. """ def __init__(self, lower, yield_point, live_vars): self.lower = lower self.context = lower.context self.builder = lower.builder self.genlower = lower.genlower self.gentype = self.genlower.gentype self.gen_state_ptr = self.genlower.gen_state_ptr self.resume_index_ptr = self.genlower.resume_index_ptr self.yp = yield_point self.inst = self.yp.inst self.live_vars = live_vars self.live_var_indices = [lower.generator_info.state_vars.index(v) for v in live_vars] def lower_yield_suspend(self): self.lower.debug_print("# generator suspend") # Save live vars in state for state_index, name in zip(self.live_var_indices, self.live_vars): state_slot = cgutils.gep_inbounds(self.builder, self.gen_state_ptr, 0, state_index) ty = self.gentype.state_types[state_index] # The yield might be in a loop, in which case the state might # contain a predicate var that branches back to the loop head, in # this case the var is live but in sequential lowering won't have # been alloca'd yet, so do this here. fetype = self.lower.typeof(name) self.lower._alloca_var(name, fetype) val = self.lower.loadvar(name) # IncRef newly stored value if self.context.enable_nrt: self.context.nrt.incref(self.builder, ty, val) self.context.pack_value(self.builder, ty, val, state_slot) # Save resume index indexval = Constant.int(self.resume_index_ptr.type.pointee, self.inst.index) self.builder.store(indexval, self.resume_index_ptr) self.lower.debug_print("# generator suspend end") def lower_yield_resume(self): # Emit resumption point self.genlower.create_resumption_block(self.lower, self.inst.index) self.lower.debug_print("# generator resume") # Reload live vars from state for state_index, name in zip(self.live_var_indices, self.live_vars): state_slot = cgutils.gep_inbounds(self.builder, self.gen_state_ptr, 0, state_index) ty = self.gentype.state_types[state_index] val = self.context.unpack_value(self.builder, ty, state_slot) self.lower.storevar(val, name) # Previous storevar is making an extra incref if self.context.enable_nrt: self.context.nrt.decref(self.builder, ty, val) self.lower.debug_print("# generator resume end") numba-0.55.1/numba/core/imputils.py000664 000000 000000 00000035051 14174536160 017151 0ustar00rootroot000000 000000 """ Utilities to simplify the boilerplate for native lowering. """ import collections import contextlib import inspect import functools from enum import Enum from numba.core import typing, types, utils, cgutils from numba.core.typing.templates import BaseRegistryLoader class Registry(object): """ A registry of function and attribute implementations. """ def __init__(self, name='unspecified'): self.name = name self.functions = [] self.getattrs = [] self.setattrs = [] self.casts = [] self.constants = [] def lower(self, func, *argtys): """ Decorate an implementation of *func* for the given argument types. *func* may be an actual global function object, or any pseudo-function supported by Numba, such as "getitem". The decorated implementation has the signature (context, builder, sig, args). """ def decorate(impl): self.functions.append((impl, func, argtys)) return impl return decorate def _decorate_attr(self, impl, ty, attr, impl_list, decorator): real_impl = decorator(impl, ty, attr) impl_list.append((real_impl, attr, real_impl.signature)) return impl def lower_getattr(self, ty, attr): """ Decorate an implementation of __getattr__ for type *ty* and the attribute *attr*. The decorated implementation will have the signature (context, builder, typ, val). """ def decorate(impl): return self._decorate_attr(impl, ty, attr, self.getattrs, _decorate_getattr) return decorate def lower_getattr_generic(self, ty): """ Decorate the fallback implementation of __getattr__ for type *ty*. The decorated implementation will have the signature (context, builder, typ, val, attr). The implementation is called for attributes which haven't been explicitly registered with lower_getattr(). """ return self.lower_getattr(ty, None) def lower_setattr(self, ty, attr): """ Decorate an implementation of __setattr__ for type *ty* and the attribute *attr*. The decorated implementation will have the signature (context, builder, sig, args). """ def decorate(impl): return self._decorate_attr(impl, ty, attr, self.setattrs, _decorate_setattr) return decorate def lower_setattr_generic(self, ty): """ Decorate the fallback implementation of __setattr__ for type *ty*. The decorated implementation will have the signature (context, builder, sig, args, attr). The implementation is called for attributes which haven't been explicitly registered with lower_setattr(). """ return self.lower_setattr(ty, None) def lower_cast(self, fromty, toty): """ Decorate the implementation of implicit conversion between *fromty* and *toty*. The decorated implementation will have the signature (context, builder, fromty, toty, val). """ def decorate(impl): self.casts.append((impl, (fromty, toty))) return impl return decorate def lower_constant(self, ty): """ Decorate the implementation for creating a constant of type *ty*. The decorated implementation will have the signature (context, builder, ty, pyval). """ def decorate(impl): self.constants.append((impl, (ty,))) return impl return decorate def __repr__(self): return f"Lowering Registry<{self.name}>" class RegistryLoader(BaseRegistryLoader): """ An incremental loader for a target registry. """ registry_items = ('functions', 'getattrs', 'setattrs', 'casts', 'constants') # Global registry for implementations of builtin operations # (functions, attributes, type casts) builtin_registry = Registry('builtin_registry') lower_builtin = builtin_registry.lower lower_getattr = builtin_registry.lower_getattr lower_getattr_generic = builtin_registry.lower_getattr_generic lower_setattr = builtin_registry.lower_setattr lower_setattr_generic = builtin_registry.lower_setattr_generic lower_cast = builtin_registry.lower_cast lower_constant = builtin_registry.lower_constant def _decorate_getattr(impl, ty, attr): real_impl = impl if attr is not None: def res(context, builder, typ, value, attr): return real_impl(context, builder, typ, value) else: def res(context, builder, typ, value, attr): return real_impl(context, builder, typ, value, attr) res.signature = (ty,) res.attr = attr return res def _decorate_setattr(impl, ty, attr): real_impl = impl if attr is not None: def res(context, builder, sig, args, attr): return real_impl(context, builder, sig, args) else: def res(context, builder, sig, args, attr): return real_impl(context, builder, sig, args, attr) res.signature = (ty, types.Any) res.attr = attr return res def fix_returning_optional(context, builder, sig, status, retval): # Reconstruct optional return type if isinstance(sig.return_type, types.Optional): value_type = sig.return_type.type optional_none = context.make_optional_none(builder, value_type) retvalptr = cgutils.alloca_once_value(builder, optional_none) with builder.if_then(builder.not_(status.is_none)): optional_value = context.make_optional_value( builder, value_type, retval, ) builder.store(optional_value, retvalptr) retval = builder.load(retvalptr) return retval def user_function(fndesc, libs): """ A wrapper inserting code calling Numba-compiled *fndesc*. """ def imp(context, builder, sig, args): func = context.declare_function(builder.module, fndesc) # env=None assumes this is a nopython function status, retval = context.call_conv.call_function( builder, func, fndesc.restype, fndesc.argtypes, args) with cgutils.if_unlikely(builder, status.is_error): context.call_conv.return_status_propagate(builder, status) assert sig.return_type == fndesc.restype # Reconstruct optional return type retval = fix_returning_optional(context, builder, sig, status, retval) # If the data representations don't match up if retval.type != context.get_value_type(sig.return_type): msg = "function returned {0} but expect {1}" raise TypeError(msg.format(retval.type, sig.return_type)) return impl_ret_new_ref(context, builder, fndesc.restype, retval) imp.signature = fndesc.argtypes imp.libs = tuple(libs) return imp def user_generator(gendesc, libs): """ A wrapper inserting code calling Numba-compiled *gendesc*. """ def imp(context, builder, sig, args): func = context.declare_function(builder.module, gendesc) # env=None assumes this is a nopython function status, retval = context.call_conv.call_function( builder, func, gendesc.restype, gendesc.argtypes, args) # Return raw status for caller to process StopIteration return status, retval imp.libs = tuple(libs) return imp def iterator_impl(iterable_type, iterator_type): """ Decorator a given class as implementing *iterator_type* (by providing an `iternext()` method). """ def wrapper(cls): # These are unbound methods iternext = cls.iternext @iternext_impl(RefType.BORROWED) def iternext_wrapper(context, builder, sig, args, result): (value,) = args iterobj = cls(context, builder, value) return iternext(iterobj, context, builder, result) lower_builtin('iternext', iterator_type)(iternext_wrapper) return cls return wrapper class _IternextResult(object): """ A result wrapper for iteration, passed by iternext_impl() into the wrapped function. """ __slots__ = ('_context', '_builder', '_pairobj') def __init__(self, context, builder, pairobj): self._context = context self._builder = builder self._pairobj = pairobj def set_exhausted(self): """ Mark the iterator as exhausted. """ self._pairobj.second = self._context.get_constant(types.boolean, False) def set_valid(self, is_valid=True): """ Mark the iterator as valid according to *is_valid* (which must be either a Python boolean or a LLVM inst). """ if is_valid in (False, True): is_valid = self._context.get_constant(types.boolean, is_valid) self._pairobj.second = is_valid def yield_(self, value): """ Mark the iterator as yielding the given *value* (a LLVM inst). """ self._pairobj.first = value def is_valid(self): """ Return whether the iterator is marked valid. """ return self._context.get_argument_value(self._builder, types.boolean, self._pairobj.second) def yielded_value(self): """ Return the iterator's yielded value, if any. """ return self._pairobj.first class RefType(Enum): """ Enumerate the reference type """ """ A new reference """ NEW = 1 """ A borrowed reference """ BORROWED = 2 """ An untracked reference """ UNTRACKED = 3 def iternext_impl(ref_type=None): """ Wrap the given iternext() implementation so that it gets passed an _IternextResult() object easing the returning of the iternext() result pair. ref_type: a numba.targets.imputils.RefType value, the reference type used is that specified through the RefType enum. The wrapped function will be called with the following signature: (context, builder, sig, args, iternext_result) """ if ref_type not in [x for x in RefType]: raise ValueError("ref_type must be an enum member of imputils.RefType") def outer(func): def wrapper(context, builder, sig, args): pair_type = sig.return_type pairobj = context.make_helper(builder, pair_type) func(context, builder, sig, args, _IternextResult(context, builder, pairobj)) if ref_type == RefType.NEW: impl_ret = impl_ret_new_ref elif ref_type == RefType.BORROWED: impl_ret = impl_ret_borrowed elif ref_type == RefType.UNTRACKED: impl_ret = impl_ret_untracked else: raise ValueError("Unknown ref_type encountered") return impl_ret(context, builder, pair_type, pairobj._getvalue()) return wrapper return outer def call_getiter(context, builder, iterable_type, val): """ Call the `getiter()` implementation for the given *iterable_type* of value *val*, and return the corresponding LLVM inst. """ getiter_sig = typing.signature(iterable_type.iterator_type, iterable_type) getiter_impl = context.get_function('getiter', getiter_sig) return getiter_impl(builder, (val,)) def call_iternext(context, builder, iterator_type, val): """ Call the `iternext()` implementation for the given *iterator_type* of value *val*, and return a convenience _IternextResult() object reflecting the results. """ itemty = iterator_type.yield_type pair_type = types.Pair(itemty, types.boolean) iternext_sig = typing.signature(pair_type, iterator_type) iternext_impl = context.get_function('iternext', iternext_sig) val = iternext_impl(builder, (val,)) pairobj = context.make_helper(builder, pair_type, val) return _IternextResult(context, builder, pairobj) def call_len(context, builder, ty, val): """ Call len() on the given value. Return None if len() isn't defined on this type. """ try: len_impl = context.get_function(len, typing.signature(types.intp, ty,)) except NotImplementedError: return None else: return len_impl(builder, (val,)) _ForIterLoop = collections.namedtuple('_ForIterLoop', ('value', 'do_break')) @contextlib.contextmanager def for_iter(context, builder, iterable_type, val): """ Simulate a for loop on the given iterable. Yields a namedtuple with the given members: - `value` is the value being yielded - `do_break` is a callable to early out of the loop """ iterator_type = iterable_type.iterator_type iterval = call_getiter(context, builder, iterable_type, val) bb_body = builder.append_basic_block('for_iter.body') bb_end = builder.append_basic_block('for_iter.end') def do_break(): builder.branch(bb_end) builder.branch(bb_body) with builder.goto_block(bb_body): res = call_iternext(context, builder, iterator_type, iterval) with builder.if_then(builder.not_(res.is_valid()), likely=False): builder.branch(bb_end) yield _ForIterLoop(res.yielded_value(), do_break) builder.branch(bb_body) builder.position_at_end(bb_end) if context.enable_nrt: context.nrt.decref(builder, iterator_type, iterval) def impl_ret_new_ref(ctx, builder, retty, ret): """ The implementation returns a new reference. """ return ret def impl_ret_borrowed(ctx, builder, retty, ret): """ The implementation returns a borrowed reference. This function automatically incref so that the implementation is returning a new reference. """ if ctx.enable_nrt: ctx.nrt.incref(builder, retty, ret) return ret def impl_ret_untracked(ctx, builder, retty, ret): """ The return type is not a NRT object. """ return ret @contextlib.contextmanager def force_error_model(context, model_name='numpy'): """ Temporarily change the context's error model. """ from numba.core import callconv old_error_model = context.error_model context.error_model = callconv.create_error_model(model_name, context) try: yield finally: context.error_model = old_error_model def numba_typeref_ctor(*args, **kwargs): """A stub for use internally by Numba when a call is emitted on a TypeRef. """ raise NotImplementedError("This function should not be executed.") numba-0.55.1/numba/core/inline_closurecall.py000664 000000 000000 00000205423 14174536160 021153 0ustar00rootroot000000 000000 import types as pytypes # avoid confusion with numba.types import copy import ctypes import numba.core.analysis from numba.core import utils, types, typing, errors, ir, rewrites, config, ir_utils from numba import prange from numba.parfors.parfor import internal_prange from numba.core.ir_utils import ( mk_unique_var, next_label, add_offset_to_labels, replace_vars, remove_dels, rename_labels, find_topo_order, merge_adjacent_blocks, GuardException, require, guard, get_definition, find_callname, find_build_sequence, get_np_ufunc_typ, get_ir_of_code, simplify_CFG, canonicalize_array_math, dead_code_elimination, ) from numba.core.analysis import ( compute_cfg_from_blocks, compute_use_defs, compute_live_variables) from numba.core import postproc from numba.np.unsafe.ndarray import empty_inferred as unsafe_empty_inferred import numpy as np import operator import numba.misc.special """ Variable enable_inline_arraycall is only used for testing purpose. """ enable_inline_arraycall = True def callee_ir_validator(func_ir): """Checks the IR of a callee is supported for inlining """ for blk in func_ir.blocks.values(): for stmt in blk.find_insts(ir.Assign): if isinstance(stmt.value, ir.Yield): msg = "The use of yield in a closure is unsupported." raise errors.UnsupportedError(msg, loc=stmt.loc) def _created_inlined_var_name(function_name, var_name): """Creates a name for an inlined variable based on the function name and the variable name. It does this "safely" to avoid the use of characters that are illegal in python variable names as there are occasions when function generation needs valid python name tokens.""" inlined_name = f'{function_name}.{var_name}' # Replace angle brackets, e.g. "" is replaced with "_locals_" new_name = inlined_name.replace('<', '_').replace('>', '_') # The version "version" of the closure function e.g. foo$2 (id 2) is # rewritten as "foo_v2". Further "." is also replaced with "_". new_name = new_name.replace('.', '_').replace('$', '_v') return new_name class InlineClosureCallPass(object): """InlineClosureCallPass class looks for direct calls to locally defined closures, and inlines the body of the closure function to the call site. """ def __init__(self, func_ir, parallel_options, swapped={}, typed=False): self.func_ir = func_ir self.parallel_options = parallel_options self.swapped = swapped self._processed_stencils = [] self.typed = typed def run(self): """Run inline closure call pass. """ # Analysis relies on ir.Del presence, strip out later pp = postproc.PostProcessor(self.func_ir) pp.run(True) modified = False work_list = list(self.func_ir.blocks.items()) debug_print = _make_debug_print("InlineClosureCallPass") debug_print("START") while work_list: label, block = work_list.pop() for i, instr in enumerate(block.body): if isinstance(instr, ir.Assign): lhs = instr.target expr = instr.value if isinstance(expr, ir.Expr) and expr.op == 'call': call_name = guard(find_callname, self.func_ir, expr) func_def = guard(get_definition, self.func_ir, expr.func) if guard(self._inline_reduction, work_list, block, i, expr, call_name): modified = True break # because block structure changed if guard(self._inline_closure, work_list, block, i, func_def): modified = True break # because block structure changed if guard(self._inline_stencil, instr, call_name, func_def): modified = True if enable_inline_arraycall: # Identify loop structure if modified: # Need to do some cleanups if closure inlining kicked in merge_adjacent_blocks(self.func_ir.blocks) cfg = compute_cfg_from_blocks(self.func_ir.blocks) debug_print("start inline arraycall") _debug_dump(cfg) loops = cfg.loops() sized_loops = [(k, len(loops[k].body)) for k in loops.keys()] visited = [] # We go over all loops, bigger loops first (outer first) for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True): visited.append(k) if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k], self.swapped, self.parallel_options.comprehension, self.typed): modified = True if modified: _fix_nested_array(self.func_ir) if modified: # clean up now dead/unreachable blocks, e.g. unconditionally raising # an exception in an inlined function would render some parts of the # inliner unreachable cfg = compute_cfg_from_blocks(self.func_ir.blocks) for dead in cfg.dead_nodes(): del self.func_ir.blocks[dead] # run dead code elimination dead_code_elimination(self.func_ir) # do label renaming self.func_ir.blocks = rename_labels(self.func_ir.blocks) # inlining done, strip dels remove_dels(self.func_ir.blocks) debug_print("END") def _inline_reduction(self, work_list, block, i, expr, call_name): # only inline reduction in sequential execution, parallel handling # is done in ParforPass. require(not self.parallel_options.reduction) require(call_name == ('reduce', 'builtins') or call_name == ('reduce', '_functools')) if len(expr.args) not in (2, 3): raise TypeError("invalid reduce call, " "two arguments are required (optional initial " "value can also be specified)") check_reduce_func(self.func_ir, expr.args[0]) def reduce_func(f, A, v=None): it = iter(A) if v is not None: s = v else: s = next(it) for a in it: s = f(s, a) return s inline_closure_call(self.func_ir, self.func_ir.func_id.func.__globals__, block, i, reduce_func, work_list=work_list, callee_validator=callee_ir_validator) return True def _inline_stencil(self, instr, call_name, func_def): from numba.stencils.stencil import StencilFunc lhs = instr.target expr = instr.value # We keep the escaping variables of the stencil kernel # alive by adding them to the actual kernel call as extra # keyword arguments, which is ignored anyway. if (isinstance(func_def, ir.Global) and func_def.name == 'stencil' and isinstance(func_def.value, StencilFunc)): if expr.kws: expr.kws += func_def.value.kws else: expr.kws = func_def.value.kws return True # Otherwise we proceed to check if it is a call to numba.stencil require(call_name == ('stencil', 'numba.stencils.stencil') or call_name == ('stencil', 'numba')) require(expr not in self._processed_stencils) self._processed_stencils.append(expr) if not len(expr.args) == 1: raise ValueError("As a minimum Stencil requires" " a kernel as an argument") stencil_def = guard(get_definition, self.func_ir, expr.args[0]) require(isinstance(stencil_def, ir.Expr) and stencil_def.op == "make_function") kernel_ir = get_ir_of_code(self.func_ir.func_id.func.__globals__, stencil_def.code) options = dict(expr.kws) if 'neighborhood' in options: fixed = guard(self._fix_stencil_neighborhood, options) if not fixed: raise ValueError("stencil neighborhood option should be a tuple" " with constant structure such as ((-w, w),)") if 'index_offsets' in options: fixed = guard(self._fix_stencil_index_offsets, options) if not fixed: raise ValueError("stencil index_offsets option should be a tuple" " with constant structure such as (offset, )") sf = StencilFunc(kernel_ir, 'constant', options) sf.kws = expr.kws # hack to keep variables live sf_global = ir.Global('stencil', sf, expr.loc) self.func_ir._definitions[lhs.name] = [sf_global] instr.value = sf_global return True def _fix_stencil_neighborhood(self, options): """ Extract the two-level tuple representing the stencil neighborhood from the program IR to provide a tuple to StencilFunc. """ # build_tuple node with neighborhood for each dimension dims_build_tuple = get_definition(self.func_ir, options['neighborhood']) require(hasattr(dims_build_tuple, 'items')) res = [] for window_var in dims_build_tuple.items: win_build_tuple = get_definition(self.func_ir, window_var) require(hasattr(win_build_tuple, 'items')) res.append(tuple(win_build_tuple.items)) options['neighborhood'] = tuple(res) return True def _fix_stencil_index_offsets(self, options): """ Extract the tuple representing the stencil index offsets from the program IR to provide to StencilFunc. """ offset_tuple = get_definition(self.func_ir, options['index_offsets']) require(hasattr(offset_tuple, 'items')) options['index_offsets'] = tuple(offset_tuple.items) return True def _inline_closure(self, work_list, block, i, func_def): require(isinstance(func_def, ir.Expr) and func_def.op == "make_function") inline_closure_call(self.func_ir, self.func_ir.func_id.func.__globals__, block, i, func_def, work_list=work_list, callee_validator=callee_ir_validator) return True def check_reduce_func(func_ir, func_var): """Checks the function at func_var in func_ir to make sure it's amenable for inlining. Returns the function itself""" reduce_func = guard(get_definition, func_ir, func_var) if reduce_func is None: raise ValueError("Reduce function cannot be found for njit \ analysis") if isinstance(reduce_func, (ir.FreeVar, ir.Global)): if not isinstance(reduce_func.value, numba.core.registry.CPUDispatcher): raise ValueError("Invalid reduction function") # pull out the python function for inlining reduce_func = reduce_func.value.py_func elif not (hasattr(reduce_func, 'code') or hasattr(reduce_func, '__code__')): raise ValueError("Invalid reduction function") f_code = (reduce_func.code if hasattr(reduce_func, 'code') else reduce_func.__code__) if not f_code.co_argcount == 2: raise TypeError("Reduction function should take 2 arguments") return reduce_func class InlineWorker(object): """ A worker class for inlining, this is a more advanced version of `inline_closure_call` in that it permits inlining from function type, Numba IR and code object. It also, runs the entire untyped compiler pipeline on the inlinee to ensure that it is transformed as though it were compiled directly. """ def __init__(self, typingctx=None, targetctx=None, locals=None, pipeline=None, flags=None, validator=callee_ir_validator, typemap=None, calltypes=None): """ Instantiate a new InlineWorker, all arguments are optional though some must be supplied together for certain use cases. The methods will refuse to run if the object isn't configured in the manner needed. Args are the same as those in a numba.core.Compiler.state, except the validator which is a function taking Numba IR and validating it for use when inlining (this is optional and really to just provide better error messages about things which the inliner cannot handle like yield in closure). """ def check(arg, name): if arg is None: raise TypeError("{} must not be None".format(name)) from numba.core.compiler import DefaultPassBuilder # check the stuff needed to run the more advanced compilation pipeline # is valid if any of it is provided compiler_args = (targetctx, locals, pipeline, flags) compiler_group = [x is not None for x in compiler_args] if any(compiler_group) and not all(compiler_group): check(targetctx, 'targetctx') check(locals, 'locals') check(pipeline, 'pipeline') check(flags, 'flags') elif all(compiler_group): check(typingctx, 'typingctx') self._compiler_pipeline = DefaultPassBuilder.define_untyped_pipeline self.typingctx = typingctx self.targetctx = targetctx self.locals = locals self.pipeline = pipeline self.flags = flags self.validator = validator self.debug_print = _make_debug_print("InlineWorker") # check whether this inliner can also support typemap and calltypes # update and if what's provided is valid pair = (typemap, calltypes) pair_is_none = [x is None for x in pair] if any(pair_is_none) and not all(pair_is_none): msg = ("typemap and calltypes must both be either None or have a " "value, got: %s, %s") raise TypeError(msg % pair) self._permit_update_type_and_call_maps = not all(pair_is_none) self.typemap = typemap self.calltypes = calltypes def inline_ir(self, caller_ir, block, i, callee_ir, callee_freevars, arg_typs=None): """ Inlines the callee_ir in the caller_ir at statement index i of block `block`, callee_freevars are the free variables for the callee_ir. If the callee_ir is derived from a function `func` then this is `func.__code__.co_freevars`. If `arg_typs` is given and the InlineWorker instance was initialized with a typemap and calltypes then they will be appropriately updated based on the arg_typs. """ # Always copy the callee IR, it gets mutated def copy_ir(the_ir): kernel_copy = the_ir.copy() kernel_copy.blocks = {} for block_label, block in the_ir.blocks.items(): new_block = copy.deepcopy(the_ir.blocks[block_label]) new_block.body = [] for stmt in the_ir.blocks[block_label].body: scopy = copy.deepcopy(stmt) new_block.body.append(scopy) kernel_copy.blocks[block_label] = new_block return kernel_copy callee_ir = copy_ir(callee_ir) # check that the contents of the callee IR is something that can be # inlined if a validator is present if self.validator is not None: self.validator(callee_ir) # save an unmutated copy of the callee_ir to return callee_ir_original = callee_ir.copy() scope = block.scope instr = block.body[i] call_expr = instr.value callee_blocks = callee_ir.blocks # 1. relabel callee_ir by adding an offset max_label = max(ir_utils._the_max_label.next(), max(caller_ir.blocks.keys())) callee_blocks = add_offset_to_labels(callee_blocks, max_label + 1) callee_blocks = simplify_CFG(callee_blocks) callee_ir.blocks = callee_blocks min_label = min(callee_blocks.keys()) max_label = max(callee_blocks.keys()) # reset globals in ir_utils before we use it ir_utils._the_max_label.update(max_label) self.debug_print("After relabel") _debug_dump(callee_ir) # 2. rename all local variables in callee_ir with new locals created in # caller_ir callee_scopes = _get_all_scopes(callee_blocks) self.debug_print("callee_scopes = ", callee_scopes) # one function should only have one local scope assert(len(callee_scopes) == 1) callee_scope = callee_scopes[0] var_dict = {} for var in callee_scope.localvars._con.values(): if not (var.name in callee_freevars): inlined_name = _created_inlined_var_name( callee_ir.func_id.unique_name, var.name) new_var = scope.redefine(inlined_name, loc=var.loc) var_dict[var.name] = new_var self.debug_print("var_dict = ", var_dict) replace_vars(callee_blocks, var_dict) self.debug_print("After local var rename") _debug_dump(callee_ir) # 3. replace formal parameters with actual arguments callee_func = callee_ir.func_id.func args = _get_callee_args(call_expr, callee_func, block.body[i].loc, caller_ir) # 4. Update typemap if self._permit_update_type_and_call_maps: if arg_typs is None: raise TypeError('arg_typs should have a value not None') self.update_type_and_call_maps(callee_ir, arg_typs) # update_type_and_call_maps replaces blocks callee_blocks = callee_ir.blocks self.debug_print("After arguments rename: ") _debug_dump(callee_ir) _replace_args_with(callee_blocks, args) # 5. split caller blocks into two new_blocks = [] new_block = ir.Block(scope, block.loc) new_block.body = block.body[i + 1:] new_label = next_label() caller_ir.blocks[new_label] = new_block new_blocks.append((new_label, new_block)) block.body = block.body[:i] block.body.append(ir.Jump(min_label, instr.loc)) # 6. replace Return with assignment to LHS topo_order = find_topo_order(callee_blocks) _replace_returns(callee_blocks, instr.target, new_label) # remove the old definition of instr.target too if (instr.target.name in caller_ir._definitions and call_expr in caller_ir._definitions[instr.target.name]): # NOTE: target can have multiple definitions due to control flow caller_ir._definitions[instr.target.name].remove(call_expr) # 7. insert all new blocks, and add back definitions for label in topo_order: # block scope must point to parent's block = callee_blocks[label] block.scope = scope _add_definitions(caller_ir, block) caller_ir.blocks[label] = block new_blocks.append((label, block)) self.debug_print("After merge in") _debug_dump(caller_ir) return callee_ir_original, callee_blocks, var_dict, new_blocks def inline_function(self, caller_ir, block, i, function, arg_typs=None): """ Inlines the function in the caller_ir at statement index i of block `block`. If `arg_typs` is given and the InlineWorker instance was initialized with a typemap and calltypes then they will be appropriately updated based on the arg_typs. """ callee_ir = self.run_untyped_passes(function) freevars = function.__code__.co_freevars return self.inline_ir(caller_ir, block, i, callee_ir, freevars, arg_typs=arg_typs) def run_untyped_passes(self, func, enable_ssa=False): """ Run the compiler frontend's untyped passes over the given Python function, and return the function's canonical Numba IR. Disable SSA transformation by default, since the call site won't be in SSA form and self.inline_ir depends on this being the case. """ from numba.core.compiler import StateDict, _CompileStatus from numba.core.untyped_passes import ExtractByteCode, WithLifting from numba.core import bytecode from numba.parfors.parfor import ParforDiagnostics state = StateDict() state.func_ir = None state.typingctx = self.typingctx state.targetctx = self.targetctx state.locals = self.locals state.pipeline = self.pipeline state.flags = self.flags state.flags.enable_ssa = enable_ssa state.func_id = bytecode.FunctionIdentity.from_function(func) state.typemap = None state.calltypes = None state.type_annotation = None state.status = _CompileStatus(False) state.return_type = None state.parfor_diagnostics = ParforDiagnostics() state.metadata = {} ExtractByteCode().run_pass(state) # This is a lie, just need *some* args for the case where an obj mode # with lift is needed state.args = len(state.bc.func_id.pysig.parameters) * (types.pyobject,) pm = self._compiler_pipeline(state) pm.finalize() pm.run(state) return state.func_ir def update_type_and_call_maps(self, callee_ir, arg_typs): """ Updates the type and call maps based on calling callee_ir with arguments from arg_typs""" from numba.core.ssa import reconstruct_ssa from numba.core.typed_passes import PreLowerStripPhis if not self._permit_update_type_and_call_maps: msg = ("InlineWorker instance not configured correctly, typemap or " "calltypes missing in initialization.") raise ValueError(msg) from numba.core import typed_passes # call branch pruning to simplify IR and avoid inference errors callee_ir._definitions = ir_utils.build_definitions(callee_ir.blocks) numba.core.analysis.dead_branch_prune(callee_ir, arg_typs) # callee's typing may require SSA callee_ir = reconstruct_ssa(callee_ir) callee_ir._definitions = ir_utils.build_definitions(callee_ir.blocks) f_typemap, f_return_type, f_calltypes, _ = typed_passes.type_inference_stage( self.typingctx, self.targetctx, callee_ir, arg_typs, None) callee_ir = PreLowerStripPhis()._strip_phi_nodes(callee_ir) callee_ir._definitions = ir_utils.build_definitions(callee_ir.blocks) canonicalize_array_math(callee_ir, f_typemap, f_calltypes, self.typingctx) # remove argument entries like arg.a from typemap arg_names = [vname for vname in f_typemap if vname.startswith("arg.")] for a in arg_names: f_typemap.pop(a) self.typemap.update(f_typemap) self.calltypes.update(f_calltypes) def inline_closure_call(func_ir, glbls, block, i, callee, typingctx=None, targetctx=None, arg_typs=None, typemap=None, calltypes=None, work_list=None, callee_validator=None, replace_freevars=True): """Inline the body of `callee` at its callsite (`i`-th instruction of `block`) `func_ir` is the func_ir object of the caller function and `glbls` is its global variable environment (func_ir.func_id.func.__globals__). `block` is the IR block of the callsite and `i` is the index of the callsite's node. `callee` is either the called function or a make_function node. `typingctx`, `typemap` and `calltypes` are typing data structures of the caller, available if we are in a typed pass. `arg_typs` includes the types of the arguments at the callsite. `callee_validator` is an optional callable which can be used to validate the IR of the callee to ensure that it contains IR supported for inlining, it takes one argument, the func_ir of the callee Returns IR blocks of the callee and the variable renaming dictionary used for them to facilitate further processing of new blocks. """ scope = block.scope instr = block.body[i] call_expr = instr.value debug_print = _make_debug_print("inline_closure_call") debug_print("Found closure call: ", instr, " with callee = ", callee) # support both function object and make_function Expr callee_code = callee.code if hasattr(callee, 'code') else callee.__code__ callee_closure = callee.closure if hasattr(callee, 'closure') else callee.__closure__ # first, get the IR of the callee if isinstance(callee, pytypes.FunctionType): from numba.core import compiler callee_ir = compiler.run_frontend(callee, inline_closures=True) else: callee_ir = get_ir_of_code(glbls, callee_code) # check that the contents of the callee IR is something that can be inlined # if a validator is supplied if callee_validator is not None: callee_validator(callee_ir) callee_blocks = callee_ir.blocks # 1. relabel callee_ir by adding an offset max_label = max(ir_utils._the_max_label.next(), max(func_ir.blocks.keys())) callee_blocks = add_offset_to_labels(callee_blocks, max_label + 1) callee_blocks = simplify_CFG(callee_blocks) callee_ir.blocks = callee_blocks min_label = min(callee_blocks.keys()) max_label = max(callee_blocks.keys()) # reset globals in ir_utils before we use it ir_utils._the_max_label.update(max_label) debug_print("After relabel") _debug_dump(callee_ir) # 2. rename all local variables in callee_ir with new locals created in func_ir callee_scopes = _get_all_scopes(callee_blocks) debug_print("callee_scopes = ", callee_scopes) # one function should only have one local scope assert(len(callee_scopes) == 1) callee_scope = callee_scopes[0] var_dict = {} for var in callee_scope.localvars._con.values(): if not (var.name in callee_code.co_freevars): inlined_name = _created_inlined_var_name( callee_ir.func_id.unique_name, var.name) new_var = scope.redefine(inlined_name, loc=var.loc) var_dict[var.name] = new_var debug_print("var_dict = ", var_dict) replace_vars(callee_blocks, var_dict) debug_print("After local var rename") _debug_dump(callee_ir) # 3. replace formal parameters with actual arguments args = _get_callee_args(call_expr, callee, block.body[i].loc, func_ir) debug_print("After arguments rename: ") _debug_dump(callee_ir) # 4. replace freevar with actual closure var if callee_closure and replace_freevars: closure = func_ir.get_definition(callee_closure) debug_print("callee's closure = ", closure) if isinstance(closure, tuple): cellget = ctypes.pythonapi.PyCell_Get cellget.restype = ctypes.py_object cellget.argtypes = (ctypes.py_object,) items = tuple(cellget(x) for x in closure) else: assert(isinstance(closure, ir.Expr) and closure.op == 'build_tuple') items = closure.items assert(len(callee_code.co_freevars) == len(items)) _replace_freevars(callee_blocks, items) debug_print("After closure rename") _debug_dump(callee_ir) if typingctx: from numba.core import typed_passes # call branch pruning to simplify IR and avoid inference errors callee_ir._definitions = ir_utils.build_definitions(callee_ir.blocks) numba.core.analysis.dead_branch_prune(callee_ir, arg_typs) try: f_typemap, f_return_type, f_calltypes, _ = typed_passes.type_inference_stage( typingctx, targetctx, callee_ir, arg_typs, None) except Exception as e: f_typemap, f_return_type, f_calltypes, _ = typed_passes.type_inference_stage( typingctx, targetctx, callee_ir, arg_typs, None) pass canonicalize_array_math(callee_ir, f_typemap, f_calltypes, typingctx) # remove argument entries like arg.a from typemap arg_names = [vname for vname in f_typemap if vname.startswith("arg.")] for a in arg_names: f_typemap.pop(a) typemap.update(f_typemap) calltypes.update(f_calltypes) _replace_args_with(callee_blocks, args) # 5. split caller blocks into two new_blocks = [] new_block = ir.Block(scope, block.loc) new_block.body = block.body[i + 1:] new_label = next_label() func_ir.blocks[new_label] = new_block new_blocks.append((new_label, new_block)) block.body = block.body[:i] block.body.append(ir.Jump(min_label, instr.loc)) # 6. replace Return with assignment to LHS topo_order = find_topo_order(callee_blocks) _replace_returns(callee_blocks, instr.target, new_label) # remove the old definition of instr.target too if (instr.target.name in func_ir._definitions and call_expr in func_ir._definitions[instr.target.name]): # NOTE: target can have multiple definitions due to control flow func_ir._definitions[instr.target.name].remove(call_expr) # 7. insert all new blocks, and add back definitions for label in topo_order: # block scope must point to parent's block = callee_blocks[label] block.scope = scope _add_definitions(func_ir, block) func_ir.blocks[label] = block new_blocks.append((label, block)) debug_print("After merge in") _debug_dump(func_ir) if work_list is not None: for block in new_blocks: work_list.append(block) return callee_blocks, var_dict def _get_callee_args(call_expr, callee, loc, func_ir): """Get arguments for calling 'callee', including the default arguments. keyword arguments are currently only handled when 'callee' is a function. """ if call_expr.op == 'call': args = list(call_expr.args) if call_expr.vararg: msg = "Calling a closure with *args is unsupported." raise errors.UnsupportedError(msg, call_expr.loc) elif call_expr.op == 'getattr': args = [call_expr.value] elif ir_utils.is_operator_or_getitem(call_expr): args = call_expr.list_vars() else: raise TypeError("Unsupported ir.Expr.{}".format(call_expr.op)) debug_print = _make_debug_print("inline_closure_call default handling") # handle defaults and kw arguments using pysignature if callee is function if isinstance(callee, pytypes.FunctionType): pysig = numba.core.utils.pysignature(callee) normal_handler = lambda index, param, default: default default_handler = lambda index, param, default: ir.Const(default, loc) # Throw error for stararg # TODO: handle stararg def stararg_handler(index, param, default): raise NotImplementedError( "Stararg not supported in inliner for arg {} {}".format( index, param)) if call_expr.op == 'call': kws = dict(call_expr.kws) else: kws = {} return numba.core.typing.fold_arguments( pysig, args, kws, normal_handler, default_handler, stararg_handler) else: # TODO: handle arguments for make_function case similar to function # case above callee_defaults = (callee.defaults if hasattr(callee, 'defaults') else callee.__defaults__) if callee_defaults: debug_print("defaults = ", callee_defaults) if isinstance(callee_defaults, tuple): # Python 3.5 defaults_list = [] for x in callee_defaults: if isinstance(x, ir.Var): defaults_list.append(x) else: # this branch is predominantly for kwargs from # inlinable functions defaults_list.append(ir.Const(value=x, loc=loc)) args = args + defaults_list elif (isinstance(callee_defaults, ir.Var) or isinstance(callee_defaults, str)): default_tuple = func_ir.get_definition(callee_defaults) assert(isinstance(default_tuple, ir.Expr)) assert(default_tuple.op == "build_tuple") const_vals = [func_ir.get_definition(x) for x in default_tuple.items] args = args + const_vals else: raise NotImplementedError( "Unsupported defaults to make_function: {}".format( defaults)) return args def _make_debug_print(prefix): def debug_print(*args): if config.DEBUG_INLINE_CLOSURE: print(prefix + ": " + "".join(str(x) for x in args)) return debug_print def _debug_dump(func_ir): if config.DEBUG_INLINE_CLOSURE: func_ir.dump() def _get_all_scopes(blocks): """Get all block-local scopes from an IR. """ all_scopes = [] for label, block in blocks.items(): if not (block.scope in all_scopes): all_scopes.append(block.scope) return all_scopes def _replace_args_with(blocks, args): """ Replace ir.Arg(...) with real arguments from call site """ for label, block in blocks.items(): assigns = block.find_insts(ir.Assign) for stmt in assigns: if isinstance(stmt.value, ir.Arg): idx = stmt.value.index assert(idx < len(args)) stmt.value = args[idx] def _replace_freevars(blocks, args): """ Replace ir.FreeVar(...) with real variables from parent function """ for label, block in blocks.items(): assigns = block.find_insts(ir.Assign) for stmt in assigns: if isinstance(stmt.value, ir.FreeVar): idx = stmt.value.index assert(idx < len(args)) if isinstance(args[idx], ir.Var): stmt.value = args[idx] else: stmt.value = ir.Const(args[idx], stmt.loc) def _replace_returns(blocks, target, return_label): """ Return return statement by assigning directly to target, and a jump. """ for label, block in blocks.items(): casts = [] for i in range(len(block.body)): stmt = block.body[i] if isinstance(stmt, ir.Return): assert(i + 1 == len(block.body)) block.body[i] = ir.Assign(stmt.value, target, stmt.loc) block.body.append(ir.Jump(return_label, stmt.loc)) # remove cast of the returned value for cast in casts: if cast.target.name == stmt.value.name: cast.value = cast.value.value elif isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op == 'cast': casts.append(stmt) def _add_definitions(func_ir, block): """ Add variable definitions found in a block to parent func_ir. """ definitions = func_ir._definitions assigns = block.find_insts(ir.Assign) for stmt in assigns: definitions[stmt.target.name].append(stmt.value) def _find_arraycall(func_ir, block): """Look for statement like "x = numpy.array(y)" or "x[..] = y" immediately after the closure call that creates list y (the i-th statement in block). Return the statement index if found, or raise GuardException. """ array_var = None array_call_index = None list_var_dead_after_array_call = False list_var = None i = 0 while i < len(block.body): instr = block.body[i] if isinstance(instr, ir.Del): # Stop the process if list_var becomes dead if list_var and array_var and instr.value == list_var.name: list_var_dead_after_array_call = True break pass elif isinstance(instr, ir.Assign): # Found array_var = array(list_var) lhs = instr.target expr = instr.value if (guard(find_callname, func_ir, expr) == ('array', 'numpy') and isinstance(expr.args[0], ir.Var)): list_var = expr.args[0] array_var = lhs array_stmt_index = i array_kws = dict(expr.kws) elif (isinstance(instr, ir.SetItem) and isinstance(instr.value, ir.Var) and not list_var): list_var = instr.value # Found array_var[..] = list_var, the case for nested array array_var = instr.target array_def = get_definition(func_ir, array_var) require(guard(_find_unsafe_empty_inferred, func_ir, array_def)) array_stmt_index = i array_kws = {} else: # Bail out otherwise break i = i + 1 # require array_var is found, and list_var is dead after array_call. require(array_var and list_var_dead_after_array_call) _make_debug_print("find_array_call")(block.body[array_stmt_index]) return list_var, array_stmt_index, array_kws def _find_iter_range(func_ir, range_iter_var, swapped): """Find the iterator's actual range if it is either range(n), or range(m, n), otherwise return raise GuardException. """ debug_print = _make_debug_print("find_iter_range") range_iter_def = get_definition(func_ir, range_iter_var) debug_print("range_iter_var = ", range_iter_var, " def = ", range_iter_def) require(isinstance(range_iter_def, ir.Expr) and range_iter_def.op == 'getiter') range_var = range_iter_def.value range_def = get_definition(func_ir, range_var) debug_print("range_var = ", range_var, " range_def = ", range_def) require(isinstance(range_def, ir.Expr) and range_def.op == 'call') func_var = range_def.func func_def = get_definition(func_ir, func_var) debug_print("func_var = ", func_var, " func_def = ", func_def) require(isinstance(func_def, ir.Global) and (func_def.value == range or func_def.value == numba.misc.special.prange)) nargs = len(range_def.args) swapping = [('"array comprehension"', 'closure of'), range_def.func.loc] if nargs == 1: swapped[range_def.func.name] = swapping stop = get_definition(func_ir, range_def.args[0], lhs_only=True) return (0, range_def.args[0], func_def) elif nargs == 2: swapped[range_def.func.name] = swapping start = get_definition(func_ir, range_def.args[0], lhs_only=True) stop = get_definition(func_ir, range_def.args[1], lhs_only=True) return (start, stop, func_def) else: raise GuardException def _inline_arraycall(func_ir, cfg, visited, loop, swapped, enable_prange=False, typed=False): """Look for array(list) call in the exit block of a given loop, and turn list operations into array operations in the loop if the following conditions are met: 1. The exit block contains an array call on the list; 2. The list variable is no longer live after array call; 3. The list is created in the loop entry block; 4. The loop is created from an range iterator whose length is known prior to the loop; 5. There is only one list_append operation on the list variable in the loop body; 6. The block that contains list_append dominates the loop head, which ensures list length is the same as loop length; If any condition check fails, no modification will be made to the incoming IR. """ debug_print = _make_debug_print("inline_arraycall") # There should only be one loop exit require(len(loop.exits) == 1) exit_block = next(iter(loop.exits)) list_var, array_call_index, array_kws = _find_arraycall(func_ir, func_ir.blocks[exit_block]) # check if dtype is present in array call dtype_def = None dtype_mod_def = None if 'dtype' in array_kws: require(isinstance(array_kws['dtype'], ir.Var)) # We require that dtype argument to be a constant of getattr Expr, and we'll # remember its definition for later use. dtype_def = get_definition(func_ir, array_kws['dtype']) require(isinstance(dtype_def, ir.Expr) and dtype_def.op == 'getattr') dtype_mod_def = get_definition(func_ir, dtype_def.value) list_var_def = get_definition(func_ir, list_var) debug_print("list_var = ", list_var, " def = ", list_var_def) if isinstance(list_var_def, ir.Expr) and list_var_def.op == 'cast': list_var_def = get_definition(func_ir, list_var_def.value) # Check if the definition is a build_list require(isinstance(list_var_def, ir.Expr) and list_var_def.op == 'build_list') # The build_list must be empty require(len(list_var_def.items) == 0) # Look for list_append in "last" block in loop body, which should be a block that is # a post-dominator of the loop header. list_append_stmts = [] for label in loop.body: # We have to consider blocks of this loop, but not sub-loops. # To achieve this, we require the set of "in_loops" of "label" to be visited loops. in_visited_loops = [l.header in visited for l in cfg.in_loops(label)] if not all(in_visited_loops): continue block = func_ir.blocks[label] debug_print("check loop body block ", label) for stmt in block.find_insts(ir.Assign): lhs = stmt.target expr = stmt.value if isinstance(expr, ir.Expr) and expr.op == 'call': func_def = get_definition(func_ir, expr.func) if isinstance(func_def, ir.Expr) and func_def.op == 'getattr' \ and func_def.attr == 'append': list_def = get_definition(func_ir, func_def.value) debug_print("list_def = ", list_def, list_def is list_var_def) if list_def is list_var_def: # found matching append call list_append_stmts.append((label, block, stmt)) # Require only one list_append, otherwise we won't know the indices require(len(list_append_stmts) == 1) append_block_label, append_block, append_stmt = list_append_stmts[0] # Check if append_block (besides loop entry) dominates loop header. # Since CFG doesn't give us this info without loop entry, we approximate # by checking if the predecessor set of the header block is the same # as loop_entries plus append_block, which is certainly more restrictive # than necessary, and can be relaxed if needed. preds = set(l for l, b in cfg.predecessors(loop.header)) debug_print("preds = ", preds, (loop.entries | set([append_block_label]))) require(preds == (loop.entries | set([append_block_label]))) # Find iterator in loop header iter_vars = [] iter_first_vars = [] loop_header = func_ir.blocks[loop.header] for stmt in loop_header.find_insts(ir.Assign): expr = stmt.value if isinstance(expr, ir.Expr): if expr.op == 'iternext': iter_def = get_definition(func_ir, expr.value) debug_print("iter_def = ", iter_def) iter_vars.append(expr.value) elif expr.op == 'pair_first': iter_first_vars.append(stmt.target) # Require only one iterator in loop header require(len(iter_vars) == 1 and len(iter_first_vars) == 1) iter_var = iter_vars[0] # variable that holds the iterator object iter_first_var = iter_first_vars[0] # variable that holds the value out of iterator # Final requirement: only one loop entry, and we're going to modify it by: # 1. replacing the list definition with an array definition; # 2. adding a counter for the array iteration. require(len(loop.entries) == 1) loop_entry = func_ir.blocks[next(iter(loop.entries))] terminator = loop_entry.terminator scope = loop_entry.scope loc = loop_entry.loc stmts = [] removed = [] def is_removed(val, removed): if isinstance(val, ir.Var): for x in removed: if x.name == val.name: return True return False # Skip list construction and skip terminator, add the rest to stmts for i in range(len(loop_entry.body) - 1): stmt = loop_entry.body[i] if isinstance(stmt, ir.Assign) and (stmt.value is list_def or is_removed(stmt.value, removed)): removed.append(stmt.target) else: stmts.append(stmt) debug_print("removed variables: ", removed) # Define an index_var to index the array. # If the range happens to be single step ranges like range(n), or range(m, n), # then the index_var correlates to iterator index; otherwise we'll have to # define a new counter. range_def = guard(_find_iter_range, func_ir, iter_var, swapped) index_var = ir.Var(scope, mk_unique_var("index"), loc) if range_def and range_def[0] == 0: # iterator starts with 0, index_var can just be iter_first_var index_var = iter_first_var else: # index_var = -1 # starting the index with -1 since it will incremented in loop header stmts.append(_new_definition(func_ir, index_var, ir.Const(value=-1, loc=loc), loc)) # Insert statement to get the size of the loop iterator size_var = ir.Var(scope, mk_unique_var("size"), loc) if range_def: start, stop, range_func_def = range_def if start == 0: size_val = stop else: size_val = ir.Expr.binop(fn=operator.sub, lhs=stop, rhs=start, loc=loc) # we can parallelize this loop if enable_prange = True, by changing # range function from range, to prange. if enable_prange and isinstance(range_func_def, ir.Global): range_func_def.name = 'internal_prange' range_func_def.value = internal_prange else: # this doesn't work in objmode as it's effectively untyped if typed: len_func_var = ir.Var(scope, mk_unique_var("len_func"), loc) from numba.cpython.rangeobj import length_of_iterator stmts.append(_new_definition(func_ir, len_func_var, ir.Global('length_of_iterator', length_of_iterator, loc=loc), loc)) size_val = ir.Expr.call(len_func_var, (iter_var,), (), loc=loc) else: raise GuardException stmts.append(_new_definition(func_ir, size_var, size_val, loc)) size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc) stmts.append(_new_definition(func_ir, size_tuple_var, ir.Expr.build_tuple(items=[size_var], loc=loc), loc)) # Insert array allocation array_var = ir.Var(scope, mk_unique_var("array"), loc) empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc) if dtype_def and dtype_mod_def: # when dtype is present, we'll call empty with dtype dtype_mod_var = ir.Var(scope, mk_unique_var("dtype_mod"), loc) dtype_var = ir.Var(scope, mk_unique_var("dtype"), loc) stmts.append(_new_definition(func_ir, dtype_mod_var, dtype_mod_def, loc)) stmts.append(_new_definition(func_ir, dtype_var, ir.Expr.getattr(dtype_mod_var, dtype_def.attr, loc), loc)) stmts.append(_new_definition(func_ir, empty_func, ir.Global('empty', np.empty, loc=loc), loc)) array_kws = [('dtype', dtype_var)] else: # this doesn't work in objmode as it's effectively untyped if typed: # otherwise we'll call unsafe_empty_inferred stmts.append(_new_definition(func_ir, empty_func, ir.Global('unsafe_empty_inferred', unsafe_empty_inferred, loc=loc), loc)) array_kws = [] else: raise GuardException # array_var = empty_func(size_tuple_var) stmts.append(_new_definition(func_ir, array_var, ir.Expr.call(empty_func, (size_tuple_var,), list(array_kws), loc=loc), loc)) # Add back removed just in case they are used by something else for var in removed: stmts.append(_new_definition(func_ir, var, array_var, loc)) # Add back terminator stmts.append(terminator) # Modify loop_entry loop_entry.body = stmts if range_def: if range_def[0] != 0: # when range doesn't start from 0, index_var becomes loop index # (iter_first_var) minus an offset (range_def[0]) terminator = loop_header.terminator assert(isinstance(terminator, ir.Branch)) # find the block in the loop body that header jumps to block_id = terminator.truebr blk = func_ir.blocks[block_id] loc = blk.loc blk.body.insert(0, _new_definition(func_ir, index_var, ir.Expr.binop(fn=operator.sub, lhs=iter_first_var, rhs=range_def[0], loc=loc), loc)) else: # Insert index_var increment to the end of loop header loc = loop_header.loc terminator = loop_header.terminator stmts = loop_header.body[0:-1] next_index_var = ir.Var(scope, mk_unique_var("next_index"), loc) one = ir.Var(scope, mk_unique_var("one"), loc) # one = 1 stmts.append(_new_definition(func_ir, one, ir.Const(value=1,loc=loc), loc)) # next_index_var = index_var + 1 stmts.append(_new_definition(func_ir, next_index_var, ir.Expr.binop(fn=operator.add, lhs=index_var, rhs=one, loc=loc), loc)) # index_var = next_index_var stmts.append(_new_definition(func_ir, index_var, next_index_var, loc)) stmts.append(terminator) loop_header.body = stmts # In append_block, change list_append into array assign for i in range(len(append_block.body)): if append_block.body[i] is append_stmt: debug_print("Replace append with SetItem") append_block.body[i] = ir.SetItem(target=array_var, index=index_var, value=append_stmt.value.args[0], loc=append_stmt.loc) # replace array call, by changing "a = array(b)" to "a = b" stmt = func_ir.blocks[exit_block].body[array_call_index] # stmt can be either array call or SetItem, we only replace array call if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr): stmt.value = array_var func_ir._definitions[stmt.target.name] = [stmt.value] return True def _find_unsafe_empty_inferred(func_ir, expr): unsafe_empty_inferred require(isinstance(expr, ir.Expr) and expr.op == 'call') callee = expr.func callee_def = get_definition(func_ir, callee) require(isinstance(callee_def, ir.Global)) _make_debug_print("_find_unsafe_empty_inferred")(callee_def.value) return callee_def.value == unsafe_empty_inferred def _fix_nested_array(func_ir): """Look for assignment like: a[..] = b, where both a and b are numpy arrays, and try to eliminate array b by expanding a with an extra dimension. """ blocks = func_ir.blocks cfg = compute_cfg_from_blocks(blocks) usedefs = compute_use_defs(blocks) empty_deadmap = dict([(label, set()) for label in blocks.keys()]) livemap = compute_live_variables(cfg, blocks, usedefs.defmap, empty_deadmap) def find_array_def(arr): """Find numpy array definition such as arr = numba.unsafe.ndarray.empty_inferred(...). If it is arr = b[...], find array definition of b recursively. """ arr_def = get_definition(func_ir, arr) _make_debug_print("find_array_def")(arr, arr_def) if isinstance(arr_def, ir.Expr): if guard(_find_unsafe_empty_inferred, func_ir, arr_def): return arr_def elif arr_def.op == 'getitem': return find_array_def(arr_def.value) raise GuardException def fix_dependencies(expr, varlist): """Double check if all variables in varlist are defined before expr is used. Try to move constant definition when the check fails. Bails out by raising GuardException if it can't be moved. """ debug_print = _make_debug_print("fix_dependencies") for label, block in blocks.items(): scope = block.scope body = block.body defined = set() for i in range(len(body)): inst = body[i] if isinstance(inst, ir.Assign): defined.add(inst.target.name) if inst.value is expr: new_varlist = [] for var in varlist: # var must be defined before this inst, or live # and not later defined. if (var.name in defined or (var.name in livemap[label] and not (var.name in usedefs.defmap[label]))): debug_print(var.name, " already defined") new_varlist.append(var) else: debug_print(var.name, " not yet defined") var_def = get_definition(func_ir, var.name) if isinstance(var_def, ir.Const): loc = var.loc new_var = ir.Var(scope, mk_unique_var("new_var"), loc) new_const = ir.Const(var_def.value, loc) new_vardef = _new_definition(func_ir, new_var, new_const, loc) new_body = [] new_body.extend(body[:i]) new_body.append(new_vardef) new_body.extend(body[i:]) block.body = new_body new_varlist.append(new_var) else: raise GuardException return new_varlist # when expr is not found in block raise GuardException def fix_array_assign(stmt): """For assignment like lhs[idx] = rhs, where both lhs and rhs are arrays, do the following: 1. find the definition of rhs, which has to be a call to numba.unsafe.ndarray.empty_inferred 2. find the source array creation for lhs, insert an extra dimension of size of b. 3. replace the definition of rhs = numba.unsafe.ndarray.empty_inferred(...) with rhs = lhs[idx] """ require(isinstance(stmt, ir.SetItem)) require(isinstance(stmt.value, ir.Var)) debug_print = _make_debug_print("fix_array_assign") debug_print("found SetItem: ", stmt) lhs = stmt.target # Find the source array creation of lhs lhs_def = find_array_def(lhs) debug_print("found lhs_def: ", lhs_def) rhs_def = get_definition(func_ir, stmt.value) debug_print("found rhs_def: ", rhs_def) require(isinstance(rhs_def, ir.Expr)) if rhs_def.op == 'cast': rhs_def = get_definition(func_ir, rhs_def.value) require(isinstance(rhs_def, ir.Expr)) require(_find_unsafe_empty_inferred(func_ir, rhs_def)) # Find the array dimension of rhs dim_def = get_definition(func_ir, rhs_def.args[0]) require(isinstance(dim_def, ir.Expr) and dim_def.op == 'build_tuple') debug_print("dim_def = ", dim_def) extra_dims = [ get_definition(func_ir, x, lhs_only=True) for x in dim_def.items ] debug_print("extra_dims = ", extra_dims) # Expand size tuple when creating lhs_def with extra_dims size_tuple_def = get_definition(func_ir, lhs_def.args[0]) require(isinstance(size_tuple_def, ir.Expr) and size_tuple_def.op == 'build_tuple') debug_print("size_tuple_def = ", size_tuple_def) extra_dims = fix_dependencies(size_tuple_def, extra_dims) size_tuple_def.items += extra_dims # In-place modify rhs_def to be getitem rhs_def.op = 'getitem' rhs_def.fn = operator.getitem rhs_def.value = get_definition(func_ir, lhs, lhs_only=True) rhs_def.index = stmt.index del rhs_def._kws['func'] del rhs_def._kws['args'] del rhs_def._kws['vararg'] del rhs_def._kws['kws'] # success return True for label in find_topo_order(func_ir.blocks): block = func_ir.blocks[label] for stmt in block.body: if guard(fix_array_assign, stmt): block.body.remove(stmt) def _new_definition(func_ir, var, value, loc): func_ir._definitions[var.name] = [value] return ir.Assign(value=value, target=var, loc=loc) @rewrites.register_rewrite('after-inference') class RewriteArrayOfConsts(rewrites.Rewrite): '''The RewriteArrayOfConsts class is responsible for finding 1D array creations from a constant list, and rewriting it into direct initialization of array elements without creating the list. ''' def __init__(self, state, *args, **kws): self.typingctx = state.typingctx super(RewriteArrayOfConsts, self).__init__(*args, **kws) def match(self, func_ir, block, typemap, calltypes): if len(calltypes) == 0: return False self.crnt_block = block self.new_body = guard(_inline_const_arraycall, block, func_ir, self.typingctx, typemap, calltypes) return self.new_body is not None def apply(self): self.crnt_block.body = self.new_body return self.crnt_block def _inline_const_arraycall(block, func_ir, context, typemap, calltypes): """Look for array(list) call where list is a constant list created by build_list, and turn them into direct array creation and initialization, if the following conditions are met: 1. The build_list call immediate precedes the array call; 2. The list variable is no longer live after array call; If any condition check fails, no modification will be made. """ debug_print = _make_debug_print("inline_const_arraycall") scope = block.scope def inline_array(array_var, expr, stmts, list_vars, dels): """Check to see if the given "array_var" is created from a list of constants, and try to inline the list definition as array initialization. Extra statements produced with be appended to "stmts". """ callname = guard(find_callname, func_ir, expr) require(callname and callname[1] == 'numpy' and callname[0] == 'array') require(expr.args[0].name in list_vars) ret_type = calltypes[expr].return_type require(isinstance(ret_type, types.ArrayCompatible) and ret_type.ndim == 1) loc = expr.loc list_var = expr.args[0] # Get the type of the array to be created. array_typ = typemap[array_var.name] debug_print("inline array_var = ", array_var, " list_var = ", list_var) # Get the element type of the array to be created. dtype = array_typ.dtype # Get the sequence of operations to provide values to the new array. seq, _ = find_build_sequence(func_ir, list_var) size = len(seq) # Create a tuple to pass to empty below to specify the new array size. size_var = ir.Var(scope, mk_unique_var("size"), loc) size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc) size_typ = types.intp size_tuple_typ = types.UniTuple(size_typ, 1) typemap[size_var.name] = size_typ typemap[size_tuple_var.name] = size_tuple_typ stmts.append(_new_definition(func_ir, size_var, ir.Const(size, loc=loc), loc)) stmts.append(_new_definition(func_ir, size_tuple_var, ir.Expr.build_tuple(items=[size_var], loc=loc), loc)) # The general approach is to create an empty array and then fill # the elements in one-by-one from their specificiation. # Get the numpy type to pass to empty. nptype = types.DType(dtype) # Create a variable to hold the numpy empty function. empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc) fnty = get_np_ufunc_typ(np.empty) sig = context.resolve_function_type(fnty, (size_typ,), {'dtype':nptype}) typemap[empty_func.name] = fnty stmts.append(_new_definition(func_ir, empty_func, ir.Global('empty', np.empty, loc=loc), loc)) # We pass two arguments to empty, first the size tuple and second # the dtype of the new array. Here, we created typ_var which is # the dtype argument of the new array. typ_var in turn is created # by getattr of the dtype string on the numpy module. # Create var for numpy module. g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) typemap[g_np_var.name] = types.misc.Module(np) g_np = ir.Global('np', np, loc) stmts.append(_new_definition(func_ir, g_np_var, g_np, loc)) # Create var for result of numpy.. typ_var = ir.Var(scope, mk_unique_var("$np_typ_var"), loc) typemap[typ_var.name] = nptype dtype_str = str(dtype) if dtype_str == 'bool': dtype_str = 'bool_' # Get dtype attribute of numpy module. np_typ_getattr = ir.Expr.getattr(g_np_var, dtype_str, loc) stmts.append(_new_definition(func_ir, typ_var, np_typ_getattr, loc)) # Create the call to numpy.empty passing the size tuple and dtype var. empty_call = ir.Expr.call(empty_func, [size_var, typ_var], {}, loc=loc) calltypes[empty_call] = typing.signature(array_typ, size_typ, nptype) stmts.append(_new_definition(func_ir, array_var, empty_call, loc)) # Fill in the new empty array one-by-one. for i in range(size): index_var = ir.Var(scope, mk_unique_var("index"), loc) index_typ = types.intp typemap[index_var.name] = index_typ stmts.append(_new_definition(func_ir, index_var, ir.Const(i, loc), loc)) setitem = ir.SetItem(array_var, index_var, seq[i], loc) calltypes[setitem] = typing.signature(types.none, array_typ, index_typ, dtype) stmts.append(setitem) stmts.extend(dels) return True class State(object): """ This class is used to hold the state in the following loop so as to make it easy to reset the state of the variables tracking the various statement kinds """ def __init__(self): # list_vars keep track of the variable created from the latest # build_list instruction, as well as its synonyms. self.list_vars = [] # dead_vars keep track of those in list_vars that are considered dead. self.dead_vars = [] # list_items keep track of the elements used in build_list. self.list_items = [] self.stmts = [] # dels keep track of the deletion of list_items, which will need to be # moved after array initialization. self.dels = [] # tracks if a modification has taken place self.modified = False def reset(self): """ Resets the internal state of the variables used for tracking """ self.list_vars = [] self.dead_vars = [] self.list_items = [] self.dels = [] def list_var_used(self, inst): """ Returns True if the list being analysed is used between the build_list and the array call. """ return any([x.name in self.list_vars for x in inst.list_vars()]) state = State() for inst in block.body: if isinstance(inst, ir.Assign): if isinstance(inst.value, ir.Var): if inst.value.name in state.list_vars: state.list_vars.append(inst.target.name) state.stmts.append(inst) continue elif isinstance(inst.value, ir.Expr): expr = inst.value if expr.op == 'build_list': # new build_list encountered, reset state state.reset() state.list_items = [x.name for x in expr.items] state.list_vars = [inst.target.name] state.stmts.append(inst) continue elif expr.op == 'call' and expr in calltypes: arr_var = inst.target if guard(inline_array, inst.target, expr, state.stmts, state.list_vars, state.dels): state.modified = True continue elif isinstance(inst, ir.Del): removed_var = inst.value if removed_var in state.list_items: state.dels.append(inst) continue elif removed_var in state.list_vars: # one of the list_vars is considered dead. state.dead_vars.append(removed_var) state.list_vars.remove(removed_var) state.stmts.append(inst) if state.list_vars == []: # if all list_vars are considered dead, we need to filter # them out from existing stmts to completely remove # build_list. # Note that if a translation didn't take place, dead_vars # will also be empty when we reach this point. body = [] for inst in state.stmts: if ((isinstance(inst, ir.Assign) and inst.target.name in state.dead_vars) or (isinstance(inst, ir.Del) and inst.value in state.dead_vars)): continue body.append(inst) state.stmts = body state.dead_vars = [] state.modified = True continue state.stmts.append(inst) # If the list is used in any capacity between build_list and array # call, then we must call off the translation for this list because # it could be mutated and list_items would no longer be applicable. if state.list_var_used(inst): state.reset() return state.stmts if state.modified else None numba-0.55.1/numba/core/interpreter.py000664 000000 000000 00000220251 14174536160 017644 0ustar00rootroot000000 000000 import builtins import collections import dis import operator import logging from numba.core import errors, dataflow, controlflow, ir, config from numba.core.errors import NotDefinedError, error_extras from numba.core.utils import (PYVERSION, BINOPS_TO_OPERATORS, INPLACE_BINOPS_TO_OPERATORS,) from numba.core.byteflow import Flow, AdaptDFA, AdaptCFA from numba.core.unsafe import eh from numba.cpython.unsafe.tuple import unpack_single_tuple class _UNKNOWN_VALUE(object): """Represents an unknown value, this is for ease of debugging purposes only. """ def __init__(self, varname): self._varname = varname def __repr__(self): return "_UNKNOWN_VALUE({})".format(self._varname) _logger = logging.getLogger(__name__) class Assigner(object): """ This object keeps track of potential assignment simplifications inside a code block. For example `$O.1 = x` followed by `y = $0.1` can be simplified into `y = x`, but it's not possible anymore if we have `x = z` in-between those two instructions. NOTE: this is not only an optimization, but is actually necessary due to certain limitations of Numba - such as only accepting the returning of an array passed as function argument. """ def __init__(self): # { destination variable name -> source Var object } self.dest_to_src = {} # Basically a reverse mapping of dest_to_src: # { source variable name -> all destination names in dest_to_src } self.src_invalidate = collections.defaultdict(list) self.unused_dests = set() def assign(self, srcvar, destvar): """ Assign *srcvar* to *destvar*. Return either *srcvar* or a possible simplified assignment source (earlier assigned to *srcvar*). """ srcname = srcvar.name destname = destvar.name if destname in self.src_invalidate: # destvar will change, invalidate all previously known # simplifications for d in self.src_invalidate.pop(destname): self.dest_to_src.pop(d) if srcname in self.dest_to_src: srcvar = self.dest_to_src[srcname] if destvar.is_temp: self.dest_to_src[destname] = srcvar self.src_invalidate[srcname].append(destname) self.unused_dests.add(destname) return srcvar def get_assignment_source(self, destname): """ Get a possible assignment source (a ir.Var instance) to replace *destname*, otherwise None. """ if destname in self.dest_to_src: return self.dest_to_src[destname] self.unused_dests.discard(destname) return None def peep_hole_list_to_tuple(func_ir): """ This peephole rewrites a bytecode sequence new to Python 3.9 that looks like e.g.: def foo(a): return (*a,) 41 0 BUILD_LIST 0 2 LOAD_FAST 0 (a) 4 LIST_EXTEND 1 6 LIST_TO_TUPLE 8 RETURN_VAL essentially, the unpacking of tuples is written as a list which is appended to/extended and then "magicked" into a tuple by the new LIST_TO_TUPLE opcode. This peephole repeatedly analyses the bytecode in a block looking for a window between a `LIST_TO_TUPLE` and `BUILD_LIST` and... 1. Turns the BUILD_LIST into a BUILD_TUPLE 2. Sets an accumulator's initial value as the target of the BUILD_TUPLE 3. Searches for 'extend' on the original list and turns these into binary additions on the accumulator. 4. Searches for 'append' on the original list and turns these into a `BUILD_TUPLE` which is then appended via binary addition to the accumulator. 5. Assigns the accumulator to the variable that exits the peephole and the rest of the block/code refers to as the result of the unpack operation. 6. Patches up """ _DEBUG = False # For all blocks for offset, blk in func_ir.blocks.items(): # keep doing the peephole rewrite until nothing is left that matches while True: # first try and find a matching region # i.e. BUILD_LIST......LIST_TO_TUPLE def find_postive_region(): found = False for idx in reversed(range(len(blk.body))): stmt = blk.body[idx] if isinstance(stmt, ir.Assign): value = stmt.value if (isinstance(value, ir.Expr) and value.op == 'list_to_tuple'): target_list = value.info[0] found = True bt = (idx, stmt) if found: if isinstance(stmt, ir.Assign): if stmt.target.name == target_list: region = (bt, (idx, stmt)) return region region = find_postive_region() # if there's a peep hole region then do something with it if region is not None: peep_hole = blk.body[region[1][0] : region[0][0]] if _DEBUG: print("\nWINDOW:") for x in peep_hole: print(x) print("") appends = [] extends = [] init = region[1][1] const_list = init.target.name # Walk through the peep_hole and find things that are being # "extend"ed and "append"ed to the BUILD_LIST for x in peep_hole: if isinstance(x, ir.Assign): if isinstance(x.value, ir.Expr): expr = x.value if (expr.op == 'getattr' and expr.value.name == const_list): # it's not strictly necessary to split out # extends and appends, but it helps with # debugging to do so! if expr.attr == 'extend': extends.append(x.target.name) elif expr.attr == 'append': appends.append(x.target.name) else: assert 0 # go back through the peep hole build new IR based on it. new_hole = [] def append_and_fix(x): """ Adds to the new_hole and fixes up definitions""" new_hole.append(x) if x.target.name in func_ir._definitions: # if there's already a definition, drop it, should only # be 1 as the way cpython emits the sequence for # `list_to_tuple` should ensure this. assert len(func_ir._definitions[x.target.name]) == 1 func_ir._definitions[x.target.name].clear() func_ir._definitions[x.target.name].append(x.value) the_build_list = init.target # Do the transform on the peep hole if _DEBUG: print("\nBLOCK:") blk.dump() # This section basically accumulates list appends and extends # as binop(+) on tuples, it drops all the getattr() for extend # and append as they are now dead and replaced with binop(+). # It also switches out the build_list for a build_tuple and then # ensures everything is wired up and defined ok. t2l_agn = region[0][1] acc = the_build_list for x in peep_hole: if isinstance(x, ir.Assign): if isinstance(x.value, ir.Expr): expr = x.value if expr.op == 'getattr': if (x.target.name in extends or x.target.name in appends): # drop definition, it's being wholesale # replaced. func_ir._definitions.pop(x.target.name) continue else: # a getattr on something we're not # interested in new_hole.append(x) elif expr.op == 'call': fname = expr.func.name if fname in extends or fname in appends: arg = expr.args[0] if isinstance(arg, ir.Var): tmp_name = "%s_var_%s" % (fname, arg.name) if fname in appends: bt = ir.Expr.build_tuple([arg,], expr.loc) else: bt = arg var = ir.Var(arg.scope, tmp_name, expr.loc) asgn = ir.Assign(bt, var, expr.loc) append_and_fix(asgn) arg = var # this needs to be a binary add new = ir.Expr.binop(fn=operator.add, lhs=acc, rhs=arg, loc=x.loc) asgn = ir.Assign(new, x.target, expr.loc) append_and_fix(asgn) acc = asgn.target else: # there could be a call in the unpack, like # *(a, x.append(y)) new_hole.append(x) elif (expr.op == 'build_list' and x.target.name == const_list): new = ir.Expr.build_tuple(expr.items, expr.loc) asgn = ir.Assign(new, x.target, expr.loc) # Not a temporary any more append_and_fix(asgn) else: new_hole.append(x) else: new_hole.append(x) else: # stick everything else in as-is new_hole.append(x) # Finally write the result back into the original build list as # everything refers to it. new_hole.append(ir.Assign(acc, t2l_agn.target, the_build_list.loc)) if _DEBUG: print("\nNEW HOLE:") for x in new_hole: print(x) # and then update the block body with the modified region cpy = blk.body[:] head = cpy[:region[1][0]] tail = blk.body[region[0][0] + 1:] tmp = head + new_hole + tail blk.body.clear() blk.body.extend(tmp) if _DEBUG: print("\nDUMP post hole:") blk.dump() else: # else escape break return func_ir def peep_hole_delete_with_exit(func_ir): """ This rewrite removes variables used to store the `__exit__` function loaded by SETUP_WITH. """ dead_vars = set() for blk in func_ir.blocks.values(): for stmt in blk.body: # Any statement that uses a variable with the '$setup_with_exitfn' # prefix is considered dead. used = set(stmt.list_vars()) for v in used: if v.name.startswith('$setup_with_exitfn'): dead_vars.add(v) # Any assignment that uses any of the dead variable is considered # dead. if used & dead_vars: if isinstance(stmt, ir.Assign): dead_vars.add(stmt.target) new_body = [] for stmt in blk.body: # Skip any statements that uses anyone of the dead variable. if not (set(stmt.list_vars()) & dead_vars): new_body.append(stmt) blk.body.clear() blk.body.extend(new_body) return func_ir class Interpreter(object): """A bytecode interpreter that builds up the IR. """ def __init__(self, func_id): self.func_id = func_id self.arg_count = func_id.arg_count self.arg_names = func_id.arg_names self.loc = self.first_loc = ir.Loc.from_function_id(func_id) self.is_generator = func_id.is_generator # { inst offset : ir.Block } self.blocks = {} # { name: [definitions] } of local variables self.definitions = collections.defaultdict(list) # A set to keep track of all exception variables. # To be used in _legalize_exception_vars() self._exception_vars = set() def interpret(self, bytecode): """ Generate IR for this bytecode. """ self.bytecode = bytecode self.scopes = [] global_scope = ir.Scope(parent=None, loc=self.loc) self.scopes.append(global_scope) if PYVERSION < (3, 7): # Control flow analysis self.cfa = controlflow.ControlFlowAnalysis(bytecode) self.cfa.run() if config.DUMP_CFG: self.cfa.dump() # Data flow analysis self.dfa = dataflow.DataFlowAnalysis(self.cfa) self.dfa.run() else: flow = Flow(bytecode) flow.run() self.dfa = AdaptDFA(flow) self.cfa = AdaptCFA(flow) if config.DUMP_CFG: self.cfa.dump() # Temp states during interpretation self.current_block = None self.current_block_offset = None self.syntax_blocks = [] self.dfainfo = None self.scopes.append(ir.Scope(parent=self.current_scope, loc=self.loc)) # Interpret loop for inst, kws in self._iter_inst(): self._dispatch(inst, kws) self._legalize_exception_vars() # Prepare FunctionIR func_ir = ir.FunctionIR(self.blocks, self.is_generator, self.func_id, self.first_loc, self.definitions, self.arg_count, self.arg_names) _logger.debug(func_ir.dump_to_string()) # post process the IR to rewrite opcodes/byte sequences that are too # involved to risk handling as part of direct interpretation peepholes = [] if PYVERSION in [(3, 9), (3, 10)]: peepholes.append(peep_hole_list_to_tuple) peepholes.append(peep_hole_delete_with_exit) post_processed_ir = self.post_process(peepholes, func_ir) return post_processed_ir def post_process(self, peepholes, func_ir): for peep in peepholes: func_ir = peep(func_ir) return func_ir def _legalize_exception_vars(self): """Search for unsupported use of exception variables. Note, they cannot be stored into user variable. """ # Build a set of exception variables excvars = self._exception_vars.copy() # Propagate the exception variables to LHS of assignment for varname, defnvars in self.definitions.items(): for v in defnvars: if isinstance(v, ir.Var): k = v.name if k in excvars: excvars.add(varname) # Filter out the user variables. uservar = list(filter(lambda x: not x.startswith('$'), excvars)) if uservar: # Complain about the first user-variable storing an exception first = uservar[0] loc = self.current_scope.get(first).loc msg = "Exception object cannot be stored into variable ({})." raise errors.UnsupportedError(msg.format(first), loc=loc) def init_first_block(self): # Define variables receiving the function arguments for index, name in enumerate(self.arg_names): val = ir.Arg(index=index, name=name, loc=self.loc) self.store(val, name) def _iter_inst(self): for blkct, block in enumerate(self.cfa.iterliveblocks()): firstinst = self.bytecode[block.offset] self.loc = self.loc.with_lineno(firstinst.lineno) self._start_new_block(block.offset) if blkct == 0: # Is first block self.init_first_block() for offset, kws in self.dfainfo.insts: inst = self.bytecode[offset] self.loc = self.loc.with_lineno(inst.lineno) yield inst, kws self._end_current_block() def _start_new_block(self, offset): oldblock = self.current_block self.insert_block(offset) # Ensure the last block is terminated if oldblock is not None and not oldblock.is_terminated: # Handle ending try block. tryblk = self.dfainfo.active_try_block # If there's an active try-block and the handler block is live. if tryblk is not None and tryblk['end'] in self.cfa.graph.nodes(): # We are in a try-block, insert a branch to except-block. # This logic cannot be in self._end_current_block() # because we the non-raising next block-offset. branch = ir.Branch( cond=self.get('$exception_check'), truebr=tryblk['end'], falsebr=offset, loc=self.loc, ) oldblock.append(branch) # Handle normal case else: jmp = ir.Jump(offset, loc=self.loc) oldblock.append(jmp) # Get DFA block info self.dfainfo = self.dfa.infos[self.current_block_offset] self.assigner = Assigner() # Check out-of-scope syntactic-block while self.syntax_blocks: if offset >= self.syntax_blocks[-1].exit: self.syntax_blocks.pop() else: break def _end_current_block(self): # Handle try block if not self.current_block.is_terminated: tryblk = self.dfainfo.active_try_block if tryblk is not None: self._insert_exception_check() # Handle normal block cleanup self._remove_unused_temporaries() self._insert_outgoing_phis() def _inject_call(self, func, gv_name, res_name=None): """A helper function to inject a call to *func* which is a python function. Parameters ---------- func : callable The function object to be called. gv_name : str The variable name to be used to store the function object. res_name : str; optional The variable name to be used to store the call result. If ``None``, a name is created automatically. """ gv_fn = ir.Global(gv_name, func, loc=self.loc) self.store(value=gv_fn, name=gv_name, redefine=True) callres = ir.Expr.call(self.get(gv_name), (), (), loc=self.loc) res_name = res_name or '$callres_{}'.format(gv_name) self.store(value=callres, name=res_name, redefine=True) def _insert_try_block_begin(self): """Insert IR-nodes to mark the start of a `try` block. """ self._inject_call(eh.mark_try_block, 'mark_try_block') def _insert_try_block_end(self): """Insert IR-nodes to mark the end of a `try` block. """ self._inject_call(eh.end_try_block, 'end_try_block') def _insert_exception_variables(self): """Insert IR-nodes to initialize the exception variables. """ tryblk = self.dfainfo.active_try_block # Get exception variables endblk = tryblk['end'] edgepushed = self.dfainfo.outgoing_edgepushed.get(endblk) # Note: the last value on the stack is the exception value # Note: due to the current limitation, all exception variables are None if edgepushed: const_none = ir.Const(value=None, loc=self.loc) # For each variable going to the handler block. for var in edgepushed: if var in self.definitions: raise AssertionError( "exception variable CANNOT be defined by other code", ) self.store(value=const_none, name=var) self._exception_vars.add(var) def _insert_exception_check(self): """Called before the end of a block to inject checks if raised. """ self._insert_exception_variables() # Do exception check self._inject_call(eh.exception_check, 'exception_check', '$exception_check') def _remove_unused_temporaries(self): """ Remove assignments to unused temporary variables from the current block. """ new_body = [] replaced_var = {} for inst in self.current_block.body: # the same temporary is assigned to multiple variables in cases # like a = b[i] = 1, so need to handle replaced temporaries in # later setitem/setattr nodes if (isinstance(inst, (ir.SetItem, ir.SetAttr)) and inst.value.name in replaced_var): inst.value = replaced_var[inst.value.name] elif isinstance(inst, ir.Assign): if (inst.target.is_temp and inst.target.name in self.assigner.unused_dests): continue # the same temporary is assigned to multiple variables in cases # like a = b = 1, so need to handle replaced temporaries in # later assignments if (isinstance(inst.value, ir.Var) and inst.value.name in replaced_var): inst.value = replaced_var[inst.value.name] new_body.append(inst) continue # chained unpack cases may reuse temporary # e.g. a = (b, c) = (x, y) if (isinstance(inst.value, ir.Expr) and inst.value.op == "exhaust_iter" and inst.value.value.name in replaced_var): inst.value.value = replaced_var[inst.value.value.name] new_body.append(inst) continue # eliminate temporary variables that are assigned to user # variables right after creation. E.g.: # $1 = f(); a = $1 -> a = f() # the temporary variable is not reused elsewhere since CPython # bytecode is stack-based and this pattern corresponds to a pop if (isinstance(inst.value, ir.Var) and inst.value.is_temp and new_body and isinstance(new_body[-1], ir.Assign)): prev_assign = new_body[-1] # _var_used_in_binop check makes sure we don't create a new # inplace binop operation which can fail # (see TestFunctionType.test_in_iter_func_call) if (prev_assign.target.name == inst.value.name and not self._var_used_in_binop( inst.target.name, prev_assign.value)): replaced_var[inst.value.name] = inst.target prev_assign.target = inst.target # replace temp var definition in target with proper defs self.definitions[inst.target.name].remove(inst.value) self.definitions[inst.target.name].extend( self.definitions.pop(inst.value.name) ) continue new_body.append(inst) self.current_block.body = new_body def _var_used_in_binop(self, varname, expr): """return True if 'expr' is a binary expression and 'varname' is used in it as an argument """ return (isinstance(expr, ir.Expr) and expr.op in ("binop", "inplace_binop") and (varname == expr.lhs.name or varname == expr.rhs.name)) def _insert_outgoing_phis(self): """ Add assignments to forward requested outgoing values to subsequent blocks. """ for phiname, varname in self.dfainfo.outgoing_phis.items(): target = self.current_scope.get_or_define(phiname, loc=self.loc) stmt = ir.Assign(value=self.get(varname), target=target, loc=self.loc) self.definitions[target.name].append(stmt.value) if not self.current_block.is_terminated: self.current_block.append(stmt) else: self.current_block.insert_before_terminator(stmt) def get_global_value(self, name): """ Get a global value from the func_global (first) or as a builtins (second). If both failed, return a ir.UNDEFINED. """ try: return self.func_id.func.__globals__[name] except KeyError: return getattr(builtins, name, ir.UNDEFINED) def get_closure_value(self, index): """ Get a value from the cell contained in this function's closure. If not set, return a ir.UNDEFINED. """ cell = self.func_id.func.__closure__[index] try: return cell.cell_contents except ValueError: return ir.UNDEFINED @property def current_scope(self): return self.scopes[-1] @property def code_consts(self): return self.bytecode.co_consts @property def code_locals(self): return self.bytecode.co_varnames @property def code_names(self): return self.bytecode.co_names @property def code_cellvars(self): return self.bytecode.co_cellvars @property def code_freevars(self): return self.bytecode.co_freevars def _dispatch(self, inst, kws): assert self.current_block is not None fname = "op_%s" % inst.opname.replace('+', '_') try: fn = getattr(self, fname) except AttributeError: raise NotImplementedError(inst) else: try: return fn(inst, **kws) except errors.NotDefinedError as e: if e.loc is None: loc = self.loc else: loc = e.loc err = errors.NotDefinedError(e.name, loc=loc) if not config.FULL_TRACEBACKS: raise err from None else: raise err # --- Scope operations --- def store(self, value, name, redefine=False): """ Store *value* (a Expr or Var instance) into the variable named *name* (a str object). Returns the target variable. """ if redefine or self.current_block_offset in self.cfa.backbone: rename = not (name in self.code_cellvars) target = self.current_scope.redefine(name, loc=self.loc, rename=rename) else: target = self.current_scope.get_or_define(name, loc=self.loc) if isinstance(value, ir.Var): value = self.assigner.assign(value, target) stmt = ir.Assign(value=value, target=target, loc=self.loc) self.current_block.append(stmt) self.definitions[target.name].append(value) return target def get(self, name): """ Get the variable (a Var instance) with the given *name*. """ # Implicit argument for comprehension starts with '.' # See Parameter class in inspect.py (from Python source) if name[0] == '.' and name[1:].isdigit(): name = 'implicit{}'.format(name[1:]) # Try to simplify the variable lookup by returning an earlier # variable assigned to *name*. var = self.assigner.get_assignment_source(name) if var is None: var = self.current_scope.get(name) return var # --- Block operations --- def insert_block(self, offset, scope=None, loc=None): scope = scope or self.current_scope loc = loc or self.loc blk = ir.Block(scope=scope, loc=loc) self.blocks[offset] = blk self.current_block = blk self.current_block_offset = offset return blk # --- Bytecode handlers --- def op_NOP(self, inst): pass def op_PRINT_ITEM(self, inst, item, printvar, res): item = self.get(item) printgv = ir.Global("print", print, loc=self.loc) self.store(value=printgv, name=printvar) call = ir.Expr.call(self.get(printvar), (item,), (), loc=self.loc) self.store(value=call, name=res) def op_PRINT_NEWLINE(self, inst, printvar, res): printgv = ir.Global("print", print, loc=self.loc) self.store(value=printgv, name=printvar) call = ir.Expr.call(self.get(printvar), (), (), loc=self.loc) self.store(value=call, name=res) def op_UNPACK_SEQUENCE(self, inst, iterable, stores, tupleobj): count = len(stores) # Exhaust the iterable into a tuple-like object tup = ir.Expr.exhaust_iter(value=self.get(iterable), loc=self.loc, count=count) self.store(name=tupleobj, value=tup) # then index the tuple-like object to extract the values for i, st in enumerate(stores): expr = ir.Expr.static_getitem(self.get(tupleobj), index=i, index_var=None, loc=self.loc) self.store(expr, st) def op_FORMAT_VALUE(self, inst, value, res, strvar): """ FORMAT_VALUE(flags): flags argument specifies format spec which is not supported yet. Currently, str() is simply called on the value. https://docs.python.org/3/library/dis.html#opcode-FORMAT_VALUE """ value = self.get(value) strgv = ir.Global("str", str, loc=self.loc) self.store(value=strgv, name=strvar) call = ir.Expr.call(self.get(strvar), (value,), (), loc=self.loc) self.store(value=call, name=res) def op_BUILD_STRING(self, inst, strings, tmps): """ BUILD_STRING(count): Concatenates count strings. Required for supporting f-strings. https://docs.python.org/3/library/dis.html#opcode-BUILD_STRING """ count = inst.arg # corner case: f"" if count == 0: const = ir.Const("", loc=self.loc) self.store(const, tmps[-1]) return prev = self.get(strings[0]) for other, tmp in zip(strings[1:], tmps): other = self.get(other) expr = ir.Expr.binop( operator.add, lhs=prev, rhs=other, loc=self.loc ) self.store(expr, tmp) prev = self.get(tmp) def op_BUILD_SLICE(self, inst, start, stop, step, res, slicevar): start = self.get(start) stop = self.get(stop) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) if step is None: sliceinst = ir.Expr.call(self.get(slicevar), (start, stop), (), loc=self.loc) else: step = self.get(step) sliceinst = ir.Expr.call(self.get(slicevar), (start, stop, step), (), loc=self.loc) self.store(value=sliceinst, name=res) def op_SLICE_0(self, inst, base, res, slicevar, indexvar, nonevar): base = self.get(base) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) nonegv = ir.Const(None, loc=self.loc) self.store(value=nonegv, name=nonevar) none = self.get(nonevar) index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc) self.store(value=index, name=indexvar) expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc) self.store(value=expr, name=res) def op_SLICE_1(self, inst, base, start, nonevar, res, slicevar, indexvar): base = self.get(base) start = self.get(start) nonegv = ir.Const(None, loc=self.loc) self.store(value=nonegv, name=nonevar) none = self.get(nonevar) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) index = ir.Expr.call(self.get(slicevar), (start, none), (), loc=self.loc) self.store(value=index, name=indexvar) expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc) self.store(value=expr, name=res) def op_SLICE_2(self, inst, base, nonevar, stop, res, slicevar, indexvar): base = self.get(base) stop = self.get(stop) nonegv = ir.Const(None, loc=self.loc) self.store(value=nonegv, name=nonevar) none = self.get(nonevar) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) index = ir.Expr.call(self.get(slicevar), (none, stop,), (), loc=self.loc) self.store(value=index, name=indexvar) expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc) self.store(value=expr, name=res) def op_SLICE_3(self, inst, base, start, stop, res, slicevar, indexvar): base = self.get(base) start = self.get(start) stop = self.get(stop) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) index = ir.Expr.call(self.get(slicevar), (start, stop), (), loc=self.loc) self.store(value=index, name=indexvar) expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc) self.store(value=expr, name=res) def op_STORE_SLICE_0(self, inst, base, value, slicevar, indexvar, nonevar): base = self.get(base) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) nonegv = ir.Const(None, loc=self.loc) self.store(value=nonegv, name=nonevar) none = self.get(nonevar) index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc) self.store(value=index, name=indexvar) stmt = ir.SetItem(base, self.get(indexvar), self.get(value), loc=self.loc) self.current_block.append(stmt) def op_STORE_SLICE_1(self, inst, base, start, nonevar, value, slicevar, indexvar): base = self.get(base) start = self.get(start) nonegv = ir.Const(None, loc=self.loc) self.store(value=nonegv, name=nonevar) none = self.get(nonevar) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) index = ir.Expr.call(self.get(slicevar), (start, none), (), loc=self.loc) self.store(value=index, name=indexvar) stmt = ir.SetItem(base, self.get(indexvar), self.get(value), loc=self.loc) self.current_block.append(stmt) def op_STORE_SLICE_2(self, inst, base, nonevar, stop, value, slicevar, indexvar): base = self.get(base) stop = self.get(stop) nonegv = ir.Const(None, loc=self.loc) self.store(value=nonegv, name=nonevar) none = self.get(nonevar) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) index = ir.Expr.call(self.get(slicevar), (none, stop,), (), loc=self.loc) self.store(value=index, name=indexvar) stmt = ir.SetItem(base, self.get(indexvar), self.get(value), loc=self.loc) self.current_block.append(stmt) def op_STORE_SLICE_3(self, inst, base, start, stop, value, slicevar, indexvar): base = self.get(base) start = self.get(start) stop = self.get(stop) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) index = ir.Expr.call(self.get(slicevar), (start, stop), (), loc=self.loc) self.store(value=index, name=indexvar) stmt = ir.SetItem(base, self.get(indexvar), self.get(value), loc=self.loc) self.current_block.append(stmt) def op_DELETE_SLICE_0(self, inst, base, slicevar, indexvar, nonevar): base = self.get(base) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) nonegv = ir.Const(None, loc=self.loc) self.store(value=nonegv, name=nonevar) none = self.get(nonevar) index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc) self.store(value=index, name=indexvar) stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc) self.current_block.append(stmt) def op_DELETE_SLICE_1(self, inst, base, start, nonevar, slicevar, indexvar): base = self.get(base) start = self.get(start) nonegv = ir.Const(None, loc=self.loc) self.store(value=nonegv, name=nonevar) none = self.get(nonevar) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) index = ir.Expr.call(self.get(slicevar), (start, none), (), loc=self.loc) self.store(value=index, name=indexvar) stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc) self.current_block.append(stmt) def op_DELETE_SLICE_2(self, inst, base, nonevar, stop, slicevar, indexvar): base = self.get(base) stop = self.get(stop) nonegv = ir.Const(None, loc=self.loc) self.store(value=nonegv, name=nonevar) none = self.get(nonevar) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) index = ir.Expr.call(self.get(slicevar), (none, stop,), (), loc=self.loc) self.store(value=index, name=indexvar) stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc) self.current_block.append(stmt) def op_DELETE_SLICE_3(self, inst, base, start, stop, slicevar, indexvar): base = self.get(base) start = self.get(start) stop = self.get(stop) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) index = ir.Expr.call(self.get(slicevar), (start, stop), (), loc=self.loc) self.store(value=index, name=indexvar) stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc) self.current_block.append(stmt) def op_LOAD_FAST(self, inst, res): srcname = self.code_locals[inst.arg] self.store(value=self.get(srcname), name=res) def op_STORE_FAST(self, inst, value): dstname = self.code_locals[inst.arg] value = self.get(value) self.store(value=value, name=dstname) def op_DELETE_FAST(self, inst): dstname = self.code_locals[inst.arg] self.current_block.append(ir.Del(dstname, loc=self.loc)) def op_DUP_TOPX(self, inst, orig, duped): for src, dst in zip(orig, duped): self.store(value=self.get(src), name=dst) op_DUP_TOP = op_DUP_TOPX op_DUP_TOP_TWO = op_DUP_TOPX def op_STORE_ATTR(self, inst, target, value): attr = self.code_names[inst.arg] sa = ir.SetAttr(target=self.get(target), value=self.get(value), attr=attr, loc=self.loc) self.current_block.append(sa) def op_DELETE_ATTR(self, inst, target): attr = self.code_names[inst.arg] sa = ir.DelAttr(target=self.get(target), attr=attr, loc=self.loc) self.current_block.append(sa) def op_LOAD_ATTR(self, inst, item, res): item = self.get(item) attr = self.code_names[inst.arg] getattr = ir.Expr.getattr(item, attr, loc=self.loc) self.store(getattr, res) def op_LOAD_CONST(self, inst, res): value = self.code_consts[inst.arg] if isinstance(value, tuple): st = [] for x in value: nm = '$const_%s' % str(x) val_const = ir.Const(x, loc=self.loc) target = self.store(val_const, name=nm, redefine=True) st.append(target) const = ir.Expr.build_tuple(st, loc=self.loc) elif isinstance(value, frozenset): st = [] for x in value: nm = '$const_%s' % str(x) val_const = ir.Const(x, loc=self.loc) target = self.store(val_const, name=nm, redefine=True) st.append(target) const = ir.Expr.build_set(st, loc=self.loc) else: const = ir.Const(value, loc=self.loc) self.store(const, res) def op_LOAD_GLOBAL(self, inst, res): name = self.code_names[inst.arg] value = self.get_global_value(name) gl = ir.Global(name, value, loc=self.loc) self.store(gl, res) def op_LOAD_DEREF(self, inst, res): n_cellvars = len(self.code_cellvars) if inst.arg < n_cellvars: name = self.code_cellvars[inst.arg] gl = self.get(name) else: idx = inst.arg - n_cellvars name = self.code_freevars[idx] value = self.get_closure_value(idx) gl = ir.FreeVar(idx, name, value, loc=self.loc) self.store(gl, res) def op_STORE_DEREF(self, inst, value): n_cellvars = len(self.code_cellvars) if inst.arg < n_cellvars: dstname = self.code_cellvars[inst.arg] else: dstname = self.code_freevars[inst.arg - n_cellvars] value = self.get(value) self.store(value=value, name=dstname) def op_SETUP_LOOP(self, inst): assert self.blocks[inst.offset] is self.current_block loop = ir.Loop(inst.offset, exit=(inst.next + inst.arg)) self.syntax_blocks.append(loop) def op_SETUP_WITH(self, inst, contextmanager, exitfn=None): assert self.blocks[inst.offset] is self.current_block # Handle with exitpt = inst.next + inst.arg wth = ir.With(inst.offset, exit=exitpt) self.syntax_blocks.append(wth) ctxmgr = self.get(contextmanager) self.current_block.append(ir.EnterWith(contextmanager=ctxmgr, begin=inst.offset, end=exitpt, loc=self.loc,)) # Store exit fn exit_fn_obj = ir.Const(None, loc=self.loc) self.store(value=exit_fn_obj, name=exitfn) def op_SETUP_EXCEPT(self, inst): # Removed since python3.8 self._insert_try_block_begin() def op_SETUP_FINALLY(self, inst): self._insert_try_block_begin() def op_WITH_CLEANUP(self, inst): "no-op" def op_WITH_CLEANUP_START(self, inst): "no-op" def op_WITH_CLEANUP_FINISH(self, inst): "no-op" def op_END_FINALLY(self, inst): "no-op" def op_BEGIN_FINALLY(self, inst, temps): # The *temps* are the exception variables const_none = ir.Const(None, loc=self.loc) for tmp in temps: # Set to None for now self.store(const_none, name=tmp) self._exception_vars.add(tmp) if PYVERSION < (3, 6): def op_CALL_FUNCTION(self, inst, func, args, kws, res, vararg): func = self.get(func) args = [self.get(x) for x in args] if vararg is not None: vararg = self.get(vararg) # Process keywords keyvalues = [] removethese = [] for k, v in kws: k, v = self.get(k), self.get(v) for inst in self.current_block.body: if isinstance(inst, ir.Assign) and inst.target is k: removethese.append(inst) keyvalues.append((inst.value.value, v)) # Remove keyword constant statements for inst in removethese: self.current_block.remove(inst) expr = ir.Expr.call(func, args, keyvalues, loc=self.loc, vararg=vararg) self.store(expr, res) op_CALL_FUNCTION_VAR = op_CALL_FUNCTION else: def op_CALL_FUNCTION(self, inst, func, args, res): func = self.get(func) args = [self.get(x) for x in args] expr = ir.Expr.call(func, args, (), loc=self.loc) self.store(expr, res) def op_CALL_FUNCTION_KW(self, inst, func, args, names, res): func = self.get(func) args = [self.get(x) for x in args] # Find names const names = self.get(names) for inst in self.current_block.body: if isinstance(inst, ir.Assign) and inst.target is names: self.current_block.remove(inst) # scan up the block looking for the values, remove them # and find their name strings named_items = [] for x in inst.value.items: for y in self.current_block.body[::-1]: if x == y.target: self.current_block.remove(y) named_items.append(y.value.value) break keys = named_items break nkeys = len(keys) posvals = args[:-nkeys] kwvals = args[-nkeys:] keyvalues = list(zip(keys, kwvals)) expr = ir.Expr.call(func, posvals, keyvalues, loc=self.loc) self.store(expr, res) def op_CALL_FUNCTION_EX(self, inst, func, vararg, res): func = self.get(func) vararg = self.get(vararg) expr = ir.Expr.call(func, [], [], loc=self.loc, vararg=vararg) self.store(expr, res) def _build_tuple_unpack(self, inst, tuples, temps, is_assign): first = self.get(tuples[0]) if is_assign: # it's assign-like, defer handling to an intrinsic that will have # type information. # Can deal with tuples only, i.e. y = (*x,). where x = gv_name = "unpack_single_tuple" gv_fn = ir.Global(gv_name, unpack_single_tuple, loc=self.loc,) self.store(value=gv_fn, name=gv_name, redefine=True) exc = ir.Expr.call(self.get(gv_name), args=(first,), kws=(), loc=self.loc,) self.store(exc, temps[0]) else: for other, tmp in zip(map(self.get, tuples[1:]), temps): out = ir.Expr.binop(fn=operator.add, lhs=first, rhs=other, loc=self.loc) self.store(out, tmp) first = self.get(tmp) def op_BUILD_TUPLE_UNPACK_WITH_CALL(self, inst, tuples, temps, is_assign): # just unpack the input tuple, call inst will be handled afterwards self._build_tuple_unpack(inst, tuples, temps, is_assign) def op_BUILD_TUPLE_UNPACK(self, inst, tuples, temps, is_assign): self._build_tuple_unpack(inst, tuples, temps, is_assign) def op_LIST_TO_TUPLE(self, inst, const_list, res): expr = ir.Expr.dummy('list_to_tuple', (const_list,), loc=self.loc) self.store(expr, res) def op_BUILD_CONST_KEY_MAP(self, inst, keys, keytmps, values, res): # Unpack the constant key-tuple and reused build_map which takes # a sequence of (key, value) pair. keyvar = self.get(keys) # TODO: refactor this pattern. occurred several times. for inst in self.current_block.body: if isinstance(inst, ir.Assign) and inst.target is keyvar: self.current_block.remove(inst) # scan up the block looking for the values, remove them # and find their name strings named_items = [] for x in inst.value.items: for y in self.current_block.body[::-1]: if x == y.target: self.current_block.remove(y) named_items.append(y.value.value) break keytup = named_items break assert len(keytup) == len(values) keyconsts = [ir.Const(value=x, loc=self.loc) for x in keytup] for kval, tmp in zip(keyconsts, keytmps): self.store(kval, tmp) items = list(zip(map(self.get, keytmps), map(self.get, values))) # sort out literal values literal_items = [] for v in values: defns = self.definitions[v] if len(defns) != 1: break defn = defns[0] if not isinstance(defn, ir.Const): break literal_items.append(defn.value) def resolve_const(v): defns = self.definitions[v] if len(defns) != 1: return _UNKNOWN_VALUE(self.get(v).name) defn = defns[0] if not isinstance(defn, ir.Const): return _UNKNOWN_VALUE(self.get(v).name) return defn.value if len(literal_items) != len(values): literal_dict = {x: resolve_const(y) for x, y in zip(keytup, values)} else: literal_dict = {x:y for x, y in zip(keytup, literal_items)} # to deal with things like {'a': 1, 'a': 'cat', 'b': 2, 'a': 2j} # store the index of the actual used value for a given key, this is # used when lowering to pull the right value out into the tuple repr # of a mixed value type dictionary. value_indexes = {} for i, k in enumerate(keytup): value_indexes[k] = i expr = ir.Expr.build_map(items=items, size=2, literal_value=literal_dict, value_indexes=value_indexes, loc=self.loc) self.store(expr, res) def op_GET_ITER(self, inst, value, res): expr = ir.Expr.getiter(value=self.get(value), loc=self.loc) self.store(expr, res) def op_FOR_ITER(self, inst, iterator, pair, indval, pred): """ Assign new block other this instruction. """ assert inst.offset in self.blocks, "FOR_ITER must be block head" # Emit code val = self.get(iterator) pairval = ir.Expr.iternext(value=val, loc=self.loc) self.store(pairval, pair) iternext = ir.Expr.pair_first(value=self.get(pair), loc=self.loc) self.store(iternext, indval) isvalid = ir.Expr.pair_second(value=self.get(pair), loc=self.loc) self.store(isvalid, pred) # Conditional jump br = ir.Branch(cond=self.get(pred), truebr=inst.next, falsebr=inst.get_jump_target(), loc=self.loc) self.current_block.append(br) def op_BINARY_SUBSCR(self, inst, target, index, res): index = self.get(index) target = self.get(target) expr = ir.Expr.getitem(target, index=index, loc=self.loc) self.store(expr, res) def op_STORE_SUBSCR(self, inst, target, index, value): index = self.get(index) target = self.get(target) value = self.get(value) stmt = ir.SetItem(target=target, index=index, value=value, loc=self.loc) self.current_block.append(stmt) def op_DELETE_SUBSCR(self, inst, target, index): index = self.get(index) target = self.get(target) stmt = ir.DelItem(target=target, index=index, loc=self.loc) self.current_block.append(stmt) def op_BUILD_TUPLE(self, inst, items, res): expr = ir.Expr.build_tuple(items=[self.get(x) for x in items], loc=self.loc) self.store(expr, res) def op_BUILD_LIST(self, inst, items, res): expr = ir.Expr.build_list(items=[self.get(x) for x in items], loc=self.loc) self.store(expr, res) def op_BUILD_SET(self, inst, items, res): expr = ir.Expr.build_set(items=[self.get(x) for x in items], loc=self.loc) self.store(expr, res) def op_SET_UPDATE(self, inst, target, value, updatevar, res): target = self.get(target) value = self.get(value) updateattr = ir.Expr.getattr(target, 'update', loc=self.loc) self.store(value=updateattr, name=updatevar) updateinst = ir.Expr.call(self.get(updatevar), (value,), (), loc=self.loc) self.store(value=updateinst, name=res) def op_BUILD_MAP(self, inst, items, size, res): got_items = [(self.get(k), self.get(v)) for k, v in items] # sort out literal values, this is a bit contrived but is to handle # situations like `{1: 10, 1: 10}` where the size of the literal dict # is smaller than the definition def get_literals(target): literal_items = [] values = [self.get(v.name) for v in target] for v in values: defns = self.definitions[v.name] if len(defns) != 1: break defn = defns[0] if not isinstance(defn, ir.Const): break literal_items.append(defn.value) return literal_items literal_keys = get_literals(x[0] for x in got_items) literal_values = get_literals(x[1] for x in got_items) has_literal_keys = len(literal_keys) == len(got_items) has_literal_values = len(literal_values) == len(got_items) value_indexes = {} if not has_literal_keys and not has_literal_values: literal_dict = None elif has_literal_keys and not has_literal_values: literal_dict = {x: _UNKNOWN_VALUE(y[1]) for x, y in zip(literal_keys, got_items)} for i, k in enumerate(literal_keys): value_indexes[k] = i else: literal_dict = {x: y for x, y in zip(literal_keys, literal_values)} for i, k in enumerate(literal_keys): value_indexes[k] = i expr = ir.Expr.build_map(items=got_items, size=size, literal_value=literal_dict, value_indexes=value_indexes, loc=self.loc) self.store(expr, res) def op_STORE_MAP(self, inst, dct, key, value): stmt = ir.StoreMap(dct=self.get(dct), key=self.get(key), value=self.get(value), loc=self.loc) self.current_block.append(stmt) def op_UNARY_NEGATIVE(self, inst, value, res): value = self.get(value) expr = ir.Expr.unary('-', value=value, loc=self.loc) return self.store(expr, res) def op_UNARY_POSITIVE(self, inst, value, res): value = self.get(value) expr = ir.Expr.unary('+', value=value, loc=self.loc) return self.store(expr, res) def op_UNARY_INVERT(self, inst, value, res): value = self.get(value) expr = ir.Expr.unary('~', value=value, loc=self.loc) return self.store(expr, res) def op_UNARY_NOT(self, inst, value, res): value = self.get(value) expr = ir.Expr.unary('not', value=value, loc=self.loc) return self.store(expr, res) def _binop(self, op, lhs, rhs, res): op = BINOPS_TO_OPERATORS[op] lhs = self.get(lhs) rhs = self.get(rhs) expr = ir.Expr.binop(op, lhs=lhs, rhs=rhs, loc=self.loc) self.store(expr, res) def _inplace_binop(self, op, lhs, rhs, res): immuop = BINOPS_TO_OPERATORS[op] op = INPLACE_BINOPS_TO_OPERATORS[op + '='] lhs = self.get(lhs) rhs = self.get(rhs) expr = ir.Expr.inplace_binop(op, immuop, lhs=lhs, rhs=rhs, loc=self.loc) self.store(expr, res) def op_BINARY_ADD(self, inst, lhs, rhs, res): self._binop('+', lhs, rhs, res) def op_BINARY_SUBTRACT(self, inst, lhs, rhs, res): self._binop('-', lhs, rhs, res) def op_BINARY_MULTIPLY(self, inst, lhs, rhs, res): self._binop('*', lhs, rhs, res) def op_BINARY_DIVIDE(self, inst, lhs, rhs, res): self._binop('/?', lhs, rhs, res) def op_BINARY_TRUE_DIVIDE(self, inst, lhs, rhs, res): self._binop('/', lhs, rhs, res) def op_BINARY_FLOOR_DIVIDE(self, inst, lhs, rhs, res): self._binop('//', lhs, rhs, res) def op_BINARY_MODULO(self, inst, lhs, rhs, res): self._binop('%', lhs, rhs, res) def op_BINARY_POWER(self, inst, lhs, rhs, res): self._binop('**', lhs, rhs, res) def op_BINARY_MATRIX_MULTIPLY(self, inst, lhs, rhs, res): self._binop('@', lhs, rhs, res) def op_BINARY_LSHIFT(self, inst, lhs, rhs, res): self._binop('<<', lhs, rhs, res) def op_BINARY_RSHIFT(self, inst, lhs, rhs, res): self._binop('>>', lhs, rhs, res) def op_BINARY_AND(self, inst, lhs, rhs, res): self._binop('&', lhs, rhs, res) def op_BINARY_OR(self, inst, lhs, rhs, res): self._binop('|', lhs, rhs, res) def op_BINARY_XOR(self, inst, lhs, rhs, res): self._binop('^', lhs, rhs, res) def op_INPLACE_ADD(self, inst, lhs, rhs, res): self._inplace_binop('+', lhs, rhs, res) def op_INPLACE_SUBTRACT(self, inst, lhs, rhs, res): self._inplace_binop('-', lhs, rhs, res) def op_INPLACE_MULTIPLY(self, inst, lhs, rhs, res): self._inplace_binop('*', lhs, rhs, res) def op_INPLACE_DIVIDE(self, inst, lhs, rhs, res): self._inplace_binop('/?', lhs, rhs, res) def op_INPLACE_TRUE_DIVIDE(self, inst, lhs, rhs, res): self._inplace_binop('/', lhs, rhs, res) def op_INPLACE_FLOOR_DIVIDE(self, inst, lhs, rhs, res): self._inplace_binop('//', lhs, rhs, res) def op_INPLACE_MODULO(self, inst, lhs, rhs, res): self._inplace_binop('%', lhs, rhs, res) def op_INPLACE_POWER(self, inst, lhs, rhs, res): self._inplace_binop('**', lhs, rhs, res) def op_INPLACE_MATRIX_MULTIPLY(self, inst, lhs, rhs, res): self._inplace_binop('@', lhs, rhs, res) def op_INPLACE_LSHIFT(self, inst, lhs, rhs, res): self._inplace_binop('<<', lhs, rhs, res) def op_INPLACE_RSHIFT(self, inst, lhs, rhs, res): self._inplace_binop('>>', lhs, rhs, res) def op_INPLACE_AND(self, inst, lhs, rhs, res): self._inplace_binop('&', lhs, rhs, res) def op_INPLACE_OR(self, inst, lhs, rhs, res): self._inplace_binop('|', lhs, rhs, res) def op_INPLACE_XOR(self, inst, lhs, rhs, res): self._inplace_binop('^', lhs, rhs, res) def op_JUMP_ABSOLUTE(self, inst): jmp = ir.Jump(inst.get_jump_target(), loc=self.loc) self.current_block.append(jmp) def op_JUMP_FORWARD(self, inst): jmp = ir.Jump(inst.get_jump_target(), loc=self.loc) self.current_block.append(jmp) def op_POP_BLOCK(self, inst, kind=None): if kind is None: self.syntax_blocks.pop() elif kind == 'with': d = ir.PopBlock(loc=self.loc) self.current_block.append(d) elif kind == 'try': self._insert_try_block_end() def op_RETURN_VALUE(self, inst, retval, castval): self.store(ir.Expr.cast(self.get(retval), loc=self.loc), castval) ret = ir.Return(self.get(castval), loc=self.loc) self.current_block.append(ret) def op_COMPARE_OP(self, inst, lhs, rhs, res): op = dis.cmp_op[inst.arg] if op == 'in' or op == 'not in': lhs, rhs = rhs, lhs if op == 'not in': self._binop('in', lhs, rhs, res) tmp = self.get(res) out = ir.Expr.unary('not', value=tmp, loc=self.loc) self.store(out, res) elif op == 'exception match': gv_fn = ir.Global( "exception_match", eh.exception_match, loc=self.loc, ) exc_match_name = '$exc_match' self.store(value=gv_fn, name=exc_match_name, redefine=True) lhs = self.get(lhs) rhs = self.get(rhs) exc = ir.Expr.call( self.get(exc_match_name), args=(lhs, rhs), kws=(), loc=self.loc, ) self.store(exc, res) else: self._binop(op, lhs, rhs, res) def op_IS_OP(self, inst, lhs, rhs, res): # invert if op case is 1 op = 'is not' if inst.arg == 1 else 'is' self._binop(op, lhs, rhs, res) def op_CONTAINS_OP(self, inst, lhs, rhs, res): lhs, rhs = rhs, lhs self._binop('in', lhs, rhs, res) # invert if op case is 1 if inst.arg == 1: tmp = self.get(res) out = ir.Expr.unary('not', value=tmp, loc=self.loc) self.store(out, res) def op_BREAK_LOOP(self, inst, end=None): if end is None: loop = self.syntax_blocks[-1] assert isinstance(loop, ir.Loop) end = loop.exit jmp = ir.Jump(target=end, loc=self.loc) self.current_block.append(jmp) def _op_JUMP_IF(self, inst, pred, iftrue): brs = { True: inst.get_jump_target(), False: inst.next, } truebr = brs[iftrue] falsebr = brs[not iftrue] name = "bool%s" % (inst.offset) gv_fn = ir.Global("bool", bool, loc=self.loc) self.store(value=gv_fn, name=name) callres = ir.Expr.call(self.get(name), (self.get(pred),), (), loc=self.loc) pname = "$%spred" % (inst.offset) predicate = self.store(value=callres, name=pname) bra = ir.Branch(cond=predicate, truebr=truebr, falsebr=falsebr, loc=self.loc) self.current_block.append(bra) def op_JUMP_IF_FALSE(self, inst, pred): self._op_JUMP_IF(inst, pred=pred, iftrue=False) def op_JUMP_IF_TRUE(self, inst, pred): self._op_JUMP_IF(inst, pred=pred, iftrue=True) def op_POP_JUMP_IF_FALSE(self, inst, pred): self._op_JUMP_IF(inst, pred=pred, iftrue=False) def op_POP_JUMP_IF_TRUE(self, inst, pred): self._op_JUMP_IF(inst, pred=pred, iftrue=True) def op_JUMP_IF_FALSE_OR_POP(self, inst, pred): self._op_JUMP_IF(inst, pred=pred, iftrue=False) def op_JUMP_IF_TRUE_OR_POP(self, inst, pred): self._op_JUMP_IF(inst, pred=pred, iftrue=True) def op_JUMP_IF_NOT_EXC_MATCH(self, inst, pred, tos, tos1): truebr = inst.next falsebr = inst.get_jump_target() gv_fn = ir.Global( "exception_match", eh.exception_match, loc=self.loc, ) exc_match_name = '$exc_match' self.store(value=gv_fn, name=exc_match_name, redefine=True) lhs = self.get(tos1) rhs = self.get(tos) exc = ir.Expr.call( self.get(exc_match_name), args=(lhs, rhs), kws=(), loc=self.loc, ) predicate = self.store(exc, pred) bra = ir.Branch(cond=predicate, truebr=truebr, falsebr=falsebr, loc=self.loc) self.current_block.append(bra) def op_RERAISE(self, inst, exc): # Numba can't handle this case and it's caught else where, this is a # runtime guard in case this is reached by unknown means. msg = (f"Unreachable condition reached (op code RERAISE executed)" f"{error_extras['reportable']}") stmt = ir.StaticRaise(AssertionError, (msg,), self.loc) self.current_block.append(stmt) def op_RAISE_VARARGS(self, inst, exc): if exc is not None: exc = self.get(exc) tryblk = self.dfainfo.active_try_block if tryblk is not None: # In a try block stmt = ir.TryRaise(exception=exc, loc=self.loc) self.current_block.append(stmt) self._insert_try_block_end() self.current_block.append(ir.Jump(tryblk['end'], loc=self.loc)) else: # Not in a try block stmt = ir.Raise(exception=exc, loc=self.loc) self.current_block.append(stmt) def op_YIELD_VALUE(self, inst, value, res): # initialize index to None. it's being set later in post-processing index = None inst = ir.Yield(value=self.get(value), index=index, loc=self.loc) return self.store(inst, res) def op_MAKE_FUNCTION(self, inst, name, code, closure, annotations, kwdefaults, defaults, res): # annotations are ignored by numba but useful for static analysis # re. https://github.com/numba/numba/issues/7269 if kwdefaults is not None: msg = "op_MAKE_FUNCTION with kwdefaults is not implemented" raise NotImplementedError(msg) if defaults: if isinstance(defaults, tuple): defaults = tuple([self.get(name) for name in defaults]) else: defaults = self.get(defaults) assume_code_const = self.definitions[code][0] if not isinstance(assume_code_const, ir.Const): msg = ( "Unsupported use of closure. " "Probably caused by complex control-flow constructs; " "e.g. try-except" ) raise errors.UnsupportedError(msg, loc=self.loc) fcode = assume_code_const.value if name: name = self.get(name) if closure: closure = self.get(closure) expr = ir.Expr.make_function(name, fcode, closure, defaults, self.loc) self.store(expr, res) def op_MAKE_CLOSURE(self, inst, name, code, closure, annotations, kwdefaults, defaults, res): self.op_MAKE_FUNCTION(inst, name, code, closure, annotations, kwdefaults, defaults, res) def op_LOAD_CLOSURE(self, inst, res): n_cellvars = len(self.code_cellvars) if inst.arg < n_cellvars: name = self.code_cellvars[inst.arg] try: gl = self.get(name) except NotDefinedError: msg = "Unsupported use of op_LOAD_CLOSURE encountered" raise NotImplementedError(msg) else: idx = inst.arg - n_cellvars name = self.code_freevars[idx] value = self.get_closure_value(idx) gl = ir.FreeVar(idx, name, value, loc=self.loc) self.store(gl, res) def op_LIST_APPEND(self, inst, target, value, appendvar, res): target = self.get(target) value = self.get(value) appendattr = ir.Expr.getattr(target, 'append', loc=self.loc) self.store(value=appendattr, name=appendvar) appendinst = ir.Expr.call(self.get(appendvar), (value,), (), loc=self.loc) self.store(value=appendinst, name=res) def op_LIST_EXTEND(self, inst, target, value, extendvar, res): target = self.get(target) value = self.get(value) # If the statements between the current instruction and the target # are N * consts followed by build_tuple AND the target has no items, # it's a situation where a list is being statically initialised, rewrite # the build_tuple as a build_list, drop the extend, and wire up the # target as the result from the build_tuple that's been rewritten. # See if this is the first statement in a block, if so its probably from # control flow in a tuple unpack like: # `(*(1, (2,) if predicate else (3,)))` # this cannot be handled as present so raise msg = ("An unsupported bytecode sequence has been encountered: " "op_LIST_EXTEND at the start of a block.\n\nThis could be " "due to the use of a branch in a tuple unpacking statement.") if not self.current_block.body: raise errors.UnsupportedError(msg) # is last emitted statement a build_tuple? stmt = self.current_block.body[-1] ok = isinstance(stmt.value, ir.Expr) and stmt.value.op == "build_tuple" # check statements from self.current_block.body[-1] through to target, # make sure they are consts build_empty_list = None if ok: for stmt in reversed(self.current_block.body[:-1]): if not isinstance(stmt, ir.Assign): ok = False break # if its not a const, it needs to be the `build_list` for the # target, else it's something else we don't know about so just # bail if isinstance(stmt.value, ir.Const): continue # it's not a const, check for target elif isinstance(stmt.value, ir.Expr) and stmt.target == target: build_empty_list = stmt # it's only ok to do this if the target has no initializer # already ok = not stmt.value.items break else: ok = False break if ok and build_empty_list is None: raise errors.UnsupportedError(msg) if ok: stmts = self.current_block.body build_tuple_asgn = self.current_block.body[-1] # move build list to last issued statement stmts.append(stmts.pop(stmts.index(build_empty_list))) # fix the build list build_tuple = build_tuple_asgn.value build_list = build_empty_list.value build_list.items = build_tuple.items else: # it's just a list extend with no static init, let it be extendattr = ir.Expr.getattr(target, 'extend', loc=self.loc) self.store(value=extendattr, name=extendvar) extendinst = ir.Expr.call(self.get(extendvar), (value,), (), loc=self.loc) self.store(value=extendinst, name=res) def op_MAP_ADD(self, inst, target, key, value, setitemvar, res): target = self.get(target) key = self.get(key) value = self.get(value) setitemattr = ir.Expr.getattr(target, '__setitem__', loc=self.loc) self.store(value=setitemattr, name=setitemvar) appendinst = ir.Expr.call(self.get(setitemvar), (key, value,), (), loc=self.loc) self.store(value=appendinst, name=res) def op_LOAD_ASSERTION_ERROR(self, inst, res): gv_fn = ir.Global("AssertionError", AssertionError, loc=self.loc) self.store(value=gv_fn, name=res) # NOTE: The LOAD_METHOD opcode is implemented as a LOAD_ATTR for ease, # however this means a new object (the bound-method instance) could be # created. Conversely, using a pure LOAD_METHOD no intermediary is present # and it is essentially like a pointer grab and forward to CALL_METHOD. The # net outcome is that the implementation in Numba produces the same result, # but in object mode it may be that it runs more slowly than it would if # run in CPython. def op_LOAD_METHOD(self, *args, **kws): self.op_LOAD_ATTR(*args, **kws) def op_CALL_METHOD(self, *args, **kws): self.op_CALL_FUNCTION(*args, **kws) numba-0.55.1/numba/core/intrinsics.py000664 000000 000000 00000003730 14174536160 017467 0ustar00rootroot000000 000000 """ LLVM pass that converts intrinsic into other math calls """ from llvmlite import ir class _DivmodFixer(ir.Visitor): def visit_Instruction(self, instr): if instr.type == ir.IntType(64): if instr.opname in ['srem', 'urem', 'sdiv', 'udiv']: name = 'numba_{op}'.format(op=instr.opname) fn = self.module.globals.get(name) # Declare the function if it doesn't already exist if fn is None: opty = instr.type sdivfnty = ir.FunctionType(opty, [opty, opty]) fn = ir.Function(self.module, sdivfnty, name=name) # Replace the operation with a call to the builtin repl = ir.CallInstr(parent=instr.parent, func=fn, args=instr.operands, name=instr.name) instr.parent.replace(instr, repl) def fix_divmod(mod): """Replace division and reminder instructions to builtins calls """ _DivmodFixer().visit(mod) INTR_TO_CMATH = { "llvm.pow.f32": "powf", "llvm.pow.f64": "pow", "llvm.sin.f32": "sinf", "llvm.sin.f64": "sin", "llvm.cos.f32": "cosf", "llvm.cos.f64": "cos", "llvm.sqrt.f32": "sqrtf", "llvm.sqrt.f64": "sqrt", "llvm.exp.f32": "expf", "llvm.exp.f64": "exp", "llvm.log.f32": "logf", "llvm.log.f64": "log", "llvm.log10.f32": "log10f", "llvm.log10.f64": "log10", "llvm.fabs.f32": "fabsf", "llvm.fabs.f64": "fabs", "llvm.floor.f32": "floorf", "llvm.floor.f64": "floor", "llvm.ceil.f32": "ceilf", "llvm.ceil.f64": "ceil", "llvm.trunc.f32": "truncf", "llvm.trunc.f64": "trunc", } OTHER_CMATHS = ''' tan tanf sinh sinhf cosh coshf tanh tanhf asin asinf acos acosf atan atanf atan2 atan2f asinh asinhf acosh acoshf atanh atanhf expm1 expm1f log1p log1pf log10 log10f fmod fmodf round roundf '''.split() INTR_MATH = frozenset(INTR_TO_CMATH.values()) | frozenset(OTHER_CMATHS) numba-0.55.1/numba/core/ir.py000664 000000 000000 00000141015 14174536160 015713 0ustar00rootroot000000 000000 from collections import defaultdict import copy import itertools import os import linecache import pprint import re import sys import operator from types import FunctionType, BuiltinFunctionType from functools import total_ordering from io import StringIO from numba.core import errors, config from numba.core.utils import (BINOPS_TO_OPERATORS, INPLACE_BINOPS_TO_OPERATORS, UNARY_BUITINS_TO_OPERATORS, OPERATORS_TO_BUILTINS) from numba.core.errors import (NotDefinedError, RedefinedError, VerificationError, ConstantInferenceError) from numba.core import consts # terminal color markup _termcolor = errors.termcolor() class Loc(object): """Source location """ _defmatcher = re.compile(r'def\s+(\w+)\(.*') def __init__(self, filename, line, col=None, maybe_decorator=False): """ Arguments: filename - name of the file line - line in file col - column maybe_decorator - Set to True if location is likely a jit decorator """ self.filename = filename self.line = line self.col = col self.lines = None # the source lines from the linecache self.maybe_decorator = maybe_decorator def __eq__(self, other): # equivalence is solely based on filename, line and col if type(self) is not type(other): return False if self.filename != other.filename: return False if self.line != other.line: return False if self.col != other.col: return False return True def __ne__(self, other): return not self.__eq__(other) @classmethod def from_function_id(cls, func_id): return cls(func_id.filename, func_id.firstlineno, maybe_decorator=True) def __repr__(self): return "Loc(filename=%s, line=%s, col=%s)" % (self.filename, self.line, self.col) def __str__(self): if self.col is not None: return "%s (%s:%s)" % (self.filename, self.line, self.col) else: return "%s (%s)" % (self.filename, self.line) def _find_definition(self): # try and find a def, go backwards from error line fn_name = None lines = self.get_lines() for x in reversed(lines[:self.line - 1]): # the strip and startswith is to handle user code with commented out # 'def' or use of 'def' in a docstring. if x.strip().startswith('def '): fn_name = x break return fn_name def _raw_function_name(self): defn = self._find_definition() if defn: return self._defmatcher.match(defn.strip()).groups()[0] else: # Probably exec() or REPL. return None def get_lines(self): if self.lines is None: self.lines = linecache.getlines(self._get_path()) return self.lines def _get_path(self): path = None try: # Try to get a relative path # ipython/jupyter input just returns as self.filename path = os.path.relpath(self.filename) except ValueError: # Fallback to absolute path if error occurred in getting the # relative path. # This may happen on windows if the drive is different path = os.path.abspath(self.filename) return path def strformat(self, nlines_up=2): lines = self.get_lines() use_line = self.line if self.maybe_decorator: # try and sort out a better `loc`, if it's suspected that this loc # points at a jit decorator by virtue of # `__code__.co_firstlineno` # get lines, add a dummy entry at the start as lines count from # 1 but list index counts from 0 tmplines = [''] + lines if lines and use_line and 'def ' not in tmplines[use_line]: # look forward 10 lines, unlikely anyone managed to stretch # a jit call declaration over >10 lines?! min_line = max(0, use_line) max_line = use_line + 10 selected = tmplines[min_line : max_line] index = 0 for idx, x in enumerate(selected): if 'def ' in x: index = idx break use_line = use_line + index ret = [] # accumulates output if lines and use_line: def count_spaces(string): spaces = 0 for x in itertools.takewhile(str.isspace, str(string)): spaces += 1 return spaces # A few places in the code still use no `loc` or default to line 1 # this is often in places where exceptions are used for the purposes # of flow control. As a result max is in use to prevent slice from # `[negative: positive]` selected = lines[max(0, use_line - nlines_up):use_line] # see if selected contains a definition def_found = False for x in selected: if 'def ' in x: def_found = True # no definition found, try and find one if not def_found: # try and find a def, go backwards from error line fn_name = None for x in reversed(lines[:use_line - 1]): if 'def ' in x: fn_name = x break if fn_name: ret.append(fn_name) spaces = count_spaces(x) ret.append(' '*(4 + spaces) + '\n') if selected: ret.extend(selected[:-1]) ret.append(_termcolor.highlight(selected[-1])) # point at the problem with a caret spaces = count_spaces(selected[-1]) ret.append(' '*(spaces) + _termcolor.indicate("^")) # if in the REPL source may not be available if not ret: ret = "" err = _termcolor.filename('\nFile "%s", line %d:')+'\n%s' tmp = err % (self._get_path(), use_line, _termcolor.code(''.join(ret))) return tmp def with_lineno(self, line, col=None): """ Return a new Loc with this line number. """ return type(self)(self.filename, line, col) def short(self): """ Returns a short string """ shortfilename = os.path.basename(self.filename) return "%s:%s" % (shortfilename, self.line) # Used for annotating errors when source location is unknown. unknown_loc = Loc("unknown location", 0, 0) @total_ordering class SlotEqualityCheckMixin(object): # some ir nodes are __dict__ free using __slots__ instead, this mixin # should not trigger the unintended creation of __dict__. __slots__ = tuple() def __eq__(self, other): if type(self) is type(other): for name in self.__slots__: if getattr(self, name) != getattr(other, name): return False else: return True return False def __le__(self, other): return str(self) <= str(other) def __hash__(self): return id(self) @total_ordering class EqualityCheckMixin(object): """ Mixin for basic equality checking """ def __eq__(self, other): if type(self) is type(other): def fixup(adict): bad = ('loc', 'scope') d = dict(adict) for x in bad: d.pop(x, None) return d d1 = fixup(self.__dict__) d2 = fixup(other.__dict__) if d1 == d2: return True return False def __le__(self, other): return str(self) < str(other) def __hash__(self): return id(self) class VarMap(object): def __init__(self): self._con = {} def define(self, name, var): if name in self._con: raise RedefinedError(name) else: self._con[name] = var def get(self, name): try: return self._con[name] except KeyError: raise NotDefinedError(name) def __contains__(self, name): return name in self._con def __len__(self): return len(self._con) def __repr__(self): return pprint.pformat(self._con) def __hash__(self): return hash(self.name) def __iter__(self): return self._con.iterkeys() def __eq__(self, other): if type(self) is type(other): # check keys only, else __eq__ ref cycles, scope -> varmap -> var return self._con.keys() == other._con.keys() return False def __ne__(self, other): return not self.__eq__(other) class AbstractRHS(object): """Abstract base class for anything that can be the RHS of an assignment. This class **does not** define any methods. """ class Inst(EqualityCheckMixin, AbstractRHS): """ Base class for all IR instructions. """ def list_vars(self): """ List the variables used (read or written) by the instruction. """ raise NotImplementedError def _rec_list_vars(self, val): """ A recursive helper used to implement list_vars() in subclasses. """ if isinstance(val, Var): return [val] elif isinstance(val, Inst): return val.list_vars() elif isinstance(val, (list, tuple)): lst = [] for v in val: lst.extend(self._rec_list_vars(v)) return lst elif isinstance(val, dict): lst = [] for v in val.values(): lst.extend(self._rec_list_vars(v)) return lst else: return [] class Stmt(Inst): """ Base class for IR statements (instructions which can appear on their own in a Block). """ # Whether this statement ends its basic block (i.e. it will either jump # to another block or exit the function). is_terminator = False # Whether this statement exits the function. is_exit = False def list_vars(self): return self._rec_list_vars(self.__dict__) class Terminator(Stmt): """ IR statements that are terminators: the last statement in a block. A terminator must either: - exit the function - jump to a block All subclass of Terminator must override `.get_targets()` to return a list of jump targets. """ is_terminator = True def get_targets(self): raise NotImplementedError(type(self)) class Expr(Inst): """ An IR expression (an instruction which can only be part of a larger statement). """ def __init__(self, op, loc, **kws): assert isinstance(op, str) assert isinstance(loc, Loc) self.op = op self.loc = loc self._kws = kws def __getattr__(self, name): if name.startswith('_'): return Inst.__getattr__(self, name) return self._kws[name] def __setattr__(self, name, value): if name in ('op', 'loc', '_kws'): self.__dict__[name] = value else: self._kws[name] = value @classmethod def binop(cls, fn, lhs, rhs, loc): assert isinstance(fn, BuiltinFunctionType) assert isinstance(lhs, Var) assert isinstance(rhs, Var) assert isinstance(loc, Loc) op = 'binop' return cls(op=op, loc=loc, fn=fn, lhs=lhs, rhs=rhs, static_lhs=UNDEFINED, static_rhs=UNDEFINED) @classmethod def inplace_binop(cls, fn, immutable_fn, lhs, rhs, loc): assert isinstance(fn, BuiltinFunctionType) assert isinstance(immutable_fn, BuiltinFunctionType) assert isinstance(lhs, Var) assert isinstance(rhs, Var) assert isinstance(loc, Loc) op = 'inplace_binop' return cls(op=op, loc=loc, fn=fn, immutable_fn=immutable_fn, lhs=lhs, rhs=rhs, static_lhs=UNDEFINED, static_rhs=UNDEFINED) @classmethod def unary(cls, fn, value, loc): assert isinstance(value, (str, Var, FunctionType)) assert isinstance(loc, Loc) op = 'unary' fn = UNARY_BUITINS_TO_OPERATORS.get(fn, fn) return cls(op=op, loc=loc, fn=fn, value=value) @classmethod def call(cls, func, args, kws, loc, vararg=None, target=None): assert isinstance(func, Var) assert isinstance(loc, Loc) op = 'call' return cls(op=op, loc=loc, func=func, args=args, kws=kws, vararg=vararg, target=target) @classmethod def build_tuple(cls, items, loc): assert isinstance(loc, Loc) op = 'build_tuple' return cls(op=op, loc=loc, items=items) @classmethod def build_list(cls, items, loc): assert isinstance(loc, Loc) op = 'build_list' return cls(op=op, loc=loc, items=items) @classmethod def build_set(cls, items, loc): assert isinstance(loc, Loc) op = 'build_set' return cls(op=op, loc=loc, items=items) @classmethod def build_map(cls, items, size, literal_value, value_indexes, loc): assert isinstance(loc, Loc) op = 'build_map' return cls(op=op, loc=loc, items=items, size=size, literal_value=literal_value, value_indexes=value_indexes) @classmethod def pair_first(cls, value, loc): assert isinstance(value, Var) op = 'pair_first' return cls(op=op, loc=loc, value=value) @classmethod def pair_second(cls, value, loc): assert isinstance(value, Var) assert isinstance(loc, Loc) op = 'pair_second' return cls(op=op, loc=loc, value=value) @classmethod def getiter(cls, value, loc): assert isinstance(value, Var) assert isinstance(loc, Loc) op = 'getiter' return cls(op=op, loc=loc, value=value) @classmethod def iternext(cls, value, loc): assert isinstance(value, Var) assert isinstance(loc, Loc) op = 'iternext' return cls(op=op, loc=loc, value=value) @classmethod def exhaust_iter(cls, value, count, loc): assert isinstance(value, Var) assert isinstance(count, int) assert isinstance(loc, Loc) op = 'exhaust_iter' return cls(op=op, loc=loc, value=value, count=count) @classmethod def getattr(cls, value, attr, loc): assert isinstance(value, Var) assert isinstance(attr, str) assert isinstance(loc, Loc) op = 'getattr' return cls(op=op, loc=loc, value=value, attr=attr) @classmethod def getitem(cls, value, index, loc): assert isinstance(value, Var) assert isinstance(index, Var) assert isinstance(loc, Loc) op = 'getitem' fn = operator.getitem return cls(op=op, loc=loc, value=value, index=index, fn=fn) @classmethod def typed_getitem(cls, value, dtype, index, loc): assert isinstance(value, Var) assert isinstance(loc, Loc) op = 'typed_getitem' return cls(op=op, loc=loc, value=value, dtype=dtype, index=index) @classmethod def static_getitem(cls, value, index, index_var, loc): assert isinstance(value, Var) assert index_var is None or isinstance(index_var, Var) assert isinstance(loc, Loc) op = 'static_getitem' fn = operator.getitem return cls(op=op, loc=loc, value=value, index=index, index_var=index_var, fn=fn) @classmethod def cast(cls, value, loc): """ A node for implicit casting at the return statement """ assert isinstance(value, Var) assert isinstance(loc, Loc) op = 'cast' return cls(op=op, value=value, loc=loc) @classmethod def phi(cls, loc): """Phi node """ assert isinstance(loc, Loc) return cls(op='phi', incoming_values=[], incoming_blocks=[], loc=loc) @classmethod def make_function(cls, name, code, closure, defaults, loc): """ A node for making a function object. """ assert isinstance(loc, Loc) op = 'make_function' return cls(op=op, name=name, code=code, closure=closure, defaults=defaults, loc=loc) @classmethod def null(cls, loc): """ A node for null value. This node is not handled by type inference. It is only added by post-typing passes. """ assert isinstance(loc, Loc) op = 'null' return cls(op=op, loc=loc) @classmethod def dummy(cls, op, info, loc): """ A node for a dummy value. This node is a place holder for carrying information through to a point where it is rewritten into something valid. This node is not handled by type inference or lowering. It's presence outside of the interpreter renders IR as illegal. """ assert isinstance(loc, Loc) assert isinstance(op, str) return cls(op=op, info=info, loc=loc) def __repr__(self): if self.op == 'call': args = ', '.join(str(a) for a in self.args) pres_order = self._kws.items() if config.DIFF_IR == 0 else sorted(self._kws.items()) kws = ', '.join('%s=%s' % (k, v) for k, v in pres_order) vararg = '*%s' % (self.vararg,) if self.vararg is not None else '' arglist = ', '.join(filter(None, [args, vararg, kws])) return 'call %s(%s)' % (self.func, arglist) elif self.op == 'binop': lhs, rhs = self.lhs, self.rhs if self.fn == operator.contains: lhs, rhs = rhs, lhs fn = OPERATORS_TO_BUILTINS.get(self.fn, self.fn) return '%s %s %s' % (lhs, fn, rhs) else: pres_order = self._kws.items() if config.DIFF_IR == 0 else sorted(self._kws.items()) args = ('%s=%s' % (k, v) for k, v in pres_order) return '%s(%s)' % (self.op, ', '.join(args)) def list_vars(self): return self._rec_list_vars(self._kws) def infer_constant(self): raise ConstantInferenceError('%s' % self, loc=self.loc) class SetItem(Stmt): """ target[index] = value """ def __init__(self, target, index, value, loc): assert isinstance(target, Var) assert isinstance(index, Var) assert isinstance(value, Var) assert isinstance(loc, Loc) self.target = target self.index = index self.value = value self.loc = loc def __repr__(self): return '%s[%s] = %s' % (self.target, self.index, self.value) class StaticSetItem(Stmt): """ target[constant index] = value """ def __init__(self, target, index, index_var, value, loc): assert isinstance(target, Var) assert not isinstance(index, Var) assert isinstance(index_var, Var) assert isinstance(value, Var) assert isinstance(loc, Loc) self.target = target self.index = index self.index_var = index_var self.value = value self.loc = loc def __repr__(self): return '%s[%r] = %s' % (self.target, self.index, self.value) class DelItem(Stmt): """ del target[index] """ def __init__(self, target, index, loc): assert isinstance(target, Var) assert isinstance(index, Var) assert isinstance(loc, Loc) self.target = target self.index = index self.loc = loc def __repr__(self): return 'del %s[%s]' % (self.target, self.index) class SetAttr(Stmt): def __init__(self, target, attr, value, loc): assert isinstance(target, Var) assert isinstance(attr, str) assert isinstance(value, Var) assert isinstance(loc, Loc) self.target = target self.attr = attr self.value = value self.loc = loc def __repr__(self): return '(%s).%s = %s' % (self.target, self.attr, self.value) class DelAttr(Stmt): def __init__(self, target, attr, loc): assert isinstance(target, Var) assert isinstance(attr, str) assert isinstance(loc, Loc) self.target = target self.attr = attr self.loc = loc def __repr__(self): return 'del (%s).%s' % (self.target, self.attr) class StoreMap(Stmt): def __init__(self, dct, key, value, loc): assert isinstance(dct, Var) assert isinstance(key, Var) assert isinstance(value, Var) assert isinstance(loc, Loc) self.dct = dct self.key = key self.value = value self.loc = loc def __repr__(self): return '%s[%s] = %s' % (self.dct, self.key, self.value) class Del(Stmt): def __init__(self, value, loc): assert isinstance(value, str) assert isinstance(loc, Loc) self.value = value self.loc = loc def __str__(self): return "del %s" % self.value class Raise(Terminator): is_exit = True def __init__(self, exception, loc): assert exception is None or isinstance(exception, Var) assert isinstance(loc, Loc) self.exception = exception self.loc = loc def __str__(self): return "raise %s" % self.exception def get_targets(self): return [] class StaticRaise(Terminator): """ Raise an exception class and arguments known at compile-time. Note that if *exc_class* is None, a bare "raise" statement is implied (i.e. re-raise the current exception). """ is_exit = True def __init__(self, exc_class, exc_args, loc): assert exc_class is None or isinstance(exc_class, type) assert isinstance(loc, Loc) assert exc_args is None or isinstance(exc_args, tuple) self.exc_class = exc_class self.exc_args = exc_args self.loc = loc def __str__(self): if self.exc_class is None: return " raise" elif self.exc_args is None: return " raise %s" % (self.exc_class,) else: return " raise %s(%s)" % (self.exc_class, ", ".join(map(repr, self.exc_args))) def get_targets(self): return [] class TryRaise(Stmt): """A raise statement inside a try-block Similar to ``Raise`` but does not terminate. """ def __init__(self, exception, loc): assert exception is None or isinstance(exception, Var) assert isinstance(loc, Loc) self.exception = exception self.loc = loc def __str__(self): return "try_raise %s" % self.exception class StaticTryRaise(Stmt): """A raise statement inside a try-block. Similar to ``StaticRaise`` but does not terminate. """ def __init__(self, exc_class, exc_args, loc): assert exc_class is None or isinstance(exc_class, type) assert isinstance(loc, Loc) assert exc_args is None or isinstance(exc_args, tuple) self.exc_class = exc_class self.exc_args = exc_args self.loc = loc def __str__(self): if self.exc_class is None: return "static_try_raise" elif self.exc_args is None: return "static_try_raise %s" % (self.exc_class,) else: return "static_try_raise %s(%s)" % (self.exc_class, ", ".join(map(repr, self.exc_args))) class Return(Terminator): """ Return to caller. """ is_exit = True def __init__(self, value, loc): assert isinstance(value, Var), type(value) assert isinstance(loc, Loc) self.value = value self.loc = loc def __str__(self): return 'return %s' % self.value def get_targets(self): return [] class Jump(Terminator): """ Unconditional branch. """ def __init__(self, target, loc): assert isinstance(loc, Loc) self.target = target self.loc = loc def __str__(self): return 'jump %s' % self.target def get_targets(self): return [self.target] class Branch(Terminator): """ Conditional branch. """ def __init__(self, cond, truebr, falsebr, loc): assert isinstance(cond, Var) assert isinstance(loc, Loc) self.cond = cond self.truebr = truebr self.falsebr = falsebr self.loc = loc def __str__(self): return 'branch %s, %s, %s' % (self.cond, self.truebr, self.falsebr) def get_targets(self): return [self.truebr, self.falsebr] class Assign(Stmt): """ Assign to a variable. """ def __init__(self, value, target, loc): assert isinstance(value, AbstractRHS) assert isinstance(target, Var) assert isinstance(loc, Loc) self.value = value self.target = target self.loc = loc def __str__(self): return '%s = %s' % (self.target, self.value) class Print(Stmt): """ Print some values. """ def __init__(self, args, vararg, loc): assert all(isinstance(x, Var) for x in args) assert vararg is None or isinstance(vararg, Var) assert isinstance(loc, Loc) self.args = tuple(args) self.vararg = vararg # Constant-inferred arguments self.consts = {} self.loc = loc def __str__(self): return 'print(%s)' % ', '.join(str(v) for v in self.args) class Yield(Inst): def __init__(self, value, loc, index): assert isinstance(value, Var) assert isinstance(loc, Loc) self.value = value self.loc = loc self.index = index def __str__(self): return 'yield %s' % (self.value,) def list_vars(self): return [self.value] class EnterWith(Stmt): """Enter a "with" context """ def __init__(self, contextmanager, begin, end, loc): """ Parameters ---------- contextmanager : IR value begin, end : int The beginning and the ending offset of the with-body. loc : ir.Loc instance Source location """ assert isinstance(contextmanager, Var) assert isinstance(loc, Loc) self.contextmanager = contextmanager self.begin = begin self.end = end self.loc = loc def __str__(self): return 'enter_with {}'.format(self.contextmanager) def list_vars(self): return [self.contextmanager] class PopBlock(Stmt): """Marker statement for a pop block op code""" def __init__(self, loc): assert isinstance(loc, Loc) self.loc = loc def __str__(self): return 'pop_block' class Arg(EqualityCheckMixin, AbstractRHS): def __init__(self, name, index, loc): assert isinstance(name, str) assert isinstance(index, int) assert isinstance(loc, Loc) self.name = name self.index = index self.loc = loc def __repr__(self): return 'arg(%d, name=%s)' % (self.index, self.name) def infer_constant(self): raise ConstantInferenceError('%s' % self, loc=self.loc) class Const(EqualityCheckMixin, AbstractRHS): def __init__(self, value, loc, use_literal_type=True): assert isinstance(loc, Loc) self.value = value self.loc = loc # Note: need better way to tell if this is a literal or not. self.use_literal_type = use_literal_type def __repr__(self): return 'const(%s, %s)' % (type(self.value).__name__, self.value) def infer_constant(self): return self.value def __deepcopy__(self, memo): # Override to not copy constant values in code return Const( value=self.value, loc=self.loc, use_literal_type=self.use_literal_type, ) class Global(EqualityCheckMixin, AbstractRHS): def __init__(self, name, value, loc): assert isinstance(loc, Loc) self.name = name self.value = value self.loc = loc def __str__(self): return 'global(%s: %s)' % (self.name, self.value) def infer_constant(self): return self.value def __deepcopy__(self, memo): # don't copy value since it can fail (e.g. modules) # value is readonly and doesn't need copying return Global(self.name, self.value, copy.deepcopy(self.loc)) class FreeVar(EqualityCheckMixin, AbstractRHS): """ A freevar, as loaded by LOAD_DECREF. (i.e. a variable defined in an enclosing non-global scope) """ def __init__(self, index, name, value, loc): assert isinstance(index, int) assert isinstance(name, str) assert isinstance(loc, Loc) # index inside __code__.co_freevars self.index = index # variable name self.name = name # frozen value self.value = value self.loc = loc def __str__(self): return 'freevar(%s: %s)' % (self.name, self.value) def infer_constant(self): return self.value def __deepcopy__(self, memo): # Override to not copy constant values in code return FreeVar(index=self.index, name=self.name, value=self.value, loc=self.loc) class Var(EqualityCheckMixin, AbstractRHS): """ Attributes ----------- - scope: Scope - name: str - loc: Loc Definition location """ def __init__(self, scope, name, loc): # NOTE: Use of scope=None should be removed. assert scope is None or isinstance(scope, Scope) assert isinstance(name, str) assert isinstance(loc, Loc) self.scope = scope self.name = name self.loc = loc def __repr__(self): return 'Var(%s, %s)' % (self.name, self.loc.short()) def __str__(self): return self.name @property def is_temp(self): return self.name.startswith("$") @property def unversioned_name(self): """The unversioned name of this variable, i.e. SSA renaming removed """ for k, redef_set in self.scope.var_redefinitions.items(): if self.name in redef_set: return k return self.name @property def versioned_names(self): """Known versioned names for this variable, i.e. known variable names in the scope that have been formed from applying SSA to this variable """ return self.scope.get_versions_of(self.unversioned_name) @property def all_names(self): """All known versioned and unversioned names for this variable """ return self.versioned_names | {self.unversioned_name,} class Scope(EqualityCheckMixin): """ Attributes ----------- - parent: Scope Parent scope - localvars: VarMap Scope-local variable map - loc: Loc Start of scope location """ def __init__(self, parent, loc): assert parent is None or isinstance(parent, Scope) assert isinstance(loc, Loc) self.parent = parent self.localvars = VarMap() self.loc = loc self.redefined = defaultdict(int) self.var_redefinitions = defaultdict(set) def define(self, name, loc): """ Define a variable """ v = Var(scope=self, name=name, loc=loc) self.localvars.define(v.name, v) return v def get(self, name): """ Refer to a variable. Returns the latest version. """ if name in self.redefined: name = "%s.%d" % (name, self.redefined[name]) return self.get_exact(name) def get_exact(self, name): """ Refer to a variable. The returned variable has the exact name (exact variable version). """ try: return self.localvars.get(name) except NotDefinedError: if self.has_parent: return self.parent.get(name) else: raise def get_or_define(self, name, loc): if name in self.redefined: name = "%s.%d" % (name, self.redefined[name]) if name not in self.localvars: return self.define(name, loc) else: return self.localvars.get(name) def redefine(self, name, loc, rename=True): """ Redefine if the name is already defined """ if name not in self.localvars: return self.define(name, loc) elif not rename: # Must use the same name if the variable is a cellvar, which # means it could be captured in a closure. return self.localvars.get(name) else: while True: ct = self.redefined[name] self.redefined[name] = ct + 1 newname = "%s.%d" % (name, ct + 1) try: res = self.define(newname, loc) except RedefinedError: continue else: self.var_redefinitions[name].add(newname) return res def get_versions_of(self, name): """ Gets all known versions of a given name """ vers = set() def walk(thename): redefs = self.var_redefinitions.get(thename, None) if redefs: for v in redefs: vers.add(v) walk(v) walk(name) return vers def make_temp(self, loc): n = len(self.localvars) v = Var(scope=self, name='$%d' % n, loc=loc) self.localvars.define(v.name, v) return v @property def has_parent(self): return self.parent is not None def __repr__(self): return "Scope(has_parent=%r, num_vars=%d, %s)" % (self.has_parent, len(self.localvars), self.loc) class Block(EqualityCheckMixin): """A code block """ def __init__(self, scope, loc): assert isinstance(scope, Scope) assert isinstance(loc, Loc) self.scope = scope self.body = [] self.loc = loc def copy(self): block = Block(self.scope, self.loc) block.body = self.body[:] return block def find_exprs(self, op=None): """ Iterate over exprs of the given *op* in this block. """ for inst in self.body: if isinstance(inst, Assign): expr = inst.value if isinstance(expr, Expr): if op is None or expr.op == op: yield expr def find_insts(self, cls=None): """ Iterate over insts of the given class in this block. """ for inst in self.body: if isinstance(inst, cls): yield inst def find_variable_assignment(self, name): """ Returns the assignment inst associated with variable "name", None if it cannot be found. """ for x in self.find_insts(cls=Assign): if x.target.name == name: return x return None def prepend(self, inst): assert isinstance(inst, Stmt) self.body.insert(0, inst) def append(self, inst): assert isinstance(inst, Stmt) self.body.append(inst) def remove(self, inst): assert isinstance(inst, Stmt) del self.body[self.body.index(inst)] def clear(self): del self.body[:] def dump(self, file=None): # Avoid early bind of sys.stdout as default value file = file or sys.stdout for inst in self.body: if hasattr(inst, 'dump'): inst.dump(file) else: inst_vars = sorted(str(v) for v in inst.list_vars()) print(' %-40s %s' % (inst, inst_vars), file=file) @property def terminator(self): return self.body[-1] @property def is_terminated(self): return self.body and self.body[-1].is_terminator def verify(self): if not self.is_terminated: raise VerificationError("Missing block terminator") # Only the last instruction can be a terminator for inst in self.body[:-1]: if inst.is_terminator: raise VerificationError("Terminator before the last " "instruction") def insert_after(self, stmt, other): """ Insert *stmt* after *other*. """ index = self.body.index(other) self.body.insert(index + 1, stmt) def insert_before_terminator(self, stmt): assert isinstance(stmt, Stmt) assert self.is_terminated self.body.insert(-1, stmt) def __repr__(self): return "" % (self.loc,) class Loop(SlotEqualityCheckMixin): """Describes a loop-block """ __slots__ = "entry", "exit" def __init__(self, entry, exit): self.entry = entry self.exit = exit def __repr__(self): args = self.entry, self.exit return "Loop(entry=%s, exit=%s)" % args class With(SlotEqualityCheckMixin): """Describes a with-block """ __slots__ = "entry", "exit" def __init__(self, entry, exit): self.entry = entry self.exit = exit def __repr__(self): args = self.entry, self.exit return "With(entry=%s, exit=%s)" % args class FunctionIR(object): def __init__(self, blocks, is_generator, func_id, loc, definitions, arg_count, arg_names): self.blocks = blocks self.is_generator = is_generator self.func_id = func_id self.loc = loc self.arg_count = arg_count self.arg_names = arg_names self._definitions = definitions self._reset_analysis_variables() def equal_ir(self, other): """ Checks that the IR contained within is equal to the IR in other. Equality is defined by being equal in fundamental structure (blocks, labels, IR node type and the order in which they are defined) and the IR nodes being equal. IR node equality essentially comes down to ensuring a node's `.__dict__` or `.__slots__` is equal, with the exception of ignoring 'loc' and 'scope' entries. The upshot is that the comparison is essentially location and scope invariant, but otherwise behaves as unsurprisingly as possible. """ if type(self) is type(other): return self.blocks == other.blocks return False def diff_str(self, other): """ Compute a human readable difference in the IR, returns a formatted string ready for printing. """ msg = [] for label, block in self.blocks.items(): other_blk = other.blocks.get(label, None) if other_blk is not None: if block != other_blk: msg.append(("Block %s differs" % label).center(80, '-')) # see if the instructions are just a permutation block_del = [x for x in block.body if isinstance(x, Del)] oth_del = [x for x in other_blk.body if isinstance(x, Del)] if block_del != oth_del: # this is a common issue, dels are all present, but # order shuffled. if sorted(block_del) == sorted(oth_del): msg.append(("Block %s contains the same dels but " "their order is different") % label) if len(block.body) > len(other_blk.body): msg.append("This block contains more statements") elif len(block.body) < len(other_blk.body): msg.append("Other block contains more statements") # find the indexes where they don't match tmp = [] for idx, stmts in enumerate(zip(block.body, other_blk.body)): b_s, o_s = stmts if b_s != o_s: tmp.append(idx) def get_pad(ablock, l): pointer = '-> ' sp = len(pointer) * ' ' pad = [] nstmt = len(ablock) for i in range(nstmt): if i in tmp: item = pointer elif i >= l: item = pointer else: item = sp pad.append(item) return pad min_stmt_len = min(len(block.body), len(other_blk.body)) with StringIO() as buf: it = [("self", block), ("other", other_blk)] for name, _block in it: buf.truncate(0) _block.dump(file=buf) stmts = buf.getvalue().splitlines() pad = get_pad(_block.body, min_stmt_len) title = ("%s: block %s" % (name, label)) msg.append(title.center(80, '-')) msg.extend(["{0}{1}".format(a, b) for a, b in zip(pad, stmts)]) if msg == []: msg.append("IR is considered equivalent.") return '\n'.join(msg) def _reset_analysis_variables(self): self._consts = consts.ConstantInference(self) # Will be computed by PostProcessor self.generator_info = None self.variable_lifetime = None # { ir.Block: { variable names (potentially) alive at start of block } } self.block_entry_vars = {} def derive(self, blocks, arg_count=None, arg_names=None, force_non_generator=False): """ Derive a new function IR from this one, using the given blocks, and possibly modifying the argument count and generator flag. Post-processing will have to be run again on the new IR. """ firstblock = blocks[min(blocks)] new_ir = copy.copy(self) new_ir.blocks = blocks new_ir.loc = firstblock.loc if force_non_generator: new_ir.is_generator = False if arg_count is not None: new_ir.arg_count = arg_count if arg_names is not None: new_ir.arg_names = arg_names new_ir._reset_analysis_variables() # Make fresh func_id new_ir.func_id = new_ir.func_id.derive() return new_ir def copy(self): new_ir = copy.copy(self) blocks = {} block_entry_vars = {} for label, block in self.blocks.items(): new_block = block.copy() blocks[label] = new_block if block in self.block_entry_vars: block_entry_vars[new_block] = self.block_entry_vars[block] new_ir.blocks = blocks new_ir.block_entry_vars = block_entry_vars return new_ir def get_block_entry_vars(self, block): """ Return a set of variable names possibly alive at the beginning of the block. """ return self.block_entry_vars[block] def infer_constant(self, name): """ Try to infer the constant value of a given variable. """ if isinstance(name, Var): name = name.name return self._consts.infer_constant(name) def get_definition(self, value, lhs_only=False): """ Get the definition site for the given variable name or instance. A Expr instance is returned by default, but if lhs_only is set to True, the left-hand-side variable is returned instead. """ lhs = value while True: if isinstance(value, Var): lhs = value name = value.name elif isinstance(value, str): lhs = value name = value else: return lhs if lhs_only else value defs = self._definitions[name] if len(defs) == 0: raise KeyError("no definition for %r" % (name,)) if len(defs) > 1: raise KeyError("more than one definition for %r" % (name,)) value = defs[0] def get_assignee(self, rhs_value, in_blocks=None): """ Finds the assignee for a given RHS value. If in_blocks is given the search will be limited to the specified blocks. """ if in_blocks is None: blocks = self.blocks.values() elif isinstance(in_blocks, int): blocks = [self.blocks[in_blocks]] else: blocks = [self.blocks[blk] for blk in list(in_blocks)] assert isinstance(rhs_value, AbstractRHS) for blk in blocks: for assign in blk.find_insts(Assign): if assign.value == rhs_value: return assign.target raise ValueError("Could not find an assignee for %s" % rhs_value) def dump(self, file=None): nofile = file is None # Avoid early bind of sys.stdout as default value file = file or StringIO() for offset, block in sorted(self.blocks.items()): print('label %s:' % (offset,), file=file) block.dump(file=file) if nofile: text = file.getvalue() if config.HIGHLIGHT_DUMPS: try: import pygments except ImportError: msg = "Please install pygments to see highlighted dumps" raise ValueError(msg) else: from pygments import highlight from numba.misc.dump_style import NumbaIRLexer as lexer from numba.misc.dump_style import by_colorscheme from pygments.formatters import Terminal256Formatter print(highlight(text, lexer(), Terminal256Formatter( style=by_colorscheme()))) else: print(text) def dump_to_string(self): with StringIO() as sb: self.dump(file=sb) return sb.getvalue() def dump_generator_info(self, file=None): file = file or sys.stdout gi = self.generator_info print("generator state variables:", sorted(gi.state_vars), file=file) for index, yp in sorted(gi.yield_points.items()): print("yield point #%d: live variables = %s, weak live variables = %s" % (index, sorted(yp.live_vars), sorted(yp.weak_live_vars)), file=file) def render_dot(self, filename_prefix="numba_ir", include_ir=True): """Render the CFG of the IR with GraphViz DOT via the ``graphviz`` python binding. Returns ------- g : graphviz.Digraph Use `g.view()` to open the graph in the default PDF application. """ try: import graphviz as gv except ImportError: raise ImportError( "The feature requires `graphviz` but it is not available. " "Please install with `pip install graphviz`" ) g = gv.Digraph( filename="{}{}.dot".format( filename_prefix, self.func_id.unique_name, ) ) # Populate the nodes for k, blk in self.blocks.items(): with StringIO() as sb: blk.dump(sb) label = sb.getvalue() if include_ir: label = ''.join( [r' {}\l'.format(x) for x in label.splitlines()], ) label = r"block {}\l".format(k) + label g.node(str(k), label=label, shape='rect') else: label = r"{}\l".format(k) g.node(str(k), label=label, shape='circle') # Populate the edges for src, blk in self.blocks.items(): for dst in blk.terminator.get_targets(): g.edge(str(src), str(dst)) return g # A stub for undefined global reference class UndefinedType(EqualityCheckMixin): _singleton = None def __new__(cls): obj = cls._singleton if obj is not None: return obj else: obj = object.__new__(cls) cls._singleton = obj return obj def __repr__(self): return "Undefined" UNDEFINED = UndefinedType() numba-0.55.1/numba/core/ir_utils.py000664 000000 000000 00000265532 14174536160 017146 0ustar00rootroot000000 000000 # # Copyright (c) 2017 Intel Corporation # SPDX-License-Identifier: BSD-2-Clause # import numpy import types as pytypes import collections import operator import warnings from llvmlite import ir as lir import numba from numba.core.extending import _Intrinsic from numba.core import types, utils, typing, ir, analysis, postproc, rewrites, config, cgutils from numba.core.typing.templates import (signature, infer_global, AbstractTemplate) from numba.core.imputils import impl_ret_untracked from numba.core.analysis import (compute_live_map, compute_use_defs, compute_cfg_from_blocks) from numba.core.errors import (TypingError, UnsupportedError, NumbaPendingDeprecationWarning, NumbaWarning, feedback_details, CompilerError) import copy _unique_var_count = 0 def mk_unique_var(prefix): global _unique_var_count var = prefix + "." + str(_unique_var_count) _unique_var_count = _unique_var_count + 1 return var class _MaxLabel: def __init__(self, value=0): self._value = value def next(self): self._value += 1 return self._value def update(self, newval): self._value = max(newval, self._value) _the_max_label = _MaxLabel() del _MaxLabel def get_unused_var_name(prefix, var_table): """ Get a new var name with a given prefix and make sure it is unused in the given variable table. """ cur = 0 while True: var = prefix + str(cur) if var not in var_table: return var cur += 1 def next_label(): return _the_max_label.next() def mk_alloc(typingctx, typemap, calltypes, lhs, size_var, dtype, scope, loc, lhs_typ): """generate an array allocation with np.empty() and return list of nodes. size_var can be an int variable or tuple of int variables. lhs_typ is the type of the array being allocated. """ out = [] ndims = 1 size_typ = types.intp if isinstance(size_var, tuple): if len(size_var) == 1: size_var = size_var[0] size_var = convert_size_to_var(size_var, typemap, scope, loc, out) else: # tuple_var = build_tuple([size_var...]) ndims = len(size_var) tuple_var = ir.Var(scope, mk_unique_var("$tuple_var"), loc) if typemap: typemap[tuple_var.name] = types.containers.UniTuple( types.intp, ndims) # constant sizes need to be assigned to vars new_sizes = [convert_size_to_var(s, typemap, scope, loc, out) for s in size_var] tuple_call = ir.Expr.build_tuple(new_sizes, loc) tuple_assign = ir.Assign(tuple_call, tuple_var, loc) out.append(tuple_assign) size_var = tuple_var size_typ = types.containers.UniTuple(types.intp, ndims) # g_np_var = Global(numpy) g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) if typemap: typemap[g_np_var.name] = types.misc.Module(numpy) g_np = ir.Global('np', numpy, loc) g_np_assign = ir.Assign(g_np, g_np_var, loc) # attr call: empty_attr = getattr(g_np_var, empty) empty_attr_call = ir.Expr.getattr(g_np_var, "empty", loc) attr_var = ir.Var(scope, mk_unique_var("$empty_attr_attr"), loc) if typemap: typemap[attr_var.name] = get_np_ufunc_typ(numpy.empty) attr_assign = ir.Assign(empty_attr_call, attr_var, loc) # Assume str(dtype) returns a valid type dtype_str = str(dtype) # alloc call: lhs = empty_attr(size_var, typ_var) typ_var = ir.Var(scope, mk_unique_var("$np_typ_var"), loc) if typemap: typemap[typ_var.name] = types.functions.NumberClass(dtype) # If dtype is a datetime/timedelta with a unit, # then it won't return a valid type and instead can be created # with a string. i.e. "datetime64[ns]") if (isinstance(dtype, (types.NPDatetime, types.NPTimedelta)) and dtype.unit != ''): typename_const = ir.Const(dtype_str, loc) typ_var_assign = ir.Assign(typename_const, typ_var, loc) else: if dtype_str=='bool': # empty doesn't like 'bool' sometimes (e.g. kmeans example) dtype_str = 'bool_' np_typ_getattr = ir.Expr.getattr(g_np_var, dtype_str, loc) typ_var_assign = ir.Assign(np_typ_getattr, typ_var, loc) alloc_call = ir.Expr.call(attr_var, [size_var, typ_var], (), loc) if calltypes: cac = typemap[attr_var.name].get_call_type( typingctx, [size_typ, types.functions.NumberClass(dtype)], {}) # By default, all calls to "empty" are typed as returning a standard # NumPy ndarray. If we are allocating a ndarray subclass here then # just change the return type to be that of the subclass. cac._return_type = (lhs_typ.copy(layout='C') if lhs_typ.layout == 'F' else lhs_typ) calltypes[alloc_call] = cac if lhs_typ.layout == 'F': empty_c_typ = lhs_typ.copy(layout='C') empty_c_var = ir.Var(scope, mk_unique_var("$empty_c_var"), loc) if typemap: typemap[empty_c_var.name] = lhs_typ.copy(layout='C') empty_c_assign = ir.Assign(alloc_call, empty_c_var, loc) # attr call: asfortranarray = getattr(g_np_var, asfortranarray) asfortranarray_attr_call = ir.Expr.getattr(g_np_var, "asfortranarray", loc) afa_attr_var = ir.Var(scope, mk_unique_var("$asfortran_array_attr"), loc) if typemap: typemap[afa_attr_var.name] = get_np_ufunc_typ(numpy.asfortranarray) afa_attr_assign = ir.Assign(asfortranarray_attr_call, afa_attr_var, loc) # call asfortranarray asfortranarray_call = ir.Expr.call(afa_attr_var, [empty_c_var], (), loc) if calltypes: calltypes[asfortranarray_call] = typemap[afa_attr_var.name].get_call_type( typingctx, [empty_c_typ], {}) asfortranarray_assign = ir.Assign(asfortranarray_call, lhs, loc) out.extend([g_np_assign, attr_assign, typ_var_assign, empty_c_assign, afa_attr_assign, asfortranarray_assign]) else: alloc_assign = ir.Assign(alloc_call, lhs, loc) out.extend([g_np_assign, attr_assign, typ_var_assign, alloc_assign]) return out def convert_size_to_var(size_var, typemap, scope, loc, nodes): if isinstance(size_var, int): new_size = ir.Var(scope, mk_unique_var("$alloc_size"), loc) if typemap: typemap[new_size.name] = types.intp size_assign = ir.Assign(ir.Const(size_var, loc), new_size, loc) nodes.append(size_assign) return new_size assert isinstance(size_var, ir.Var) return size_var def get_np_ufunc_typ(func): """get type of the incoming function from builtin registry""" for (k, v) in typing.npydecl.registry.globals: if k == func: return v for (k, v) in typing.templates.builtin_registry.globals: if k == func: return v raise RuntimeError("type for func ", func, " not found") def mk_range_block(typemap, start, stop, step, calltypes, scope, loc): """make a block that initializes loop range and iteration variables. target label in jump needs to be set. """ # g_range_var = Global(range) g_range_var = ir.Var(scope, mk_unique_var("$range_g_var"), loc) typemap[g_range_var.name] = get_global_func_typ(range) g_range = ir.Global('range', range, loc) g_range_assign = ir.Assign(g_range, g_range_var, loc) arg_nodes, args = _mk_range_args(typemap, start, stop, step, scope, loc) # range_call_var = call g_range_var(start, stop, step) range_call = ir.Expr.call(g_range_var, args, (), loc) calltypes[range_call] = typemap[g_range_var.name].get_call_type( typing.Context(), [types.intp] * len(args), {}) #signature(types.range_state64_type, types.intp) range_call_var = ir.Var(scope, mk_unique_var("$range_c_var"), loc) typemap[range_call_var.name] = types.iterators.RangeType(types.intp) range_call_assign = ir.Assign(range_call, range_call_var, loc) # iter_var = getiter(range_call_var) iter_call = ir.Expr.getiter(range_call_var, loc) calltypes[iter_call] = signature(types.range_iter64_type, types.range_state64_type) iter_var = ir.Var(scope, mk_unique_var("$iter_var"), loc) typemap[iter_var.name] = types.iterators.RangeIteratorType(types.intp) iter_call_assign = ir.Assign(iter_call, iter_var, loc) # $phi = iter_var phi_var = ir.Var(scope, mk_unique_var("$phi"), loc) typemap[phi_var.name] = types.iterators.RangeIteratorType(types.intp) phi_assign = ir.Assign(iter_var, phi_var, loc) # jump to header jump_header = ir.Jump(-1, loc) range_block = ir.Block(scope, loc) range_block.body = arg_nodes + [g_range_assign, range_call_assign, iter_call_assign, phi_assign, jump_header] return range_block def _mk_range_args(typemap, start, stop, step, scope, loc): nodes = [] if isinstance(stop, ir.Var): g_stop_var = stop else: assert isinstance(stop, int) g_stop_var = ir.Var(scope, mk_unique_var("$range_stop"), loc) if typemap: typemap[g_stop_var.name] = types.intp stop_assign = ir.Assign(ir.Const(stop, loc), g_stop_var, loc) nodes.append(stop_assign) if start == 0 and step == 1: return nodes, [g_stop_var] if isinstance(start, ir.Var): g_start_var = start else: assert isinstance(start, int) g_start_var = ir.Var(scope, mk_unique_var("$range_start"), loc) if typemap: typemap[g_start_var.name] = types.intp start_assign = ir.Assign(ir.Const(start, loc), g_start_var, loc) nodes.append(start_assign) if step == 1: return nodes, [g_start_var, g_stop_var] if isinstance(step, ir.Var): g_step_var = step else: assert isinstance(step, int) g_step_var = ir.Var(scope, mk_unique_var("$range_step"), loc) if typemap: typemap[g_step_var.name] = types.intp step_assign = ir.Assign(ir.Const(step, loc), g_step_var, loc) nodes.append(step_assign) return nodes, [g_start_var, g_stop_var, g_step_var] def get_global_func_typ(func): """get type variable for func() from builtin registry""" for (k, v) in typing.templates.builtin_registry.globals: if k == func: return v raise RuntimeError("func type not found {}".format(func)) def mk_loop_header(typemap, phi_var, calltypes, scope, loc): """make a block that is a loop header updating iteration variables. target labels in branch need to be set. """ # iternext_var = iternext(phi_var) iternext_var = ir.Var(scope, mk_unique_var("$iternext_var"), loc) typemap[iternext_var.name] = types.containers.Pair( types.intp, types.boolean) iternext_call = ir.Expr.iternext(phi_var, loc) calltypes[iternext_call] = signature( types.containers.Pair( types.intp, types.boolean), types.range_iter64_type) iternext_assign = ir.Assign(iternext_call, iternext_var, loc) # pair_first_var = pair_first(iternext_var) pair_first_var = ir.Var(scope, mk_unique_var("$pair_first_var"), loc) typemap[pair_first_var.name] = types.intp pair_first_call = ir.Expr.pair_first(iternext_var, loc) pair_first_assign = ir.Assign(pair_first_call, pair_first_var, loc) # pair_second_var = pair_second(iternext_var) pair_second_var = ir.Var(scope, mk_unique_var("$pair_second_var"), loc) typemap[pair_second_var.name] = types.boolean pair_second_call = ir.Expr.pair_second(iternext_var, loc) pair_second_assign = ir.Assign(pair_second_call, pair_second_var, loc) # phi_b_var = pair_first_var phi_b_var = ir.Var(scope, mk_unique_var("$phi"), loc) typemap[phi_b_var.name] = types.intp phi_b_assign = ir.Assign(pair_first_var, phi_b_var, loc) # branch pair_second_var body_block out_block branch = ir.Branch(pair_second_var, -1, -1, loc) header_block = ir.Block(scope, loc) header_block.body = [iternext_assign, pair_first_assign, pair_second_assign, phi_b_assign, branch] return header_block def legalize_names(varnames): """returns a dictionary for conversion of variable names to legal parameter names. """ var_map = {} for var in varnames: new_name = var.replace("_", "__").replace("$", "_").replace(".", "_") assert new_name not in var_map var_map[var] = new_name return var_map def get_name_var_table(blocks): """create a mapping from variable names to their ir.Var objects""" def get_name_var_visit(var, namevar): namevar[var.name] = var return var namevar = {} visit_vars(blocks, get_name_var_visit, namevar) return namevar def replace_var_names(blocks, namedict): """replace variables (ir.Var to ir.Var) from dictionary (name -> name)""" # remove identity values to avoid infinite loop new_namedict = {} for l, r in namedict.items(): if l != r: new_namedict[l] = r def replace_name(var, namedict): assert isinstance(var, ir.Var) while var.name in namedict: var = ir.Var(var.scope, namedict[var.name], var.loc) return var visit_vars(blocks, replace_name, new_namedict) def replace_var_callback(var, vardict): assert isinstance(var, ir.Var) while var.name in vardict.keys(): assert(vardict[var.name].name != var.name) new_var = vardict[var.name] var = ir.Var(new_var.scope, new_var.name, new_var.loc) return var def replace_vars(blocks, vardict): """replace variables (ir.Var to ir.Var) from dictionary (name -> ir.Var)""" # remove identity values to avoid infinite loop new_vardict = {} for l, r in vardict.items(): if l != r.name: new_vardict[l] = r visit_vars(blocks, replace_var_callback, new_vardict) def replace_vars_stmt(stmt, vardict): visit_vars_stmt(stmt, replace_var_callback, vardict) def replace_vars_inner(node, vardict): return visit_vars_inner(node, replace_var_callback, vardict) # other packages that define new nodes add calls to visit variables in them # format: {type:function} visit_vars_extensions = {} def visit_vars(blocks, callback, cbdata): """go over statements of block bodies and replace variable names with dictionary. """ for block in blocks.values(): for stmt in block.body: visit_vars_stmt(stmt, callback, cbdata) return def visit_vars_stmt(stmt, callback, cbdata): # let external calls handle stmt if type matches for t, f in visit_vars_extensions.items(): if isinstance(stmt, t): f(stmt, callback, cbdata) return if isinstance(stmt, ir.Assign): stmt.target = visit_vars_inner(stmt.target, callback, cbdata) stmt.value = visit_vars_inner(stmt.value, callback, cbdata) elif isinstance(stmt, ir.Arg): stmt.name = visit_vars_inner(stmt.name, callback, cbdata) elif isinstance(stmt, ir.Return): stmt.value = visit_vars_inner(stmt.value, callback, cbdata) elif isinstance(stmt, ir.Raise): stmt.exception = visit_vars_inner(stmt.exception, callback, cbdata) elif isinstance(stmt, ir.Branch): stmt.cond = visit_vars_inner(stmt.cond, callback, cbdata) elif isinstance(stmt, ir.Jump): stmt.target = visit_vars_inner(stmt.target, callback, cbdata) elif isinstance(stmt, ir.Del): # Because Del takes only a var name, we make up by # constructing a temporary variable. var = ir.Var(None, stmt.value, stmt.loc) var = visit_vars_inner(var, callback, cbdata) stmt.value = var.name elif isinstance(stmt, ir.DelAttr): stmt.target = visit_vars_inner(stmt.target, callback, cbdata) stmt.attr = visit_vars_inner(stmt.attr, callback, cbdata) elif isinstance(stmt, ir.SetAttr): stmt.target = visit_vars_inner(stmt.target, callback, cbdata) stmt.attr = visit_vars_inner(stmt.attr, callback, cbdata) stmt.value = visit_vars_inner(stmt.value, callback, cbdata) elif isinstance(stmt, ir.DelItem): stmt.target = visit_vars_inner(stmt.target, callback, cbdata) stmt.index = visit_vars_inner(stmt.index, callback, cbdata) elif isinstance(stmt, ir.StaticSetItem): stmt.target = visit_vars_inner(stmt.target, callback, cbdata) stmt.index_var = visit_vars_inner(stmt.index_var, callback, cbdata) stmt.value = visit_vars_inner(stmt.value, callback, cbdata) elif isinstance(stmt, ir.SetItem): stmt.target = visit_vars_inner(stmt.target, callback, cbdata) stmt.index = visit_vars_inner(stmt.index, callback, cbdata) stmt.value = visit_vars_inner(stmt.value, callback, cbdata) elif isinstance(stmt, ir.Print): stmt.args = [visit_vars_inner(x, callback, cbdata) for x in stmt.args] else: # TODO: raise NotImplementedError("no replacement for IR node: ", stmt) pass return def visit_vars_inner(node, callback, cbdata): if isinstance(node, ir.Var): return callback(node, cbdata) elif isinstance(node, list): return [visit_vars_inner(n, callback, cbdata) for n in node] elif isinstance(node, tuple): return tuple([visit_vars_inner(n, callback, cbdata) for n in node]) elif isinstance(node, ir.Expr): # if node.op in ['binop', 'inplace_binop']: # lhs = node.lhs.name # rhs = node.rhs.name # node.lhs.name = callback, cbdata.get(lhs, lhs) # node.rhs.name = callback, cbdata.get(rhs, rhs) for arg in node._kws.keys(): node._kws[arg] = visit_vars_inner(node._kws[arg], callback, cbdata) elif isinstance(node, ir.Yield): node.value = visit_vars_inner(node.value, callback, cbdata) return node add_offset_to_labels_extensions = {} def add_offset_to_labels(blocks, offset): """add an offset to all block labels and jump/branch targets """ new_blocks = {} for l, b in blocks.items(): # some parfor last blocks might be empty term = None if b.body: term = b.body[-1] for inst in b.body: for T, f in add_offset_to_labels_extensions.items(): if isinstance(inst, T): f_max = f(inst, offset) if isinstance(term, ir.Jump): b.body[-1] = ir.Jump(term.target + offset, term.loc) if isinstance(term, ir.Branch): b.body[-1] = ir.Branch(term.cond, term.truebr + offset, term.falsebr + offset, term.loc) new_blocks[l + offset] = b return new_blocks find_max_label_extensions = {} def find_max_label(blocks): max_label = 0 for l, b in blocks.items(): term = None if b.body: term = b.body[-1] for inst in b.body: for T, f in find_max_label_extensions.items(): if isinstance(inst, T): f_max = f(inst) if f_max > max_label: max_label = f_max if l > max_label: max_label = l return max_label def flatten_labels(blocks): """makes the labels in range(0, len(blocks)), useful to compare CFGs """ # first bulk move the labels out of the rewrite range blocks = add_offset_to_labels(blocks, find_max_label(blocks) + 1) # order them in topo order because it's easier to read new_blocks = {} topo_order = find_topo_order(blocks) l_map = dict() idx = 0 for x in topo_order: l_map[x] = idx idx += 1 for t_node in topo_order: b = blocks[t_node] # some parfor last blocks might be empty term = None if b.body: term = b.body[-1] if isinstance(term, ir.Jump): b.body[-1] = ir.Jump(l_map[term.target], term.loc) if isinstance(term, ir.Branch): b.body[-1] = ir.Branch(term.cond, l_map[term.truebr], l_map[term.falsebr], term.loc) new_blocks[l_map[t_node]] = b return new_blocks def remove_dels(blocks): """remove ir.Del nodes""" for block in blocks.values(): new_body = [] for stmt in block.body: if not isinstance(stmt, ir.Del): new_body.append(stmt) block.body = new_body return def remove_args(blocks): """remove ir.Arg nodes""" for block in blocks.values(): new_body = [] for stmt in block.body: if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Arg): continue new_body.append(stmt) block.body = new_body return def dead_code_elimination(func_ir, typemap=None, alias_map=None, arg_aliases=None): """ Performs dead code elimination and leaves the IR in a valid state on exit """ do_post_proc = False while (remove_dead(func_ir.blocks, func_ir.arg_names, func_ir, typemap, alias_map, arg_aliases)): do_post_proc = True if do_post_proc: post_proc = postproc.PostProcessor(func_ir) post_proc.run() def remove_dead(blocks, args, func_ir, typemap=None, alias_map=None, arg_aliases=None): """dead code elimination using liveness and CFG info. Returns True if something has been removed, or False if nothing is removed. """ cfg = compute_cfg_from_blocks(blocks) usedefs = compute_use_defs(blocks) live_map = compute_live_map(cfg, blocks, usedefs.usemap, usedefs.defmap) call_table, _ = get_call_table(blocks) if alias_map is None or arg_aliases is None: alias_map, arg_aliases = find_potential_aliases(blocks, args, typemap, func_ir) if config.DEBUG_ARRAY_OPT >= 1: print("args:", args) print("alias map:", alias_map) print("arg_aliases:", arg_aliases) print("live_map:", live_map) print("usemap:", usedefs.usemap) print("defmap:", usedefs.defmap) # keep set for easier search alias_set = set(alias_map.keys()) removed = False for label, block in blocks.items(): # find live variables at each statement to delete dead assignment lives = {v.name for v in block.terminator.list_vars()} if config.DEBUG_ARRAY_OPT >= 2: print("remove_dead processing block", label, lives) # find live variables at the end of block for out_blk, _data in cfg.successors(label): if config.DEBUG_ARRAY_OPT >= 2: print("succ live_map", out_blk, live_map[out_blk]) lives |= live_map[out_blk] removed |= remove_dead_block(block, lives, call_table, arg_aliases, alias_map, alias_set, func_ir, typemap) return removed # other packages that define new nodes add calls to remove dead code in them # format: {type:function} remove_dead_extensions = {} def remove_dead_block(block, lives, call_table, arg_aliases, alias_map, alias_set, func_ir, typemap): """remove dead code using liveness info. Mutable arguments (e.g. arrays) that are not definitely assigned are live after return of function. """ # TODO: find mutable args that are not definitely assigned instead of # assuming all args are live after return removed = False # add statements in reverse order new_body = [block.terminator] # for each statement in reverse order, excluding terminator for stmt in reversed(block.body[:-1]): if config.DEBUG_ARRAY_OPT >= 2: print("remove_dead_block", stmt) # aliases of lives are also live alias_lives = set() init_alias_lives = lives & alias_set for v in init_alias_lives: alias_lives |= alias_map[v] lives_n_aliases = lives | alias_lives | arg_aliases # let external calls handle stmt if type matches if type(stmt) in remove_dead_extensions: f = remove_dead_extensions[type(stmt)] stmt = f(stmt, lives, lives_n_aliases, arg_aliases, alias_map, func_ir, typemap) if stmt is None: if config.DEBUG_ARRAY_OPT >= 2: print("Statement was removed.") removed = True continue # ignore assignments that their lhs is not live or lhs==rhs if isinstance(stmt, ir.Assign): lhs = stmt.target rhs = stmt.value if lhs.name not in lives and has_no_side_effect( rhs, lives_n_aliases, call_table): if config.DEBUG_ARRAY_OPT >= 2: print("Statement was removed.") removed = True continue if isinstance(rhs, ir.Var) and lhs.name == rhs.name: if config.DEBUG_ARRAY_OPT >= 2: print("Statement was removed.") removed = True continue # TODO: remove other nodes like SetItem etc. if isinstance(stmt, ir.Del): if stmt.value not in lives: if config.DEBUG_ARRAY_OPT >= 2: print("Statement was removed.") removed = True continue if isinstance(stmt, ir.SetItem): name = stmt.target.name if name not in lives_n_aliases: if config.DEBUG_ARRAY_OPT >= 2: print("Statement was removed.") continue if type(stmt) in analysis.ir_extension_usedefs: def_func = analysis.ir_extension_usedefs[type(stmt)] uses, defs = def_func(stmt) lives -= defs lives |= uses else: lives |= {v.name for v in stmt.list_vars()} if isinstance(stmt, ir.Assign): # make sure lhs is not used in rhs, e.g. a = g(a) if isinstance(stmt.value, ir.Expr): rhs_vars = {v.name for v in stmt.value.list_vars()} if lhs.name not in rhs_vars: lives.remove(lhs.name) else: lives.remove(lhs.name) new_body.append(stmt) new_body.reverse() block.body = new_body return removed # list of functions remove_call_handlers = [] def remove_dead_random_call(rhs, lives, call_list): if len(call_list) == 3 and call_list[1:] == ['random', numpy]: return call_list[0] not in {'seed', 'shuffle'} return False remove_call_handlers.append(remove_dead_random_call) def has_no_side_effect(rhs, lives, call_table): """ Returns True if this expression has no side effects that would prevent re-ordering. """ from numba.parfors import array_analysis, parfor from numba.misc.special import prange if isinstance(rhs, ir.Expr) and rhs.op == 'call': func_name = rhs.func.name if func_name not in call_table or call_table[func_name] == []: return False call_list = call_table[func_name] if (call_list == ['empty', numpy] or call_list == [slice] or call_list == ['stencil', numba] or call_list == ['log', numpy] or call_list == ['dtype', numpy] or call_list == [array_analysis.wrap_index] or call_list == [prange] or call_list == ['prange', numba] or call_list == [parfor.internal_prange]): return True elif (isinstance(call_list[0], _Intrinsic) and (call_list[0]._name == 'empty_inferred' or call_list[0]._name == 'unsafe_empty_inferred')): return True from numba.core.registry import CPUDispatcher from numba.np.linalg import dot_3_mv_check_args if isinstance(call_list[0], CPUDispatcher): py_func = call_list[0].py_func if py_func == dot_3_mv_check_args: return True for f in remove_call_handlers: if f(rhs, lives, call_list): return True return False if isinstance(rhs, ir.Expr) and rhs.op == 'inplace_binop': return rhs.lhs.name not in lives if isinstance(rhs, ir.Yield): return False if isinstance(rhs, ir.Expr) and rhs.op == 'pair_first': # don't remove pair_first since prange looks for it return False return True is_pure_extensions = [] def is_pure(rhs, lives, call_table): """ Returns True if every time this expression is evaluated it returns the same result. This is not the case for things like calls to numpy.random. """ if isinstance(rhs, ir.Expr): if rhs.op == 'call': func_name = rhs.func.name if func_name not in call_table or call_table[func_name] == []: return False call_list = call_table[func_name] if (call_list == [slice] or call_list == ['log', numpy] or call_list == ['empty', numpy]): return True for f in is_pure_extensions: if f(rhs, lives, call_list): return True return False elif rhs.op == 'getiter' or rhs.op == 'iternext': return False if isinstance(rhs, ir.Yield): return False return True def is_const_call(module_name, func_name): # Returns True if there is no state in the given module changed by the given function. if module_name == 'numpy': if func_name in ['empty']: return True return False alias_analysis_extensions = {} alias_func_extensions = {} def get_canonical_alias(v, alias_map): if v not in alias_map: return v v_aliases = sorted(list(alias_map[v])) return v_aliases[0] def find_potential_aliases(blocks, args, typemap, func_ir, alias_map=None, arg_aliases=None): "find all array aliases and argument aliases to avoid remove as dead" if alias_map is None: alias_map = {} if arg_aliases is None: arg_aliases = set(a for a in args if not is_immutable_type(a, typemap)) # update definitions since they are not guaranteed to be up-to-date # FIXME keep definitions up-to-date to avoid the need for rebuilding func_ir._definitions = build_definitions(func_ir.blocks) np_alias_funcs = ['ravel', 'transpose', 'reshape'] for bl in blocks.values(): for instr in bl.body: if type(instr) in alias_analysis_extensions: f = alias_analysis_extensions[type(instr)] f(instr, args, typemap, func_ir, alias_map, arg_aliases) if isinstance(instr, ir.Assign): expr = instr.value lhs = instr.target.name # only mutable types can alias if is_immutable_type(lhs, typemap): continue if isinstance(expr, ir.Var) and lhs!=expr.name: _add_alias(lhs, expr.name, alias_map, arg_aliases) # subarrays like A = B[0] for 2D B if (isinstance(expr, ir.Expr) and (expr.op == 'cast' or expr.op in ['getitem', 'static_getitem'])): _add_alias(lhs, expr.value.name, alias_map, arg_aliases) if isinstance(expr, ir.Expr) and expr.op == 'inplace_binop': _add_alias(lhs, expr.lhs.name, alias_map, arg_aliases) # array attributes like A.T if (isinstance(expr, ir.Expr) and expr.op == 'getattr' and expr.attr in ['T', 'ctypes', 'flat']): _add_alias(lhs, expr.value.name, alias_map, arg_aliases) # a = b.c. a should alias b if (isinstance(expr, ir.Expr) and expr.op == 'getattr' and expr.attr not in ['shape'] and expr.value.name in arg_aliases): _add_alias(lhs, expr.value.name, alias_map, arg_aliases) # calls that can create aliases such as B = A.ravel() if isinstance(expr, ir.Expr) and expr.op == 'call': fdef = guard(find_callname, func_ir, expr, typemap) # TODO: sometimes gufunc backend creates duplicate code # causing find_callname to fail. Example: test_argmax # ignored here since those cases don't create aliases # but should be fixed in general if fdef is None: continue fname, fmod = fdef if fdef in alias_func_extensions: alias_func = alias_func_extensions[fdef] alias_func(lhs, expr.args, alias_map, arg_aliases) if fmod == 'numpy' and fname in np_alias_funcs: _add_alias(lhs, expr.args[0].name, alias_map, arg_aliases) if isinstance(fmod, ir.Var) and fname in np_alias_funcs: _add_alias(lhs, fmod.name, alias_map, arg_aliases) # copy to avoid changing size during iteration old_alias_map = copy.deepcopy(alias_map) # combine all aliases transitively for v in old_alias_map: for w in old_alias_map[v]: alias_map[v] |= alias_map[w] for w in old_alias_map[v]: alias_map[w] = alias_map[v] return alias_map, arg_aliases def _add_alias(lhs, rhs, alias_map, arg_aliases): if rhs in arg_aliases: arg_aliases.add(lhs) else: if rhs not in alias_map: alias_map[rhs] = set() if lhs not in alias_map: alias_map[lhs] = set() alias_map[rhs].add(lhs) alias_map[lhs].add(rhs) return def is_immutable_type(var, typemap): # Conservatively, assume mutable if type not available if typemap is None or var not in typemap: return False typ = typemap[var] # TODO: add more immutable types if isinstance(typ, (types.Number, types.scalars._NPDatetimeBase, types.iterators.RangeType)): return True if typ==types.string: return True # conservatively, assume mutable return False def copy_propagate(blocks, typemap): """compute copy propagation information for each block using fixed-point iteration on data flow equations: in_b = intersect(predec(B)) out_b = gen_b | (in_b - kill_b) """ cfg = compute_cfg_from_blocks(blocks) entry = cfg.entry_point() # format: dict of block labels to copies as tuples # label -> (l,r) c_data = init_copy_propagate_data(blocks, entry, typemap) (gen_copies, all_copies, kill_copies, in_copies, out_copies) = c_data old_point = None new_point = copy.deepcopy(out_copies) # comparison works since dictionary of built-in types while old_point != new_point: for label in blocks.keys(): if label == entry: continue predecs = [i for i, _d in cfg.predecessors(label)] # in_b = intersect(predec(B)) in_copies[label] = out_copies[predecs[0]].copy() for p in predecs: in_copies[label] &= out_copies[p] # out_b = gen_b | (in_b - kill_b) out_copies[label] = (gen_copies[label] | (in_copies[label] - kill_copies[label])) old_point = new_point new_point = copy.deepcopy(out_copies) if config.DEBUG_ARRAY_OPT >= 1: print("copy propagate out_copies:", out_copies) return in_copies, out_copies def init_copy_propagate_data(blocks, entry, typemap): """get initial condition of copy propagation data flow for each block. """ # gen is all definite copies, extra_kill is additional ones that may hit # for example, parfors can have control flow so they may hit extra copies gen_copies, extra_kill = get_block_copies(blocks, typemap) # set of all program copies all_copies = set() for l, s in gen_copies.items(): all_copies |= gen_copies[l] kill_copies = {} for label, gen_set in gen_copies.items(): kill_copies[label] = set() for lhs, rhs in all_copies: if lhs in extra_kill[label] or rhs in extra_kill[label]: kill_copies[label].add((lhs, rhs)) # a copy is killed if it is not in this block and lhs or rhs are # assigned in this block assigned = {lhs for lhs, rhs in gen_set} if ((lhs, rhs) not in gen_set and (lhs in assigned or rhs in assigned)): kill_copies[label].add((lhs, rhs)) # set initial values # all copies are in for all blocks except entry in_copies = {l: all_copies.copy() for l in blocks.keys()} in_copies[entry] = set() out_copies = {} for label in blocks.keys(): # out_b = gen_b | (in_b - kill_b) out_copies[label] = (gen_copies[label] | (in_copies[label] - kill_copies[label])) out_copies[entry] = gen_copies[entry] return (gen_copies, all_copies, kill_copies, in_copies, out_copies) # other packages that define new nodes add calls to get copies in them # format: {type:function} copy_propagate_extensions = {} def get_block_copies(blocks, typemap): """get copies generated and killed by each block """ block_copies = {} extra_kill = {} for label, block in blocks.items(): assign_dict = {} extra_kill[label] = set() # assignments as dict to replace with latest value for stmt in block.body: for T, f in copy_propagate_extensions.items(): if isinstance(stmt, T): gen_set, kill_set = f(stmt, typemap) for lhs, rhs in gen_set: assign_dict[lhs] = rhs # if a=b is in dict and b is killed, a is also killed new_assign_dict = {} for l, r in assign_dict.items(): if l not in kill_set and r not in kill_set: new_assign_dict[l] = r if r in kill_set: extra_kill[label].add(l) assign_dict = new_assign_dict extra_kill[label] |= kill_set if isinstance(stmt, ir.Assign): lhs = stmt.target.name if isinstance(stmt.value, ir.Var): rhs = stmt.value.name # copy is valid only if same type (see # TestCFunc.test_locals) # Some transformations can produce assignments of the # form A = A. We don't put these mapping in the # copy propagation set because then you get cycles and # infinite loops in the replacement phase. if typemap[lhs] == typemap[rhs] and lhs != rhs: assign_dict[lhs] = rhs continue if isinstance(stmt.value, ir.Expr) and stmt.value.op == 'inplace_binop': in1_var = stmt.value.lhs.name in1_typ = typemap[in1_var] # inplace_binop assigns first operand if mutable if not (isinstance(in1_typ, types.Number) or in1_typ == types.string): extra_kill[label].add(in1_var) # if a=b is in dict and b is killed, a is also killed new_assign_dict = {} for l, r in assign_dict.items(): if l != in1_var and r != in1_var: new_assign_dict[l] = r if r == in1_var: extra_kill[label].add(l) assign_dict = new_assign_dict extra_kill[label].add(lhs) block_cps = set(assign_dict.items()) block_copies[label] = block_cps return block_copies, extra_kill # other packages that define new nodes add calls to apply copy propagate in them # format: {type:function} apply_copy_propagate_extensions = {} def apply_copy_propagate(blocks, in_copies, name_var_table, typemap, calltypes, save_copies=None): """apply copy propagation to IR: replace variables when copies available""" # save_copies keeps an approximation of the copies that were applied, so # that the variable names of removed user variables can be recovered to some # extent. if save_copies is None: save_copies = [] for label, block in blocks.items(): var_dict = {l: name_var_table[r] for l, r in in_copies[label]} # assignments as dict to replace with latest value for stmt in block.body: if type(stmt) in apply_copy_propagate_extensions: f = apply_copy_propagate_extensions[type(stmt)] f(stmt, var_dict, name_var_table, typemap, calltypes, save_copies) # only rhs of assignments should be replaced # e.g. if x=y is available, x in x=z shouldn't be replaced elif isinstance(stmt, ir.Assign): stmt.value = replace_vars_inner(stmt.value, var_dict) else: replace_vars_stmt(stmt, var_dict) fix_setitem_type(stmt, typemap, calltypes) for T, f in copy_propagate_extensions.items(): if isinstance(stmt, T): gen_set, kill_set = f(stmt, typemap) for lhs, rhs in gen_set: if rhs in name_var_table: var_dict[lhs] = name_var_table[rhs] for l, r in var_dict.copy().items(): if l in kill_set or r.name in kill_set: var_dict.pop(l) if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Var): lhs = stmt.target.name rhs = stmt.value.name # rhs could be replaced with lhs from previous copies if lhs != rhs: # copy is valid only if same type (see # TestCFunc.test_locals) if typemap[lhs] == typemap[rhs] and rhs in name_var_table: var_dict[lhs] = name_var_table[rhs] else: var_dict.pop(lhs, None) # a=b kills previous t=a lhs_kill = [] for k, v in var_dict.items(): if v.name == lhs: lhs_kill.append(k) for k in lhs_kill: var_dict.pop(k, None) if (isinstance(stmt, ir.Assign) and not isinstance(stmt.value, ir.Var)): lhs = stmt.target.name var_dict.pop(lhs, None) # previous t=a is killed if a is killed lhs_kill = [] for k, v in var_dict.items(): if v.name == lhs: lhs_kill.append(k) for k in lhs_kill: var_dict.pop(k, None) save_copies.extend(var_dict.items()) return save_copies def fix_setitem_type(stmt, typemap, calltypes): """Copy propagation can replace setitem target variable, which can be array with 'A' layout. The replaced variable can be 'C' or 'F', so we update setitem call type reflect this (from matrix power test) """ if not isinstance(stmt, (ir.SetItem, ir.StaticSetItem)): return t_typ = typemap[stmt.target.name] s_typ = calltypes[stmt].args[0] # test_optional t_typ can be Optional with array if not isinstance( s_typ, types.npytypes.Array) or not isinstance( t_typ, types.npytypes.Array): return if s_typ.layout == 'A' and t_typ.layout != 'A': new_s_typ = s_typ.copy(layout=t_typ.layout) calltypes[stmt].args = ( new_s_typ, calltypes[stmt].args[1], calltypes[stmt].args[2]) return def dprint_func_ir(func_ir, title, blocks=None): """Debug print function IR, with an optional blocks argument that may differ from the IR's original blocks. """ if config.DEBUG_ARRAY_OPT >= 1: ir_blocks = func_ir.blocks func_ir.blocks = ir_blocks if blocks == None else blocks name = func_ir.func_id.func_qualname print(("IR %s: %s" % (title, name)).center(80, "-")) func_ir.dump() print("-" * 40) func_ir.blocks = ir_blocks def find_topo_order(blocks, cfg = None): """find topological order of blocks such that true branches are visited first (e.g. for_break test in test_dataflow). """ if cfg is None: cfg = compute_cfg_from_blocks(blocks) post_order = [] seen = set() def _dfs_rec(node): if node not in seen: seen.add(node) succs = cfg._succs[node] last_inst = blocks[node].body[-1] if isinstance(last_inst, ir.Branch): succs = [last_inst.falsebr, last_inst.truebr] for dest in succs: if (node, dest) not in cfg._back_edges: _dfs_rec(dest) post_order.append(node) _dfs_rec(cfg.entry_point()) post_order.reverse() return post_order # other packages that define new nodes add calls to get call table # format: {type:function} call_table_extensions = {} def get_call_table(blocks, call_table=None, reverse_call_table=None, topological_ordering=True): """returns a dictionary of call variables and their references. """ # call_table example: c = np.zeros becomes c:["zeroes", np] # reverse_call_table example: c = np.zeros becomes np_var:c if call_table is None: call_table = {} if reverse_call_table is None: reverse_call_table = {} if topological_ordering: order = find_topo_order(blocks) else: order = list(blocks.keys()) for label in reversed(order): for inst in reversed(blocks[label].body): if isinstance(inst, ir.Assign): lhs = inst.target.name rhs = inst.value if isinstance(rhs, ir.Expr) and rhs.op == 'call': call_table[rhs.func.name] = [] if isinstance(rhs, ir.Expr) and rhs.op == 'getattr': if lhs in call_table: call_table[lhs].append(rhs.attr) reverse_call_table[rhs.value.name] = lhs if lhs in reverse_call_table: call_var = reverse_call_table[lhs] call_table[call_var].append(rhs.attr) reverse_call_table[rhs.value.name] = call_var if isinstance(rhs, ir.Global): if lhs in call_table: call_table[lhs].append(rhs.value) if lhs in reverse_call_table: call_var = reverse_call_table[lhs] call_table[call_var].append(rhs.value) if isinstance(rhs, ir.FreeVar): if lhs in call_table: call_table[lhs].append(rhs.value) if lhs in reverse_call_table: call_var = reverse_call_table[lhs] call_table[call_var].append(rhs.value) if isinstance(rhs, ir.Var): if lhs in call_table: call_table[lhs].append(rhs.name) reverse_call_table[rhs.name] = lhs if lhs in reverse_call_table: call_var = reverse_call_table[lhs] call_table[call_var].append(rhs.name) for T, f in call_table_extensions.items(): if isinstance(inst, T): f(inst, call_table, reverse_call_table) return call_table, reverse_call_table # other packages that define new nodes add calls to get tuple table # format: {type:function} tuple_table_extensions = {} def get_tuple_table(blocks, tuple_table=None): """returns a dictionary of tuple variables and their values. """ if tuple_table is None: tuple_table = {} for block in blocks.values(): for inst in block.body: if isinstance(inst, ir.Assign): lhs = inst.target.name rhs = inst.value if isinstance(rhs, ir.Expr) and rhs.op == 'build_tuple': tuple_table[lhs] = rhs.items if isinstance(rhs, ir.Const) and isinstance(rhs.value, tuple): tuple_table[lhs] = rhs.value for T, f in tuple_table_extensions.items(): if isinstance(inst, T): f(inst, tuple_table) return tuple_table def get_stmt_writes(stmt): writes = set() if isinstance(stmt, (ir.Assign, ir.SetItem, ir.StaticSetItem)): writes.add(stmt.target.name) return writes def rename_labels(blocks): """rename labels of function body blocks according to topological sort. The set of labels of these blocks will remain unchanged. """ topo_order = find_topo_order(blocks) # make a block with return last if available (just for readability) return_label = -1 for l, b in blocks.items(): if isinstance(b.body[-1], ir.Return): return_label = l # some cases like generators can have no return blocks if return_label != -1: topo_order.remove(return_label) topo_order.append(return_label) label_map = {} all_labels = sorted(topo_order, reverse=True) for label in topo_order: label_map[label] = all_labels.pop() # update target labels in jumps/branches for b in blocks.values(): term = b.terminator if isinstance(term, ir.Jump): term.target = label_map[term.target] if isinstance(term, ir.Branch): term.truebr = label_map[term.truebr] term.falsebr = label_map[term.falsebr] # update blocks dictionary keys new_blocks = {} for k, b in blocks.items(): new_label = label_map[k] new_blocks[new_label] = b return new_blocks def simplify_CFG(blocks): """transform chains of blocks that have no loop into a single block""" # first, inline single-branch-block to its predecessors cfg = compute_cfg_from_blocks(blocks) def find_single_branch(label): block = blocks[label] return len(block.body) == 1 and isinstance(block.body[0], ir.Branch) single_branch_blocks = list(filter(find_single_branch, blocks.keys())) marked_for_del = set() for label in single_branch_blocks: inst = blocks[label].body[0] predecessors = cfg.predecessors(label) delete_block = True for (p, q) in predecessors: block = blocks[p] if isinstance(block.body[-1], ir.Jump): block.body[-1] = copy.copy(inst) else: delete_block = False if delete_block: marked_for_del.add(label) # Delete marked labels for label in marked_for_del: del blocks[label] merge_adjacent_blocks(blocks) return rename_labels(blocks) arr_math = ['min', 'max', 'sum', 'prod', 'mean', 'var', 'std', 'cumsum', 'cumprod', 'argmax', 'argmin', 'argsort', 'nonzero', 'ravel'] def canonicalize_array_math(func_ir, typemap, calltypes, typingctx): # save array arg to call # call_varname -> array blocks = func_ir.blocks saved_arr_arg = {} topo_order = find_topo_order(blocks) for label in topo_order: block = blocks[label] new_body = [] for stmt in block.body: if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr): lhs = stmt.target.name rhs = stmt.value # replace A.func with np.func, and save A in saved_arr_arg if (rhs.op == 'getattr' and rhs.attr in arr_math and isinstance( typemap[rhs.value.name], types.npytypes.Array)): rhs = stmt.value arr = rhs.value saved_arr_arg[lhs] = arr scope = arr.scope loc = arr.loc # g_np_var = Global(numpy) g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) typemap[g_np_var.name] = types.misc.Module(numpy) g_np = ir.Global('np', numpy, loc) g_np_assign = ir.Assign(g_np, g_np_var, loc) rhs.value = g_np_var new_body.append(g_np_assign) func_ir._definitions[g_np_var.name] = [g_np] # update func var type func = getattr(numpy, rhs.attr) func_typ = get_np_ufunc_typ(func) typemap.pop(lhs) typemap[lhs] = func_typ if rhs.op == 'call' and rhs.func.name in saved_arr_arg: # add array as first arg arr = saved_arr_arg[rhs.func.name] # update call type signature to include array arg old_sig = calltypes.pop(rhs) # argsort requires kws for typing so sig.args can't be used # reusing sig.args since some types become Const in sig argtyps = old_sig.args[:len(rhs.args)] kwtyps = {name: typemap[v.name] for name, v in rhs.kws} calltypes[rhs] = typemap[rhs.func.name].get_call_type( typingctx, [typemap[arr.name]] + list(argtyps), kwtyps) rhs.args = [arr] + rhs.args new_body.append(stmt) block.body = new_body return # format: {type:function} array_accesses_extensions = {} def get_array_accesses(blocks, accesses=None): """returns a set of arrays accessed and their indices. """ if accesses is None: accesses = set() for block in blocks.values(): for inst in block.body: if isinstance(inst, ir.SetItem): accesses.add((inst.target.name, inst.index.name)) if isinstance(inst, ir.StaticSetItem): accesses.add((inst.target.name, inst.index_var.name)) if isinstance(inst, ir.Assign): lhs = inst.target.name rhs = inst.value if isinstance(rhs, ir.Expr) and rhs.op == 'getitem': accesses.add((rhs.value.name, rhs.index.name)) if isinstance(rhs, ir.Expr) and rhs.op == 'static_getitem': index = rhs.index # slice is unhashable, so just keep the variable if index is None or is_slice_index(index): index = rhs.index_var.name accesses.add((rhs.value.name, index)) for T, f in array_accesses_extensions.items(): if isinstance(inst, T): f(inst, accesses) return accesses def is_slice_index(index): """see if index is a slice index or has slice in it""" if isinstance(index, slice): return True if isinstance(index, tuple): for i in index: if isinstance(i, slice): return True return False def merge_adjacent_blocks(blocks): cfg = compute_cfg_from_blocks(blocks) # merge adjacent blocks removed = set() for label in list(blocks.keys()): if label in removed: continue block = blocks[label] succs = list(cfg.successors(label)) while True: if len(succs) != 1: break next_label = succs[0][0] if next_label in removed: break preds = list(cfg.predecessors(next_label)) succs = list(cfg.successors(next_label)) if len(preds) != 1 or preds[0][0] != label: break next_block = blocks[next_label] # XXX: commented out since scope objects are not consistent # throughout the compiler. for example, pieces of code are compiled # and inlined on the fly without proper scope merge. # if block.scope != next_block.scope: # break # merge block.body.pop() # remove Jump block.body += next_block.body del blocks[next_label] removed.add(next_label) label = next_label def restore_copy_var_names(blocks, save_copies, typemap): """ restores variable names of user variables after applying copy propagation """ if not save_copies: return {} rename_dict = {} var_rename_map = {} for (a, b) in save_copies: # a is string name, b is variable # if a is user variable and b is generated temporary and b is not # already renamed if (not a.startswith('$') and b.name.startswith('$') and b.name not in rename_dict): new_name = mk_unique_var('${}'.format(a)); rename_dict[b.name] = new_name var_rename_map[new_name] = a typ = typemap.pop(b.name) typemap[new_name] = typ replace_var_names(blocks, rename_dict) return var_rename_map def simplify(func_ir, typemap, calltypes, metadata): # get copies in to blocks and out from blocks in_cps, _ = copy_propagate(func_ir.blocks, typemap) # table mapping variable names to ir.Var objects to help replacement name_var_table = get_name_var_table(func_ir.blocks) save_copies = apply_copy_propagate( func_ir.blocks, in_cps, name_var_table, typemap, calltypes) var_rename_map = restore_copy_var_names(func_ir.blocks, save_copies, typemap) if "var_rename_map" not in metadata: metadata["var_rename_map"] = {} metadata["var_rename_map"].update(var_rename_map) # remove dead code to enable fusion if config.DEBUG_ARRAY_OPT >= 1: dprint_func_ir(func_ir, "after copy prop") remove_dead(func_ir.blocks, func_ir.arg_names, func_ir, typemap) func_ir.blocks = simplify_CFG(func_ir.blocks) if config.DEBUG_ARRAY_OPT >= 1: dprint_func_ir(func_ir, "after simplify") class GuardException(Exception): pass def require(cond): """ Raise GuardException if the given condition is False. """ if not cond: raise GuardException def guard(func, *args, **kwargs): """ Run a function with given set of arguments, and guard against any GuardException raised by the function by returning None, or the expected return results if no such exception was raised. """ try: return func(*args, **kwargs) except GuardException: return None def get_definition(func_ir, name, **kwargs): """ Same as func_ir.get_definition(name), but raise GuardException if exception KeyError is caught. """ try: return func_ir.get_definition(name, **kwargs) except KeyError: raise GuardException def build_definitions(blocks, definitions=None): """Build the definitions table of the given blocks by scanning through all blocks and instructions, useful when the definitions table is out-of-sync. Will return a new definition table if one is not passed. """ if definitions is None: definitions = collections.defaultdict(list) for block in blocks.values(): for inst in block.body: if isinstance(inst, ir.Assign): name = inst.target.name definition = definitions.get(name, []) if definition == []: definitions[name] = definition definition.append(inst.value) if type(inst) in build_defs_extensions: f = build_defs_extensions[type(inst)] f(inst, definitions) return definitions build_defs_extensions = {} def find_callname(func_ir, expr, typemap=None, definition_finder=get_definition): """Try to find a call expression's function and module names and return them as strings for unbounded calls. If the call is a bounded call, return the self object instead of module name. Raise GuardException if failed. Providing typemap can make the call matching more accurate in corner cases such as bounded call on an object which is inside another object. """ require(isinstance(expr, ir.Expr) and expr.op == 'call') callee = expr.func callee_def = definition_finder(func_ir, callee) attrs = [] obj = None while True: if isinstance(callee_def, (ir.Global, ir.FreeVar)): # require(callee_def.value == numpy) # these checks support modules like numpy, numpy.random as well as # calls like len() and intrinsics like assertEquiv keys = ['name', '_name', '__name__'] value = None for key in keys: if hasattr(callee_def.value, key): value = getattr(callee_def.value, key) break if not value or not isinstance(value, str): raise GuardException attrs.append(value) def_val = callee_def.value # get the underlying definition of Intrinsic object to be able to # find the module effectively. # Otherwise, it will return numba.extending if isinstance(def_val, _Intrinsic): def_val = def_val._defn if hasattr(def_val, '__module__'): mod_name = def_val.__module__ # The reason for first checking if the function is in NumPy's # top level name space by module is that some functions are # deprecated in NumPy but the functions' names are aliased with # other common names. This prevents deprecation warnings on # e.g. getattr(numpy, 'bool') were a bool the target. # For context see #6175, impacts NumPy>=1.20. mod_not_none = mod_name is not None numpy_toplevel = (mod_not_none and (mod_name == 'numpy' or mod_name.startswith('numpy.'))) # it might be a numpy function imported directly if (numpy_toplevel and hasattr(numpy, value) and def_val == getattr(numpy, value)): attrs += ['numpy'] # it might be a np.random function imported directly elif (hasattr(numpy.random, value) and def_val == getattr(numpy.random, value)): attrs += ['random', 'numpy'] elif mod_not_none: attrs.append(mod_name) else: class_name = def_val.__class__.__name__ if class_name == 'builtin_function_or_method': class_name = 'builtin' if class_name != 'module': attrs.append(class_name) break elif isinstance(callee_def, ir.Expr) and callee_def.op == 'getattr': obj = callee_def.value attrs.append(callee_def.attr) if typemap and obj.name in typemap: typ = typemap[obj.name] if not isinstance(typ, types.Module): return attrs[0], obj callee_def = definition_finder(func_ir, obj) else: # obj.func calls where obj is not np array if obj is not None: return '.'.join(reversed(attrs)), obj raise GuardException return attrs[0], '.'.join(reversed(attrs[1:])) def find_build_sequence(func_ir, var): """Check if a variable is constructed via build_tuple or build_list or build_set, and return the sequence and the operator, or raise GuardException otherwise. Note: only build_tuple is immutable, so use with care. """ require(isinstance(var, ir.Var)) var_def = get_definition(func_ir, var) require(isinstance(var_def, ir.Expr)) build_ops = ['build_tuple', 'build_list', 'build_set'] require(var_def.op in build_ops) return var_def.items, var_def.op def find_const(func_ir, var): """Check if a variable is defined as constant, and return the constant value, or raise GuardException otherwise. """ require(isinstance(var, ir.Var)) var_def = get_definition(func_ir, var) require(isinstance(var_def, (ir.Const, ir.Global, ir.FreeVar))) return var_def.value def compile_to_numba_ir(mk_func, glbls, typingctx=None, targetctx=None, arg_typs=None, typemap=None, calltypes=None): """ Compile a function or a make_function node to Numba IR. Rename variables and labels to avoid conflict if inlined somewhere else. Perform type inference if typingctx and other typing inputs are available and update typemap and calltypes. """ from numba.core import typed_passes # mk_func can be actual function or make_function node, or a njit function if hasattr(mk_func, 'code'): code = mk_func.code elif hasattr(mk_func, '__code__'): code = mk_func.__code__ else: raise NotImplementedError("function type not recognized {}".format(mk_func)) f_ir = get_ir_of_code(glbls, code) remove_dels(f_ir.blocks) # relabel by adding an offset f_ir.blocks = add_offset_to_labels(f_ir.blocks, _the_max_label.next()) max_label = max(f_ir.blocks.keys()) _the_max_label.update(max_label) # rename all variables to avoid conflict var_table = get_name_var_table(f_ir.blocks) new_var_dict = {} for name, var in var_table.items(): new_var_dict[name] = mk_unique_var(name) replace_var_names(f_ir.blocks, new_var_dict) # perform type inference if typingctx is available and update type # data structures typemap and calltypes if typingctx: f_typemap, f_return_type, f_calltypes, _ = typed_passes.type_inference_stage( typingctx, targetctx, f_ir, arg_typs, None) # remove argument entries like arg.a from typemap arg_names = [vname for vname in f_typemap if vname.startswith("arg.")] for a in arg_names: f_typemap.pop(a) typemap.update(f_typemap) calltypes.update(f_calltypes) return f_ir def _create_function_from_code_obj(fcode, func_env, func_arg, func_clo, glbls): """ Creates a function from a code object. Args: * fcode - the code object * func_env - string for the freevar placeholders * func_arg - string for the function args (e.g. "a, b, c, d=None") * func_clo - string for the closure args * glbls - the function globals """ sanitized_co_name = fcode.co_name.replace('<', '_').replace('>', '_') func_text = (f"def closure():\n{func_env}\n" f"\tdef {sanitized_co_name}({func_arg}):\n" f"\t\treturn ({func_clo})\n" f"\treturn {sanitized_co_name}") loc = {} exec(func_text, glbls, loc) f = loc['closure']() # replace the code body f.__code__ = fcode f.__name__ = fcode.co_name return f def get_ir_of_code(glbls, fcode): """ Compile a code object to get its IR, ir.Del nodes are emitted """ nfree = len(fcode.co_freevars) func_env = "\n".join(["\tc_%d = None" % i for i in range(nfree)]) func_clo = ",".join(["c_%d" % i for i in range(nfree)]) func_arg = ",".join(["x_%d" % i for i in range(fcode.co_argcount)]) f = _create_function_from_code_obj(fcode, func_env, func_arg, func_clo, glbls) from numba.core import compiler ir = compiler.run_frontend(f) # we need to run the before inference rewrite pass to normalize the IR # XXX: check rewrite pass flag? # for example, Raise nodes need to become StaticRaise before type inference class DummyPipeline(object): def __init__(self, f_ir): self.state = compiler.StateDict() self.state.typingctx = None self.state.targetctx = None self.state.args = None self.state.func_ir = f_ir self.state.typemap = None self.state.return_type = None self.state.calltypes = None state = DummyPipeline(ir).state rewrites.rewrite_registry.apply('before-inference', state) # call inline pass to handle cases like stencils and comprehensions swapped = {} # TODO: get this from diagnostics store import numba.core.inline_closurecall inline_pass = numba.core.inline_closurecall.InlineClosureCallPass( ir, numba.core.cpu.ParallelOptions(False), swapped) inline_pass.run() # TODO: DO NOT ADD MORE THINGS HERE! # If adding more things here is being contemplated, it really is time to # retire this function and work on getting the InlineWorker class from # numba.core.inline_closurecall into sufficient shape as a replacement. # The issue with `get_ir_of_code` is that it doesn't run a full compilation # pipeline and as a result various additional things keep needing to be # added to create valid IR. # rebuild IR in SSA form from numba.core.untyped_passes import ReconstructSSA from numba.core.typed_passes import PreLowerStripPhis reconstruct_ssa = ReconstructSSA() phistrip = PreLowerStripPhis() reconstruct_ssa.run_pass(state) phistrip.run_pass(state) post_proc = postproc.PostProcessor(ir) post_proc.run(True) return ir def replace_arg_nodes(block, args): """ Replace ir.Arg(...) with variables """ for stmt in block.body: if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Arg): idx = stmt.value.index assert(idx < len(args)) stmt.value = args[idx] return def replace_returns(blocks, target, return_label): """ Return return statement by assigning directly to target, and a jump. """ for block in blocks.values(): # some blocks may be empty during transformations if not block.body: continue stmt = block.terminator if isinstance(stmt, ir.Return): block.body.pop() # remove return cast_stmt = block.body.pop() assert (isinstance(cast_stmt, ir.Assign) and isinstance(cast_stmt.value, ir.Expr) and cast_stmt.value.op == 'cast'), "invalid return cast" block.body.append(ir.Assign(cast_stmt.value.value, target, stmt.loc)) block.body.append(ir.Jump(return_label, stmt.loc)) def gen_np_call(func_as_str, func, lhs, args, typingctx, typemap, calltypes): scope = args[0].scope loc = args[0].loc # g_np_var = Global(numpy) g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) typemap[g_np_var.name] = types.misc.Module(numpy) g_np = ir.Global('np', numpy, loc) g_np_assign = ir.Assign(g_np, g_np_var, loc) # attr call: _attr = getattr(g_np_var, func_as_str) np_attr_call = ir.Expr.getattr(g_np_var, func_as_str, loc) attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc) func_var_typ = get_np_ufunc_typ(func) typemap[attr_var.name] = func_var_typ attr_assign = ir.Assign(np_attr_call, attr_var, loc) # np call: lhs = np_attr(*args) np_call = ir.Expr.call(attr_var, args, (), loc) arg_types = [typemap[x.name] for x in args] func_typ = func_var_typ.get_call_type(typingctx, arg_types, {}) calltypes[np_call] = func_typ np_assign = ir.Assign(np_call, lhs, loc) return [g_np_assign, attr_assign, np_assign] def dump_blocks(blocks): for label, block in blocks.items(): print(label, ":") for stmt in block.body: print(" ", stmt) def is_operator_or_getitem(expr): """true if expr is unary or binary operator or getitem""" return (isinstance(expr, ir.Expr) and getattr(expr, 'op', False) and expr.op in ['unary', 'binop', 'inplace_binop', 'getitem', 'static_getitem']) def is_get_setitem(stmt): """stmt is getitem assignment or setitem (and static cases)""" return is_getitem(stmt) or is_setitem(stmt) def is_getitem(stmt): """true if stmt is a getitem or static_getitem assignment""" return (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['getitem', 'static_getitem']) def is_setitem(stmt): """true if stmt is a SetItem or StaticSetItem node""" return isinstance(stmt, (ir.SetItem, ir.StaticSetItem)) def index_var_of_get_setitem(stmt): """get index variable for getitem/setitem nodes (and static cases)""" if is_getitem(stmt): if stmt.value.op == 'getitem': return stmt.value.index else: return stmt.value.index_var if is_setitem(stmt): if isinstance(stmt, ir.SetItem): return stmt.index else: return stmt.index_var return None def set_index_var_of_get_setitem(stmt, new_index): if is_getitem(stmt): if stmt.value.op == 'getitem': stmt.value.index = new_index else: stmt.value.index_var = new_index elif is_setitem(stmt): if isinstance(stmt, ir.SetItem): stmt.index = new_index else: stmt.index_var = new_index else: raise ValueError("getitem or setitem node expected but received {}".format( stmt)) def is_namedtuple_class(c): """check if c is a namedtuple class""" if not isinstance(c, type): return False # should have only tuple as superclass bases = c.__bases__ if len(bases) != 1 or bases[0] != tuple: return False # should have _make method if not hasattr(c, '_make'): return False # should have _fields that is all string fields = getattr(c, '_fields', None) if not isinstance(fields, tuple): return False return all(isinstance(f, str) for f in fields) def fill_block_with_call(newblock, callee, label_next, inputs, outputs): """Fill *newblock* to call *callee* with arguments listed in *inputs*. The returned values are unwraped into variables in *outputs*. The block would then jump to *label_next*. """ scope = newblock.scope loc = newblock.loc fn = ir.Const(value=callee, loc=loc) fnvar = scope.make_temp(loc=loc) newblock.append(ir.Assign(target=fnvar, value=fn, loc=loc)) # call args = [scope.get_exact(name) for name in inputs] callexpr = ir.Expr.call(func=fnvar, args=args, kws=(), loc=loc) callres = scope.make_temp(loc=loc) newblock.append(ir.Assign(target=callres, value=callexpr, loc=loc)) # unpack return value for i, out in enumerate(outputs): target = scope.get_exact(out) getitem = ir.Expr.static_getitem(value=callres, index=i, index_var=None, loc=loc) newblock.append(ir.Assign(target=target, value=getitem, loc=loc)) # jump to next block newblock.append(ir.Jump(target=label_next, loc=loc)) return newblock def fill_callee_prologue(block, inputs, label_next): """ Fill a new block *block* that unwraps arguments using names in *inputs* and then jumps to *label_next*. Expected to use with *fill_block_with_call()* """ scope = block.scope loc = block.loc # load args args = [ir.Arg(name=k, index=i, loc=loc) for i, k in enumerate(inputs)] for aname, aval in zip(inputs, args): tmp = ir.Var(scope=scope, name=aname, loc=loc) block.append(ir.Assign(target=tmp, value=aval, loc=loc)) # jump to loop entry block.append(ir.Jump(target=label_next, loc=loc)) return block def fill_callee_epilogue(block, outputs): """ Fill a new block *block* to prepare the return values. This block is the last block of the function. Expected to use with *fill_block_with_call()* """ scope = block.scope loc = block.loc # prepare tuples to return vals = [scope.get_exact(name=name) for name in outputs] tupexpr = ir.Expr.build_tuple(items=vals, loc=loc) tup = scope.make_temp(loc=loc) block.append(ir.Assign(target=tup, value=tupexpr, loc=loc)) # return block.append(ir.Return(value=tup, loc=loc)) return block def find_global_value(func_ir, var): """Check if a variable is a global value, and return the value, or raise GuardException otherwise. """ dfn = get_definition(func_ir, var) if isinstance(dfn, ir.Global): return dfn.value if isinstance(dfn, ir.Expr) and dfn.op == 'getattr': prev_val = find_global_value(func_ir, dfn.value) try: val = getattr(prev_val, dfn.attr) return val except AttributeError: raise GuardException raise GuardException def raise_on_unsupported_feature(func_ir, typemap): """ Helper function to walk IR and raise if it finds op codes that are unsupported. Could be extended to cover IR sequences as well as op codes. Intended use is to call it as a pipeline stage just prior to lowering to prevent LoweringErrors for known unsupported features. """ gdb_calls = [] # accumulate calls to gdb/gdb_init # issue 2195: check for excessively large tuples for arg_name in func_ir.arg_names: if arg_name in typemap and \ isinstance(typemap[arg_name], types.containers.UniTuple) and \ typemap[arg_name].count > 1000: # Raise an exception when len(tuple) > 1000. The choice of this number (1000) # was entirely arbitrary msg = ("Tuple '{}' length must be smaller than 1000.\n" "Large tuples lead to the generation of a prohibitively large " "LLVM IR which causes excessive memory pressure " "and large compile times.\n" "As an alternative, the use of a 'list' is recommended in " "place of a 'tuple' as lists do not suffer from this problem.".format(arg_name)) raise UnsupportedError(msg, func_ir.loc) for blk in func_ir.blocks.values(): for stmt in blk.find_insts(ir.Assign): # This raises on finding `make_function` if isinstance(stmt.value, ir.Expr): if stmt.value.op == 'make_function': val = stmt.value # See if the construct name can be refined code = getattr(val, 'code', None) if code is not None: # check if this is a closure, the co_name will # be the captured function name which is not # useful so be explicit if getattr(val, 'closure', None) is not None: use = '' expr = '' else: use = code.co_name expr = '(%s) ' % use else: use = '' expr = '' msg = ("Numba encountered the use of a language " "feature it does not support in this context: " "%s (op code: make_function not supported). If " "the feature is explicitly supported it is " "likely that the result of the expression %s" "is being used in an unsupported manner.") % \ (use, expr) raise UnsupportedError(msg, stmt.value.loc) # this checks for gdb initialization calls, only one is permitted if isinstance(stmt.value, (ir.Global, ir.FreeVar)): val = stmt.value val = getattr(val, 'value', None) if val is None: continue # check global function found = False if isinstance(val, pytypes.FunctionType): found = val in {numba.gdb, numba.gdb_init} if not found: # freevar bind to intrinsic found = getattr(val, '_name', "") == "gdb_internal" if found: gdb_calls.append(stmt.loc) # report last seen location # this checks that np. was called if view is called if isinstance(stmt.value, ir.Expr): if stmt.value.op == 'getattr' and stmt.value.attr == 'view': var = stmt.value.value.name if isinstance(typemap[var], types.Array): continue df = func_ir.get_definition(var) cn = guard(find_callname, func_ir, df) if cn and cn[1] == 'numpy': ty = getattr(numpy, cn[0]) if (numpy.issubdtype(ty, numpy.integer) or numpy.issubdtype(ty, numpy.floating)): continue vardescr = '' if var.startswith('$') else "'{}' ".format(var) raise TypingError( "'view' can only be called on NumPy dtypes, " "try wrapping the variable {}with 'np.()'". format(vardescr), loc=stmt.loc) # checks for globals that are also reflected if isinstance(stmt.value, ir.Global): ty = typemap[stmt.target.name] msg = ("The use of a %s type, assigned to variable '%s' in " "globals, is not supported as globals are considered " "compile-time constants and there is no known way to " "compile a %s type as a constant.") if (getattr(ty, 'reflected', False) or isinstance(ty, (types.DictType, types.ListType))): raise TypingError(msg % (ty, stmt.value.name, ty), loc=stmt.loc) # checks for generator expressions (yield in use when func_ir has # not been identified as a generator). if isinstance(stmt.value, ir.Yield) and not func_ir.is_generator: msg = "The use of generator expressions is unsupported." raise UnsupportedError(msg, loc=stmt.loc) # There is more than one call to function gdb/gdb_init if len(gdb_calls) > 1: msg = ("Calling either numba.gdb() or numba.gdb_init() more than once " "in a function is unsupported (strange things happen!), use " "numba.gdb_breakpoint() to create additional breakpoints " "instead.\n\nRelevant documentation is available here:\n" "https://numba.readthedocs.io/en/stable/user/troubleshoot.html" "#using-numba-s-direct-gdb-bindings-in-nopython-mode\n\n" "Conflicting calls found at:\n %s") buf = '\n'.join([x.strformat() for x in gdb_calls]) raise UnsupportedError(msg % buf) def warn_deprecated(func_ir, typemap): # first pass, just walk the type map for name, ty in typemap.items(): # the Type Metaclass has a reflected member if ty.reflected: # if its an arg, report function call if name.startswith('arg.'): loc = func_ir.loc arg = name.split('.')[1] fname = func_ir.func_id.func_qualname tyname = 'list' if isinstance(ty, types.List) else 'set' url = ("https://numba.readthedocs.io/en/stable/reference/" "deprecation.html#deprecation-of-reflection-for-list-and" "-set-types") msg = ("\nEncountered the use of a type that is scheduled for " "deprecation: type 'reflected %s' found for argument " "'%s' of function '%s'.\n\nFor more information visit " "%s" % (tyname, arg, fname, url)) warnings.warn(NumbaPendingDeprecationWarning(msg, loc=loc)) def resolve_func_from_module(func_ir, node): """ This returns the python function that is being getattr'd from a module in some IR, it resolves import chains/submodules recursively. Should it not be possible to find the python function being called None will be returned. func_ir - the FunctionIR object node - the IR node from which to start resolving (should be a `getattr`). """ getattr_chain = [] def resolve_mod(mod): if getattr(mod, 'op', False) == 'getattr': getattr_chain.insert(0, mod.attr) try: mod = func_ir.get_definition(mod.value) except KeyError: # multiple definitions return None return resolve_mod(mod) elif isinstance(mod, (ir.Global, ir.FreeVar)): if isinstance(mod.value, pytypes.ModuleType): return mod return None mod = resolve_mod(node) if mod is not None: defn = mod.value for x in getattr_chain: defn = getattr(defn, x, False) if not defn: break else: return defn else: return None def enforce_no_dels(func_ir): """ Enforce there being no ir.Del nodes in the IR. """ for blk in func_ir.blocks.values(): dels = [x for x in blk.find_insts(ir.Del)] if dels: msg = "Illegal IR, del found at: %s" % dels[0] raise CompilerError(msg, loc=dels[0].loc) def enforce_no_phis(func_ir): """ Enforce there being no ir.Expr.phi nodes in the IR. """ for blk in func_ir.blocks.values(): phis = [x for x in blk.find_exprs(op='phi')] if phis: msg = "Illegal IR, phi found at: %s" % phis[0] raise CompilerError(msg, loc=phis[0].loc) def legalize_single_scope(blocks): """Check the given mapping of ir.Block for containing a single scope. """ return len({blk.scope for blk in blocks.values()}) == 1 def check_and_legalize_ir(func_ir): """ This checks that the IR presented is legal """ enforce_no_phis(func_ir) enforce_no_dels(func_ir) # postprocess and emit ir.Dels post_proc = postproc.PostProcessor(func_ir) post_proc.run(True, extend_lifetimes=config.EXTEND_VARIABLE_LIFETIMES) def convert_code_obj_to_function(code_obj, caller_ir): """ Converts a code object from a `make_function.code` attr in the IR into a python function, caller_ir is the FunctionIR of the caller and is used for the resolution of freevars. """ fcode = code_obj.code nfree = len(fcode.co_freevars) # try and resolve freevars if they are consts in the caller's IR # these can be baked into the new function freevars = [] for x in fcode.co_freevars: # not using guard here to differentiate between multiple definition and # non-const variable try: freevar_def = caller_ir.get_definition(x) except KeyError: msg = ("Cannot capture a constant value for variable '%s' as there " "are multiple definitions present." % x) raise TypingError(msg, loc=code_obj.loc) if isinstance(freevar_def, ir.Const): freevars.append(freevar_def.value) else: msg = ("Cannot capture the non-constant value associated with " "variable '%s' in a function that will escape." % x) raise TypingError(msg, loc=code_obj.loc) func_env = "\n".join(["\tc_%d = %s" % (i, x) for i, x in enumerate(freevars)]) func_clo = ",".join(["c_%d" % i for i in range(nfree)]) co_varnames = list(fcode.co_varnames) # This is horrible. The code object knows about the number of args present # it also knows the name of the args but these are bundled in with other # vars in `co_varnames`. The make_function IR node knows what the defaults # are, they are defined in the IR as consts. The following finds the total # number of args (args + kwargs with defaults), finds the default values # and infers the number of "kwargs with defaults" from this and then infers # the number of actual arguments from that. n_kwargs = 0 n_allargs = fcode.co_argcount kwarg_defaults = caller_ir.get_definition(code_obj.defaults) if kwarg_defaults is not None: if isinstance(kwarg_defaults, tuple): d = [caller_ir.get_definition(x).value for x in kwarg_defaults] kwarg_defaults_tup = tuple(d) else: d = [caller_ir.get_definition(x).value for x in kwarg_defaults.items] kwarg_defaults_tup = tuple(d) n_kwargs = len(kwarg_defaults_tup) nargs = n_allargs - n_kwargs func_arg = ",".join(["%s" % (co_varnames[i]) for i in range(nargs)]) if n_kwargs: kw_const = ["%s = %s" % (co_varnames[i + nargs], kwarg_defaults_tup[i]) for i in range(n_kwargs)] func_arg += ", " func_arg += ", ".join(kw_const) # globals are the same as those in the caller glbls = caller_ir.func_id.func.__globals__ # create the function and return it return _create_function_from_code_obj(fcode, func_env, func_arg, func_clo, glbls) def fixup_var_define_in_scope(blocks): """Fixes the mapping of ir.Block to ensure all referenced ir.Var are defined in every scope used by the function. Such that looking up a variable from any scope in this function will not fail. Note: This is a workaround. Ideally, all the blocks should refer to the same ir.Scope, but that property is not maintained by all the passes. """ # Scan for all used variables used_var = {} for blk in blocks.values(): scope = blk.scope for inst in blk.body: for var in inst.list_vars(): used_var[var] = inst # Note: not all blocks share a single scope even though they should. # Ensure the scope of each block defines all used variables. for blk in blocks.values(): scope = blk.scope for var, inst in used_var.items(): # add this variable if it's not in scope if var.name not in scope.localvars: # Note: using a internal method to reuse the same scope.localvars.define(var.name, var) def transfer_scope(block, scope): """Transfer the ir.Block to use the given ir.Scope. """ old_scope = block.scope if old_scope is scope: # bypass if the block is already using the given scope return block # Ensure variables are defined in the new scope for var in old_scope.localvars._con.values(): if var.name not in scope.localvars: scope.localvars.define(var.name, var) # replace scope block.scope = scope return block def is_setup_with(stmt): return isinstance(stmt, ir.EnterWith) def is_terminator(stmt): return isinstance(stmt, ir.Terminator) def is_raise(stmt): return isinstance(stmt, ir.Raise) def is_return(stmt): return isinstance(stmt, ir.Return) def is_pop_block(stmt): return isinstance(stmt, ir.PopBlock) numba-0.55.1/numba/core/itanium_mangler.py000664 000000 000000 00000015567 14174536160 020470 0ustar00rootroot000000 000000 """ Itanium CXX ABI Mangler Reference: http://mentorembedded.github.io/cxx-abi/abi.html The basics of the mangling scheme. We are hijacking the CXX mangling scheme for our use. We map Python modules into CXX namespace. A `module1.submodule2.foo` is mapped to `module1::submodule2::foo`. For parameterized numba types, we treat them as templated types; for example, `array(int64, 1d, C)` becomes an `array`. All mangled names are prefixed with "_Z". It is followed by the name of the entity. A name contains one or more identifiers. Each identifier is encoded as "". If the name is namespaced and, therefore, has multiple identifiers, the entire name is encoded as "NE". For functions, arguments types follow. There are condensed encodings for basic built-in types; e.g. "i" for int, "f" for float. For other types, the previously mentioned name encoding should be used. For templated types, the template parameters are encoded immediately after the name. If it is namespaced, it should be within the 'N' 'E' marker. Template parameters are encoded in "IE", where each parameter is encoded using the mentioned name encoding scheme. Template parameters can contain literal values like the '1' in the array type shown earlier. There is special encoding scheme for them to avoid leading digits. """ import re from numba.core import types # According the scheme, valid characters for mangled names are [a-zA-Z0-9_]. # We borrow the '_' as the escape character to encode invalid char into # '_xx' where 'xx' is the hex codepoint. _re_invalid_char = re.compile(r'[^a-z0-9_]', re.I) PREFIX = "_Z" # C names to mangled type code C2CODE = { 'void': 'v', 'wchar_t': 'w', 'bool': 'b', 'char': 'c', 'signed char': 'a', 'unsigned char': 'h', 'short': 's', 'unsigned short': 't', 'int': 'i', 'unsigned int': 'j', 'long': 'l', 'unsigned long': 'm', 'long long': 'x', # __int64 'unsigned long long': 'y', # unsigned __int64 '__int128': 'n', 'unsigned __int128': 'o', 'half' : 'Dh', 'float': 'f', 'double': 'd', 'long double': 'e', # __float80 '__float128': 'g', 'ellipsis': 'z', } # Numba types to C names N2C = { types.void: 'void', types.boolean: 'bool', types.uint8: 'unsigned char', types.int8: 'signed char', types.uint16: 'unsigned short', types.int16: 'short', types.uint32: 'unsigned int', types.int32: 'int', types.uint64: 'unsigned long long', types.int64: 'long long', types.float16: 'half', types.float32: 'float', types.float64: 'double', } def _escape_string(text): """Escape the given string so that it only contains ASCII characters of [a-zA-Z0-9_$]. The dollar symbol ($) and other invalid characters are escaped into the string sequence of "$xx" where "xx" is the hex codepoint of the char. Multibyte characters are encoded into utf8 and converted into the above hex format. """ def repl(m): return ''.join(('_%02x' % ch) for ch in m.group(0).encode('utf8')) ret = re.sub(_re_invalid_char, repl, text) # Return str if we got a unicode (for py2) if not isinstance(ret, str): return ret.encode('ascii') return ret def _fix_lead_digit(text): """ Fix text with leading digit """ if text and text[0].isdigit(): return '_' + text else: return text def _len_encoded(string): """ Prefix string with digit indicating the length. Add underscore if string is prefixed with digits. """ string = _fix_lead_digit(string) return '%u%s' % (len(string), string) def mangle_abi_tag(abi_tag: str) -> str: return "B" + _len_encoded(_escape_string(abi_tag)) def mangle_identifier(ident, template_params='', *, abi_tags=()): """ Mangle the identifier with optional template parameters and abi_tags. Note: This treats '.' as '::' in C++. """ parts = [_len_encoded(_escape_string(x)) for x in ident.split('.')] enc_abi_tags = list(map(mangle_abi_tag, abi_tags)) extras = template_params + ''.join(enc_abi_tags) if len(parts) > 1: return 'N%s%sE' % (''.join(parts), extras) else: return '%s%s' % (parts[0], extras) def mangle_type_c(typ): """ Mangle C type name Args ---- typ: str C type name """ if typ in C2CODE: return C2CODE[typ] else: return mangle_identifier(typ) def mangle_type_or_value(typ): """ Mangle type parameter and arbitrary value. """ # Handle numba types if isinstance(typ, types.Type): if typ in N2C: return mangle_type_c(N2C[typ]) else: return mangle_templated_ident(*typ.mangling_args) # Handle integer literal elif isinstance(typ, int): return 'Li%dE' % typ # Handle str as identifier elif isinstance(typ, str): return mangle_identifier(typ) # Otherwise else: enc = _escape_string(str(typ)) return _len_encoded(enc) # Alias mangle_type = mangle_type_or_value mangle_value = mangle_type_or_value def mangle_templated_ident(identifier, parameters): """ Mangle templated identifier. """ template_params = ('I%sE' % ''.join(map(mangle_type_or_value, parameters)) if parameters else '') return mangle_identifier(identifier, template_params) def mangle_args_c(argtys): """ Mangle sequence of C type names """ return ''.join([mangle_type_c(t) for t in argtys]) def mangle_args(argtys): """ Mangle sequence of Numba type objects and arbitrary values. """ return ''.join([mangle_type_or_value(t) for t in argtys]) def mangle_c(ident, argtys): """ Mangle identifier with C type names """ return PREFIX + mangle_identifier(ident) + mangle_args_c(argtys) def mangle(ident, argtys, *, abi_tags=()): """ Mangle identifier with Numba type objects and abi-tags. """ return ''.join([PREFIX, mangle_identifier(ident, abi_tags=abi_tags), mangle_args(argtys)]) def prepend_namespace(mangled, ns): """ Prepend namespace to mangled name. """ if not mangled.startswith(PREFIX): raise ValueError('input is not a mangled name') elif mangled.startswith(PREFIX + 'N'): # nested remaining = mangled[3:] ret = PREFIX + 'N' + mangle_identifier(ns) + remaining else: # non-nested remaining = mangled[2:] head, tail = _split_mangled_ident(remaining) ret = PREFIX + 'N' + mangle_identifier(ns) + head + 'E' + tail return ret def _split_mangled_ident(mangled): """ Returns `(head, tail)` where `head` is the ` + ` encoded identifier and `tail` is the remaining. """ ct = int(mangled) ctlen = len(str(ct)) at = ctlen + ct return mangled[:at], mangled[at:] numba-0.55.1/numba/core/lowering.py000664 000000 000000 00000173225 14174536160 017137 0ustar00rootroot000000 000000 from collections import namedtuple, defaultdict import ast import inspect import textwrap import operator import warnings from functools import partial from llvmlite.llvmpy.core import Constant, Type, Builder from numba.core import (typing, utils, types, ir, debuginfo, funcdesc, generators, config, ir_utils, cgutils, removerefctpass, targetconfig) from numba.core.errors import (LoweringError, new_error_context, TypingError, LiteralTypingError, UnsupportedError, NumbaDebugInfoWarning) from numba.core.funcdesc import default_mangler from numba.core.environment import Environment from numba.core.analysis import compute_use_defs _VarArgItem = namedtuple("_VarArgItem", ("vararg", "index")) class BaseLower(object): """ Lower IR to LLVM """ def __init__(self, context, library, fndesc, func_ir, metadata=None): self.library = library self.fndesc = fndesc self.blocks = utils.SortedMap(func_ir.blocks.items()) self.func_ir = func_ir self.call_conv = context.call_conv self.generator_info = func_ir.generator_info self.metadata = metadata self.flags = targetconfig.ConfigStack.top_or_none() # Initialize LLVM self.module = self.library.create_ir_module(self.fndesc.unique_name) # Python execution environment (will be available to the compiled # function). self.env = Environment.from_fndesc(self.fndesc) # Internal states self.blkmap = {} self.pending_phis = {} self.varmap = {} self.firstblk = min(self.blocks.keys()) self.loc = -1 # Specializes the target context as seen inside the Lowerer # This adds: # - environment: the python execution environment self.context = context.subtarget(environment=self.env, fndesc=self.fndesc) # Debuginfo dibuildercls = (self.context.DIBuilder if self.context.enable_debuginfo else debuginfo.DummyDIBuilder) # debuginfo def location self.defn_loc = self._compute_def_location() self.debuginfo = dibuildercls(module=self.module, filepath=func_ir.loc.filename, cgctx=context) # Subclass initialization self.init() def init(self): pass def init_pyapi(self): """ Init the Python API and Environment Manager for the function being lowered. """ if self.pyapi is not None: return self.pyapi = self.context.get_python_api(self.builder) # Store environment argument for later use self.env_manager = self.context.get_env_manager(self.builder) self.env_body = self.env_manager.env_body self.envarg = self.env_manager.env_ptr def _compute_def_location(self): # Debuginfo requires source to be accurate. Find it and warn if not # found. If it's not found, use the func_ir line + 1, this assumes that # the function definition is decorated with a 1 line jit decorator. defn_loc = self.func_ir.loc.with_lineno(self.func_ir.loc.line + 1) if self.context.enable_debuginfo: fn = self.func_ir.func_id.func try: raw_source_str, _ = inspect.getsourcelines(fn) except OSError: msg = ("Could not find source for function: " f"{self.func_ir.func_id.func}. Debug line information " "may be inaccurate.") warnings.warn(NumbaDebugInfoWarning(msg)) else: # Parse the source and find the line with `def ` in it, it # is assumed that if the compilation has made it this far that # the source is at least legal and has valid syntax. # Join the source as a block and dedent it. source_str = textwrap.dedent(''.join(raw_source_str)) # Deal with unparsable source (see #7730), this can be caused # by continuation lines/comments at indent levels that are # invalid when the just function source is parsed in isolation. src_ast = None try: src_ast = ast.parse(source_str) except IndentationError: msg = ("Could not parse the source for function: " f"{self.func_ir.func_id.func}. Debug line " "information may be inaccurate. This is often " "caused by comments/docstrings/line continuation " "that is at a lesser indent level than the source.") warnings.warn(NumbaDebugInfoWarning(msg)) # pull the definition out of the AST, only if it seems valid # i.e. one thing in the body if src_ast is not None and len(src_ast.body) == 1: pydef = src_ast.body.pop() # -1 as lines start at 1 and this is an offset. pydef_offset = pydef.lineno - 1 func_ir_loc = self.func_ir.loc defn_line = func_ir_loc.line + pydef_offset defn_loc = func_ir_loc.with_lineno(defn_line) return defn_loc def pre_lower(self): """ Called before lowering all blocks. """ # A given Lower object can be used for several LL functions # (for generators) and it's important to use a new API and # EnvironmentManager. self.pyapi = None self.debuginfo.mark_subprogram(function=self.builder.function, qualname=self.fndesc.qualname, argnames=self.fndesc.args, argtypes=self.fndesc.argtypes, line=self.defn_loc.line) def post_lower(self): """ Called after all blocks are lowered """ self.debuginfo.finalize() def pre_block(self, block): """ Called before lowering a block. """ def post_block(self, block): """ Called after lowering a block. """ def return_exception(self, exc_class, exc_args=None, loc=None): """Propagate exception to the caller. """ self.call_conv.return_user_exc( self.builder, exc_class, exc_args, loc=loc, func_name=self.func_ir.func_id.func_name, ) def set_exception(self, exc_class, exc_args=None, loc=None): """Set exception state in the current function. """ self.call_conv.set_static_user_exc( self.builder, exc_class, exc_args, loc=loc, func_name=self.func_ir.func_id.func_name, ) def emit_environment_object(self): """Emit a pointer to hold the Environment object. """ # Define global for the environment and initialize it to NULL envname = self.context.get_env_name(self.fndesc) self.context.declare_env_global(self.module, envname) def lower(self): # Emit the Env into the module self.emit_environment_object() if self.generator_info is None: self.genlower = None self.lower_normal_function(self.fndesc) else: self.genlower = self.GeneratorLower(self) self.gentype = self.genlower.gentype self.genlower.lower_init_func(self) self.genlower.lower_next_func(self) if self.gentype.has_finalizer: self.genlower.lower_finalize_func(self) if config.DUMP_LLVM: print(("LLVM DUMP %s" % self.fndesc).center(80, '-')) if config.HIGHLIGHT_DUMPS: try: from pygments import highlight from pygments.lexers import LlvmLexer as lexer from pygments.formatters import Terminal256Formatter from numba.misc.dump_style import by_colorscheme print(highlight(self.module.__repr__(), lexer(), Terminal256Formatter( style=by_colorscheme()))) except ImportError: msg = "Please install pygments to see highlighted dumps" raise ValueError(msg) else: print(self.module) print('=' * 80) # Special optimization to remove NRT on functions that do not need it. if self.context.enable_nrt and self.generator_info is None: removerefctpass.remove_unnecessary_nrt_usage(self.function, context=self.context, fndesc=self.fndesc) # Run target specific post lowering transformation self.context.post_lowering(self.module, self.library) # Materialize LLVM Module self.library.add_ir_module(self.module) def extract_function_arguments(self): self.fnargs = self.call_conv.decode_arguments(self.builder, self.fndesc.argtypes, self.function) return self.fnargs def lower_normal_function(self, fndesc): """ Lower non-generator *fndesc*. """ self.setup_function(fndesc) # Init argument values self.extract_function_arguments() entry_block_tail = self.lower_function_body() # Close tail of entry block, do not emit debug metadata else the # unconditional jump gets associated with the metadata from the function # body end. with debuginfo.suspend_emission(self.builder): self.builder.position_at_end(entry_block_tail) self.builder.branch(self.blkmap[self.firstblk]) def lower_function_body(self): """ Lower the current function's body, and return the entry block. """ # Init Python blocks for offset in self.blocks: bname = "B%s" % offset self.blkmap[offset] = self.function.append_basic_block(bname) self.pre_lower() # pre_lower() may have changed the current basic block entry_block_tail = self.builder.basic_block self.debug_print("# function begin: {0}".format( self.fndesc.unique_name)) # Lower all blocks for offset, block in sorted(self.blocks.items()): bb = self.blkmap[offset] self.builder.position_at_end(bb) self.lower_block(block) self.post_lower() return entry_block_tail def lower_block(self, block): """ Lower the given block. """ self.pre_block(block) for inst in block.body: self.loc = inst.loc defaulterrcls = partial(LoweringError, loc=self.loc) with new_error_context('lowering "{inst}" at {loc}', inst=inst, loc=self.loc, errcls_=defaulterrcls): self.lower_inst(inst) self.post_block(block) def create_cpython_wrapper(self, release_gil=False): """ Create CPython wrapper(s) around this function (or generator). """ if self.genlower: self.context.create_cpython_wrapper(self.library, self.genlower.gendesc, self.env, self.call_helper, release_gil=release_gil) self.context.create_cpython_wrapper(self.library, self.fndesc, self.env, self.call_helper, release_gil=release_gil) def create_cfunc_wrapper(self): """ Create C wrapper around this function. """ if self.genlower: raise UnsupportedError('generator as a first-class function type') self.context.create_cfunc_wrapper(self.library, self.fndesc, self.env, self.call_helper) def setup_function(self, fndesc): # Setup function self.function = self.context.declare_function(self.module, fndesc) self.entry_block = self.function.append_basic_block('entry') self.builder = Builder(self.entry_block) self.call_helper = self.call_conv.init_call_helper(self.builder) def typeof(self, varname): return self.fndesc.typemap[varname] def debug_print(self, msg): if config.DEBUG_JIT: self.context.debug_print(self.builder, "DEBUGJIT: {0}".format(msg)) class Lower(BaseLower): GeneratorLower = generators.GeneratorLower def init(self): super().init() # find all singly assigned variables self._find_singly_assigned_variable() @property def _disable_sroa_like_opt(self): """Flags that the SROA like optimisation that Numba performs (which prevent alloca and subsequent load/store for locals) should be disabled. Currently, this is conditional solely on the presence of a request for the emission of debug information.""" return False if self.flags is None else self.flags.debuginfo def _find_singly_assigned_variable(self): func_ir = self.func_ir blocks = func_ir.blocks sav = set() if not self.func_ir.func_id.is_generator: use_defs = compute_use_defs(blocks) # Compute where variables are defined var_assign_map = defaultdict(set) for blk, vl in use_defs.defmap.items(): for var in vl: var_assign_map[var].add(blk) # Compute where variables are used var_use_map = defaultdict(set) for blk, vl in use_defs.usemap.items(): for var in vl: var_use_map[var].add(blk) # Keep only variables that are defined locally and used locally for var in var_assign_map: if len(var_assign_map[var]) == 1: # Usemap does not keep locally defined variables. if len(var_use_map[var]) == 0: # Ensure that the variable is not defined multiple times # the the block [defblk] = var_assign_map[var] assign_stmts = self.blocks[defblk].find_insts(ir.Assign) assigns = [stmt for stmt in assign_stmts if stmt.target.name == var] if len(assigns) == 1: sav.add(var) self._singly_assigned_vars = sav self._blk_local_varmap = {} def pre_block(self, block): from numba.core.unsafe import eh super(Lower, self).pre_block(block) self._cur_ir_block = block if block == self.firstblk: # create slots for all the vars, irrespective of whether they are # initialized, SSA will pick this up and warn users about using # uninitialized variables. Slots are added as alloca in the first # block bb = self.blkmap[self.firstblk] self.builder.position_at_end(bb) all_names = set() for block in self.blocks.values(): for x in block.find_insts(ir.Del): if x.value not in all_names: all_names.add(x.value) for name in all_names: fetype = self.typeof(name) self._alloca_var(name, fetype) # Detect if we are in a TRY block by looking for a call to # `eh.exception_check`. for call in block.find_exprs(op='call'): defn = ir_utils.guard( ir_utils.get_definition, self.func_ir, call.func, ) if defn is not None and isinstance(defn, ir.Global): if defn.value is eh.exception_check: if isinstance(block.terminator, ir.Branch): targetblk = self.blkmap[block.terminator.truebr] # NOTE: This hacks in an attribute for call_conv to # pick up. This hack is no longer needed when # all old-style implementations are gone. self.builder._in_try_block = {'target': targetblk} break def post_block(self, block): # Clean-up try: del self.builder._in_try_block except AttributeError: pass def lower_inst(self, inst): # Set debug location for all subsequent LL instructions self.debuginfo.mark_location(self.builder, self.loc.line) self.debug_print(str(inst)) if isinstance(inst, ir.Assign): ty = self.typeof(inst.target.name) val = self.lower_assign(ty, inst) argidx = None # If this is a store from an arg, like x = arg.x then tell debuginfo # that this is the arg if isinstance(inst.value, ir.Arg): # NOTE: debug location is the `def ` line self.debuginfo.mark_location(self.builder, self.defn_loc.line) argidx = inst.value.index + 1 # args start at 1 self.storevar(val, inst.target.name, argidx=argidx) elif isinstance(inst, ir.Branch): cond = self.loadvar(inst.cond.name) tr = self.blkmap[inst.truebr] fl = self.blkmap[inst.falsebr] condty = self.typeof(inst.cond.name) pred = self.context.cast(self.builder, cond, condty, types.boolean) assert pred.type == Type.int(1), ("cond is not i1: %s" % pred.type) self.builder.cbranch(pred, tr, fl) elif isinstance(inst, ir.Jump): target = self.blkmap[inst.target] self.builder.branch(target) elif isinstance(inst, ir.Return): if self.generator_info: # StopIteration self.genlower.return_from_generator(self) return val = self.loadvar(inst.value.name) oty = self.typeof(inst.value.name) ty = self.fndesc.restype if isinstance(ty, types.Optional): # If returning an optional type self.call_conv.return_optional_value(self.builder, ty, oty, val) return assert ty == oty, ( "type '{}' does not match return type '{}'".format(oty, ty)) retval = self.context.get_return_value(self.builder, ty, val) self.call_conv.return_value(self.builder, retval) elif isinstance(inst, ir.PopBlock): pass # this is just a marker elif isinstance(inst, ir.StaticSetItem): signature = self.fndesc.calltypes[inst] assert signature is not None try: impl = self.context.get_function('static_setitem', signature) except NotImplementedError: return self.lower_setitem(inst.target, inst.index_var, inst.value, signature) else: target = self.loadvar(inst.target.name) value = self.loadvar(inst.value.name) valuety = self.typeof(inst.value.name) value = self.context.cast(self.builder, value, valuety, signature.args[2]) return impl(self.builder, (target, inst.index, value)) elif isinstance(inst, ir.Print): self.lower_print(inst) elif isinstance(inst, ir.SetItem): signature = self.fndesc.calltypes[inst] assert signature is not None return self.lower_setitem(inst.target, inst.index, inst.value, signature) elif isinstance(inst, ir.StoreMap): signature = self.fndesc.calltypes[inst] assert signature is not None return self.lower_setitem(inst.dct, inst.key, inst.value, signature) elif isinstance(inst, ir.DelItem): target = self.loadvar(inst.target.name) index = self.loadvar(inst.index.name) targetty = self.typeof(inst.target.name) indexty = self.typeof(inst.index.name) signature = self.fndesc.calltypes[inst] assert signature is not None op = operator.delitem fnop = self.context.typing_context.resolve_value_type(op) callsig = fnop.get_call_type( self.context.typing_context, signature.args, {}, ) impl = self.context.get_function(fnop, callsig) assert targetty == signature.args[0] index = self.context.cast(self.builder, index, indexty, signature.args[1]) return impl(self.builder, (target, index)) elif isinstance(inst, ir.Del): self.delvar(inst.value) elif isinstance(inst, ir.SetAttr): target = self.loadvar(inst.target.name) value = self.loadvar(inst.value.name) signature = self.fndesc.calltypes[inst] targetty = self.typeof(inst.target.name) valuety = self.typeof(inst.value.name) assert signature is not None assert signature.args[0] == targetty impl = self.context.get_setattr(inst.attr, signature) # Convert argument to match value = self.context.cast(self.builder, value, valuety, signature.args[1]) return impl(self.builder, (target, value)) elif isinstance(inst, ir.StaticRaise): self.lower_static_raise(inst) elif isinstance(inst, ir.StaticTryRaise): self.lower_static_try_raise(inst) else: if hasattr(self.context, "lower_extensions"): for _class, func in self.context.lower_extensions.items(): if isinstance(inst, _class): func(self, inst) return raise NotImplementedError(type(inst)) def lower_setitem(self, target_var, index_var, value_var, signature): target = self.loadvar(target_var.name) value = self.loadvar(value_var.name) index = self.loadvar(index_var.name) targetty = self.typeof(target_var.name) valuety = self.typeof(value_var.name) indexty = self.typeof(index_var.name) op = operator.setitem fnop = self.context.typing_context.resolve_value_type(op) callsig = fnop.get_call_type( self.context.typing_context, signature.args, {}, ) impl = self.context.get_function(fnop, callsig) # Convert argument to match if isinstance(targetty, types.Optional): target = self.context.cast(self.builder, target, targetty, targetty.type) else: ul = types.unliteral assert ul(targetty) == ul(signature.args[0]) index = self.context.cast(self.builder, index, indexty, signature.args[1]) value = self.context.cast(self.builder, value, valuety, signature.args[2]) return impl(self.builder, (target, index, value)) def lower_static_raise(self, inst): if inst.exc_class is None: # Reraise self.return_exception(None, loc=self.loc) else: self.return_exception(inst.exc_class, inst.exc_args, loc=self.loc) def lower_static_try_raise(self, inst): if inst.exc_class is None: # Reraise self.set_exception(None, loc=self.loc) else: self.set_exception(inst.exc_class, inst.exc_args, loc=self.loc) def lower_assign(self, ty, inst): value = inst.value # In nopython mode, closure vars are frozen like globals if isinstance(value, (ir.Const, ir.Global, ir.FreeVar)): res = self.context.get_constant_generic(self.builder, ty, value.value) self.incref(ty, res) return res elif isinstance(value, ir.Expr): return self.lower_expr(ty, value) elif isinstance(value, ir.Var): val = self.loadvar(value.name) oty = self.typeof(value.name) res = self.context.cast(self.builder, val, oty, ty) self.incref(ty, res) return res elif isinstance(value, ir.Arg): # Suspend debug info else all the arg repacking ends up being # associated with some line or other and it's actually just a detail # of Numba's CC. with debuginfo.suspend_emission(self.builder): # Cast from the argument type to the local variable type # (note the "arg.FOO" convention as used in typeinfer) argty = self.typeof("arg." + value.name) if isinstance(argty, types.Omitted): pyval = argty.value tyctx = self.context.typing_context valty = tyctx.resolve_value_type_prefer_literal(pyval) # use the type of the constant value const = self.context.get_constant_generic( self.builder, valty, pyval, ) # cast it to the variable type res = self.context.cast(self.builder, const, valty, ty) else: val = self.fnargs[value.index] res = self.context.cast(self.builder, val, argty, ty) self.incref(ty, res) return res elif isinstance(value, ir.Yield): res = self.lower_yield(ty, value) self.incref(ty, res) return res raise NotImplementedError(type(value), value) def lower_yield(self, retty, inst): yp = self.generator_info.yield_points[inst.index] assert yp.inst is inst y = generators.LowerYield(self, yp, yp.live_vars) y.lower_yield_suspend() # Yield to caller val = self.loadvar(inst.value.name) typ = self.typeof(inst.value.name) actual_rettyp = self.gentype.yield_type # cast the local val to the type yielded yret = self.context.cast(self.builder, val, typ, actual_rettyp) # get the return repr of yielded value retval = self.context.get_return_value( self.builder, actual_rettyp, yret, ) # return self.call_conv.return_value(self.builder, retval) # Resumption point y.lower_yield_resume() # None is returned by the yield expression return self.context.get_constant_generic(self.builder, retty, None) def lower_binop(self, resty, expr, op): # if op in utils.OPERATORS_TO_BUILTINS: # map operator.the_op => the corresponding types.Function() # TODO: is this looks dodgy ... op = self.context.typing_context.resolve_value_type(op) lhs = expr.lhs rhs = expr.rhs static_lhs = expr.static_lhs static_rhs = expr.static_rhs lty = self.typeof(lhs.name) rty = self.typeof(rhs.name) lhs = self.loadvar(lhs.name) rhs = self.loadvar(rhs.name) # Convert argument to match signature = self.fndesc.calltypes[expr] lhs = self.context.cast(self.builder, lhs, lty, signature.args[0]) rhs = self.context.cast(self.builder, rhs, rty, signature.args[1]) def cast_result(res): return self.context.cast(self.builder, res, signature.return_type, resty) # First try with static operands, if known def try_static_impl(tys, args): if any(a is ir.UNDEFINED for a in args): return None try: if isinstance(op, types.Function): static_sig = op.get_call_type(self.context.typing_context, tys, {}) else: static_sig = typing.signature(signature.return_type, *tys) except TypingError: return None try: static_impl = self.context.get_function(op, static_sig) return static_impl(self.builder, args) except NotImplementedError: return None res = try_static_impl( (_lit_or_omitted(static_lhs), _lit_or_omitted(static_rhs)), (static_lhs, static_rhs), ) if res is not None: return cast_result(res) res = try_static_impl( (_lit_or_omitted(static_lhs), rty), (static_lhs, rhs), ) if res is not None: return cast_result(res) res = try_static_impl( (lty, _lit_or_omitted(static_rhs)), (lhs, static_rhs), ) if res is not None: return cast_result(res) # Normal implementation for generic arguments sig = op.get_call_type(self.context.typing_context, signature.args, {}) impl = self.context.get_function(op, sig) res = impl(self.builder, (lhs, rhs)) return cast_result(res) def lower_getitem(self, resty, expr, value, index, signature): baseval = self.loadvar(value.name) indexval = self.loadvar(index.name) # Get implementation of getitem op = operator.getitem fnop = self.context.typing_context.resolve_value_type(op) callsig = fnop.get_call_type( self.context.typing_context, signature.args, {}, ) impl = self.context.get_function(fnop, callsig) argvals = (baseval, indexval) argtyps = (self.typeof(value.name), self.typeof(index.name)) castvals = [self.context.cast(self.builder, av, at, ft) for av, at, ft in zip(argvals, argtyps, signature.args)] res = impl(self.builder, castvals) return self.context.cast(self.builder, res, signature.return_type, resty) def _cast_var(self, var, ty): """ Cast a Numba IR variable to the given Numba type, returning a low-level value. """ if isinstance(var, _VarArgItem): varty = self.typeof(var.vararg.name)[var.index] val = self.builder.extract_value(self.loadvar(var.vararg.name), var.index) else: varty = self.typeof(var.name) val = self.loadvar(var.name) return self.context.cast(self.builder, val, varty, ty) def fold_call_args(self, fnty, signature, pos_args, vararg, kw_args): if vararg: # Inject *args from function call # The lowering will be done in _cast_var() above. tp_vararg = self.typeof(vararg.name) assert isinstance(tp_vararg, types.BaseTuple) pos_args = pos_args + [_VarArgItem(vararg, i) for i in range(len(tp_vararg))] # Fold keyword arguments and resolve default argument values pysig = signature.pysig if pysig is None: if kw_args: raise NotImplementedError("unsupported keyword arguments " "when calling %s" % (fnty,)) argvals = [self._cast_var(var, sigty) for var, sigty in zip(pos_args, signature.args)] else: def normal_handler(index, param, var): return self._cast_var(var, signature.args[index]) def default_handler(index, param, default): return self.context.get_constant_generic( self.builder, signature.args[index], default) def stararg_handler(index, param, vars): stararg_ty = signature.args[index] assert isinstance(stararg_ty, types.BaseTuple), stararg_ty values = [self._cast_var(var, sigty) for var, sigty in zip(vars, stararg_ty)] return cgutils.make_anonymous_struct(self.builder, values) argvals = typing.fold_arguments(pysig, pos_args, dict(kw_args), normal_handler, default_handler, stararg_handler) return argvals def lower_print(self, inst): """ Lower a ir.Print() """ # We handle this, as far as possible, as a normal call to built-in # print(). This will make it easy to undo the special ir.Print # rewrite when it becomes unnecessary (e.g. when we have native # strings). sig = self.fndesc.calltypes[inst] assert sig.return_type == types.none fnty = self.context.typing_context.resolve_value_type(print) # Fix the call signature to inject any constant-inferred # string argument pos_tys = list(sig.args) pos_args = list(inst.args) for i in range(len(pos_args)): if i in inst.consts: pyval = inst.consts[i] if isinstance(pyval, str): pos_tys[i] = types.literal(pyval) fixed_sig = typing.signature(sig.return_type, *pos_tys) fixed_sig = fixed_sig.replace(pysig=sig.pysig) argvals = self.fold_call_args(fnty, sig, pos_args, inst.vararg, {}) impl = self.context.get_function(print, fixed_sig) impl(self.builder, argvals) def lower_call(self, resty, expr): signature = self.fndesc.calltypes[expr] self.debug_print("# lower_call: expr = {0}".format(expr)) if isinstance(signature.return_type, types.Phantom): return self.context.get_dummy_value() fnty = self.typeof(expr.func.name) if isinstance(fnty, types.ObjModeDispatcher): res = self._lower_call_ObjModeDispatcher(fnty, expr, signature) elif isinstance(fnty, types.ExternalFunction): res = self._lower_call_ExternalFunction(fnty, expr, signature) elif isinstance(fnty, types.ExternalFunctionPointer): res = self._lower_call_ExternalFunctionPointer( fnty, expr, signature) elif isinstance(fnty, types.RecursiveCall): res = self._lower_call_RecursiveCall(fnty, expr, signature) elif isinstance(fnty, types.FunctionType): res = self._lower_call_FunctionType(fnty, expr, signature) else: res = self._lower_call_normal(fnty, expr, signature) # If lowering the call returned None, interpret that as returning dummy # value if the return type of the function is void, otherwise there is # a problem if res is None: if signature.return_type == types.void: res = self.context.get_dummy_value() else: raise LoweringError( msg="non-void function returns None from implementation", loc=self.loc ) return self.context.cast(self.builder, res, signature.return_type, resty) def _lower_call_ObjModeDispatcher(self, fnty, expr, signature): from numba.core.pythonapi import ObjModeUtils self.init_pyapi() # Acquire the GIL gil_state = self.pyapi.gil_ensure() # Fix types argnames = [a.name for a in expr.args] argtypes = [self.typeof(a) for a in argnames] argvalues = [self.loadvar(a) for a in argnames] for v, ty in zip(argvalues, argtypes): # Because .from_native_value steal the reference self.incref(ty, v) argobjs = [self.pyapi.from_native_value(atyp, aval, self.env_manager) for atyp, aval in zip(argtypes, argvalues)] # Load objmode dispatcher callee = ObjModeUtils(self.pyapi).load_dispatcher(fnty, argtypes) # Make Call ret_obj = self.pyapi.call_function_objargs(callee, argobjs) has_exception = cgutils.is_null(self.builder, ret_obj) with self. builder.if_else(has_exception) as (then, orelse): # Handles exception # This branch must exit the function with then: # Clean arg for obj in argobjs: self.pyapi.decref(obj) # Release the GIL self.pyapi.gil_release(gil_state) # Return and signal exception self.call_conv.return_exc(self.builder) # Handles normal return with orelse: # Fix output value native = self.pyapi.to_native_value( fnty.dispatcher.output_types, ret_obj, ) output = native.value # Release objs self.pyapi.decref(ret_obj) for obj in argobjs: self.pyapi.decref(obj) # cleanup output if callable(native.cleanup): native.cleanup() # Release the GIL self.pyapi.gil_release(gil_state) # Error during unboxing with self.builder.if_then(native.is_error): self.call_conv.return_exc(self.builder) return output def _lower_call_ExternalFunction(self, fnty, expr, signature): # Handle a named external function self.debug_print("# external function") argvals = self.fold_call_args( fnty, signature, expr.args, expr.vararg, expr.kws, ) fndesc = funcdesc.ExternalFunctionDescriptor( fnty.symbol, fnty.sig.return_type, fnty.sig.args) func = self.context.declare_external_function( self.builder.module, fndesc) return self.context.call_external_function( self.builder, func, fndesc.argtypes, argvals, ) def _lower_call_ExternalFunctionPointer(self, fnty, expr, signature): # Handle a C function pointer self.debug_print("# calling external function pointer") argvals = self.fold_call_args( fnty, signature, expr.args, expr.vararg, expr.kws, ) pointer = self.loadvar(expr.func.name) # If the external function pointer uses libpython if fnty.requires_gil: self.init_pyapi() # Acquire the GIL gil_state = self.pyapi.gil_ensure() # Make PyObjects newargvals = [] pyvals = [] for exptyp, gottyp, aval in zip(fnty.sig.args, signature.args, argvals): # Adjust argument values to pyobjects if exptyp == types.ffi_forced_object: self.incref(gottyp, aval) obj = self.pyapi.from_native_value( gottyp, aval, self.env_manager, ) newargvals.append(obj) pyvals.append(obj) else: newargvals.append(aval) # Call external function res = self.context.call_function_pointer( self.builder, pointer, newargvals, fnty.cconv, ) # Release PyObjects for obj in pyvals: self.pyapi.decref(obj) # Release the GIL self.pyapi.gil_release(gil_state) # If the external function pointer does NOT use libpython else: res = self.context.call_function_pointer( self.builder, pointer, argvals, fnty.cconv, ) return res def _lower_call_RecursiveCall(self, fnty, expr, signature): # Recursive call argvals = self.fold_call_args( fnty, signature, expr.args, expr.vararg, expr.kws, ) qualprefix = fnty.overloads[signature.args] mangler = self.context.mangler or default_mangler abi_tags = self.fndesc.abi_tags mangled_name = mangler(qualprefix, signature.args, abi_tags=abi_tags) # special case self recursion if self.builder.function.name.startswith(mangled_name): res = self.context.call_internal( self.builder, self.fndesc, signature, argvals, ) else: res = self.context.call_unresolved( self.builder, mangled_name, signature, argvals, ) return res def _lower_call_FunctionType(self, fnty, expr, signature): self.debug_print("# calling first-class function type") sig = types.unliteral(signature) if not fnty.check_signature(signature): # value dependent polymorphism? raise UnsupportedError( f'mismatch of function types:' f' expected {fnty} but got {types.FunctionType(sig)}') ftype = fnty.ftype argvals = self.fold_call_args( fnty, sig, expr.args, expr.vararg, expr.kws, ) func_ptr = self.__get_function_pointer(ftype, expr.func.name, sig=sig) res = self.builder.call(func_ptr, argvals, cconv=fnty.cconv) return res def __get_function_pointer(self, ftype, fname, sig=None): from numba.experimental.function_type import lower_get_wrapper_address llty = self.context.get_value_type(ftype) fstruct = self.loadvar(fname) addr = self.builder.extract_value(fstruct, 0, name='addr_of_%s' % (fname)) fptr = cgutils.alloca_once(self.builder, llty, name="fptr_of_%s" % (fname)) with self.builder.if_else( cgutils.is_null(self.builder, addr), likely=False) as (then, orelse): with then: self.init_pyapi() # Acquire the GIL gil_state = self.pyapi.gil_ensure() pyaddr = self.builder.extract_value( fstruct, 1, name='pyaddr_of_%s' % (fname)) # try to recover the function address, see # test_zero_address BadToGood example in # test_function_type.py addr1 = lower_get_wrapper_address( self.context, self.builder, pyaddr, sig, failure_mode='ignore') with self.builder.if_then( cgutils.is_null(self.builder, addr1), likely=False): self.return_exception( RuntimeError, exc_args=(f"{ftype} function address is null",), loc=self.loc) addr2 = self.pyapi.long_as_voidptr(addr1) self.builder.store(self.builder.bitcast(addr2, llty), fptr) self.pyapi.decref(addr1) self.pyapi.gil_release(gil_state) with orelse: self.builder.store(self.builder.bitcast(addr, llty), fptr) return self.builder.load(fptr) def _lower_call_normal(self, fnty, expr, signature): # Normal function resolution self.debug_print("# calling normal function: {0}".format(fnty)) self.debug_print("# signature: {0}".format(signature)) if isinstance(fnty, types.ObjModeDispatcher): argvals = expr.func.args else: argvals = self.fold_call_args( fnty, signature, expr.args, expr.vararg, expr.kws, ) tname = expr.target if tname is not None: from numba.core.target_extension import resolve_dispatcher_from_str disp = resolve_dispatcher_from_str(tname) hw_ctx = disp.targetdescr.target_context impl = hw_ctx.get_function(fnty, signature) else: impl = self.context.get_function(fnty, signature) if signature.recvr: # The "self" object is passed as the function object # for bounded function the_self = self.loadvar(expr.func.name) # Prepend the self reference argvals = [the_self] + list(argvals) res = impl(self.builder, argvals, self.loc) return res def lower_expr(self, resty, expr): if expr.op == 'binop': return self.lower_binop(resty, expr, expr.fn) elif expr.op == 'inplace_binop': lty = self.typeof(expr.lhs.name) if lty.mutable: return self.lower_binop(resty, expr, expr.fn) else: # inplace operators on non-mutable types reuse the same # definition as the corresponding copying operators.) return self.lower_binop(resty, expr, expr.immutable_fn) elif expr.op == 'unary': val = self.loadvar(expr.value.name) typ = self.typeof(expr.value.name) func_ty = self.context.typing_context.resolve_value_type(expr.fn) # Get function signature = self.fndesc.calltypes[expr] impl = self.context.get_function(func_ty, signature) # Convert argument to match val = self.context.cast(self.builder, val, typ, signature.args[0]) res = impl(self.builder, [val]) res = self.context.cast(self.builder, res, signature.return_type, resty) return res elif expr.op == 'call': res = self.lower_call(resty, expr) return res elif expr.op == 'pair_first': val = self.loadvar(expr.value.name) ty = self.typeof(expr.value.name) res = self.context.pair_first(self.builder, val, ty) self.incref(resty, res) return res elif expr.op == 'pair_second': val = self.loadvar(expr.value.name) ty = self.typeof(expr.value.name) res = self.context.pair_second(self.builder, val, ty) self.incref(resty, res) return res elif expr.op in ('getiter', 'iternext'): val = self.loadvar(expr.value.name) ty = self.typeof(expr.value.name) signature = self.fndesc.calltypes[expr] impl = self.context.get_function(expr.op, signature) [fty] = signature.args castval = self.context.cast(self.builder, val, ty, fty) res = impl(self.builder, (castval,)) res = self.context.cast(self.builder, res, signature.return_type, resty) return res elif expr.op == 'exhaust_iter': val = self.loadvar(expr.value.name) ty = self.typeof(expr.value.name) # Unpack optional if isinstance(ty, types.Optional): val = self.context.cast(self.builder, val, ty, ty.type) ty = ty.type # If we have a tuple, we needn't do anything # (and we can't iterate over the heterogeneous ones). if isinstance(ty, types.BaseTuple): assert ty == resty self.incref(ty, val) return val itemty = ty.iterator_type.yield_type tup = self.context.get_constant_undef(resty) pairty = types.Pair(itemty, types.boolean) getiter_sig = typing.signature(ty.iterator_type, ty) getiter_impl = self.context.get_function('getiter', getiter_sig) iternext_sig = typing.signature(pairty, ty.iterator_type) iternext_impl = self.context.get_function('iternext', iternext_sig) iterobj = getiter_impl(self.builder, (val,)) # We call iternext() as many times as desired (`expr.count`). for i in range(expr.count): pair = iternext_impl(self.builder, (iterobj,)) is_valid = self.context.pair_second(self.builder, pair, pairty) with cgutils.if_unlikely(self.builder, self.builder.not_(is_valid)): self.return_exception(ValueError, loc=self.loc) item = self.context.pair_first(self.builder, pair, pairty) tup = self.builder.insert_value(tup, item, i) # Call iternext() once more to check that the iterator # is exhausted. pair = iternext_impl(self.builder, (iterobj,)) is_valid = self.context.pair_second(self.builder, pair, pairty) with cgutils.if_unlikely(self.builder, is_valid): self.return_exception(ValueError, loc=self.loc) self.decref(ty.iterator_type, iterobj) return tup elif expr.op == "getattr": val = self.loadvar(expr.value.name) ty = self.typeof(expr.value.name) if isinstance(resty, types.BoundFunction): # if we are getting out a method, assume we have typed this # properly and just build a bound function object casted = self.context.cast(self.builder, val, ty, resty.this) res = self.context.get_bound_function(self.builder, casted, resty.this) self.incref(resty, res) return res else: impl = self.context.get_getattr(ty, expr.attr) attrty = self.context.typing_context.resolve_getattr(ty, expr.attr) if impl is None: # ignore the attribute return self.context.get_dummy_value() else: res = impl(self.context, self.builder, ty, val, expr.attr) # Cast the attribute type to the expected output type res = self.context.cast(self.builder, res, attrty, resty) return res elif expr.op == "static_getitem": signature = typing.signature( resty, self.typeof(expr.value.name), _lit_or_omitted(expr.index), ) try: # Both get_function() and the returned implementation can # raise NotImplementedError if the types aren't supported impl = self.context.get_function("static_getitem", signature) return impl(self.builder, (self.loadvar(expr.value.name), expr.index)) except NotImplementedError: if expr.index_var is None: raise # Fall back on the generic getitem() implementation # for this type. signature = self.fndesc.calltypes[expr] return self.lower_getitem(resty, expr, expr.value, expr.index_var, signature) elif expr.op == "typed_getitem": signature = typing.signature( resty, self.typeof(expr.value.name), self.typeof(expr.index.name), ) impl = self.context.get_function("typed_getitem", signature) return impl(self.builder, (self.loadvar(expr.value.name), self.loadvar(expr.index.name))) elif expr.op == "getitem": signature = self.fndesc.calltypes[expr] return self.lower_getitem(resty, expr, expr.value, expr.index, signature) elif expr.op == "build_tuple": itemvals = [self.loadvar(i.name) for i in expr.items] itemtys = [self.typeof(i.name) for i in expr.items] castvals = [self.context.cast(self.builder, val, fromty, toty) for val, toty, fromty in zip(itemvals, resty, itemtys)] tup = self.context.make_tuple(self.builder, resty, castvals) self.incref(resty, tup) return tup elif expr.op == "build_list": itemvals = [self.loadvar(i.name) for i in expr.items] itemtys = [self.typeof(i.name) for i in expr.items] if isinstance(resty, types.LiteralList): castvals = [self.context.cast(self.builder, val, fromty, toty) for val, toty, fromty in zip(itemvals, resty.types, itemtys)] tup = self.context.make_tuple(self.builder, types.Tuple(resty.types), castvals) self.incref(resty, tup) return tup else: castvals = [self.context.cast(self.builder, val, fromty, resty.dtype) for val, fromty in zip(itemvals, itemtys)] return self.context.build_list(self.builder, resty, castvals) elif expr.op == "build_set": # Insert in reverse order, as Python does items = expr.items[::-1] itemvals = [self.loadvar(i.name) for i in items] itemtys = [self.typeof(i.name) for i in items] castvals = [self.context.cast(self.builder, val, fromty, resty.dtype) for val, fromty in zip(itemvals, itemtys)] return self.context.build_set(self.builder, resty, castvals) elif expr.op == "build_map": items = expr.items keys, values = [], [] key_types, value_types = [], [] for k, v in items: key = self.loadvar(k.name) keytype = self.typeof(k.name) val = self.loadvar(v.name) valtype = self.typeof(v.name) keys.append(key) values.append(val) key_types.append(keytype) value_types.append(valtype) return self.context.build_map(self.builder, resty, list(zip(key_types, value_types)), list(zip(keys, values))) elif expr.op == "cast": val = self.loadvar(expr.value.name) ty = self.typeof(expr.value.name) castval = self.context.cast(self.builder, val, ty, resty) self.incref(resty, castval) return castval elif expr.op == "phi": raise LoweringError("PHI not stripped") elif expr.op == 'null': return self.context.get_constant_null(resty) elif expr.op in self.context.special_ops: res = self.context.special_ops[expr.op](self, expr) return res raise NotImplementedError(expr) def _alloca_var(self, name, fetype): """ Ensure the given variable has an allocated stack slot (if needed). """ if name in self.varmap: # quit early return # If the name is used in multiple blocks or lowering with debuginfo... if ((name not in self._singly_assigned_vars) or self._disable_sroa_like_opt): # If not already defined, allocate it ptr = self.alloca(name, fetype) # Remember the pointer self.varmap[name] = ptr def getvar(self, name): """ Get a pointer to the given variable's slot. """ if not self._disable_sroa_like_opt: assert name not in self._blk_local_varmap assert name not in self._singly_assigned_vars return self.varmap[name] def loadvar(self, name): """ Load the given variable's value. """ if name in self._blk_local_varmap and not self._disable_sroa_like_opt: return self._blk_local_varmap[name] ptr = self.getvar(name) # Don't associate debuginfo with the load for a function arg else it # creates instructions ahead of the first source line of the # function which then causes problems with breaking on the function # symbol (it hits the symbol, not the first line). if name in self.func_ir.arg_names: with debuginfo.suspend_emission(self.builder): return self.builder.load(ptr) else: return self.builder.load(ptr) def storevar(self, value, name, argidx=None): """ Store the value into the given variable. """ fetype = self.typeof(name) # Define if not already self._alloca_var(name, fetype) # Store variable if (name in self._singly_assigned_vars and not self._disable_sroa_like_opt): self._blk_local_varmap[name] = value else: if argidx is None: # Clean up existing value stored in the variable, not needed # if it's an arg old = self.loadvar(name) self.decref(fetype, old) # stack stored variable ptr = self.getvar(name) if value.type != ptr.type.pointee: msg = ("Storing {value.type} to ptr of {ptr.type.pointee} " "('{name}'). FE type {fetype}").format(value=value, ptr=ptr, fetype=fetype, name=name) raise AssertionError(msg) # If this store is associated with an argument to the function (i.e. # store following reassemble from CC splatting structs as many args # to the function) then mark this variable as such. if argidx is not None: with debuginfo.suspend_emission(self.builder): self.builder.store(value, ptr) loc = self.defn_loc # the line with `def ` lltype = self.context.get_value_type(fetype) sizeof = self.context.get_abi_sizeof(lltype) datamodel = self.context.data_model_manager[fetype] self.debuginfo.mark_variable(self.builder, ptr, name=name, lltype=lltype, size=sizeof, line=loc.line, datamodel=datamodel, argidx=argidx) else: self.builder.store(value, ptr) def delvar(self, name): """ Delete the given variable. """ fetype = self.typeof(name) # Out-of-order if (name not in self._blk_local_varmap and not self._disable_sroa_like_opt): if name in self._singly_assigned_vars: self._singly_assigned_vars.discard(name) # Define if not already (may happen if the variable is deleted # at the beginning of a loop, but only set later in the loop) self._alloca_var(name, fetype) if name in self._blk_local_varmap and not self._disable_sroa_like_opt: llval = self._blk_local_varmap[name] self.decref(fetype, llval) else: ptr = self.getvar(name) self.decref(fetype, self.builder.load(ptr)) # Zero-fill variable to avoid double frees on subsequent dels self.builder.store(Constant.null(ptr.type.pointee), ptr) def alloca(self, name, type): lltype = self.context.get_value_type(type) datamodel = self.context.data_model_manager[type] return self.alloca_lltype(name, lltype, datamodel=datamodel) def alloca_lltype(self, name, lltype, datamodel=None): # Is user variable? is_uservar = not name.startswith('$') # Allocate space for variable aptr = cgutils.alloca_once(self.builder, lltype, name=name, zfill=False) # Emit debug info for user variable if is_uservar: # Don't associate debuginfo with the alloca for a function arg, this # is handled by the first store to the alloca so that repacking the # splatted args from the CC is dealt with. if name not in self.func_ir.arg_names: sizeof = self.context.get_abi_sizeof(lltype) self.debuginfo.mark_variable(self.builder, aptr, name=name, lltype=lltype, size=sizeof, line=self.loc.line, datamodel=datamodel,) return aptr def incref(self, typ, val): if not self.context.enable_nrt: return self.context.nrt.incref(self.builder, typ, val) def decref(self, typ, val): if not self.context.enable_nrt: return # do not associate decref with "use", it creates "jumpy" line info as # the decrefs are usually where the ir.Del nodes are, which is at the # end of the block. with debuginfo.suspend_emission(self.builder): self.context.nrt.decref(self.builder, typ, val) def _lit_or_omitted(value): """Returns a Literal instance if the type of value is supported; otherwise, return `Omitted(value)`. """ try: return types.literal(value) except LiteralTypingError: return types.Omitted(value) numba-0.55.1/numba/core/object_mode_passes.py000664 000000 000000 00000016145 14174536160 021136 0ustar00rootroot000000 000000 import warnings from numba.core import (errors, types, typing, funcdesc, config, pylowering, transforms) from numba.core.compiler_machinery import (FunctionPass, LoweringPass, register_pass) from collections import defaultdict @register_pass(mutates_CFG=True, analysis_only=False) class ObjectModeFrontEnd(FunctionPass): _name = "object_mode_front_end" def __init__(self): FunctionPass.__init__(self) def _frontend_looplift(self, state): """ Loop lifting analysis and transformation """ loop_flags = state.flags.copy() outer_flags = state.flags.copy() # Do not recursively loop lift outer_flags.enable_looplift = False loop_flags.enable_looplift = False if not state.flags.enable_pyobject_looplift: loop_flags.enable_pyobject = False loop_flags.enable_ssa = False main, loops = transforms.loop_lifting(state.func_ir, typingctx=state.typingctx, targetctx=state.targetctx, locals=state.locals, flags=loop_flags) if loops: # Some loops were extracted if config.DEBUG_FRONTEND or config.DEBUG: for loop in loops: print("Lifting loop", loop.get_source_location()) from numba.core.compiler import compile_ir cres = compile_ir(state.typingctx, state.targetctx, main, state.args, state.return_type, outer_flags, state.locals, lifted=tuple(loops), lifted_from=None, is_lifted_loop=True) return cres def run_pass(self, state): from numba.core.compiler import _EarlyPipelineCompletion # NOTE: That so much stuff, including going back into the compiler, is # captured in a single pass is not ideal. if state.flags.enable_looplift: assert not state.lifted cres = self._frontend_looplift(state) if cres is not None: raise _EarlyPipelineCompletion(cres) # Fallback typing: everything is a python object state.typemap = defaultdict(lambda: types.pyobject) state.calltypes = defaultdict(lambda: types.pyobject) state.return_type = types.pyobject return True @register_pass(mutates_CFG=True, analysis_only=False) class ObjectModeBackEnd(LoweringPass): _name = "object_mode_back_end" def __init__(self): LoweringPass.__init__(self) def _py_lowering_stage(self, targetctx, library, interp, flags): fndesc = funcdesc.PythonFunctionDescriptor.from_object_mode_function( interp ) with targetctx.push_code_library(library): lower = pylowering.PyLower(targetctx, library, fndesc, interp) lower.lower() if not flags.no_cpython_wrapper: lower.create_cpython_wrapper() env = lower.env call_helper = lower.call_helper del lower from numba.core.compiler import _LowerResult # TODO: move this if flags.no_compile: return _LowerResult(fndesc, call_helper, cfunc=None, env=env) else: # Prepare for execution cfunc = targetctx.get_executable(library, fndesc, env) return _LowerResult(fndesc, call_helper, cfunc=cfunc, env=env) def run_pass(self, state): """ Lowering for object mode """ if state.library is None: codegen = state.targetctx.codegen() state.library = codegen.create_library(state.func_id.func_qualname) # Enable object caching upfront, so that the library can # be later serialized. state.library.enable_object_caching() def backend_object_mode(): """ Object mode compilation """ if len(state.args) != state.nargs: # append missing # BUG?: What's going on with nargs here? # check state.nargs vs self.nargs on original code state.args = (tuple(state.args) + (types.pyobject,) * (state.nargs - len(state.args))) return self._py_lowering_stage(state.targetctx, state.library, state.func_ir, state.flags) lowered = backend_object_mode() signature = typing.signature(state.return_type, *state.args) from numba.core.compiler import compile_result state.cr = compile_result( typing_context=state.typingctx, target_context=state.targetctx, entry_point=lowered.cfunc, typing_error=state.status.fail_reason, type_annotation=state.type_annotation, library=state.library, call_helper=lowered.call_helper, signature=signature, objectmode=True, lifted=state.lifted, fndesc=lowered.fndesc, environment=lowered.env, metadata=state.metadata, reload_init=state.reload_init, ) # Warn, deprecated behaviour, code compiled in objmode without # force_pyobject indicates fallback from nopython mode if not state.flags.force_pyobject: # first warn about object mode and yes/no to lifted loops if len(state.lifted) > 0: warn_msg = ('Function "%s" was compiled in object mode without' ' forceobj=True, but has lifted loops.' % (state.func_id.func_name,)) else: warn_msg = ('Function "%s" was compiled in object mode without' ' forceobj=True.' % (state.func_id.func_name,)) warnings.warn(errors.NumbaWarning(warn_msg, state.func_ir.loc)) url = ("https://numba.readthedocs.io/en/stable/reference/" "deprecation.html#deprecation-of-object-mode-fall-" "back-behaviour-when-using-jit") msg = ("\nFall-back from the nopython compilation path to the " "object mode compilation path has been detected, this is " "deprecated behaviour.\n\nFor more information visit %s" % url) warnings.warn(errors.NumbaDeprecationWarning(msg, state.func_ir.loc)) if state.flags.release_gil: warn_msg = ("Code running in object mode won't allow parallel" " execution despite nogil=True.") warnings.warn_explicit(warn_msg, errors.NumbaWarning, state.func_id.filename, state.func_id.firstlineno) return True numba-0.55.1/numba/core/optional.py000664 000000 000000 00000010126 14174536160 017124 0ustar00rootroot000000 000000 import operator from numba.core import types, typing, cgutils from numba.core.imputils import (lower_cast, lower_builtin, lower_getattr_generic, impl_ret_untracked, lower_setattr_generic) def always_return_true_impl(context, builder, sig, args): return cgutils.true_bit def always_return_false_impl(context, builder, sig, args): return cgutils.false_bit def optional_is_none(context, builder, sig, args): """ Check if an Optional value is invalid """ [lty, rty] = sig.args [lval, rval] = args # Make sure None is on the right if lty == types.none: lty, rty = rty, lty lval, rval = rval, lval opt_type = lty opt_val = lval opt = context.make_helper(builder, opt_type, opt_val) res = builder.not_(cgutils.as_bool_bit(builder, opt.valid)) return impl_ret_untracked(context, builder, sig.return_type, res) # None is/not None lower_builtin(operator.is_, types.none, types.none)(always_return_true_impl) # Optional is None lower_builtin(operator.is_, types.Optional, types.none)(optional_is_none) lower_builtin(operator.is_, types.none, types.Optional)(optional_is_none) @lower_getattr_generic(types.Optional) def optional_getattr(context, builder, typ, value, attr): """ Optional.__getattr__ => redirect to the wrapped type. """ inner_type = typ.type val = context.cast(builder, value, typ, inner_type) imp = context.get_getattr(inner_type, attr) return imp(context, builder, inner_type, val, attr) @lower_setattr_generic(types.Optional) def optional_setattr(context, builder, sig, args, attr): """ Optional.__setattr__ => redirect to the wrapped type. """ basety, valty = sig.args target, val = args target_type = basety.type target = context.cast(builder, target, basety, target_type) newsig = typing.signature(sig.return_type, target_type, valty) imp = context.get_setattr(attr, newsig) return imp(builder, (target, val)) @lower_cast(types.Optional, types.Optional) def optional_to_optional(context, builder, fromty, toty, val): """ The handling of optional->optional cast must be special cased for correct propagation of None value. Given type T and U. casting of T? to U? (? denotes optional) should always succeed. If the from-value is None, the None value the casted value (U?) should be None; otherwise, the from-value is casted to U. This is different from casting T? to U, which requires the from-value must not be None. """ optval = context.make_helper(builder, fromty, value=val) validbit = cgutils.as_bool_bit(builder, optval.valid) # Create uninitialized optional value outoptval = context.make_helper(builder, toty) with builder.if_else(validbit) as (is_valid, is_not_valid): with is_valid: # Cast internal value outoptval.valid = cgutils.true_bit outoptval.data = context.cast(builder, optval.data, fromty.type, toty.type) with is_not_valid: # Store None to result outoptval.valid = cgutils.false_bit outoptval.data = cgutils.get_null_value( outoptval.data.type) return outoptval._getvalue() @lower_cast(types.Any, types.Optional) def any_to_optional(context, builder, fromty, toty, val): if fromty == types.none: return context.make_optional_none(builder, toty.type) else: val = context.cast(builder, val, fromty, toty.type) return context.make_optional_value(builder, toty.type, val) @lower_cast(types.Optional, types.Any) @lower_cast(types.Optional, types.Boolean) def optional_to_any(context, builder, fromty, toty, val): optval = context.make_helper(builder, fromty, value=val) validbit = cgutils.as_bool_bit(builder, optval.valid) with builder.if_then(builder.not_(validbit), likely=False): msg = "expected %s, got None" % (fromty.type,) context.call_conv.return_user_exc(builder, TypeError, (msg,)) return context.cast(builder, optval.data, fromty.type, toty) numba-0.55.1/numba/core/options.py000664 000000 000000 00000005521 14174536160 016775 0ustar00rootroot000000 000000 """ Target Options """ import operator from numba.core import config, utils from numba.core.targetconfig import TargetConfig, Option class TargetOptions: """Target options maps user options from decorators to the ``numba.core.compiler.Flags`` used by lowering and target context. """ class Mapping: def __init__(self, flag_name, apply=lambda x: x): self.flag_name = flag_name self.apply = apply def finalize(self, flags, options): """Subclasses can override this method to make target specific customizations of default flags. Parameters ---------- flags : Flags options : dict """ pass @classmethod def parse_as_flags(cls, flags, options): """Parse target options defined in ``options`` and set ``flags`` accordingly. Parameters ---------- flags : Flags options : dict """ opt = cls() opt._apply(flags, options) opt.finalize(flags, options) return flags def _apply(self, flags, options): # Find all Mapping instances in the class mappings = {} cls = type(self) for k in dir(cls): v = getattr(cls, k) if isinstance(v, cls.Mapping): mappings[k] = v used = set() for k, mapping in mappings.items(): if k in options: v = mapping.apply(options[k]) setattr(flags, mapping.flag_name, v) used.add(k) unused = set(options) - used if unused: # Unread options? m = (f"Unrecognized options: {unused}. " f"Known options are {mappings.keys()}") raise KeyError(m) _mapping = TargetOptions.Mapping class DefaultOptions: """Defines how user-level target options are mapped to the target flags. """ nopython = _mapping("enable_pyobject", operator.not_) forceobj = _mapping("force_pyobject") looplift = _mapping("enable_looplift") _nrt = _mapping("nrt") debug = _mapping("debuginfo") boundscheck = _mapping("boundscheck") nogil = _mapping("release_gil") no_rewrites = _mapping("no_rewrites") no_cpython_wrapper = _mapping("no_cpython_wrapper") no_cfunc_wrapper = _mapping("no_cfunc_wrapper") parallel = _mapping("auto_parallel") fastmath = _mapping("fastmath") error_model = _mapping("error_model") inline = _mapping("inline") forceinline = _mapping("forceinline") target_backend = _mapping("target_backend") def include_default_options(*args): """Returns a mixin class with a subset of the options Parameters ---------- *args : str Option names to include. """ glbs = {k: getattr(DefaultOptions, k) for k in args} return type("OptionMixins", (), glbs) numba-0.55.1/numba/core/overload_glue.py000664 000000 000000 00000023302 14174536160 020126 0ustar00rootroot000000 000000 """ Provides wrapper functions for "glueing" together Numba implementations that are written in the "old" style of a separate typing and lowering implementation. """ import types as pytypes import textwrap from threading import RLock from collections import defaultdict from numba.core import errors class _OverloadWrapper(object): """This class does all the work of assembling and registering wrapped split implementations. """ def __init__(self, function, typing_key=None): assert function is not None self._function = function self._typing_key = typing_key self._BIND_TYPES = dict() self._selector = None self._TYPER = None # run to register overload, the intrinsic sorts out the binding to the # registered impls at the point the overload is evaluated, i.e. this # is all lazy. self._build() def _stub_generator(self, nargs, body_func, kwargs=None): """This generates a function that takes "nargs" count of arguments and the presented kwargs, the "body_func" is the function that'll type the overloaded function and then work out which lowering to return""" def stub(tyctx): # body is supplied when the function is magic'd into life via glbls return body(tyctx) # noqa: F821 if kwargs is None: kwargs = {} # create new code parts stub_code = stub.__code__ co_args = [stub_code.co_argcount + nargs + len(kwargs)] new_varnames = [*stub_code.co_varnames] new_varnames.extend([f'tmp{x}' for x in range(nargs)]) new_varnames.extend([x for x, _ in kwargs.items()]) from numba.core import utils if utils.PYVERSION >= (3, 8): co_args.append(stub_code.co_posonlyargcount) co_args.append(stub_code.co_kwonlyargcount) co_args.extend([stub_code.co_nlocals + nargs + len(kwargs), stub_code.co_stacksize, stub_code.co_flags, stub_code.co_code, stub_code.co_consts, stub_code.co_names, tuple(new_varnames), stub_code.co_filename, stub_code.co_name, stub_code.co_firstlineno, stub_code.co_lnotab, stub_code.co_freevars, stub_code.co_cellvars ]) new_code = pytypes.CodeType(*co_args) # get function new_func = pytypes.FunctionType(new_code, {'body': body_func}) return new_func def wrap_typing(self): """ Use this to replace @infer_global, it records the decorated function as a typer for the argument `concrete_function`. """ if self._typing_key is None: key = self._function else: key = self._typing_key def inner(typing_class): # Note that two templates could be used for the same function, to # avoid @infer_global etc the typing template is copied. This is to # ensure there's a 1:1 relationship between the typing templates and # their keys. clazz_dict = dict(typing_class.__dict__) clazz_dict['key'] = key cloned = type(f"cloned_template_for_{key}", typing_class.__bases__, clazz_dict) self._TYPER = cloned _overload_glue.add_no_defer(key) self._build() return typing_class return inner def wrap_impl(self, *args): """ Use this to replace @lower*, it records the decorated function as the lowering implementation """ assert self._TYPER is not None def inner(lowerer): self._BIND_TYPES[args] = lowerer return lowerer return inner def _assemble(self): """Assembles the OverloadSelector definitions from the registered typing to lowering map. """ from numba.core.base import OverloadSelector if self._typing_key is None: key = self._function else: key = self._typing_key _overload_glue.flush_deferred_lowering(key) self._selector = OverloadSelector() msg = f"No entries in the typing->lowering map for {self._function}" assert self._BIND_TYPES, msg for sig, impl in self._BIND_TYPES.items(): self._selector.append(impl, sig) def _build(self): from numba.core.extending import overload, intrinsic @overload(self._function, strict=False, jit_options={'forceinline': True}) def ol_generated(*ol_args, **ol_kwargs): def body(tyctx): msg = f"No typer registered for {self._function}" if self._TYPER is None: raise errors.InternalError(msg) typing = self._TYPER(tyctx) sig = typing.apply(ol_args, ol_kwargs) if sig is None: # this follows convention of something not typeable # returning None return None if self._selector is None: self._assemble() lowering = self._selector.find(sig.args) msg = (f"Could not find implementation to lower {sig} for ", f"{self._function}") if lowering is None: raise errors.InternalError(msg) return sig, lowering stub = self._stub_generator(len(ol_args), body, ol_kwargs) intrin = intrinsic(stub) # This is horrible, need to generate a jit wrapper function that # walks the ol_kwargs into the intrin with a signature that # matches the lowering sig. The actual kwarg var names matter, # they have to match exactly. arg_str = ','.join([f'tmp{x}' for x in range(len(ol_args))]) kws_str = ','.join(ol_kwargs.keys()) call_str = ','.join([x for x in (arg_str, kws_str) if x]) # NOTE: The jit_wrapper functions cannot take `*args` # albeit this an obvious choice for accepting an unknown number # of arguments. If this is done, `*args` ends up as a cascade of # Tuple assembling in the IR which ends up with literal # information being lost. As a result the _exact_ argument list # is generated to match the number of arguments and kwargs. name = str(self._function) # This is to name the function with something vaguely identifiable name = ''.join([x if x not in {'>','<',' ','-','.'} else '_' for x in name]) gen = textwrap.dedent((""" def jit_wrapper_{}({}): return intrin({}) """)).format(name, call_str, call_str) l = {} g = {'intrin': intrin} exec(gen, g, l) return l['jit_wrapper_{}'.format(name)] class _Gluer: """This is a helper class to make sure that each concrete overload has only one wrapper as the code relies on the wrapper being a singleton.""" def __init__(self): self._registered = dict() self._lock = RLock() # `_no_defer` stores keys that should not defer lowering because typing # is already provided. self._no_defer = set() # `_deferred` stores lowering that must be deferred because the typing # has not been provided. self._deferred = defaultdict(list) def __call__(self, func, typing_key=None): with self._lock: if typing_key is None: key = func else: key = typing_key if key in self._registered: return self._registered[key] else: wrapper = _OverloadWrapper(func, typing_key=typing_key) self._registered[key] = wrapper return wrapper def defer_lowering(self, key, lower_fn): """Defer lowering of the given key and lowering function. """ with self._lock: if key in self._no_defer: # Key is marked as no defer, register lowering now lower_fn() else: # Defer self._deferred[key].append(lower_fn) def add_no_defer(self, key): """Stop lowering to be deferred for the given key. """ with self._lock: self._no_defer.add(key) def flush_deferred_lowering(self, key): """Flush the deferred lowering for the given key. """ with self._lock: deferred = self._deferred.pop(key, []) for cb in deferred: cb() _overload_glue = _Gluer() del _Gluer def glue_typing(concrete_function, typing_key=None): """This is a decorator for wrapping the typing part for a concrete function 'concrete_function', it's a text-only replacement for '@infer_global'""" return _overload_glue(concrete_function, typing_key=typing_key).wrap_typing() def glue_lowering(*args): """This is a decorator for wrapping the implementation (lowering) part for a concrete function. 'args[0]' is the concrete_function, 'args[1:]' are the types the lowering will accept. This acts as a text-only replacement for '@lower/@lower_builtin'""" def wrap(fn): key = args[0] def real_call(): glue = _overload_glue(args[0], typing_key=key) return glue.wrap_impl(*args[1:])(fn) _overload_glue.defer_lowering(key, real_call) return fn return wrap numba-0.55.1/numba/core/postproc.py000664 000000 000000 00000022117 14174536160 017153 0ustar00rootroot000000 000000 from numba.core import utils, ir, analysis, transforms, ir_utils class YieldPoint(object): def __init__(self, block, inst): assert isinstance(block, ir.Block) assert isinstance(inst, ir.Yield) self.block = block self.inst = inst self.live_vars = None self.weak_live_vars = None class GeneratorInfo(object): def __init__(self): # { index: YieldPoint } self.yield_points = {} # Ordered list of variable names self.state_vars = [] def get_yield_points(self): """ Return an iterable of YieldPoint instances. """ return self.yield_points.values() class VariableLifetime(object): """ For lazily building information of variable lifetime """ def __init__(self, blocks): self._blocks = blocks @utils.cached_property def cfg(self): return analysis.compute_cfg_from_blocks(self._blocks) @utils.cached_property def usedefs(self): return analysis.compute_use_defs(self._blocks) @utils.cached_property def livemap(self): return analysis.compute_live_map(self.cfg, self._blocks, self.usedefs.usemap, self.usedefs.defmap) @utils.cached_property def deadmaps(self): return analysis.compute_dead_maps(self.cfg, self._blocks, self.livemap, self.usedefs.defmap) # other packages that define new nodes add calls for inserting dels # format: {type:function} ir_extension_insert_dels = {} class PostProcessor(object): """ A post-processor for Numba IR. """ def __init__(self, func_ir): self.func_ir = func_ir def run(self, emit_dels=False, extend_lifetimes=False): """ Run the following passes over Numba IR: - canonicalize the CFG - emit explicit `del` instructions for variables - compute lifetime of variables - compute generator info (if function is a generator function) """ self.func_ir.blocks = transforms.canonicalize_cfg(self.func_ir.blocks) vlt = VariableLifetime(self.func_ir.blocks) self.func_ir.variable_lifetime = vlt bev = analysis.compute_live_variables(vlt.cfg, self.func_ir.blocks, vlt.usedefs.defmap, vlt.deadmaps.combined) for offset, ir_block in self.func_ir.blocks.items(): self.func_ir.block_entry_vars[ir_block] = bev[offset] if self.func_ir.is_generator: self.func_ir.generator_info = GeneratorInfo() self._compute_generator_info() else: self.func_ir.generator_info = None # Emit del nodes, do this last as the generator info parsing generates # and then strips dels as part of its analysis. if emit_dels: self._insert_var_dels(extend_lifetimes=extend_lifetimes) def _populate_generator_info(self): """ Fill `index` for the Yield instruction and create YieldPoints. """ dct = self.func_ir.generator_info.yield_points assert not dct, 'rerunning _populate_generator_info' for block in self.func_ir.blocks.values(): for inst in block.body: if isinstance(inst, ir.Assign): yieldinst = inst.value if isinstance(yieldinst, ir.Yield): index = len(dct) + 1 yieldinst.index = index yp = YieldPoint(block, yieldinst) dct[yieldinst.index] = yp def _compute_generator_info(self): """ Compute the generator's state variables as the union of live variables at all yield points. """ # generate del info, it's used in analysis here, strip it out at the end self._insert_var_dels() self._populate_generator_info() gi = self.func_ir.generator_info for yp in gi.get_yield_points(): live_vars = set(self.func_ir.get_block_entry_vars(yp.block)) weak_live_vars = set() stmts = iter(yp.block.body) for stmt in stmts: if isinstance(stmt, ir.Assign): if stmt.value is yp.inst: break live_vars.add(stmt.target.name) elif isinstance(stmt, ir.Del): live_vars.remove(stmt.value) else: assert 0, "couldn't find yield point" # Try to optimize out any live vars that are deleted immediately # after the yield point. for stmt in stmts: if isinstance(stmt, ir.Del): name = stmt.value if name in live_vars: live_vars.remove(name) weak_live_vars.add(name) else: break yp.live_vars = live_vars yp.weak_live_vars = weak_live_vars st = set() for yp in gi.get_yield_points(): st |= yp.live_vars st |= yp.weak_live_vars gi.state_vars = sorted(st) self.remove_dels() def _insert_var_dels(self, extend_lifetimes=False): """ Insert del statements for each variable. Returns a 2-tuple of (variable definition map, variable deletion map) which indicates variables defined and deleted in each block. The algorithm avoids relying on explicit knowledge on loops and distinguish between variables that are defined locally vs variables that come from incoming blocks. We start with simple usage (variable reference) and definition (variable creation) maps on each block. Propagate the liveness info to predecessor blocks until it stabilize, at which point we know which variables must exist before entering each block. Then, we compute the end of variable lives and insert del statements accordingly. Variables are deleted after the last use. Variable referenced by terminators (e.g. conditional branch and return) are deleted by the successors or the caller. """ vlt = self.func_ir.variable_lifetime self._patch_var_dels(vlt.deadmaps.internal, vlt.deadmaps.escaping, extend_lifetimes=extend_lifetimes) def _patch_var_dels(self, internal_dead_map, escaping_dead_map, extend_lifetimes=False): """ Insert delete in each block """ for offset, ir_block in self.func_ir.blocks.items(): # for each internal var, insert delete after the last use internal_dead_set = internal_dead_map[offset].copy() delete_pts = [] # for each statement in reverse order for stmt in reversed(ir_block.body[:-1]): # internal vars that are used here live_set = set(v.name for v in stmt.list_vars()) dead_set = live_set & internal_dead_set for T, def_func in ir_extension_insert_dels.items(): if isinstance(stmt, T): done_dels = def_func(stmt, dead_set) dead_set -= done_dels internal_dead_set -= done_dels # used here but not afterwards delete_pts.append((stmt, dead_set)) internal_dead_set -= dead_set # rewrite body and insert dels body = [] lastloc = ir_block.loc del_store = [] for stmt, delete_set in reversed(delete_pts): # If using extended lifetimes then the Dels are all put at the # block end just ahead of the terminator, so associate their # location with the terminator. if extend_lifetimes: lastloc = ir_block.body[-1].loc else: lastloc = stmt.loc # Ignore dels (assuming no user inserted deletes) if not isinstance(stmt, ir.Del): body.append(stmt) # note: the reverse sort is not necessary for correctness # it is just to minimize changes to test for now for var_name in sorted(delete_set, reverse=True): delnode = ir.Del(var_name, loc=lastloc) if extend_lifetimes: del_store.append(delnode) else: body.append(delnode) if extend_lifetimes: body.extend(del_store) body.append(ir_block.body[-1]) # terminator ir_block.body = body # vars to delete at the start escape_dead_set = escaping_dead_map[offset] for var_name in sorted(escape_dead_set): ir_block.prepend(ir.Del(var_name, loc=ir_block.body[0].loc)) def remove_dels(self): """ Strips the IR of Del nodes """ ir_utils.remove_dels(self.func_ir.blocks) numba-0.55.1/numba/core/pylowering.py000664 000000 000000 00000057637 14174536160 017520 0ustar00rootroot000000 000000 """ Lowering implementation for object mode. """ import builtins import operator import inspect from llvmlite.llvmpy.core import Type, Constant import llvmlite.llvmpy.core as lc from numba.core import types, utils, ir, generators, cgutils from numba.core.errors import (ForbiddenConstruct, LoweringError, NumbaNotImplementedError) from numba.core.lowering import BaseLower # Issue #475: locals() is unsupported as calling it naively would give # out wrong results. _unsupported_builtins = set([locals]) # Map operators to methods on the PythonAPI class PYTHON_BINOPMAP = { operator.add: ("number_add", False), operator.sub: ("number_subtract", False), operator.mul: ("number_multiply", False), operator.truediv: ("number_truedivide", False), operator.floordiv: ("number_floordivide", False), operator.mod: ("number_remainder", False), operator.pow: ("number_power", False), operator.lshift: ("number_lshift", False), operator.rshift: ("number_rshift", False), operator.and_: ("number_and", False), operator.or_: ("number_or", False), operator.xor: ("number_xor", False), # inplace operators operator.iadd: ("number_add", True), operator.isub: ("number_subtract", True), operator.imul: ("number_multiply", True), operator.itruediv: ("number_truedivide", True), operator.ifloordiv: ("number_floordivide", True), operator.imod: ("number_remainder", True), operator.ipow: ("number_power", True), operator.ilshift: ("number_lshift", True), operator.irshift: ("number_rshift", True), operator.iand: ("number_and", True), operator.ior: ("number_or", True), operator.ixor: ("number_xor", True), } PYTHON_BINOPMAP[operator.matmul] = ("number_matrix_multiply", False) PYTHON_BINOPMAP[operator.imatmul] = ("number_matrix_multiply", True) PYTHON_COMPAREOPMAP = { operator.eq: '==', operator.ne: '!=', operator.lt: '<', operator.le: '<=', operator.gt: '>', operator.ge: '>=', operator.is_: 'is', operator.is_not: 'is not', operator.contains: 'in' } class PyLower(BaseLower): GeneratorLower = generators.PyGeneratorLower def init(self): # Strings to be frozen into the Environment object self._frozen_strings = set() self._live_vars = set() def pre_lower(self): super(PyLower, self).pre_lower() self.init_pyapi() def post_lower(self): pass def pre_block(self, block): self.init_vars(block) def lower_inst(self, inst): if isinstance(inst, ir.Assign): value = self.lower_assign(inst) self.storevar(value, inst.target.name) elif isinstance(inst, ir.SetItem): target = self.loadvar(inst.target.name) index = self.loadvar(inst.index.name) value = self.loadvar(inst.value.name) ok = self.pyapi.object_setitem(target, index, value) self.check_int_status(ok) elif isinstance(inst, ir.DelItem): target = self.loadvar(inst.target.name) index = self.loadvar(inst.index.name) ok = self.pyapi.object_delitem(target, index) self.check_int_status(ok) elif isinstance(inst, ir.SetAttr): target = self.loadvar(inst.target.name) value = self.loadvar(inst.value.name) ok = self.pyapi.object_setattr(target, self._freeze_string(inst.attr), value) self.check_int_status(ok) elif isinstance(inst, ir.DelAttr): target = self.loadvar(inst.target.name) ok = self.pyapi.object_delattr(target, self._freeze_string(inst.attr)) self.check_int_status(ok) elif isinstance(inst, ir.StoreMap): dct = self.loadvar(inst.dct.name) key = self.loadvar(inst.key.name) value = self.loadvar(inst.value.name) ok = self.pyapi.dict_setitem(dct, key, value) self.check_int_status(ok) elif isinstance(inst, ir.Return): retval = self.loadvar(inst.value.name) if self.generator_info: # StopIteration # We own a reference to the "return value", but we # don't return it. self.pyapi.decref(retval) self.genlower.return_from_generator(self) return # No need to incref() as the reference is already owned. self.call_conv.return_value(self.builder, retval) elif isinstance(inst, ir.Branch): cond = self.loadvar(inst.cond.name) if cond.type == Type.int(1): istrue = cond else: istrue = self.pyapi.object_istrue(cond) zero = lc.Constant.null(istrue.type) pred = self.builder.icmp(lc.ICMP_NE, istrue, zero) tr = self.blkmap[inst.truebr] fl = self.blkmap[inst.falsebr] self.builder.cbranch(pred, tr, fl) elif isinstance(inst, ir.Jump): target = self.blkmap[inst.target] self.builder.branch(target) elif isinstance(inst, ir.Del): self.delvar(inst.value) elif isinstance(inst, ir.PopBlock): pass # this is just a marker elif isinstance(inst, ir.Raise): if inst.exception is not None: exc = self.loadvar(inst.exception.name) # A reference will be stolen by raise_object() and another # by return_exception_raised(). self.incref(exc) else: exc = None self.pyapi.raise_object(exc) self.return_exception_raised() else: msg = f"{type(inst)}, {inst}" raise NumbaNotImplementedError(msg) @utils.cached_property def _omitted_typobj(self): """Return a `OmittedArg` type instance as a LLVM value suitable for testing at runtime. """ from numba.core.dispatcher import OmittedArg return self.pyapi.unserialize( self.pyapi.serialize_object(OmittedArg)) def lower_assign(self, inst): """ The returned object must have a new reference """ value = inst.value if isinstance(value, (ir.Const, ir.FreeVar)): return self.lower_const(value.value) elif isinstance(value, ir.Var): val = self.loadvar(value.name) self.incref(val) return val elif isinstance(value, ir.Expr): return self.lower_expr(value) elif isinstance(value, ir.Global): return self.lower_global(value.name, value.value) elif isinstance(value, ir.Yield): return self.lower_yield(value) elif isinstance(value, ir.Arg): param = self.func_ir.func_id.pysig.parameters.get(value.name) obj = self.fnargs[value.index] slot = cgutils.alloca_once_value(self.builder, obj) # Don't check for OmittedArg unless the argument has a default if param is not None and param.default is inspect.Parameter.empty: self.incref(obj) self.builder.store(obj, slot) else: # When an argument is omitted, the dispatcher hands it as # _OmittedArg() typobj = self.pyapi.get_type(obj) is_omitted = self.builder.icmp_unsigned('==', typobj, self._omitted_typobj) with self.builder.if_else(is_omitted, likely=False) as (omitted, present): with present: self.incref(obj) self.builder.store(obj, slot) with omitted: # The argument is omitted => get the default value obj = self.pyapi.object_getattr_string(obj, 'value') self.builder.store(obj, slot) return self.builder.load(slot) else: raise NotImplementedError(type(value), value) def lower_yield(self, inst): yp = self.generator_info.yield_points[inst.index] assert yp.inst is inst self.genlower.init_generator_state(self) # Save live vars in state # We also need to save live vars that are del'ed afterwards. y = generators.LowerYield(self, yp, yp.live_vars | yp.weak_live_vars) y.lower_yield_suspend() # Yield to caller val = self.loadvar(inst.value.name) # Let caller own the reference self.pyapi.incref(val) self.call_conv.return_value(self.builder, val) # Resumption point y.lower_yield_resume() # None is returned by the yield expression return self.pyapi.make_none() def lower_binop(self, expr, op, inplace=False): lhs = self.loadvar(expr.lhs.name) rhs = self.loadvar(expr.rhs.name) assert not isinstance(op, str) if op in PYTHON_BINOPMAP: fname, inplace = PYTHON_BINOPMAP[op] fn = getattr(self.pyapi, fname) res = fn(lhs, rhs, inplace=inplace) else: # Assumed to be rich comparison fn = PYTHON_COMPAREOPMAP.get(expr.fn, expr.fn) if fn == 'in': # 'in' and operator.contains have args reversed lhs, rhs = rhs, lhs res = self.pyapi.object_richcompare(lhs, rhs, fn) self.check_error(res) return res def lower_expr(self, expr): if expr.op == 'binop': return self.lower_binop(expr, expr.fn, inplace=False) elif expr.op == 'inplace_binop': return self.lower_binop(expr, expr.fn, inplace=True) elif expr.op == 'unary': value = self.loadvar(expr.value.name) if expr.fn == operator.neg: res = self.pyapi.number_negative(value) elif expr.fn == operator.pos: res = self.pyapi.number_positive(value) elif expr.fn == operator.not_: res = self.pyapi.object_not(value) self.check_int_status(res) res = self.pyapi.bool_from_bool(res) elif expr.fn == operator.invert: res = self.pyapi.number_invert(value) else: raise NotImplementedError(expr) self.check_error(res) return res elif expr.op == 'call': argvals = [self.loadvar(a.name) for a in expr.args] fn = self.loadvar(expr.func.name) args = self.pyapi.tuple_pack(argvals) if expr.vararg: # Expand *args new_args = self.pyapi.number_add(args, self.loadvar(expr.vararg.name)) self.decref(args) args = new_args if not expr.kws: # No named arguments ret = self.pyapi.call(fn, args, None) else: # Named arguments keyvalues = [(k, self.loadvar(v.name)) for k, v in expr.kws] kws = self.pyapi.dict_pack(keyvalues) ret = self.pyapi.call(fn, args, kws) self.decref(kws) self.decref(args) self.check_error(ret) return ret elif expr.op == 'getattr': obj = self.loadvar(expr.value.name) res = self.pyapi.object_getattr(obj, self._freeze_string(expr.attr)) self.check_error(res) return res elif expr.op == 'build_tuple': items = [self.loadvar(it.name) for it in expr.items] res = self.pyapi.tuple_pack(items) self.check_error(res) return res elif expr.op == 'build_list': items = [self.loadvar(it.name) for it in expr.items] res = self.pyapi.list_pack(items) self.check_error(res) return res elif expr.op == 'build_map': res = self.pyapi.dict_new(expr.size) self.check_error(res) for k, v in expr.items: key = self.loadvar(k.name) value = self.loadvar(v.name) ok = self.pyapi.dict_setitem(res, key, value) self.check_int_status(ok) return res elif expr.op == 'build_set': items = [self.loadvar(it.name) for it in expr.items] res = self.pyapi.set_new() self.check_error(res) for it in items: ok = self.pyapi.set_add(res, it) self.check_int_status(ok) return res elif expr.op == 'getiter': obj = self.loadvar(expr.value.name) res = self.pyapi.object_getiter(obj) self.check_error(res) return res elif expr.op == 'iternext': iterobj = self.loadvar(expr.value.name) item = self.pyapi.iter_next(iterobj) is_valid = cgutils.is_not_null(self.builder, item) pair = self.pyapi.tuple_new(2) with self.builder.if_else(is_valid) as (then, otherwise): with then: self.pyapi.tuple_setitem(pair, 0, item) with otherwise: self.check_occurred() # Make the tuple valid by inserting None as dummy # iteration "result" (it will be ignored). self.pyapi.tuple_setitem(pair, 0, self.pyapi.make_none()) self.pyapi.tuple_setitem(pair, 1, self.pyapi.bool_from_bool(is_valid)) return pair elif expr.op == 'pair_first': pair = self.loadvar(expr.value.name) first = self.pyapi.tuple_getitem(pair, 0) self.incref(first) return first elif expr.op == 'pair_second': pair = self.loadvar(expr.value.name) second = self.pyapi.tuple_getitem(pair, 1) self.incref(second) return second elif expr.op == 'exhaust_iter': iterobj = self.loadvar(expr.value.name) tup = self.pyapi.sequence_tuple(iterobj) self.check_error(tup) # Check tuple size is as expected tup_size = self.pyapi.tuple_size(tup) expected_size = self.context.get_constant(types.intp, expr.count) has_wrong_size = self.builder.icmp(lc.ICMP_NE, tup_size, expected_size) with cgutils.if_unlikely(self.builder, has_wrong_size): self.return_exception(ValueError) return tup elif expr.op == 'getitem': value = self.loadvar(expr.value.name) index = self.loadvar(expr.index.name) res = self.pyapi.object_getitem(value, index) self.check_error(res) return res elif expr.op == 'static_getitem': value = self.loadvar(expr.value.name) index = self.context.get_constant(types.intp, expr.index) indexobj = self.pyapi.long_from_ssize_t(index) self.check_error(indexobj) res = self.pyapi.object_getitem(value, indexobj) self.decref(indexobj) self.check_error(res) return res elif expr.op == 'getslice': target = self.loadvar(expr.target.name) start = self.loadvar(expr.start.name) stop = self.loadvar(expr.stop.name) slicefn = self.get_builtin_obj("slice") sliceobj = self.pyapi.call_function_objargs(slicefn, (start, stop)) self.decref(slicefn) self.check_error(sliceobj) res = self.pyapi.object_getitem(target, sliceobj) self.check_error(res) return res elif expr.op == 'cast': val = self.loadvar(expr.value.name) self.incref(val) return val elif expr.op == 'phi': raise LoweringError("PHI not stripped") elif expr.op == 'null': # Make null value return cgutils.get_null_value(self.pyapi.pyobj) else: raise NotImplementedError(expr) def lower_const(self, const): # All constants are frozen inside the environment index = self.env_manager.add_const(const) ret = self.env_manager.read_const(index) self.check_error(ret) self.incref(ret) return ret def lower_global(self, name, value): """ 1) Check global scope dictionary. 2) Check __builtins__. 2a) is it a dictionary (for non __main__ module) 2b) is it a module (for __main__ module) """ moddict = self.get_module_dict() obj = self.pyapi.dict_getitem(moddict, self._freeze_string(name)) self.incref(obj) # obj is borrowed try: if value in _unsupported_builtins: raise ForbiddenConstruct("builtins %s() is not supported" % name, loc=self.loc) except TypeError: # `value` is unhashable, ignore pass if hasattr(builtins, name): obj_is_null = self.is_null(obj) bbelse = self.builder.basic_block with self.builder.if_then(obj_is_null): mod = self.pyapi.dict_getitem(moddict, self._freeze_string("__builtins__")) builtin = self.builtin_lookup(mod, name) bbif = self.builder.basic_block retval = self.builder.phi(self.pyapi.pyobj) retval.add_incoming(obj, bbelse) retval.add_incoming(builtin, bbif) else: retval = obj with cgutils.if_unlikely(self.builder, self.is_null(retval)): self.pyapi.raise_missing_global_error(name) self.return_exception_raised() return retval # ------------------------------------------------------------------------- def get_module_dict(self): return self.env_body.globals def get_builtin_obj(self, name): # XXX The builtins dict could be bound into the environment moddict = self.get_module_dict() mod = self.pyapi.dict_getitem(moddict, self._freeze_string("__builtins__")) return self.builtin_lookup(mod, name) def builtin_lookup(self, mod, name): """ Args ---- mod: The __builtins__ dictionary or module, as looked up in a module's globals. name: str The object to lookup """ fromdict = self.pyapi.dict_getitem(mod, self._freeze_string(name)) self.incref(fromdict) # fromdict is borrowed bbifdict = self.builder.basic_block with cgutils.if_unlikely(self.builder, self.is_null(fromdict)): # This happen if we are using the __main__ module frommod = self.pyapi.object_getattr(mod, self._freeze_string(name)) with cgutils.if_unlikely(self.builder, self.is_null(frommod)): self.pyapi.raise_missing_global_error(name) self.return_exception_raised() bbifmod = self.builder.basic_block builtin = self.builder.phi(self.pyapi.pyobj) builtin.add_incoming(fromdict, bbifdict) builtin.add_incoming(frommod, bbifmod) return builtin def check_occurred(self): """ Return if an exception occurred. """ err_occurred = cgutils.is_not_null(self.builder, self.pyapi.err_occurred()) with cgutils.if_unlikely(self.builder, err_occurred): self.return_exception_raised() def check_error(self, obj): """ Return if *obj* is NULL. """ with cgutils.if_unlikely(self.builder, self.is_null(obj)): self.return_exception_raised() return obj def check_int_status(self, num, ok_value=0): """ Raise an exception if *num* is smaller than *ok_value*. """ ok = lc.Constant.int(num.type, ok_value) pred = self.builder.icmp(lc.ICMP_SLT, num, ok) with cgutils.if_unlikely(self.builder, pred): self.return_exception_raised() def is_null(self, obj): return cgutils.is_null(self.builder, obj) def return_exception_raised(self): """ Return with the currently raised exception. """ self.cleanup_vars() self.call_conv.return_exc(self.builder) def init_vars(self, block): """ Initialize live variables for *block*. """ self._live_vars = set(self.func_ir.get_block_entry_vars(block)) def _getvar(self, name, ltype=None): if name not in self.varmap: self.varmap[name] = self.alloca(name, ltype=ltype) return self.varmap[name] def loadvar(self, name): """ Load the llvm value of the variable named *name*. """ # If this raises then the live variables analysis is wrong assert name in self._live_vars, name ptr = self.varmap[name] val = self.builder.load(ptr) with cgutils.if_unlikely(self.builder, self.is_null(val)): self.pyapi.raise_missing_name_error(name) self.return_exception_raised() return val def delvar(self, name): """ Delete the variable slot with the given name. This will decref the corresponding Python object. """ # If this raises then the live variables analysis is wrong self._live_vars.remove(name) ptr = self._getvar(name) # initializes `name` if not already self.decref(self.builder.load(ptr)) # This is a safety guard against double decref's, but really # the IR should be correct and have only one Del per variable # and code path. self.builder.store(cgutils.get_null_value(ptr.type.pointee), ptr) def storevar(self, value, name, clobber=False): """ Stores a llvm value and allocate stack slot if necessary. The llvm value can be of arbitrary type. """ is_redefine = name in self._live_vars and not clobber ptr = self._getvar(name, ltype=value.type) if is_redefine: old = self.builder.load(ptr) else: self._live_vars.add(name) assert value.type == ptr.type.pointee, (str(value.type), str(ptr.type.pointee)) self.builder.store(value, ptr) # Safe to call decref even on non python object if is_redefine: self.decref(old) def cleanup_vars(self): """ Cleanup live variables. """ for name in self._live_vars: ptr = self._getvar(name) self.decref(self.builder.load(ptr)) def alloca(self, name, ltype=None): """ Allocate a stack slot and initialize it to NULL. The default is to allocate a pyobject pointer. Use ``ltype`` to override. """ if ltype is None: ltype = self.context.get_value_type(types.pyobject) with self.builder.goto_block(self.entry_block): ptr = self.builder.alloca(ltype, name=name) self.builder.store(cgutils.get_null_value(ltype), ptr) return ptr def _alloca_var(self, name, fetype): # This is here for API compatibility with lowering.py::Lower. # NOTE: fetype is unused return self.alloca(name) def incref(self, value): self.pyapi.incref(value) def decref(self, value): """ This is allow to be called on non pyobject pointer, in which case no code is inserted. """ lpyobj = self.context.get_value_type(types.pyobject) if value.type == lpyobj: self.pyapi.decref(value) def _freeze_string(self, string): """ Freeze a Python string object into the code. """ return self.lower_const(string) numba-0.55.1/numba/core/pythonapi.py000664 000000 000000 00000203507 14174536160 017321 0ustar00rootroot000000 000000 from collections import namedtuple import contextlib import pickle import hashlib import sys from llvmlite import ir from llvmlite.llvmpy.core import Type, Constant import llvmlite.llvmpy.core as lc import ctypes from numba import _helperlib from numba.core import ( types, utils, config, lowering, cgutils, imputils, serialize, ) PY_UNICODE_1BYTE_KIND = _helperlib.py_unicode_1byte_kind PY_UNICODE_2BYTE_KIND = _helperlib.py_unicode_2byte_kind PY_UNICODE_4BYTE_KIND = _helperlib.py_unicode_4byte_kind PY_UNICODE_WCHAR_KIND = _helperlib.py_unicode_wchar_kind class _Registry(object): def __init__(self): self.functions = {} def register(self, typeclass): assert issubclass(typeclass, types.Type) def decorator(func): if typeclass in self.functions: raise KeyError("duplicate registration for %s" % (typeclass,)) self.functions[typeclass] = func return func return decorator def lookup(self, typeclass, default=None): assert issubclass(typeclass, types.Type) for cls in typeclass.__mro__: func = self.functions.get(cls) if func is not None: return func return default # Registries of boxing / unboxing implementations _boxers = _Registry() _unboxers = _Registry() _reflectors = _Registry() box = _boxers.register unbox = _unboxers.register reflect = _reflectors.register class _BoxContext(namedtuple("_BoxContext", ("context", "builder", "pyapi", "env_manager"))): """ The facilities required by boxing implementations. """ __slots__ = () def box(self, typ, val): return self.pyapi.from_native_value(typ, val, self.env_manager) class _UnboxContext(namedtuple("_UnboxContext", ("context", "builder", "pyapi"))): """ The facilities required by unboxing implementations. """ __slots__ = () def unbox(self, typ, obj): return self.pyapi.to_native_value(typ, obj) class _ReflectContext(namedtuple("_ReflectContext", ("context", "builder", "pyapi", "env_manager", "is_error"))): """ The facilities required by reflection implementations. """ __slots__ = () # XXX the error bit is currently unused by consumers (e.g. PyCallWrapper) def set_error(self): self.builder.store(self.is_error, cgutils.true_bit) def box(self, typ, val): return self.pyapi.from_native_value(typ, val, self.env_manager) def reflect(self, typ, val): return self.pyapi.reflect_native_value(typ, val, self.env_manager) class NativeValue(object): """ Encapsulate the result of converting a Python object to a native value, recording whether the conversion was successful and how to cleanup. """ def __init__(self, value, is_error=None, cleanup=None): self.value = value self.is_error = is_error if is_error is not None else cgutils.false_bit self.cleanup = cleanup class EnvironmentManager(object): def __init__(self, pyapi, env, env_body, env_ptr): assert isinstance(env, lowering.Environment) self.pyapi = pyapi self.env = env self.env_body = env_body self.env_ptr = env_ptr def add_const(self, const): """ Add a constant to the environment, return its index. """ # All constants are frozen inside the environment if isinstance(const, str): const = sys.intern(const) for index, val in enumerate(self.env.consts): if val is const: break else: index = len(self.env.consts) self.env.consts.append(const) return index def read_const(self, index): """ Look up constant number *index* inside the environment body. A borrowed reference is returned. The returned LLVM value may have NULL value at runtime which indicates an error at runtime. """ assert index < len(self.env.consts) builder = self.pyapi.builder consts = self.env_body.consts ret = cgutils.alloca_once(builder, self.pyapi.pyobj, zfill=True) with builder.if_else(cgutils.is_not_null(builder, consts)) as \ (br_not_null, br_null): with br_not_null: getitem = self.pyapi.list_getitem(consts, index) builder.store(getitem, ret) with br_null: # This can happen when the Environment is accidentally released # and has subsequently been garbage collected. self.pyapi.err_set_string( "PyExc_RuntimeError", "`env.consts` is NULL in `read_const`", ) return builder.load(ret) _IteratorLoop = namedtuple('_IteratorLoop', ('value', 'do_break')) class PythonAPI(object): """ Code generation facilities to call into the CPython C API (and related helpers). """ def __init__(self, context, builder): """ Note: Maybe called multiple times when lowering a function """ self.context = context self.builder = builder self.module = builder.basic_block.function.module # A unique mapping of serialized objects in this module try: self.module.__serialized except AttributeError: self.module.__serialized = {} # Initialize types self.pyobj = self.context.get_argument_type(types.pyobject) self.pyobjptr = self.pyobj.as_pointer() self.voidptr = Type.pointer(Type.int(8)) self.long = Type.int(ctypes.sizeof(ctypes.c_long) * 8) self.ulong = self.long self.longlong = Type.int(ctypes.sizeof(ctypes.c_ulonglong) * 8) self.ulonglong = self.longlong self.double = Type.double() self.py_ssize_t = self.context.get_value_type(types.intp) self.cstring = Type.pointer(Type.int(8)) self.gil_state = Type.int(_helperlib.py_gil_state_size * 8) self.py_buffer_t = ir.ArrayType(ir.IntType(8), _helperlib.py_buffer_size) self.py_hash_t = self.py_ssize_t self.py_unicode_1byte_kind = _helperlib.py_unicode_1byte_kind self.py_unicode_2byte_kind = _helperlib.py_unicode_2byte_kind self.py_unicode_4byte_kind = _helperlib.py_unicode_4byte_kind self.py_unicode_wchar_kind = _helperlib.py_unicode_wchar_kind def get_env_manager(self, env, env_body, env_ptr): return EnvironmentManager(self, env, env_body, env_ptr) def emit_environment_sentry(self, envptr, return_pyobject=False, debug_msg=''): """Emits LLVM code to ensure the `envptr` is not NULL """ is_null = cgutils.is_null(self.builder, envptr) with cgutils.if_unlikely(self.builder, is_null): if return_pyobject: fnty = self.builder.function.type.pointee assert fnty.return_type == self.pyobj self.err_set_string( "PyExc_RuntimeError", f"missing Environment: {debug_msg}", ) self.builder.ret(self.get_null_object()) else: self.context.call_conv.return_user_exc( self.builder, RuntimeError, (f"missing Environment: {debug_msg}",), ) # ------ Python API ----- # # Basic object API # def incref(self, obj): fnty = Type.function(Type.void(), [self.pyobj]) fn = self._get_function(fnty, name="Py_IncRef") self.builder.call(fn, [obj]) def decref(self, obj): fnty = Type.function(Type.void(), [self.pyobj]) fn = self._get_function(fnty, name="Py_DecRef") self.builder.call(fn, [obj]) def get_type(self, obj): fnty = Type.function(self.pyobj, [self.pyobj]) fn = self._get_function(fnty, name="numba_py_type") return self.builder.call(fn, [obj]) # # Argument unpacking # def parse_tuple_and_keywords(self, args, kws, fmt, keywords, *objs): charptr = Type.pointer(Type.int(8)) charptrary = Type.pointer(charptr) argtypes = [self.pyobj, self.pyobj, charptr, charptrary] fnty = Type.function(Type.int(), argtypes, var_arg=True) fn = self._get_function(fnty, name="PyArg_ParseTupleAndKeywords") return self.builder.call(fn, [args, kws, fmt, keywords] + list(objs)) def parse_tuple(self, args, fmt, *objs): charptr = Type.pointer(Type.int(8)) argtypes = [self.pyobj, charptr] fnty = Type.function(Type.int(), argtypes, var_arg=True) fn = self._get_function(fnty, name="PyArg_ParseTuple") return self.builder.call(fn, [args, fmt] + list(objs)) def unpack_tuple(self, args, name, n_min, n_max, *objs): charptr = Type.pointer(Type.int(8)) argtypes = [self.pyobj, charptr, self.py_ssize_t, self.py_ssize_t] fnty = Type.function(Type.int(), argtypes, var_arg=True) fn = self._get_function(fnty, name="PyArg_UnpackTuple") n_min = Constant.int(self.py_ssize_t, n_min) n_max = Constant.int(self.py_ssize_t, n_max) if isinstance(name, str): name = self.context.insert_const_string(self.builder.module, name) return self.builder.call(fn, [args, name, n_min, n_max] + list(objs)) # # Exception and errors # def err_occurred(self): fnty = Type.function(self.pyobj, ()) fn = self._get_function(fnty, name="PyErr_Occurred") return self.builder.call(fn, ()) def err_clear(self): fnty = Type.function(Type.void(), ()) fn = self._get_function(fnty, name="PyErr_Clear") return self.builder.call(fn, ()) def err_set_string(self, exctype, msg): fnty = Type.function(Type.void(), [self.pyobj, self.cstring]) fn = self._get_function(fnty, name="PyErr_SetString") if isinstance(exctype, str): exctype = self.get_c_object(exctype) if isinstance(msg, str): msg = self.context.insert_const_string(self.module, msg) return self.builder.call(fn, (exctype, msg)) def err_format(self, exctype, msg, *format_args): fnty = Type.function(Type.void(), [self.pyobj, self.cstring], var_arg=True) fn = self._get_function(fnty, name="PyErr_Format") if isinstance(exctype, str): exctype = self.get_c_object(exctype) if isinstance(msg, str): msg = self.context.insert_const_string(self.module, msg) return self.builder.call(fn, (exctype, msg) + tuple(format_args)) def raise_object(self, exc=None): """ Raise an arbitrary exception (type or value or (type, args) or None - if reraising). A reference to the argument is consumed. """ fnty = Type.function(Type.void(), [self.pyobj]) fn = self._get_function(fnty, name="numba_do_raise") if exc is None: exc = self.make_none() return self.builder.call(fn, (exc,)) def err_set_object(self, exctype, excval): fnty = Type.function(Type.void(), [self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="PyErr_SetObject") if isinstance(exctype, str): exctype = self.get_c_object(exctype) return self.builder.call(fn, (exctype, excval)) def err_set_none(self, exctype): fnty = Type.function(Type.void(), [self.pyobj]) fn = self._get_function(fnty, name="PyErr_SetNone") if isinstance(exctype, str): exctype = self.get_c_object(exctype) return self.builder.call(fn, (exctype,)) def err_write_unraisable(self, obj): fnty = Type.function(Type.void(), [self.pyobj]) fn = self._get_function(fnty, name="PyErr_WriteUnraisable") return self.builder.call(fn, (obj,)) def err_fetch(self, pty, pval, ptb): fnty = Type.function(Type.void(), [self.pyobjptr] * 3) fn = self._get_function(fnty, name="PyErr_Fetch") return self.builder.call(fn, (pty, pval, ptb)) def err_restore(self, ty, val, tb): fnty = Type.function(Type.void(), [self.pyobj] * 3) fn = self._get_function(fnty, name="PyErr_Restore") return self.builder.call(fn, (ty, val, tb)) @contextlib.contextmanager def err_push(self, keep_new=False): """ Temporarily push the current error indicator while the code block is executed. If *keep_new* is True and the code block raises a new error, the new error is kept, otherwise the old error indicator is restored at the end of the block. """ pty, pval, ptb = [cgutils.alloca_once(self.builder, self.pyobj) for i in range(3)] self.err_fetch(pty, pval, ptb) yield ty = self.builder.load(pty) val = self.builder.load(pval) tb = self.builder.load(ptb) if keep_new: new_error = cgutils.is_not_null(self.builder, self.err_occurred()) with self.builder.if_else(new_error, likely=False) as (if_error, if_ok): with if_error: # Code block raised an error, keep it self.decref(ty) self.decref(val) self.decref(tb) with if_ok: # Restore previous error self.err_restore(ty, val, tb) else: self.err_restore(ty, val, tb) def get_c_object(self, name): """ Get a Python object through its C-accessible *name* (e.g. "PyExc_ValueError"). The underlying variable must be a `PyObject *`, and the value of that pointer is returned. """ # A LLVM global variable is implicitly a pointer to the declared # type, so fix up by using pyobj.pointee. return self.context.get_c_value(self.builder, self.pyobj.pointee, name, dllimport=True) def raise_missing_global_error(self, name): msg = "global name '%s' is not defined" % name cstr = self.context.insert_const_string(self.module, msg) self.err_set_string("PyExc_NameError", cstr) def raise_missing_name_error(self, name): msg = "name '%s' is not defined" % name cstr = self.context.insert_const_string(self.module, msg) self.err_set_string("PyExc_NameError", cstr) def fatal_error(self, msg): fnty = Type.function(Type.void(), [self.cstring]) fn = self._get_function(fnty, name="Py_FatalError") fn.attributes.add("noreturn") cstr = self.context.insert_const_string(self.module, msg) self.builder.call(fn, (cstr,)) # # Concrete dict API # def dict_getitem_string(self, dic, name): """Lookup name inside dict Returns a borrowed reference """ fnty = Type.function(self.pyobj, [self.pyobj, self.cstring]) fn = self._get_function(fnty, name="PyDict_GetItemString") cstr = self.context.insert_const_string(self.module, name) return self.builder.call(fn, [dic, cstr]) def dict_getitem(self, dic, name): """Lookup name inside dict Returns a borrowed reference """ fnty = Type.function(self.pyobj, [self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="PyDict_GetItem") return self.builder.call(fn, [dic, name]) def dict_new(self, presize=0): if presize == 0: fnty = Type.function(self.pyobj, ()) fn = self._get_function(fnty, name="PyDict_New") return self.builder.call(fn, ()) else: fnty = Type.function(self.pyobj, [self.py_ssize_t]) fn = self._get_function(fnty, name="_PyDict_NewPresized") return self.builder.call(fn, [Constant.int(self.py_ssize_t, presize)]) def dict_setitem(self, dictobj, nameobj, valobj): fnty = Type.function(Type.int(), (self.pyobj, self.pyobj, self.pyobj)) fn = self._get_function(fnty, name="PyDict_SetItem") return self.builder.call(fn, (dictobj, nameobj, valobj)) def dict_setitem_string(self, dictobj, name, valobj): fnty = Type.function(Type.int(), (self.pyobj, self.cstring, self.pyobj)) fn = self._get_function(fnty, name="PyDict_SetItemString") cstr = self.context.insert_const_string(self.module, name) return self.builder.call(fn, (dictobj, cstr, valobj)) def dict_pack(self, keyvalues): """ Args ----- keyvalues: iterable of (str, llvm.Value of PyObject*) """ dictobj = self.dict_new() with self.if_object_ok(dictobj): for k, v in keyvalues: self.dict_setitem_string(dictobj, k, v) return dictobj # # Concrete number APIs # def float_from_double(self, fval): fnty = Type.function(self.pyobj, [self.double]) fn = self._get_function(fnty, name="PyFloat_FromDouble") return self.builder.call(fn, [fval]) def number_as_ssize_t(self, numobj): fnty = Type.function(self.py_ssize_t, [self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="PyNumber_AsSsize_t") # We don't want any clipping, so pass OverflowError as the 2nd arg exc_class = self.get_c_object("PyExc_OverflowError") return self.builder.call(fn, [numobj, exc_class]) def number_long(self, numobj): fnty = Type.function(self.pyobj, [self.pyobj]) fn = self._get_function(fnty, name="PyNumber_Long") return self.builder.call(fn, [numobj]) def long_as_ulonglong(self, numobj): fnty = Type.function(self.ulonglong, [self.pyobj]) fn = self._get_function(fnty, name="PyLong_AsUnsignedLongLong") return self.builder.call(fn, [numobj]) def long_as_longlong(self, numobj): fnty = Type.function(self.ulonglong, [self.pyobj]) fn = self._get_function(fnty, name="PyLong_AsLongLong") return self.builder.call(fn, [numobj]) def long_as_voidptr(self, numobj): """ Convert the given Python integer to a void*. This is recommended over number_as_ssize_t as it isn't affected by signedness. """ fnty = Type.function(self.voidptr, [self.pyobj]) fn = self._get_function(fnty, name="PyLong_AsVoidPtr") return self.builder.call(fn, [numobj]) def _long_from_native_int(self, ival, func_name, native_int_type, signed): fnty = Type.function(self.pyobj, [native_int_type]) fn = self._get_function(fnty, name=func_name) resptr = cgutils.alloca_once(self.builder, self.pyobj) fn = self._get_function(fnty, name=func_name) self.builder.store(self.builder.call(fn, [ival]), resptr) return self.builder.load(resptr) def long_from_long(self, ival): func_name = "PyLong_FromLong" fnty = Type.function(self.pyobj, [self.long]) fn = self._get_function(fnty, name=func_name) return self.builder.call(fn, [ival]) def long_from_ulong(self, ival): return self._long_from_native_int(ival, "PyLong_FromUnsignedLong", self.long, signed=False) def long_from_ssize_t(self, ival): return self._long_from_native_int(ival, "PyLong_FromSsize_t", self.py_ssize_t, signed=True) def long_from_longlong(self, ival): return self._long_from_native_int(ival, "PyLong_FromLongLong", self.longlong, signed=True) def long_from_ulonglong(self, ival): return self._long_from_native_int(ival, "PyLong_FromUnsignedLongLong", self.ulonglong, signed=False) def long_from_signed_int(self, ival): """ Return a Python integer from any native integer value. """ bits = ival.type.width if bits <= self.long.width: return self.long_from_long(self.builder.sext(ival, self.long)) elif bits <= self.longlong.width: return self.long_from_longlong(self.builder.sext(ival, self.longlong)) else: raise OverflowError("integer too big (%d bits)" % (bits)) def long_from_unsigned_int(self, ival): """ Same as long_from_signed_int, but for unsigned values. """ bits = ival.type.width if bits <= self.ulong.width: return self.long_from_ulong(self.builder.zext(ival, self.ulong)) elif bits <= self.ulonglong.width: return self.long_from_ulonglong(self.builder.zext(ival, self.ulonglong)) else: raise OverflowError("integer too big (%d bits)" % (bits)) def _get_number_operator(self, name): fnty = Type.function(self.pyobj, [self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="PyNumber_%s" % name) return fn def _call_number_operator(self, name, lhs, rhs, inplace=False): if inplace: name = "InPlace" + name fn = self._get_number_operator(name) return self.builder.call(fn, [lhs, rhs]) def number_add(self, lhs, rhs, inplace=False): return self._call_number_operator("Add", lhs, rhs, inplace=inplace) def number_subtract(self, lhs, rhs, inplace=False): return self._call_number_operator("Subtract", lhs, rhs, inplace=inplace) def number_multiply(self, lhs, rhs, inplace=False): return self._call_number_operator("Multiply", lhs, rhs, inplace=inplace) def number_truedivide(self, lhs, rhs, inplace=False): return self._call_number_operator("TrueDivide", lhs, rhs, inplace=inplace) def number_floordivide(self, lhs, rhs, inplace=False): return self._call_number_operator("FloorDivide", lhs, rhs, inplace=inplace) def number_remainder(self, lhs, rhs, inplace=False): return self._call_number_operator("Remainder", lhs, rhs, inplace=inplace) def number_matrix_multiply(self, lhs, rhs, inplace=False): return self._call_number_operator("MatrixMultiply", lhs, rhs, inplace=inplace) def number_lshift(self, lhs, rhs, inplace=False): return self._call_number_operator("Lshift", lhs, rhs, inplace=inplace) def number_rshift(self, lhs, rhs, inplace=False): return self._call_number_operator("Rshift", lhs, rhs, inplace=inplace) def number_and(self, lhs, rhs, inplace=False): return self._call_number_operator("And", lhs, rhs, inplace=inplace) def number_or(self, lhs, rhs, inplace=False): return self._call_number_operator("Or", lhs, rhs, inplace=inplace) def number_xor(self, lhs, rhs, inplace=False): return self._call_number_operator("Xor", lhs, rhs, inplace=inplace) def number_power(self, lhs, rhs, inplace=False): fnty = Type.function(self.pyobj, [self.pyobj] * 3) fname = "PyNumber_InPlacePower" if inplace else "PyNumber_Power" fn = self._get_function(fnty, fname) return self.builder.call(fn, [lhs, rhs, self.borrow_none()]) def number_negative(self, obj): fnty = Type.function(self.pyobj, [self.pyobj]) fn = self._get_function(fnty, name="PyNumber_Negative") return self.builder.call(fn, (obj,)) def number_positive(self, obj): fnty = Type.function(self.pyobj, [self.pyobj]) fn = self._get_function(fnty, name="PyNumber_Positive") return self.builder.call(fn, (obj,)) def number_float(self, val): fnty = Type.function(self.pyobj, [self.pyobj]) fn = self._get_function(fnty, name="PyNumber_Float") return self.builder.call(fn, [val]) def number_invert(self, obj): fnty = Type.function(self.pyobj, [self.pyobj]) fn = self._get_function(fnty, name="PyNumber_Invert") return self.builder.call(fn, (obj,)) def float_as_double(self, fobj): fnty = Type.function(self.double, [self.pyobj]) fn = self._get_function(fnty, name="PyFloat_AsDouble") return self.builder.call(fn, [fobj]) def bool_from_bool(self, bval): """ Get a Python bool from a LLVM boolean. """ longval = self.builder.zext(bval, self.long) return self.bool_from_long(longval) def bool_from_long(self, ival): fnty = Type.function(self.pyobj, [self.long]) fn = self._get_function(fnty, name="PyBool_FromLong") return self.builder.call(fn, [ival]) def complex_from_doubles(self, realval, imagval): fnty = Type.function(self.pyobj, [Type.double(), Type.double()]) fn = self._get_function(fnty, name="PyComplex_FromDoubles") return self.builder.call(fn, [realval, imagval]) def complex_real_as_double(self, cobj): fnty = Type.function(Type.double(), [self.pyobj]) fn = self._get_function(fnty, name="PyComplex_RealAsDouble") return self.builder.call(fn, [cobj]) def complex_imag_as_double(self, cobj): fnty = Type.function(Type.double(), [self.pyobj]) fn = self._get_function(fnty, name="PyComplex_ImagAsDouble") return self.builder.call(fn, [cobj]) # # Concrete slice API # def slice_as_ints(self, obj): """ Read the members of a slice of integers. Returns a (ok, start, stop, step) tuple where ok is a boolean and the following members are pointer-sized ints. """ pstart = cgutils.alloca_once(self.builder, self.py_ssize_t) pstop = cgutils.alloca_once(self.builder, self.py_ssize_t) pstep = cgutils.alloca_once(self.builder, self.py_ssize_t) fnty = Type.function(Type.int(), [self.pyobj] + [self.py_ssize_t.as_pointer()] * 3) fn = self._get_function(fnty, name="numba_unpack_slice") res = self.builder.call(fn, (obj, pstart, pstop, pstep)) start = self.builder.load(pstart) stop = self.builder.load(pstop) step = self.builder.load(pstep) return cgutils.is_null(self.builder, res), start, stop, step # # List and sequence APIs # def sequence_getslice(self, obj, start, stop): fnty = Type.function(self.pyobj, [self.pyobj, self.py_ssize_t, self.py_ssize_t]) fn = self._get_function(fnty, name="PySequence_GetSlice") return self.builder.call(fn, (obj, start, stop)) def sequence_tuple(self, obj): fnty = Type.function(self.pyobj, [self.pyobj]) fn = self._get_function(fnty, name="PySequence_Tuple") return self.builder.call(fn, [obj]) def list_new(self, szval): fnty = Type.function(self.pyobj, [self.py_ssize_t]) fn = self._get_function(fnty, name="PyList_New") return self.builder.call(fn, [szval]) def list_size(self, lst): fnty = Type.function(self.py_ssize_t, [self.pyobj]) fn = self._get_function(fnty, name="PyList_Size") return self.builder.call(fn, [lst]) def list_append(self, lst, val): fnty = Type.function(Type.int(), [self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="PyList_Append") return self.builder.call(fn, [lst, val]) def list_setitem(self, lst, idx, val): """ Warning: Steals reference to ``val`` """ fnty = Type.function(Type.int(), [self.pyobj, self.py_ssize_t, self.pyobj]) fn = self._get_function(fnty, name="PyList_SetItem") return self.builder.call(fn, [lst, idx, val]) def list_getitem(self, lst, idx): """ Returns a borrowed reference. """ fnty = Type.function(self.pyobj, [self.pyobj, self.py_ssize_t]) fn = self._get_function(fnty, name="PyList_GetItem") if isinstance(idx, int): idx = self.context.get_constant(types.intp, idx) return self.builder.call(fn, [lst, idx]) def list_setslice(self, lst, start, stop, obj): if obj is None: obj = self.get_null_object() fnty = Type.function(Type.int(), [self.pyobj, self.py_ssize_t, self.py_ssize_t, self.pyobj]) fn = self._get_function(fnty, name="PyList_SetSlice") return self.builder.call(fn, (lst, start, stop, obj)) # # Concrete tuple API # def tuple_getitem(self, tup, idx): """ Borrow reference """ fnty = Type.function(self.pyobj, [self.pyobj, self.py_ssize_t]) fn = self._get_function(fnty, name="PyTuple_GetItem") idx = self.context.get_constant(types.intp, idx) return self.builder.call(fn, [tup, idx]) def tuple_pack(self, items): fnty = Type.function(self.pyobj, [self.py_ssize_t], var_arg=True) fn = self._get_function(fnty, name="PyTuple_Pack") n = self.context.get_constant(types.intp, len(items)) args = [n] args.extend(items) return self.builder.call(fn, args) def tuple_size(self, tup): fnty = Type.function(self.py_ssize_t, [self.pyobj]) fn = self._get_function(fnty, name="PyTuple_Size") return self.builder.call(fn, [tup]) def tuple_new(self, count): fnty = Type.function(self.pyobj, [Type.int()]) fn = self._get_function(fnty, name='PyTuple_New') return self.builder.call(fn, [self.context.get_constant(types.int32, count)]) def tuple_setitem(self, tuple_val, index, item): """ Steals a reference to `item`. """ fnty = Type.function(Type.int(), [self.pyobj, Type.int(), self.pyobj]) setitem_fn = self._get_function(fnty, name='PyTuple_SetItem') index = self.context.get_constant(types.int32, index) self.builder.call(setitem_fn, [tuple_val, index, item]) # # Concrete set API # def set_new(self, iterable=None): if iterable is None: iterable = self.get_null_object() fnty = Type.function(self.pyobj, [self.pyobj]) fn = self._get_function(fnty, name="PySet_New") return self.builder.call(fn, [iterable]) def set_add(self, set, value): fnty = Type.function(Type.int(), [self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="PySet_Add") return self.builder.call(fn, [set, value]) def set_clear(self, set): fnty = Type.function(Type.int(), [self.pyobj]) fn = self._get_function(fnty, name="PySet_Clear") return self.builder.call(fn, [set]) def set_size(self, set): fnty = Type.function(self.py_ssize_t, [self.pyobj]) fn = self._get_function(fnty, name="PySet_Size") return self.builder.call(fn, [set]) def set_update(self, set, iterable): fnty = Type.function(Type.int(), [self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="_PySet_Update") return self.builder.call(fn, [set, iterable]) def set_next_entry(self, set, posptr, keyptr, hashptr): fnty = Type.function(Type.int(), [self.pyobj, self.py_ssize_t.as_pointer(), self.pyobj.as_pointer(), self.py_hash_t.as_pointer()]) fn = self._get_function(fnty, name="_PySet_NextEntry") return self.builder.call(fn, (set, posptr, keyptr, hashptr)) @contextlib.contextmanager def set_iterate(self, set): builder = self.builder hashptr = cgutils.alloca_once(builder, self.py_hash_t, name="hashptr") keyptr = cgutils.alloca_once(builder, self.pyobj, name="keyptr") posptr = cgutils.alloca_once_value(builder, ir.Constant(self.py_ssize_t, 0), name="posptr") bb_body = builder.append_basic_block("bb_body") bb_end = builder.append_basic_block("bb_end") builder.branch(bb_body) def do_break(): builder.branch(bb_end) with builder.goto_block(bb_body): r = self.set_next_entry(set, posptr, keyptr, hashptr) finished = cgutils.is_null(builder, r) with builder.if_then(finished, likely=False): builder.branch(bb_end) yield _IteratorLoop(builder.load(keyptr), do_break) builder.branch(bb_body) builder.position_at_end(bb_end) # # GIL APIs # def gil_ensure(self): """ Ensure the GIL is acquired. The returned value must be consumed by gil_release(). """ gilptrty = Type.pointer(self.gil_state) fnty = Type.function(Type.void(), [gilptrty]) fn = self._get_function(fnty, "numba_gil_ensure") gilptr = cgutils.alloca_once(self.builder, self.gil_state) self.builder.call(fn, [gilptr]) return gilptr def gil_release(self, gil): """ Release the acquired GIL by gil_ensure(). Must be paired with a gil_ensure(). """ gilptrty = Type.pointer(self.gil_state) fnty = Type.function(Type.void(), [gilptrty]) fn = self._get_function(fnty, "numba_gil_release") return self.builder.call(fn, [gil]) def save_thread(self): """ Release the GIL and return the former thread state (an opaque non-NULL pointer). """ fnty = Type.function(self.voidptr, []) fn = self._get_function(fnty, name="PyEval_SaveThread") return self.builder.call(fn, []) def restore_thread(self, thread_state): """ Restore the given thread state by reacquiring the GIL. """ fnty = Type.function(Type.void(), [self.voidptr]) fn = self._get_function(fnty, name="PyEval_RestoreThread") self.builder.call(fn, [thread_state]) # # Generic object private data (a way of associating an arbitrary void * # pointer to an arbitrary Python object). # def object_get_private_data(self, obj): fnty = Type.function(self.voidptr, [self.pyobj]) fn = self._get_function(fnty, name="numba_get_pyobject_private_data") return self.builder.call(fn, (obj,)) def object_set_private_data(self, obj, ptr): fnty = Type.function(Type.void(), [self.pyobj, self.voidptr]) fn = self._get_function(fnty, name="numba_set_pyobject_private_data") return self.builder.call(fn, (obj, ptr)) def object_reset_private_data(self, obj): fnty = Type.function(Type.void(), [self.pyobj]) fn = self._get_function(fnty, name="numba_reset_pyobject_private_data") return self.builder.call(fn, (obj,)) # # Other APIs (organize them better!) # def import_module_noblock(self, modname): fnty = Type.function(self.pyobj, [self.cstring]) fn = self._get_function(fnty, name="PyImport_ImportModuleNoBlock") return self.builder.call(fn, [modname]) def call_function_objargs(self, callee, objargs): fnty = Type.function(self.pyobj, [self.pyobj], var_arg=True) fn = self._get_function(fnty, name="PyObject_CallFunctionObjArgs") args = [callee] + list(objargs) args.append(self.context.get_constant_null(types.pyobject)) return self.builder.call(fn, args) def call_method(self, callee, method, objargs=()): cname = self.context.insert_const_string(self.module, method) fnty = Type.function(self.pyobj, [self.pyobj, self.cstring, self.cstring], var_arg=True) fn = self._get_function(fnty, name="PyObject_CallMethod") fmt = 'O' * len(objargs) cfmt = self.context.insert_const_string(self.module, fmt) args = [callee, cname, cfmt] if objargs: args.extend(objargs) args.append(self.context.get_constant_null(types.pyobject)) return self.builder.call(fn, args) def call(self, callee, args=None, kws=None): if args is None: args = self.get_null_object() if kws is None: kws = self.get_null_object() fnty = Type.function(self.pyobj, [self.pyobj] * 3) fn = self._get_function(fnty, name="PyObject_Call") return self.builder.call(fn, (callee, args, kws)) def object_type(self, obj): """Emit a call to ``PyObject_Type(obj)`` to get the type of ``obj``. """ fnty = Type.function(self.pyobj, [self.pyobj]) fn = self._get_function(fnty, name="PyObject_Type") return self.builder.call(fn, (obj,)) def object_istrue(self, obj): fnty = Type.function(Type.int(), [self.pyobj]) fn = self._get_function(fnty, name="PyObject_IsTrue") return self.builder.call(fn, [obj]) def object_not(self, obj): fnty = Type.function(Type.int(), [self.pyobj]) fn = self._get_function(fnty, name="PyObject_Not") return self.builder.call(fn, [obj]) def object_richcompare(self, lhs, rhs, opstr): """ Refer to Python source Include/object.h for macros definition of the opid. """ ops = ['<', '<=', '==', '!=', '>', '>='] if opstr in ops: opid = ops.index(opstr) fnty = Type.function(self.pyobj, [self.pyobj, self.pyobj, Type.int()]) fn = self._get_function(fnty, name="PyObject_RichCompare") lopid = self.context.get_constant(types.int32, opid) return self.builder.call(fn, (lhs, rhs, lopid)) elif opstr == 'is': bitflag = self.builder.icmp(lc.ICMP_EQ, lhs, rhs) return self.bool_from_bool(bitflag) elif opstr == 'is not': bitflag = self.builder.icmp(lc.ICMP_NE, lhs, rhs) return self.bool_from_bool(bitflag) elif opstr in ('in', 'not in'): fnty = Type.function(Type.int(), [self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="PySequence_Contains") status = self.builder.call(fn, (rhs, lhs)) negone = self.context.get_constant(types.int32, -1) is_good = self.builder.icmp(lc.ICMP_NE, status, negone) # Stack allocate output and initialize to Null outptr = cgutils.alloca_once_value(self.builder, Constant.null(self.pyobj)) # If PySequence_Contains returns non-error value with cgutils.if_likely(self.builder, is_good): if opstr == 'not in': status = self.builder.not_(status) # Store the status as a boolean object truncated = self.builder.trunc(status, Type.int(1)) self.builder.store(self.bool_from_bool(truncated), outptr) return self.builder.load(outptr) else: raise NotImplementedError("Unknown operator {op!r}".format( op=opstr)) def iter_next(self, iterobj): fnty = Type.function(self.pyobj, [self.pyobj]) fn = self._get_function(fnty, name="PyIter_Next") return self.builder.call(fn, [iterobj]) def object_getiter(self, obj): fnty = Type.function(self.pyobj, [self.pyobj]) fn = self._get_function(fnty, name="PyObject_GetIter") return self.builder.call(fn, [obj]) def object_getattr_string(self, obj, attr): cstr = self.context.insert_const_string(self.module, attr) fnty = Type.function(self.pyobj, [self.pyobj, self.cstring]) fn = self._get_function(fnty, name="PyObject_GetAttrString") return self.builder.call(fn, [obj, cstr]) def object_getattr(self, obj, attr): fnty = Type.function(self.pyobj, [self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="PyObject_GetAttr") return self.builder.call(fn, [obj, attr]) def object_setattr_string(self, obj, attr, val): cstr = self.context.insert_const_string(self.module, attr) fnty = Type.function(Type.int(), [self.pyobj, self.cstring, self.pyobj]) fn = self._get_function(fnty, name="PyObject_SetAttrString") return self.builder.call(fn, [obj, cstr, val]) def object_setattr(self, obj, attr, val): fnty = Type.function(Type.int(), [self.pyobj, self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="PyObject_SetAttr") return self.builder.call(fn, [obj, attr, val]) def object_delattr_string(self, obj, attr): # PyObject_DelAttrString() is actually a C macro calling # PyObject_SetAttrString() with value == NULL. return self.object_setattr_string(obj, attr, self.get_null_object()) def object_delattr(self, obj, attr): # PyObject_DelAttr() is actually a C macro calling # PyObject_SetAttr() with value == NULL. return self.object_setattr(obj, attr, self.get_null_object()) def object_getitem(self, obj, key): """ Return obj[key] """ fnty = Type.function(self.pyobj, [self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="PyObject_GetItem") return self.builder.call(fn, (obj, key)) def object_setitem(self, obj, key, val): """ obj[key] = val """ fnty = Type.function(Type.int(), [self.pyobj, self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="PyObject_SetItem") return self.builder.call(fn, (obj, key, val)) def object_delitem(self, obj, key): """ del obj[key] """ fnty = Type.function(Type.int(), [self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="PyObject_DelItem") return self.builder.call(fn, (obj, key)) def string_as_string(self, strobj): fnty = Type.function(self.cstring, [self.pyobj]) fname = "PyUnicode_AsUTF8" fn = self._get_function(fnty, name=fname) return self.builder.call(fn, [strobj]) def string_as_string_and_size(self, strobj): """ Returns a tuple of ``(ok, buffer, length)``. The ``ok`` is i1 value that is set if ok. The ``buffer`` is a i8* of the output buffer. The ``length`` is a i32/i64 (py_ssize_t) of the length of the buffer. """ p_length = cgutils.alloca_once(self.builder, self.py_ssize_t) fnty = Type.function(self.cstring, [self.pyobj, self.py_ssize_t.as_pointer()]) fname = "PyUnicode_AsUTF8AndSize" fn = self._get_function(fnty, name=fname) buffer = self.builder.call(fn, [strobj, p_length]) ok = self.builder.icmp_unsigned('!=', ir.Constant(buffer.type, None), buffer) return (ok, buffer, self.builder.load(p_length)) def string_as_string_size_and_kind(self, strobj): """ Returns a tuple of ``(ok, buffer, length, kind)``. The ``ok`` is i1 value that is set if ok. The ``buffer`` is a i8* of the output buffer. The ``length`` is a i32/i64 (py_ssize_t) of the length of the buffer. The ``kind`` is a i32 (int32) of the Unicode kind constant The ``hash`` is a long/uint64_t (py_hash_t) of the Unicode constant hash """ p_length = cgutils.alloca_once(self.builder, self.py_ssize_t) p_kind = cgutils.alloca_once(self.builder, Type.int()) p_ascii = cgutils.alloca_once(self.builder, Type.int()) p_hash = cgutils.alloca_once(self.builder, self.py_hash_t) fnty = Type.function(self.cstring, [self.pyobj, self.py_ssize_t.as_pointer(), Type.int().as_pointer(), Type.int().as_pointer(), self.py_hash_t.as_pointer()]) fname = "numba_extract_unicode" fn = self._get_function(fnty, name=fname) buffer = self.builder.call( fn, [strobj, p_length, p_kind, p_ascii, p_hash]) ok = self.builder.icmp_unsigned('!=', ir.Constant(buffer.type, None), buffer) return (ok, buffer, self.builder.load(p_length), self.builder.load(p_kind), self.builder.load(p_ascii), self.builder.load(p_hash)) def string_from_string_and_size(self, string, size): fnty = Type.function(self.pyobj, [self.cstring, self.py_ssize_t]) fname = "PyString_FromStringAndSize" fn = self._get_function(fnty, name=fname) return self.builder.call(fn, [string, size]) def string_from_string(self, string): fnty = Type.function(self.pyobj, [self.cstring]) fname = "PyUnicode_FromString" fn = self._get_function(fnty, name=fname) return self.builder.call(fn, [string]) def string_from_kind_and_data(self, kind, string, size): fnty = Type.function(self.pyobj, [Type.int(), self.cstring, self.py_ssize_t]) fname = "PyUnicode_FromKindAndData" fn = self._get_function(fnty, name=fname) return self.builder.call(fn, [kind, string, size]) def bytes_from_string_and_size(self, string, size): fnty = Type.function(self.pyobj, [self.cstring, self.py_ssize_t]) fname = "PyBytes_FromStringAndSize" fn = self._get_function(fnty, name=fname) return self.builder.call(fn, [string, size]) def object_hash(self, obj): fnty = Type.function(self.py_hash_t, [self.pyobj,]) fname = "PyObject_Hash" fn = self._get_function(fnty, name=fname) return self.builder.call(fn, [obj,]) def object_str(self, obj): fnty = Type.function(self.pyobj, [self.pyobj]) fn = self._get_function(fnty, name="PyObject_Str") return self.builder.call(fn, [obj]) def make_none(self): obj = self.borrow_none() self.incref(obj) return obj def borrow_none(self): return self.get_c_object("_Py_NoneStruct") def sys_write_stdout(self, fmt, *args): fnty = Type.function(Type.void(), [self.cstring], var_arg=True) fn = self._get_function(fnty, name="PySys_FormatStdout") return self.builder.call(fn, (fmt,) + args) def object_dump(self, obj): """ Dump a Python object on C stderr. For debugging purposes. """ fnty = Type.function(Type.void(), [self.pyobj]) fn = self._get_function(fnty, name="_PyObject_Dump") return self.builder.call(fn, (obj,)) # # NRT (Numba runtime) APIs # def nrt_adapt_ndarray_to_python(self, aryty, ary, dtypeptr): assert self.context.enable_nrt, "NRT required" intty = ir.IntType(32) # Embed the Python type of the array (maybe subclass) in the LLVM IR. serial_aryty_pytype = self.unserialize(self.serialize_object(aryty.box_type)) fnty = Type.function(self.pyobj, [self.voidptr, self.pyobj, intty, intty, self.pyobj]) fn = self._get_function(fnty, name="NRT_adapt_ndarray_to_python_acqref") fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) ndim = self.context.get_constant(types.int32, aryty.ndim) writable = self.context.get_constant(types.int32, int(aryty.mutable)) aryptr = cgutils.alloca_once_value(self.builder, ary) return self.builder.call(fn, [self.builder.bitcast(aryptr, self.voidptr), serial_aryty_pytype, ndim, writable, dtypeptr]) def nrt_meminfo_new_from_pyobject(self, data, pyobj): """ Allocate a new MemInfo with data payload borrowed from a python object. """ mod = self.builder.module fnty = ir.FunctionType( cgutils.voidptr_t, [cgutils.voidptr_t, cgutils.voidptr_t], ) fn = cgutils.get_or_insert_function( mod, fnty, "NRT_meminfo_new_from_pyobject", ) fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE) fn.return_value.add_attribute("noalias") return self.builder.call(fn, [data, pyobj]) def nrt_meminfo_as_pyobject(self, miptr): mod = self.builder.module fnty = ir.FunctionType( self.pyobj, [cgutils.voidptr_t] ) fn = cgutils.get_or_insert_function( mod, fnty, 'NRT_meminfo_as_pyobject', ) fn.return_value.add_attribute("noalias") return self.builder.call(fn, [miptr]) def nrt_meminfo_from_pyobject(self, miobj): mod = self.builder.module fnty = ir.FunctionType( cgutils.voidptr_t, [self.pyobj] ) fn = cgutils.get_or_insert_function( mod, fnty, 'NRT_meminfo_from_pyobject', ) fn.return_value.add_attribute("noalias") return self.builder.call(fn, [miobj]) def nrt_adapt_ndarray_from_python(self, ary, ptr): assert self.context.enable_nrt fnty = Type.function(Type.int(), [self.pyobj, self.voidptr]) fn = self._get_function(fnty, name="NRT_adapt_ndarray_from_python") fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE) return self.builder.call(fn, (ary, ptr)) def nrt_adapt_buffer_from_python(self, buf, ptr): assert self.context.enable_nrt fnty = Type.function(Type.void(), [Type.pointer(self.py_buffer_t), self.voidptr]) fn = self._get_function(fnty, name="NRT_adapt_buffer_from_python") fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE) return self.builder.call(fn, (buf, ptr)) # ------ utils ----- def _get_function(self, fnty, name): return cgutils.get_or_insert_function(self.module, fnty, name) def alloca_obj(self): return self.builder.alloca(self.pyobj) def alloca_buffer(self): """ Return a pointer to a stack-allocated, zero-initialized Py_buffer. """ # Treat the buffer as an opaque array of bytes ptr = cgutils.alloca_once_value(self.builder, lc.Constant.null(self.py_buffer_t)) return ptr @contextlib.contextmanager def if_object_ok(self, obj): with cgutils.if_likely(self.builder, cgutils.is_not_null(self.builder, obj)): yield def print_object(self, obj): strobj = self.object_str(obj) cstr = self.string_as_string(strobj) fmt = self.context.insert_const_string(self.module, "%s") self.sys_write_stdout(fmt, cstr) self.decref(strobj) def print_string(self, text): fmt = self.context.insert_const_string(self.module, text) self.sys_write_stdout(fmt) def get_null_object(self): return Constant.null(self.pyobj) def return_none(self): none = self.make_none() self.builder.ret(none) def list_pack(self, items): n = len(items) seq = self.list_new(self.context.get_constant(types.intp, n)) with self.if_object_ok(seq): for i in range(n): idx = self.context.get_constant(types.intp, i) self.incref(items[i]) self.list_setitem(seq, idx, items[i]) return seq def unserialize(self, structptr): """ Unserialize some data. *structptr* should be a pointer to a {i8* data, i32 length} structure. """ fnty = Type.function(self.pyobj, (self.voidptr, ir.IntType(32), self.voidptr)) fn = self._get_function(fnty, name="numba_unpickle") ptr = self.builder.extract_value(self.builder.load(structptr), 0) n = self.builder.extract_value(self.builder.load(structptr), 1) hashed = self.builder.extract_value(self.builder.load(structptr), 2) return self.builder.call(fn, (ptr, n, hashed)) def serialize_uncached(self, obj): """ Same as serialize_object(), but don't create a global variable, simply return a literal {i8* data, i32 length, i8* hashbuf} structure. """ # First make the array constant data = serialize.dumps(obj) assert len(data) < 2**31 name = ".const.pickledata.%s" % (id(obj) if config.DIFF_IR == 0 else "DIFF_IR") bdata = cgutils.make_bytearray(data) # Make SHA1 hash on the pickled content # NOTE: update buffer size in numba_unpickle() when changing the # hash algorithm. hashed = cgutils.make_bytearray(hashlib.sha1(data).digest()) arr = self.context.insert_unique_const(self.module, name, bdata) hasharr = self.context.insert_unique_const( self.module, f"{name}.sha1", hashed, ) # Then populate the structure constant struct = ir.Constant.literal_struct([ arr.bitcast(self.voidptr), ir.Constant(ir.IntType(32), arr.type.pointee.count), hasharr.bitcast(self.voidptr), ]) return struct def serialize_object(self, obj): """ Serialize the given object in the bitcode, and return it as a pointer to a {i8* data, i32 length}, structure constant (suitable for passing to unserialize()). """ try: gv = self.module.__serialized[obj] except KeyError: struct = self.serialize_uncached(obj) name = ".const.picklebuf.%s" % (id(obj) if config.DIFF_IR == 0 else "DIFF_IR") gv = self.context.insert_unique_const(self.module, name, struct) # Make the id() (and hence the name) unique while populating the module. self.module.__serialized[obj] = gv return gv def c_api_error(self): return cgutils.is_not_null(self.builder, self.err_occurred()) def to_native_value(self, typ, obj): """ Unbox the Python object as the given Numba type. A NativeValue instance is returned. """ from numba.core.boxing import unbox_unsupported impl = _unboxers.lookup(typ.__class__, unbox_unsupported) c = _UnboxContext(self.context, self.builder, self) return impl(typ, obj, c) def from_native_return(self, typ, val, env_manager): assert not isinstance(typ, types.Optional), "callconv should have " \ "prevented the return of " \ "optional value" out = self.from_native_value(typ, val, env_manager) return out def from_native_value(self, typ, val, env_manager=None): """ Box the native value of the given Numba type. A Python object pointer is returned (NULL if an error occurred). This method steals any native (NRT) reference embedded in *val*. """ from numba.core.boxing import box_unsupported impl = _boxers.lookup(typ.__class__, box_unsupported) c = _BoxContext(self.context, self.builder, self, env_manager) return impl(typ, val, c) def reflect_native_value(self, typ, val, env_manager=None): """ Reflect the native value onto its Python original, if any. An error bit (as an LLVM value) is returned. """ impl = _reflectors.lookup(typ.__class__) if impl is None: # Reflection isn't needed for most types return cgutils.false_bit is_error = cgutils.alloca_once_value(self.builder, cgutils.false_bit) c = _ReflectContext(self.context, self.builder, self, env_manager, is_error) impl(typ, val, c) return self.builder.load(c.is_error) def to_native_generator(self, obj, typ): """ Extract the generator structure pointer from a generator *obj* (a _dynfunc.Generator instance). """ gen_ptr_ty = Type.pointer(self.context.get_data_type(typ)) value = self.context.get_generator_state(self.builder, obj, gen_ptr_ty) return NativeValue(value) def from_native_generator(self, val, typ, env=None): """ Make a Numba generator (a _dynfunc.Generator instance) from a generator structure pointer *val*. *env* is an optional _dynfunc.Environment instance to be wrapped in the generator. """ llty = self.context.get_data_type(typ) assert not llty.is_pointer gen_struct_size = self.context.get_abi_sizeof(llty) gendesc = self.context.get_generator_desc(typ) # This is the PyCFunctionWithKeywords generated by PyCallWrapper genfnty = Type.function(self.pyobj, [self.pyobj, self.pyobj, self.pyobj]) genfn = self._get_function(genfnty, name=gendesc.llvm_cpython_wrapper_name) # This is the raw finalizer generated by _lower_generator_finalize_func() finalizerty = Type.function(Type.void(), [self.voidptr]) if typ.has_finalizer: finalizer = self._get_function(finalizerty, name=gendesc.llvm_finalizer_name) else: finalizer = Constant.null(Type.pointer(finalizerty)) # PyObject *numba_make_generator(state_size, initial_state, nextfunc, finalizer, env) fnty = Type.function(self.pyobj, [self.py_ssize_t, self.voidptr, Type.pointer(genfnty), Type.pointer(finalizerty), self.voidptr]) fn = self._get_function(fnty, name="numba_make_generator") state_size = ir.Constant(self.py_ssize_t, gen_struct_size) initial_state = self.builder.bitcast(val, self.voidptr) if env is None: env = self.get_null_object() env = self.builder.bitcast(env, self.voidptr) return self.builder.call(fn, (state_size, initial_state, genfn, finalizer, env)) def numba_array_adaptor(self, ary, ptr): assert not self.context.enable_nrt fnty = Type.function(Type.int(), [self.pyobj, self.voidptr]) fn = self._get_function(fnty, name="numba_adapt_ndarray") fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE) return self.builder.call(fn, (ary, ptr)) def numba_buffer_adaptor(self, buf, ptr): fnty = Type.function(Type.void(), [ir.PointerType(self.py_buffer_t), self.voidptr]) fn = self._get_function(fnty, name="numba_adapt_buffer") fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE) return self.builder.call(fn, (buf, ptr)) def complex_adaptor(self, cobj, cmplx): fnty = Type.function(Type.int(), [self.pyobj, cmplx.type]) fn = self._get_function(fnty, name="numba_complex_adaptor") return self.builder.call(fn, [cobj, cmplx]) def extract_record_data(self, obj, pbuf): fnty = Type.function(self.voidptr, [self.pyobj, ir.PointerType(self.py_buffer_t)]) fn = self._get_function(fnty, name="numba_extract_record_data") return self.builder.call(fn, [obj, pbuf]) def get_buffer(self, obj, pbuf): fnty = Type.function(Type.int(), [self.pyobj, ir.PointerType(self.py_buffer_t)]) fn = self._get_function(fnty, name="numba_get_buffer") return self.builder.call(fn, [obj, pbuf]) def release_buffer(self, pbuf): fnty = Type.function(Type.void(), [ir.PointerType(self.py_buffer_t)]) fn = self._get_function(fnty, name="numba_release_buffer") return self.builder.call(fn, [pbuf]) def extract_np_datetime(self, obj): fnty = Type.function(Type.int(64), [self.pyobj]) fn = self._get_function(fnty, name="numba_extract_np_datetime") return self.builder.call(fn, [obj]) def extract_np_timedelta(self, obj): fnty = Type.function(Type.int(64), [self.pyobj]) fn = self._get_function(fnty, name="numba_extract_np_timedelta") return self.builder.call(fn, [obj]) def create_np_datetime(self, val, unit_code): unit_code = Constant.int(Type.int(), unit_code) fnty = Type.function(self.pyobj, [Type.int(64), Type.int()]) fn = self._get_function(fnty, name="numba_create_np_datetime") return self.builder.call(fn, [val, unit_code]) def create_np_timedelta(self, val, unit_code): unit_code = Constant.int(Type.int(), unit_code) fnty = Type.function(self.pyobj, [Type.int(64), Type.int()]) fn = self._get_function(fnty, name="numba_create_np_timedelta") return self.builder.call(fn, [val, unit_code]) def recreate_record(self, pdata, size, dtype, env_manager): fnty = Type.function(self.pyobj, [Type.pointer(Type.int(8)), Type.int(), self.pyobj]) fn = self._get_function(fnty, name="numba_recreate_record") dtypeaddr = env_manager.read_const(env_manager.add_const(dtype)) return self.builder.call(fn, [pdata, size, dtypeaddr]) def string_from_constant_string(self, string): cstr = self.context.insert_const_string(self.module, string) sz = self.context.get_constant(types.intp, len(string)) return self.string_from_string_and_size(cstr, sz) def call_jit_code(self, func, sig, args): """Calls into Numba jitted code and propagate error using the Python calling convention. Parameters ---------- func : function The Python function to be compiled. This function is compiled in nopython-mode. sig : numba.typing.Signature The function signature for *func*. args : Sequence[llvmlite.binding.Value] LLVM values to use as arguments. Returns ------- (is_error, res) : 2-tuple of llvmlite.binding.Value. is_error : true iff *func* raised an exception. res : Returned value from *func* iff *is_error* is false. If *is_error* is true, this method will adapt the nopython exception into a Python exception. Caller should return NULL to Python to indicate an error. """ # Compile *func* builder = self.builder cres = self.context.compile_subroutine(builder, func, sig) got_retty = cres.signature.return_type retty = sig.return_type if got_retty != retty: # This error indicates an error in *func* or the caller of this # method. raise errors.LoweringError( f'mismatching signature {got_retty} != {retty}.\n' ) # Call into *func* status, res = self.context.call_internal_no_propagate( builder, cres.fndesc, sig, args, ) # Post-call handling for *func* is_error_ptr = cgutils.alloca_once(builder, cgutils.bool_t, zfill=True) res_type = self.context.get_value_type(sig.return_type) res_ptr = cgutils.alloca_once(builder, res_type, zfill=True) # Handle error and adapt the nopython exception into cpython exception with builder.if_else(status.is_error) as (has_err, no_err): with has_err: builder.store(status.is_error, is_error_ptr) # Set error state in the Python interpreter self.context.call_conv.raise_error(builder, self, status) with no_err: # Handle returned value res = imputils.fix_returning_optional( self.context, builder, sig, status, res, ) builder.store(res, res_ptr) is_error = builder.load(is_error_ptr) res = builder.load(res_ptr) return is_error, res class ObjModeUtils: """Internal utils for calling objmode dispatcher from within NPM code. """ def __init__(self, pyapi): self.pyapi = pyapi def load_dispatcher(self, fnty, argtypes): builder = self.pyapi.builder tyctx = self.pyapi.context m = builder.module # Add a global variable to cache the objmode dispatcher gv = ir.GlobalVariable( m, self.pyapi.pyobj, name=m.get_unique_name("cached_objmode_dispatcher"), ) gv.initializer = gv.type.pointee(None) gv.linkage = 'internal' # Make a basic-block to common exit bb_end = builder.append_basic_block("bb_end") if serialize.is_serialiable(fnty.dispatcher): serialized_dispatcher = self.pyapi.serialize_object( (fnty.dispatcher, tuple(argtypes)), ) compile_args = self.pyapi.unserialize(serialized_dispatcher) # unserialize (unpickling) can fail failed_unser = cgutils.is_null(builder, compile_args) with builder.if_then(failed_unser): # early exit. `gv` is still null. builder.branch(bb_end) cached = builder.load(gv) with builder.if_then(cgutils.is_null(builder, cached)): if serialize.is_serialiable(fnty.dispatcher): cls = type(self) compiler = self.pyapi.unserialize( self.pyapi.serialize_object(cls._call_objmode_dispatcher) ) callee = self.pyapi.call_function_objargs( compiler, [compile_args], ) # Clean up self.pyapi.decref(compiler) self.pyapi.decref(compile_args) else: entry_pt = fnty.dispatcher.compile(tuple(argtypes)) callee = tyctx.add_dynamic_addr( builder, id(entry_pt), info="with_objectmode", ) # Incref the dispatcher and cache it self.pyapi.incref(callee) builder.store(callee, gv) # Jump to the exit block builder.branch(bb_end) # Define the exit block builder.position_at_end(bb_end) callee = builder.load(gv) return callee @staticmethod def _call_objmode_dispatcher(compile_args): dispatcher, argtypes = compile_args entrypt = dispatcher.compile(argtypes) return entrypt numba-0.55.1/numba/core/registry.py000664 000000 000000 00000006710 14174536160 017153 0ustar00rootroot000000 000000 import contextlib from numba.core.descriptors import TargetDescriptor from numba.core import utils, typing, dispatcher, cpu # ----------------------------------------------------------------------------- # Default CPU target descriptors class _NestedContext(object): _typing_context = None _target_context = None @contextlib.contextmanager def nested(self, typing_context, target_context): old_nested = self._typing_context, self._target_context try: self._typing_context = typing_context self._target_context = target_context yield finally: self._typing_context, self._target_context = old_nested class CPUTarget(TargetDescriptor): options = cpu.CPUTargetOptions _nested = _NestedContext() @utils.cached_property def _toplevel_target_context(self): # Lazily-initialized top-level target context, for all threads return cpu.CPUContext(self.typing_context, self._target_name) @utils.cached_property def _toplevel_typing_context(self): # Lazily-initialized top-level typing context, for all threads return typing.Context() @property def target_context(self): """ The target context for CPU targets. """ nested = self._nested._target_context if nested is not None: return nested else: return self._toplevel_target_context @property def typing_context(self): """ The typing context for CPU targets. """ nested = self._nested._typing_context if nested is not None: return nested else: return self._toplevel_typing_context def nested_context(self, typing_context, target_context): """ A context manager temporarily replacing the contexts with the given ones, for the current thread of execution. """ return self._nested.nested(typing_context, target_context) # The global CPU target cpu_target = CPUTarget('cpu') class CPUDispatcher(dispatcher.Dispatcher): targetdescr = cpu_target class DelayedRegistry(utils.UniqueDict): """ A unique dictionary but with deferred initialisation of the values. Attributes ---------- ondemand: A dictionary of key -> value, where value is executed the first time it is is used. It is used for part of a deferred initialization strategy. """ def __init__(self, *args, **kws): self.ondemand = utils.UniqueDict() self.key_type = kws.pop('key_type', None) self.value_type = kws.pop('value_type', None) self._type_check = self.key_type or self.value_type super(DelayedRegistry, self).__init__(*args, **kws) def __getitem__(self, item): if item in self.ondemand: self[item] = self.ondemand[item]() del self.ondemand[item] return super(DelayedRegistry, self).__getitem__(item) def __setitem__(self, key, value): if self._type_check: def check(x, ty_x): if isinstance(ty_x, type): assert ty_x in x.__mro__, (x, ty_x) else: assert isinstance(x, ty_x), (x, ty_x) if self.key_type is not None: check(key, self.key_type) if self.value_type is not None: check(value, self.value_type) return super(DelayedRegistry, self).__setitem__(key, value) numba-0.55.1/numba/core/removerefctpass.py000664 000000 000000 00000006513 14174536160 020514 0ustar00rootroot000000 000000 """ Implement a rewrite pass on a LLVM module to remove unnecessary refcount operations. """ from llvmlite.ir.transforms import CallVisitor from numba.core import types class _MarkNrtCallVisitor(CallVisitor): """ A pass to mark all NRT_incref and NRT_decref. """ def __init__(self): self.marked = set() def visit_Call(self, instr): if getattr(instr.callee, 'name', '') in _accepted_nrtfns: self.marked.add(instr) def _rewrite_function(function): # Mark NRT usage markpass = _MarkNrtCallVisitor() markpass.visit_Function(function) # Remove NRT usage for bb in function.basic_blocks: for inst in list(bb.instructions): if inst in markpass.marked: bb.instructions.remove(inst) _accepted_nrtfns = 'NRT_incref', 'NRT_decref' def _legalize(module, dmm, fndesc): """ Legalize the code in the module. Returns True if the module is legal for the rewrite pass that removes unnecessary refcounts. """ def valid_output(ty): """ Valid output are any type that does not need refcount """ model = dmm[ty] return not model.contains_nrt_meminfo() def valid_input(ty): """ Valid input are any type that does not need refcount except Array. """ return valid_output(ty) or isinstance(ty, types.Array) # Ensure no reference to function marked as # "numba_args_may_always_need_nrt" try: nmd = module.get_named_metadata("numba_args_may_always_need_nrt") except KeyError: # Nothing marked pass else: # Has functions marked as "numba_args_may_always_need_nrt" if len(nmd.operands) > 0: # The pass is illegal for this compilation unit. return False # More legalization base on function type argtypes = fndesc.argtypes restype = fndesc.restype calltypes = fndesc.calltypes # Legalize function arguments for argty in argtypes: if not valid_input(argty): return False # Legalize function return if not valid_output(restype): return False # Legalize all called functions for callty in calltypes.values(): if callty is not None and not valid_output(callty.return_type): return False # Ensure no allocation for fn in module.functions: if fn.name.startswith("NRT_"): if fn.name not in _accepted_nrtfns: return False return True def remove_unnecessary_nrt_usage(function, context, fndesc): """ Remove unnecessary NRT incref/decref in the given LLVM function. It uses highlevel type info to determine if the function does not need NRT. Such a function does not: - return array object(s); - take arguments that need refcounting except array; - call function(s) that return refcounted object. In effect, the function will not capture or create references that extend the lifetime of any refcounted objects beyound the lifetime of the function. The rewrite is performed in place. If rewrite has happened, this function returns True, otherwise, it returns False. """ dmm = context.data_model_manager if _legalize(function.module, dmm, fndesc): _rewrite_function(function) return True else: return False numba-0.55.1/numba/core/retarget.py000664 000000 000000 00000007652 14174536160 017126 0ustar00rootroot000000 000000 """ Implement utils for supporting retargeting of dispatchers. WARNING: Features defined in this file are experimental. The API may change without notice. """ import abc import weakref from numba.core import errors class RetargetCache: """Cache for retargeted dispatchers. The cache uses the original dispatcher as the key. """ container_type = weakref.WeakKeyDictionary def __init__(self): self._cache = self.container_type() self._stat_hit = 0 self._stat_miss = 0 def save_cache(self, orig_disp, new_disp): """Save a dispatcher associated with the given key. """ self._cache[orig_disp] = new_disp def load_cache(self, orig_disp): """Load a dispatcher associated with the given key. """ out = self._cache.get(orig_disp) if out is None: self._stat_miss += 1 else: self._stat_hit += 1 return out def items(self): """Returns the contents of the cache. """ return self._cache.items() def stats(self): """Returns stats regarding cache hit/miss. """ return {'hit': self._stat_hit, 'miss': self._stat_miss} class BaseRetarget(abc.ABC): """Abstract base class for retargeting logic. """ @abc.abstractmethod def check_compatible(self, orig_disp): """Check that the retarget is compatible. This method does not return anything meaningful (e.g. None) Incompatibility is signalled via raising an exception. """ pass @abc.abstractmethod def retarget(self, orig_disp): """Retargets the given dispatcher and returns a new dispatcher-like callable. Or, returns the original dispatcher if the the target_backend will not change. """ pass class BasicRetarget(BaseRetarget): """A basic retargeting implementation for a single output target. This class has two abstract methods/properties that subclasses must define. - `output_target` must return output target name. - `compile_retarget` must define the logic to retarget the given dispatcher. By default, this class uses `RetargetCache` as the internal cache. This can be modified by overriding the `.cache_type` class attribute. """ cache_type = RetargetCache def __init__(self): self.cache = self.cache_type() @abc.abstractproperty def output_target(self) -> str: """Returns the output target name. See numba/tests/test_retargeting.py for example usage. """ pass @abc.abstractmethod def compile_retarget(self, orig_disp): """Returns the retargeted dispatcher. See numba/tests/test_retargeting.py for example usage. """ pass def check_compatible(self, orig_disp): """ This implementation checks that `self.output_target == orig_disp._required_target_backend` """ required_target = orig_disp._required_target_backend output_target = self.output_target if required_target is not None: if output_target != required_target: m = ("The output target does match the required target: " f"{output_target} != {required_target}.") raise errors.CompilerError(m) def retarget(self, orig_disp): """Apply retargeting to orig_disp. The retargeted dispatchers are cached for future use. """ cache = self.cache opts = orig_disp.targetoptions # Skip if the original dispatcher is targeting the same output target if opts.get('target_backend') == self.output_target: return orig_disp cached = cache.load_cache(orig_disp) # No cache? if cached is None: out = self.compile_retarget(orig_disp) cache.save_cache(orig_disp, out) else: out = cached return out numba-0.55.1/numba/core/rewrites/000775 000000 000000 00000000000 14174536160 016571 5ustar00rootroot000000 000000 numba-0.55.1/numba/core/rewrites/__init__.py000664 000000 000000 00000000434 14174536160 020703 0ustar00rootroot000000 000000 """ A subpackage hosting Numba IR rewrite passes. """ from .registry import register_rewrite, rewrite_registry, Rewrite # Register various built-in rewrite passes from numba.core.rewrites import (static_getitem, static_raise, static_binop, ir_print) numba-0.55.1/numba/core/rewrites/ir_print.py000664 000000 000000 00000005631 14174536160 020776 0ustar00rootroot000000 000000 from numba.core import errors, ir from numba.core.rewrites import register_rewrite, Rewrite @register_rewrite('before-inference') class RewritePrintCalls(Rewrite): """ Rewrite calls to the print() global function to dedicated IR print() nodes. """ def match(self, func_ir, block, typemap, calltypes): self.prints = prints = {} self.block = block # Find all assignments with a right-hand print() call for inst in block.find_insts(ir.Assign): if isinstance(inst.value, ir.Expr) and inst.value.op == 'call': expr = inst.value try: callee = func_ir.infer_constant(expr.func) except errors.ConstantInferenceError: continue if callee is print: if expr.kws: # Only positional args are supported msg = ("Numba's print() function implementation does not " "support keyword arguments.") raise errors.UnsupportedError(msg, inst.loc) prints[inst] = expr return len(prints) > 0 def apply(self): """ Rewrite `var = call (...)` as a sequence of `print(...)` and `var = const(None)`. """ new_block = self.block.copy() new_block.clear() for inst in self.block.body: if inst in self.prints: expr = self.prints[inst] print_node = ir.Print(args=expr.args, vararg=expr.vararg, loc=expr.loc) new_block.append(print_node) assign_node = ir.Assign(value=ir.Const(None, loc=expr.loc), target=inst.target, loc=inst.loc) new_block.append(assign_node) else: new_block.append(inst) return new_block @register_rewrite('before-inference') class DetectConstPrintArguments(Rewrite): """ Detect and store constant arguments to print() nodes. """ def match(self, func_ir, block, typemap, calltypes): self.consts = consts = {} self.block = block for inst in block.find_insts(ir.Print): if inst.consts: # Already rewritten continue for idx, var in enumerate(inst.args): try: const = func_ir.infer_constant(var) except errors.ConstantInferenceError: continue consts.setdefault(inst, {})[idx] = const return len(consts) > 0 def apply(self): """ Store detected constant arguments on their nodes. """ for inst in self.block.body: if inst in self.consts: inst.consts = self.consts[inst] return self.block numba-0.55.1/numba/core/rewrites/registry.py000664 000000 000000 00000007103 14174536160 021014 0ustar00rootroot000000 000000 from collections import defaultdict from numba.core import config class Rewrite(object): '''Defines the abstract base class for Numba rewrites. ''' def __init__(self, state=None): '''Constructor for the Rewrite class. ''' pass def match(self, func_ir, block, typemap, calltypes): '''Overload this method to check an IR block for matching terms in the rewrite. ''' return False def apply(self): '''Overload this method to return a rewritten IR basic block when a match has been found. ''' raise NotImplementedError("Abstract Rewrite.apply() called!") class RewriteRegistry(object): '''Defines a registry for Numba rewrites. ''' _kinds = frozenset(['before-inference', 'after-inference']) def __init__(self): '''Constructor for the rewrite registry. Initializes the rewrites member to an empty list. ''' self.rewrites = defaultdict(list) def register(self, kind): """ Decorator adding a subclass of Rewrite to the registry for the given *kind*. """ if kind not in self._kinds: raise KeyError("invalid kind %r" % (kind,)) def do_register(rewrite_cls): if not issubclass(rewrite_cls, Rewrite): raise TypeError('{0} is not a subclass of Rewrite'.format( rewrite_cls)) self.rewrites[kind].append(rewrite_cls) return rewrite_cls return do_register def apply(self, kind, state): '''Given a pipeline and a dictionary of basic blocks, exhaustively attempt to apply all registered rewrites to all basic blocks. ''' assert kind in self._kinds blocks = state.func_ir.blocks old_blocks = blocks.copy() for rewrite_cls in self.rewrites[kind]: # Exhaustively apply a rewrite until it stops matching. rewrite = rewrite_cls(state) work_list = list(blocks.items()) while work_list: key, block = work_list.pop() matches = rewrite.match(state.func_ir, block, state.typemap, state.calltypes) if matches: if config.DEBUG or config.DUMP_IR: print("_" * 70) print("REWRITING (%s):" % rewrite_cls.__name__) block.dump() print("_" * 60) new_block = rewrite.apply() blocks[key] = new_block work_list.append((key, new_block)) if config.DEBUG or config.DUMP_IR: new_block.dump() print("_" * 70) # If any blocks were changed, perform a sanity check. for key, block in blocks.items(): if block != old_blocks[key]: block.verify() # Some passes, e.g. _inline_const_arraycall are known to occasionally # do invalid things WRT ir.Del, others, e.g. RewriteArrayExprs do valid # things with ir.Del, but the placement is not optimal. The lines below # fix-up the IR so that ref counts are valid and optimally placed, # see #4093 for context. This has to be run here opposed to in # apply() as the CFG needs computing so full IR is needed. from numba.core import postproc post_proc = postproc.PostProcessor(state.func_ir) post_proc.run() rewrite_registry = RewriteRegistry() register_rewrite = rewrite_registry.register numba-0.55.1/numba/core/rewrites/static_binop.py000664 000000 000000 00000002172 14174536160 021623 0ustar00rootroot000000 000000 from numba.core import errors, ir from numba.core.rewrites import register_rewrite, Rewrite @register_rewrite('before-inference') class DetectStaticBinops(Rewrite): """ Detect constant arguments to select binops. """ # Those operators can benefit from a constant-inferred argument rhs_operators = {'**'} def match(self, func_ir, block, typemap, calltypes): self.static_lhs = {} self.static_rhs = {} self.block = block # Find binop expressions with a constant lhs or rhs for expr in block.find_exprs(op='binop'): try: if (expr.fn in self.rhs_operators and expr.static_rhs is ir.UNDEFINED): self.static_rhs[expr] = func_ir.infer_constant(expr.rhs) except errors.ConstantInferenceError: continue return len(self.static_lhs) > 0 or len(self.static_rhs) > 0 def apply(self): """ Store constant arguments that were detected in match(). """ for expr, rhs in self.static_rhs.items(): expr.static_rhs = rhs return self.block numba-0.55.1/numba/core/rewrites/static_getitem.py000664 000000 000000 00000014740 14174536160 022156 0ustar00rootroot000000 000000 from numba.core import errors, ir, types from numba.core.rewrites import register_rewrite, Rewrite @register_rewrite('before-inference') class RewriteConstGetitems(Rewrite): """ Rewrite IR expressions of the kind `getitem(value=arr, index=$constXX)` where `$constXX` is a known constant as `static_getitem(value=arr, index=)`. """ def match(self, func_ir, block, typemap, calltypes): self.getitems = getitems = {} self.block = block # Detect all getitem expressions and find which ones can be # rewritten for expr in block.find_exprs(op='getitem'): if expr.op == 'getitem': try: const = func_ir.infer_constant(expr.index) except errors.ConstantInferenceError: continue getitems[expr] = const return len(getitems) > 0 def apply(self): """ Rewrite all matching getitems as static_getitems. """ new_block = self.block.copy() new_block.clear() for inst in self.block.body: if isinstance(inst, ir.Assign): expr = inst.value if expr in self.getitems: const = self.getitems[expr] new_expr = ir.Expr.static_getitem(value=expr.value, index=const, index_var=expr.index, loc=expr.loc) inst = ir.Assign(value=new_expr, target=inst.target, loc=inst.loc) new_block.append(inst) return new_block @register_rewrite('after-inference') class RewriteStringLiteralGetitems(Rewrite): """ Rewrite IR expressions of the kind `getitem(value=arr, index=$XX)` where `$XX` is a StringLiteral value as `static_getitem(value=arr, index=)`. """ def match(self, func_ir, block, typemap, calltypes): """ Detect all getitem expressions and find which ones have string literal indexes """ self.getitems = getitems = {} self.block = block self.calltypes = calltypes for expr in block.find_exprs(op='getitem'): if expr.op == 'getitem': index_ty = typemap[expr.index.name] if isinstance(index_ty, types.StringLiteral): getitems[expr] = (expr.index, index_ty.literal_value) return len(getitems) > 0 def apply(self): """ Rewrite all matching getitems as static_getitems where the index is the literal value of the string. """ new_block = ir.Block(self.block.scope, self.block.loc) for inst in self.block.body: if isinstance(inst, ir.Assign): expr = inst.value if expr in self.getitems: const, lit_val = self.getitems[expr] new_expr = ir.Expr.static_getitem(value=expr.value, index=lit_val, index_var=expr.index, loc=expr.loc) self.calltypes[new_expr] = self.calltypes[expr] inst = ir.Assign(value=new_expr, target=inst.target, loc=inst.loc) new_block.append(inst) return new_block @register_rewrite('after-inference') class RewriteStringLiteralSetitems(Rewrite): """ Rewrite IR expressions of the kind `setitem(value=arr, index=$XX, value=)` where `$XX` is a StringLiteral value as `static_setitem(value=arr, index=, value=)`. """ def match(self, func_ir, block, typemap, calltypes): """ Detect all setitem expressions and find which ones have string literal indexes """ self.setitems = setitems = {} self.block = block self.calltypes = calltypes for inst in block.find_insts(ir.SetItem): index_ty = typemap[inst.index.name] if isinstance(index_ty, types.StringLiteral): setitems[inst] = (inst.index, index_ty.literal_value) return len(setitems) > 0 def apply(self): """ Rewrite all matching setitems as static_setitems where the index is the literal value of the string. """ new_block = ir.Block(self.block.scope, self.block.loc) for inst in self.block.body: if isinstance(inst, ir.SetItem): if inst in self.setitems: const, lit_val = self.setitems[inst] new_inst = ir.StaticSetItem(target=inst.target, index=lit_val, index_var=inst.index, value=inst.value, loc=inst.loc) self.calltypes[new_inst] = self.calltypes[inst] inst = new_inst new_block.append(inst) return new_block @register_rewrite('before-inference') class RewriteConstSetitems(Rewrite): """ Rewrite IR statements of the kind `setitem(target=arr, index=$constXX, ...)` where `$constXX` is a known constant as `static_setitem(target=arr, index=, ...)`. """ def match(self, func_ir, block, typemap, calltypes): self.setitems = setitems = {} self.block = block # Detect all setitem statements and find which ones can be # rewritten for inst in block.find_insts(ir.SetItem): try: const = func_ir.infer_constant(inst.index) except errors.ConstantInferenceError: continue setitems[inst] = const return len(setitems) > 0 def apply(self): """ Rewrite all matching setitems as static_setitems. """ new_block = self.block.copy() new_block.clear() for inst in self.block.body: if inst in self.setitems: const = self.setitems[inst] new_inst = ir.StaticSetItem(inst.target, const, inst.index, inst.value, inst.loc) new_block.append(new_inst) else: new_block.append(inst) return new_block numba-0.55.1/numba/core/rewrites/static_raise.py000664 000000 000000 00000006142 14174536160 021620 0ustar00rootroot000000 000000 from numba.core import errors, ir from numba.core.rewrites import register_rewrite, Rewrite @register_rewrite('before-inference') class RewriteConstRaises(Rewrite): """ Rewrite IR statements of the kind `raise(value)` where `value` is the result of instantiating an exception with constant arguments into `static_raise(exception_type, constant args)`. This allows lowering in nopython mode, where one can't instantiate exception instances from runtime data. """ def _is_exception_type(self, const): return isinstance(const, type) and issubclass(const, Exception) def _break_constant(self, const, loc): """ Break down constant exception. """ if isinstance(const, tuple): # it's a tuple(exception class, args) if not self._is_exception_type(const[0]): msg = "Encountered unsupported exception constant %r" raise errors.UnsupportedError(msg % (const[0],), loc) return const[0], tuple(const[1]) elif self._is_exception_type(const): return const, None else: if isinstance(const, str): msg = ("Directly raising a string constant as an exception is " "not supported.") else: msg = "Encountered unsupported constant type used for exception" raise errors.UnsupportedError(msg, loc) def match(self, func_ir, block, typemap, calltypes): self.raises = raises = {} self.tryraises = tryraises = {} self.block = block # Detect all raise statements and find which ones can be # rewritten for inst in block.find_insts((ir.Raise, ir.TryRaise)): if inst.exception is None: # re-reraise exc_type, exc_args = None, None else: # raise => find the definition site for const = func_ir.infer_constant(inst.exception) loc = inst.exception.loc exc_type, exc_args = self._break_constant(const, loc) if isinstance(inst, ir.Raise): raises[inst] = exc_type, exc_args elif isinstance(inst, ir.TryRaise): tryraises[inst] = exc_type, exc_args else: raise ValueError('unexpected: {}'.format(type(inst))) return (len(raises) + len(tryraises)) > 0 def apply(self): """ Rewrite all matching setitems as static_setitems. """ new_block = self.block.copy() new_block.clear() for inst in self.block.body: if inst in self.raises: exc_type, exc_args = self.raises[inst] new_inst = ir.StaticRaise(exc_type, exc_args, inst.loc) new_block.append(new_inst) elif inst in self.tryraises: exc_type, exc_args = self.tryraises[inst] new_inst = ir.StaticTryRaise(exc_type, exc_args, inst.loc) new_block.append(new_inst) else: new_block.append(inst) return new_block numba-0.55.1/numba/core/runtime/000775 000000 000000 00000000000 14174536160 016410 5ustar00rootroot000000 000000 numba-0.55.1/numba/core/runtime/__init__.py000664 000000 000000 00000000027 14174536160 020520 0ustar00rootroot000000 000000 from .nrt import rtsys numba-0.55.1/numba/core/runtime/_nrt_python.c000664 000000 000000 00000032536 14174536160 021130 0ustar00rootroot000000 000000 /* * Definition of NRT functions for marshalling from / to Python objects. * This module is included by _nrt_pythonmod.c and by pycc-compiled modules. */ #include "../../_pymodule.h" #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include #include #include "../../_arraystruct.h" #include "../../_numba_common.h" #include "nrt.h" /* * Create a NRT MemInfo for data owned by a PyObject. */ static void pyobject_dtor(void *ptr, size_t size, void* info) { PyGILState_STATE gstate; PyObject *ownerobj = info; gstate = PyGILState_Ensure(); /* ensure the GIL */ Py_DECREF(ownerobj); /* release the python object */ PyGILState_Release(gstate); /* release the GIL */ } NUMBA_EXPORT_FUNC(NRT_MemInfo *) NRT_meminfo_new_from_pyobject(void *data, PyObject *ownerobj) { size_t dummy_size = 0; Py_INCREF(ownerobj); return NRT_MemInfo_new(data, dummy_size, pyobject_dtor, ownerobj); } /* * A Python object wrapping a NRT meminfo. */ typedef struct { PyObject_HEAD NRT_MemInfo *meminfo; } MemInfoObject; static int MemInfo_init(MemInfoObject *self, PyObject *args, PyObject *kwds) { static char *keywords[] = {"ptr", NULL}; PyObject *raw_ptr_obj; void *raw_ptr; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", keywords, &raw_ptr_obj)) { return -1; } raw_ptr = PyLong_AsVoidPtr(raw_ptr_obj); NRT_Debug(nrt_debug_print("MemInfo_init self=%p raw_ptr=%p\n", self, raw_ptr)); if(PyErr_Occurred()) return -1; self->meminfo = (NRT_MemInfo *)raw_ptr; assert (NRT_MemInfo_refcount(self->meminfo) > 0 && "0 refcount"); return 0; } static int MemInfo_getbuffer(PyObject *exporter, Py_buffer *view, int flags) { Py_ssize_t len; void *buf; int readonly = 0; MemInfoObject *miobj = (MemInfoObject*)exporter; NRT_MemInfo *mi = miobj->meminfo; buf = NRT_MemInfo_data(mi); len = NRT_MemInfo_size(mi); return PyBuffer_FillInfo(view, exporter, buf, len, readonly, flags); } static PyBufferProcs MemInfo_bufferProcs = {MemInfo_getbuffer, NULL}; static PyObject* MemInfo_acquire(MemInfoObject *self) { NRT_MemInfo_acquire(self->meminfo); Py_RETURN_NONE; } static PyObject* MemInfo_release(MemInfoObject *self) { NRT_MemInfo_release(self->meminfo); Py_RETURN_NONE; } static PyObject* MemInfo_get_data(MemInfoObject *self, void *closure) { return PyLong_FromVoidPtr(NRT_MemInfo_data(self->meminfo)); } static PyObject* MemInfo_get_refcount(MemInfoObject *self, void *closure) { size_t refct = NRT_MemInfo_refcount(self->meminfo); if ( refct == (size_t)-1 ) { PyErr_SetString(PyExc_ValueError, "invalid MemInfo"); return NULL; } return PyLong_FromSize_t(refct); } static PyObject* MemInfo_get_external_allocator(MemInfoObject *self, void *closure) { void *p = NRT_MemInfo_external_allocator(self->meminfo); return PyLong_FromVoidPtr(p); } static PyObject* MemInfo_get_parent(MemInfoObject *self, void *closure) { void *p = NRT_MemInfo_parent(self->meminfo); if (p) { Py_INCREF(p); return (PyObject*)p; } else { Py_INCREF(Py_None); return Py_None; } } static void MemInfo_dealloc(MemInfoObject *self) { NRT_MemInfo_release(self->meminfo); Py_TYPE(self)->tp_free((PyObject*)self); } static PyMethodDef MemInfo_methods[] = { {"acquire", (PyCFunction)MemInfo_acquire, METH_NOARGS, "Increment the reference count" }, {"release", (PyCFunction)MemInfo_release, METH_NOARGS, "Decrement the reference count" }, {NULL} /* Sentinel */ }; static PyGetSetDef MemInfo_getsets[] = { {"data", (getter)MemInfo_get_data, NULL, "Get the data pointer as an integer", NULL}, {"refcount", (getter)MemInfo_get_refcount, NULL, "Get the refcount", NULL}, {"external_allocator", (getter)MemInfo_get_external_allocator, NULL, "Get the external allocator", NULL}, {"parent", (getter)MemInfo_get_parent, NULL, NULL}, {NULL} /* Sentinel */ }; static PyTypeObject MemInfoType = { PyVarObject_HEAD_INIT(NULL, 0) "_nrt_python._MemInfo", /* tp_name*/ sizeof(MemInfoObject), /* tp_basicsize*/ 0, /* tp_itemsize*/ (destructor)MemInfo_dealloc, /* tp_dealloc*/ 0, /* tp_print*/ 0, /* tp_getattr*/ 0, /* tp_setattr*/ 0, /* tp_compare*/ 0, /* tp_repr*/ 0, /* tp_as_number*/ 0, /* tp_as_sequence*/ 0, /* tp_as_mapping*/ 0, /* tp_hash */ 0, /* tp_call*/ 0, /* tp_str*/ 0, /* tp_getattro*/ 0, /* tp_setattro*/ &MemInfo_bufferProcs, /* tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags*/ 0, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ MemInfo_methods, /* tp_methods */ 0, /* tp_members */ MemInfo_getsets, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ (initproc)MemInfo_init, /* tp_init */ 0, /* tp_alloc */ 0, /* tp_new */ }; /* Return a MemInfo* as a MemInfoObject* The NRT reference to the MemInfo is borrowed. */ NUMBA_EXPORT_FUNC(MemInfoObject*) NRT_meminfo_as_pyobject(NRT_MemInfo *meminfo) { MemInfoObject *mi; PyObject *addr; addr = PyLong_FromVoidPtr(meminfo); if (!addr) return NULL; mi = (MemInfoObject*)PyObject_CallFunctionObjArgs((PyObject *)&MemInfoType, addr, NULL); Py_DECREF(addr); if (!mi) return NULL; return mi; } /* Return a MemInfo* from a MemInfoObject* A new reference is returned. */ NUMBA_EXPORT_FUNC(NRT_MemInfo*) NRT_meminfo_from_pyobject(MemInfoObject *miobj) { NRT_MemInfo_acquire(miobj->meminfo); return miobj->meminfo; } /* * Array adaptor code */ NUMBA_EXPORT_FUNC(int) NRT_adapt_ndarray_from_python(PyObject *obj, arystruct_t* arystruct) { PyArrayObject *ndary; int i, ndim; npy_intp *p; void *data; if (!PyArray_Check(obj)) { return -1; } ndary = (PyArrayObject*)obj; ndim = PyArray_NDIM(ndary); data = PyArray_DATA(ndary); arystruct->meminfo = NRT_meminfo_new_from_pyobject((void*)data, obj); arystruct->data = data; arystruct->nitems = PyArray_SIZE(ndary); arystruct->itemsize = PyArray_ITEMSIZE(ndary); arystruct->parent = obj; p = arystruct->shape_and_strides; for (i = 0; i < ndim; i++, p++) { *p = PyArray_DIM(ndary, i); } for (i = 0; i < ndim; i++, p++) { *p = PyArray_STRIDE(ndary, i); } NRT_Debug(nrt_debug_print("NRT_adapt_ndarray_from_python %p\n", arystruct->meminfo)); return 0; } static PyObject* try_to_return_parent(arystruct_t *arystruct, int ndim, PyArray_Descr *descr) { int i; PyArrayObject *array = (PyArrayObject *)arystruct->parent; if (!PyArray_Check(arystruct->parent)) /* Parent is a generic buffer-providing object */ goto RETURN_ARRAY_COPY; if (PyArray_DATA(array) != arystruct->data) goto RETURN_ARRAY_COPY; if (PyArray_NDIM(array) != ndim) goto RETURN_ARRAY_COPY; if (PyObject_RichCompareBool((PyObject *) PyArray_DESCR(array), (PyObject *) descr, Py_EQ) <= 0) goto RETURN_ARRAY_COPY; for(i = 0; i < ndim; ++i) { if (PyArray_DIMS(array)[i] != arystruct->shape_and_strides[i]) goto RETURN_ARRAY_COPY; if (PyArray_STRIDES(array)[i] != arystruct->shape_and_strides[ndim + i]) goto RETURN_ARRAY_COPY; } /* Yes, it is the same array Return new reference */ Py_INCREF((PyObject *)array); return (PyObject *)array; RETURN_ARRAY_COPY: return NULL; } /** * This function is used during the boxing of ndarray type. * `arystruct` is a structure containing essential information from the * unboxed array. * `retty` is the subtype of the NumPy PyArray_Type this function should return. * This is related to `numba.core.types.Array.box_type`. * `ndim` is the number of dimension of the array. * `writeable` corresponds to the "writable" flag in NumPy ndarray. * `descr` is the NumPy data type description. * * This function was renamed in 0.52.0 to specify that it acquires references. * It used to steal the reference of the arystruct. * Refer to https://github.com/numba/numba/pull/6446 */ NUMBA_EXPORT_FUNC(PyObject *) NRT_adapt_ndarray_to_python_acqref(arystruct_t* arystruct, PyTypeObject *retty, int ndim, int writeable, PyArray_Descr *descr) { PyArrayObject *array; MemInfoObject *miobj = NULL; PyObject *args; npy_intp *shape, *strides; int flags = 0; if (descr == NULL) { PyErr_Format(PyExc_RuntimeError, "In 'NRT_adapt_ndarray_to_python', 'descr' is NULL"); return NULL; } if (!NUMBA_PyArray_DescrCheck(descr)) { PyErr_Format(PyExc_TypeError, "expected dtype object, got '%.200s'", Py_TYPE(descr)->tp_name); return NULL; } if (arystruct->parent) { PyObject *obj = try_to_return_parent(arystruct, ndim, descr); if (obj) { return obj; } } if (arystruct->meminfo) { /* wrap into MemInfoObject */ miobj = PyObject_New(MemInfoObject, &MemInfoType); args = PyTuple_New(1); /* SETITEM steals reference */ PyTuple_SET_ITEM(args, 0, PyLong_FromVoidPtr(arystruct->meminfo)); NRT_Debug(nrt_debug_print("NRT_adapt_ndarray_to_python arystruct->meminfo=%p\n", arystruct->meminfo)); /* Note: MemInfo_init() does not incref. This function steals the * NRT reference, which we need to acquire. */ NRT_Debug(nrt_debug_print("NRT_adapt_ndarray_to_python_acqref created MemInfo=%p\n", miobj)); NRT_MemInfo_acquire(arystruct->meminfo); if (MemInfo_init(miobj, args, NULL)) { NRT_Debug(nrt_debug_print("MemInfo_init failed.\n")); return NULL; } Py_DECREF(args); } shape = arystruct->shape_and_strides; strides = shape + ndim; Py_INCREF((PyObject *) descr); array = (PyArrayObject *) PyArray_NewFromDescr(retty, descr, ndim, shape, strides, arystruct->data, flags, (PyObject *) miobj); if (array == NULL) return NULL; /* Set writable */ #if NPY_API_VERSION >= 0x00000007 if (writeable) { PyArray_ENABLEFLAGS(array, NPY_ARRAY_WRITEABLE); } else { PyArray_CLEARFLAGS(array, NPY_ARRAY_WRITEABLE); } #else if (writeable) { array->flags |= NPY_WRITEABLE; } else { array->flags &= ~NPY_WRITEABLE; } #endif if (miobj) { /* Set the MemInfoObject as the base object */ #if NPY_API_VERSION >= 0x00000007 if (-1 == PyArray_SetBaseObject(array, (PyObject *) miobj)) { Py_DECREF(array); Py_DECREF(miobj); return NULL; } #else PyArray_BASE(array) = (PyObject *) miobj; #endif } return (PyObject *) array; } NUMBA_EXPORT_FUNC(void) NRT_adapt_buffer_from_python(Py_buffer *buf, arystruct_t *arystruct) { int i; npy_intp *p; if (buf->obj) { /* Allocate new MemInfo only if the buffer has a parent */ arystruct->meminfo = NRT_meminfo_new_from_pyobject((void*)buf->buf, buf->obj); } arystruct->data = buf->buf; arystruct->itemsize = buf->itemsize; arystruct->parent = buf->obj; arystruct->nitems = 1; p = arystruct->shape_and_strides; for (i = 0; i < buf->ndim; i++, p++) { *p = buf->shape[i]; arystruct->nitems *= buf->shape[i]; } for (i = 0; i < buf->ndim; i++, p++) { *p = buf->strides[i]; } } /* Initialization subroutines for modules including this source file */ static int init_nrt_python_module(PyObject *module) { MemInfoType.tp_new = PyType_GenericNew; if (PyType_Ready(&MemInfoType)) return -1; return 0; } numba-0.55.1/numba/core/runtime/_nrt_pythonmod.c000664 000000 000000 00000013367 14174536160 021631 0ustar00rootroot000000 000000 #define NUMBA_EXPORT_FUNC(_rettype) static _rettype #define NUMBA_EXPORT_DATA(_vartype) static _vartype #include "_nrt_python.c" static PyObject * memsys_shutdown(PyObject *self, PyObject *args) { NRT_MemSys_shutdown(); Py_RETURN_NONE; } static PyObject * memsys_use_cpython_allocator(PyObject *self, PyObject *args) { NRT_MemSys_set_allocator(PyMem_RawMalloc, PyMem_RawRealloc, PyMem_RawFree); Py_RETURN_NONE; } static PyObject * memsys_set_atomic_inc_dec(PyObject *self, PyObject *args) { PyObject *addr_inc_obj, *addr_dec_obj; void *addr_inc, *addr_dec; if (!PyArg_ParseTuple(args, "OO", &addr_inc_obj, &addr_dec_obj)) { return NULL; } addr_inc = PyLong_AsVoidPtr(addr_inc_obj); if(PyErr_Occurred()) return NULL; addr_dec = PyLong_AsVoidPtr(addr_dec_obj); if(PyErr_Occurred()) return NULL; NRT_MemSys_set_atomic_inc_dec(addr_inc, addr_dec); Py_RETURN_NONE; } static PyObject * memsys_set_atomic_cas(PyObject *self, PyObject *args) { PyObject *addr_cas_obj; void *addr_cas; if (!PyArg_ParseTuple(args, "O", &addr_cas_obj)) { return NULL; } addr_cas = PyLong_AsVoidPtr(addr_cas_obj); if(PyErr_Occurred()) return NULL; NRT_MemSys_set_atomic_cas(addr_cas); Py_RETURN_NONE; } static PyObject * memsys_get_stats_alloc(PyObject *self, PyObject *args) { return PyLong_FromSize_t(NRT_MemSys_get_stats_alloc()); } static PyObject * memsys_get_stats_free(PyObject *self, PyObject *args) { return PyLong_FromSize_t(NRT_MemSys_get_stats_free()); } static PyObject * memsys_get_stats_mi_alloc(PyObject *self, PyObject *args) { return PyLong_FromSize_t(NRT_MemSys_get_stats_mi_alloc()); } static PyObject * memsys_get_stats_mi_free(PyObject *self, PyObject *args) { return PyLong_FromSize_t(NRT_MemSys_get_stats_mi_free()); } /* * Create a new MemInfo with a owner PyObject */ static PyObject * meminfo_new(PyObject *self, PyObject *args) { PyObject *addr_data_obj; void *addr_data; PyObject *ownerobj; NRT_MemInfo *mi; if (!PyArg_ParseTuple(args, "OO", &addr_data_obj, &ownerobj)) { return NULL; } addr_data = PyLong_AsVoidPtr(addr_data_obj); if (PyErr_Occurred()) return NULL; mi = NRT_meminfo_new_from_pyobject(addr_data, ownerobj); return PyLong_FromVoidPtr(mi); } /* * Create a new MemInfo with a new NRT allocation */ static PyObject * meminfo_alloc(PyObject *self, PyObject *args) { NRT_MemInfo *mi; Py_ssize_t size; if (!PyArg_ParseTuple(args, "n", &size)) { return NULL; } mi = NRT_MemInfo_alloc(size); return PyLong_FromVoidPtr(mi); } /* * Like meminfo_alloc but set memory to zero after allocation and before * deallocation. */ static PyObject * meminfo_alloc_safe(PyObject *self, PyObject *args) { NRT_MemInfo *mi; Py_ssize_t size; if (!PyArg_ParseTuple(args, "n", &size)) { return NULL; } mi = NRT_MemInfo_alloc_safe(size); return PyLong_FromVoidPtr(mi); } static PyMethodDef ext_methods[] = { #define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL } #define declmethod_noargs(func) { #func , ( PyCFunction )func , METH_NOARGS, NULL } declmethod_noargs(memsys_use_cpython_allocator), declmethod_noargs(memsys_shutdown), declmethod(memsys_set_atomic_inc_dec), declmethod(memsys_set_atomic_cas), declmethod_noargs(memsys_get_stats_alloc), declmethod_noargs(memsys_get_stats_free), declmethod_noargs(memsys_get_stats_mi_alloc), declmethod_noargs(memsys_get_stats_mi_free), declmethod(meminfo_new), declmethod(meminfo_alloc), declmethod(meminfo_alloc_safe), { NULL }, #undef declmethod }; static PyObject * build_c_helpers_dict(void) { PyObject *dct = PyDict_New(); if (dct == NULL) goto error; #define _declpointer(name, value) do { \ PyObject *o = PyLong_FromVoidPtr(value); \ if (o == NULL) goto error; \ if (PyDict_SetItemString(dct, name, o)) { \ Py_DECREF(o); \ goto error; \ } \ Py_DECREF(o); \ } while (0) #define declmethod(func) _declpointer(#func, &NRT_##func) #define declmethod_internal(func) _declpointer(#func, &func) declmethod(adapt_ndarray_from_python); declmethod(adapt_ndarray_to_python_acqref); declmethod(adapt_buffer_from_python); declmethod(meminfo_new_from_pyobject); declmethod(meminfo_as_pyobject); declmethod(meminfo_from_pyobject); declmethod(MemInfo_alloc); declmethod(MemInfo_alloc_safe); declmethod(MemInfo_alloc_aligned); declmethod(MemInfo_alloc_safe_aligned); declmethod(MemInfo_alloc_safe_aligned_external); declmethod_internal(_nrt_get_sample_external_allocator); declmethod(MemInfo_alloc_dtor_safe); declmethod(MemInfo_call_dtor); declmethod(MemInfo_new_varsize); declmethod(MemInfo_new_varsize_dtor); declmethod(MemInfo_varsize_alloc); declmethod(MemInfo_data); declmethod(MemInfo_varsize_free); declmethod(MemInfo_varsize_realloc); declmethod(MemInfo_release); declmethod(Allocate); declmethod(Free); declmethod(get_api); #undef declmethod #undef declmethod_internal return dct; error: Py_XDECREF(dct); return NULL; } MOD_INIT(_nrt_python) { PyObject *m; MOD_DEF(m, "_nrt_python", "No docs", ext_methods) if (m == NULL) return MOD_ERROR_VAL; import_array(); NRT_MemSys_init(); if (init_nrt_python_module(m)) return MOD_ERROR_VAL; Py_INCREF(&MemInfoType); PyModule_AddObject(m, "_MemInfo", (PyObject *) (&MemInfoType)); PyModule_AddObject(m, "c_helpers", build_c_helpers_dict()); return MOD_SUCCESS_VAL(m); } numba-0.55.1/numba/core/runtime/context.py000664 000000 000000 00000022737 14174536160 020461 0ustar00rootroot000000 000000 from llvmlite import ir from numba.core import types, cgutils, errors class NRTContext(object): """ An object providing access to NRT APIs in the lowering pass. """ def __init__(self, context, enabled): self._context = context self._enabled = enabled def _require_nrt(self): if not self._enabled: raise errors.NumbaRuntimeError("NRT required but not enabled") def allocate(self, builder, size): """ Low-level allocate a new memory area of `size` bytes. """ self._require_nrt() mod = builder.module fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t]) fn = cgutils.get_or_insert_function(mod, fnty, "NRT_Allocate") fn.return_value.add_attribute("noalias") return builder.call(fn, [size]) def free(self, builder, ptr): """ Low-level free a memory area allocated with allocate(). """ self._require_nrt() mod = builder.module fnty = ir.FunctionType(ir.VoidType(), [cgutils.voidptr_t]) fn = cgutils.get_or_insert_function(mod, fnty, "NRT_Free") return builder.call(fn, [ptr]) def meminfo_alloc(self, builder, size): """ Allocate a new MemInfo with a data payload of `size` bytes. A pointer to the MemInfo is returned. """ self._require_nrt() mod = builder.module fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t]) fn = cgutils.get_or_insert_function(mod, fnty, "NRT_MemInfo_alloc_safe") fn.return_value.add_attribute("noalias") return builder.call(fn, [size]) def meminfo_alloc_dtor(self, builder, size, dtor): self._require_nrt() mod = builder.module fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t, cgutils.voidptr_t]) fn = cgutils.get_or_insert_function(mod, fnty, "NRT_MemInfo_alloc_dtor_safe") fn.return_value.add_attribute("noalias") return builder.call(fn, [size, builder.bitcast(dtor, cgutils.voidptr_t)]) def meminfo_alloc_aligned(self, builder, size, align): """ Allocate a new MemInfo with an aligned data payload of `size` bytes. The data pointer is aligned to `align` bytes. `align` can be either a Python int or a LLVM uint32 value. A pointer to the MemInfo is returned. """ self._require_nrt() mod = builder.module u32 = ir.IntType(32) fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t, u32]) fn = cgutils.get_or_insert_function(mod, fnty, "NRT_MemInfo_alloc_safe_aligned") fn.return_value.add_attribute("noalias") if isinstance(align, int): align = self._context.get_constant(types.uint32, align) else: assert align.type == u32, "align must be a uint32" return builder.call(fn, [size, align]) def meminfo_new_varsize(self, builder, size): """ Allocate a MemInfo pointing to a variable-sized data area. The area is separately allocated (i.e. two allocations are made) so that re-allocating it doesn't change the MemInfo's address. A pointer to the MemInfo is returned. """ self._require_nrt() mod = builder.module fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t]) fn = cgutils.get_or_insert_function(mod, fnty, "NRT_MemInfo_new_varsize") fn.return_value.add_attribute("noalias") return builder.call(fn, [size]) def meminfo_new_varsize_dtor(self, builder, size, dtor): """ Like meminfo_new_varsize() but also set the destructor for cleaning up references to objects inside the allocation. """ self._require_nrt() mod = builder.module fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t, cgutils.voidptr_t]) fn = cgutils.get_or_insert_function( mod, fnty, "NRT_MemInfo_new_varsize_dtor") return builder.call(fn, [size, dtor]) def meminfo_varsize_alloc(self, builder, meminfo, size): """ Allocate a new data area for a MemInfo created by meminfo_new_varsize(). The new data pointer is returned, for convenience. Contrary to realloc(), this always allocates a new area and doesn't copy the old data. This is useful if resizing a container needs more than simply copying the data area (e.g. for hash tables). The old pointer will have to be freed with meminfo_varsize_free(). """ return self._call_varsize_alloc(builder, meminfo, size, "NRT_MemInfo_varsize_alloc") def meminfo_varsize_realloc(self, builder, meminfo, size): """ Reallocate a data area allocated by meminfo_new_varsize(). The new data pointer is returned, for convenience. """ return self._call_varsize_alloc(builder, meminfo, size, "NRT_MemInfo_varsize_realloc") def meminfo_varsize_free(self, builder, meminfo, ptr): """ Free a memory area allocated for a NRT varsize object. Note this does *not* free the NRT object itself! """ self._require_nrt() mod = builder.module fnty = ir.FunctionType(ir.VoidType(), [cgutils.voidptr_t, cgutils.voidptr_t]) fn = cgutils.get_or_insert_function(mod, fnty, "NRT_MemInfo_varsize_free") return builder.call(fn, (meminfo, ptr)) def _call_varsize_alloc(self, builder, meminfo, size, funcname): self._require_nrt() mod = builder.module fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.voidptr_t, cgutils.intp_t]) fn = cgutils.get_or_insert_function(mod, fnty, funcname) fn.return_value.add_attribute("noalias") return builder.call(fn, [meminfo, size]) def meminfo_data(self, builder, meminfo): """ Given a MemInfo pointer, return a pointer to the allocated data managed by it. This works for MemInfos allocated with all the above methods. """ self._require_nrt() from numba.core.runtime.nrtdynmod import meminfo_data_ty mod = builder.module fn = cgutils.get_or_insert_function(mod, meminfo_data_ty, "NRT_MemInfo_data_fast") return builder.call(fn, [meminfo]) def get_meminfos(self, builder, ty, val): """Return a list of *(type, meminfo)* inside the given value. """ datamodel = self._context.data_model_manager[ty] members = datamodel.traverse(builder) meminfos = [] if datamodel.has_nrt_meminfo(): mi = datamodel.get_nrt_meminfo(builder, val) meminfos.append((ty, mi)) for mtyp, getter in members: field = getter(val) inner_meminfos = self.get_meminfos(builder, mtyp, field) meminfos.extend(inner_meminfos) return meminfos def _call_incref_decref(self, builder, typ, value, funcname): """Call function of *funcname* on every meminfo found in *value*. """ self._require_nrt() from numba.core.runtime.nrtdynmod import incref_decref_ty meminfos = self.get_meminfos(builder, typ, value) for _, mi in meminfos: mod = builder.module fn = cgutils.get_or_insert_function(mod, incref_decref_ty, funcname) # XXX "nonnull" causes a crash in test_dyn_array: can this # function be called with a NULL pointer? fn.args[0].add_attribute("noalias") fn.args[0].add_attribute("nocapture") builder.call(fn, [mi]) def incref(self, builder, typ, value): """ Recursively incref the given *value* and its members. """ self._call_incref_decref(builder, typ, value, "NRT_incref") def decref(self, builder, typ, value): """ Recursively decref the given *value* and its members. """ self._call_incref_decref(builder, typ, value, "NRT_decref") def get_nrt_api(self, builder): """Calls NRT_get_api(), which returns the NRT API function table. """ self._require_nrt() fnty = ir.FunctionType(cgutils.voidptr_t, ()) mod = builder.module fn = cgutils.get_or_insert_function(mod, fnty, "NRT_get_api") return builder.call(fn, ()) def eh_check(self, builder): """Check if an exception is raised """ ctx = self._context cc = ctx.call_conv # Inspect the excinfo argument on the function trystatus = cc.check_try_status(builder) excinfo = trystatus.excinfo has_raised = builder.not_(cgutils.is_null(builder, excinfo)) with builder.if_then(has_raised): self.eh_end_try(builder) return has_raised def eh_try(self, builder): """Begin a try-block. """ ctx = self._context cc = ctx.call_conv cc.set_try_status(builder) def eh_end_try(self, builder): """End a try-block """ ctx = self._context cc = ctx.call_conv cc.unset_try_status(builder) numba-0.55.1/numba/core/runtime/nrt.c000664 000000 000000 00000040143 14174536160 017361 0ustar00rootroot000000 000000 #include #include /* for memset */ #include "nrt.h" #include "assert.h" #if !defined MIN #define MIN(a, b) ((a) < (b)) ? (a) : (b) #endif typedef int (*atomic_meminfo_cas_func)(void **ptr, void *cmp, void *repl, void **oldptr); /* NOTE: if changing the layout, please update numba.core.runtime.atomicops */ struct MemInfo { size_t refct; NRT_dtor_function dtor; void *dtor_info; void *data; size_t size; /* only used for NRT allocated memory */ NRT_ExternalAllocator *external_allocator; }; /* * Misc helpers. */ static void nrt_fatal_error(const char *msg) { fprintf(stderr, "Fatal Numba error: %s\n", msg); fflush(stderr); /* it helps in Windows debug build */ #if defined(MS_WINDOWS) && defined(_DEBUG) DebugBreak(); #endif abort(); } /* * Global resources. */ struct MemSys { /* Atomic increment and decrement function */ NRT_atomic_inc_dec_func atomic_inc, atomic_dec; /* Atomic CAS */ atomic_meminfo_cas_func atomic_cas; /* Shutdown flag */ int shutting; /* Stats */ size_t stats_alloc, stats_free, stats_mi_alloc, stats_mi_free; /* System allocation functions */ struct { NRT_malloc_func malloc; NRT_realloc_func realloc; NRT_free_func free; } allocator; }; /* The Memory System object */ static NRT_MemSys TheMSys; void NRT_MemSys_init(void) { memset(&TheMSys, 0, sizeof(NRT_MemSys)); /* Bind to libc allocator */ TheMSys.allocator.malloc = malloc; TheMSys.allocator.realloc = realloc; TheMSys.allocator.free = free; } void NRT_MemSys_shutdown(void) { TheMSys.shutting = 1; /* Revert to use our non-atomic stub for all atomic operations because the JIT-ed version will be removed. Since we are at interpreter shutdown, it cannot be running multiple threads anymore. */ NRT_MemSys_set_atomic_inc_dec_stub(); NRT_MemSys_set_atomic_cas_stub(); } void NRT_MemSys_set_allocator(NRT_malloc_func malloc_func, NRT_realloc_func realloc_func, NRT_free_func free_func) { if ((malloc_func != TheMSys.allocator.malloc || realloc_func != TheMSys.allocator.realloc || free_func != TheMSys.allocator.free) && (TheMSys.stats_alloc != TheMSys.stats_free || TheMSys.stats_mi_alloc != TheMSys.stats_mi_free)) { nrt_fatal_error("cannot change allocator while blocks are allocated"); } TheMSys.allocator.malloc = malloc_func; TheMSys.allocator.realloc = realloc_func; TheMSys.allocator.free = free_func; } void NRT_MemSys_set_atomic_inc_dec(NRT_atomic_inc_dec_func inc, NRT_atomic_inc_dec_func dec) { TheMSys.atomic_inc = inc; TheMSys.atomic_dec = dec; } void NRT_MemSys_set_atomic_cas(NRT_atomic_cas_func cas) { TheMSys.atomic_cas = (atomic_meminfo_cas_func) cas; } size_t NRT_MemSys_get_stats_alloc() { return TheMSys.stats_alloc; } size_t NRT_MemSys_get_stats_free() { return TheMSys.stats_free; } size_t NRT_MemSys_get_stats_mi_alloc() { return TheMSys.stats_mi_alloc; } size_t NRT_MemSys_get_stats_mi_free() { return TheMSys.stats_mi_free; } static size_t nrt_testing_atomic_inc(size_t *ptr){ /* non atomic */ size_t out = *ptr; out += 1; *ptr = out; return out; } static size_t nrt_testing_atomic_dec(size_t *ptr){ /* non atomic */ size_t out = *ptr; out -= 1; *ptr = out; return out; } static int nrt_testing_atomic_cas(void* volatile *ptr, void *cmp, void *val, void * *oldptr){ /* non atomic */ void *old = *ptr; *oldptr = old; if (old == cmp) { *ptr = val; return 1; } return 0; } void NRT_MemSys_set_atomic_inc_dec_stub(void){ NRT_MemSys_set_atomic_inc_dec(nrt_testing_atomic_inc, nrt_testing_atomic_dec); } void NRT_MemSys_set_atomic_cas_stub(void) { NRT_MemSys_set_atomic_cas(nrt_testing_atomic_cas); } /* * The MemInfo structure. */ void NRT_MemInfo_init(NRT_MemInfo *mi,void *data, size_t size, NRT_dtor_function dtor, void *dtor_info, NRT_ExternalAllocator *external_allocator) { mi->refct = 1; /* starts with 1 refct */ mi->dtor = dtor; mi->dtor_info = dtor_info; mi->data = data; mi->size = size; mi->external_allocator = external_allocator; NRT_Debug(nrt_debug_print("NRT_MemInfo_init mi=%p external_allocator=%p\n", mi, external_allocator)); /* Update stats */ TheMSys.atomic_inc(&TheMSys.stats_mi_alloc); } NRT_MemInfo *NRT_MemInfo_new(void *data, size_t size, NRT_dtor_function dtor, void *dtor_info) { NRT_MemInfo *mi = NRT_Allocate(sizeof(NRT_MemInfo)); NRT_Debug(nrt_debug_print("NRT_MemInfo_new mi=%p\n", mi)); NRT_MemInfo_init(mi, data, size, dtor, dtor_info, NULL); return mi; } size_t NRT_MemInfo_refcount(NRT_MemInfo *mi) { /* Should never returns 0 for a valid MemInfo */ if (mi && mi->data) return mi->refct; else{ return (size_t)-1; } } static void nrt_internal_dtor_safe(void *ptr, size_t size, void *info) { NRT_Debug(nrt_debug_print("nrt_internal_dtor_safe %p, %p\n", ptr, info)); /* See NRT_MemInfo_alloc_safe() */ memset(ptr, 0xDE, MIN(size, 256)); } static void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out, NRT_ExternalAllocator *allocator) { NRT_MemInfo *mi; NRT_Debug(nrt_debug_print("nrt_allocate_meminfo_and_data %p\n", allocator)); char *base = NRT_Allocate_External(sizeof(NRT_MemInfo) + size, allocator); mi = (NRT_MemInfo *) base; *mi_out = mi; return base + sizeof(NRT_MemInfo); } static void nrt_internal_custom_dtor_safe(void *ptr, size_t size, void *info) { NRT_dtor_function dtor = info; NRT_Debug(nrt_debug_print("nrt_internal_custom_dtor_safe %p, %p\n", ptr, info)); if (dtor) { dtor(ptr, size, NULL); } nrt_internal_dtor_safe(ptr, size, NULL); } NRT_MemInfo *NRT_MemInfo_alloc(size_t size) { NRT_MemInfo *mi; void *data = nrt_allocate_meminfo_and_data(size, &mi, NULL); NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc %p\n", data)); NRT_MemInfo_init(mi, data, size, NULL, NULL, NULL); return mi; } NRT_MemInfo *NRT_MemInfo_alloc_external(size_t size, NRT_ExternalAllocator *allocator) { NRT_MemInfo *mi; void *data = nrt_allocate_meminfo_and_data(size, &mi, allocator); NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc %p\n", data)); NRT_MemInfo_init(mi, data, size, NULL, NULL, allocator); return mi; } NRT_MemInfo *NRT_MemInfo_alloc_safe(size_t size) { return NRT_MemInfo_alloc_dtor_safe(size, NULL); } NRT_MemInfo* NRT_MemInfo_alloc_dtor_safe(size_t size, NRT_dtor_function dtor) { NRT_MemInfo *mi; void *data = nrt_allocate_meminfo_and_data(size, &mi, NULL); /* Only fill up a couple cachelines with debug markers, to minimize overhead. */ memset(data, 0xCB, MIN(size, 256)); NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_dtor_safe %p %zu\n", data, size)); NRT_MemInfo_init(mi, data, size, nrt_internal_custom_dtor_safe, dtor, NULL); return mi; } static void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align, NRT_MemInfo **mi, NRT_ExternalAllocator *allocator) { size_t offset, intptr, remainder; NRT_Debug(nrt_debug_print("nrt_allocate_meminfo_and_data_align %p\n", allocator)); char *base = nrt_allocate_meminfo_and_data(size + 2 * align, mi, allocator); intptr = (size_t) base; /* See if we are aligned */ remainder = intptr % align; if (remainder == 0){ /* Yes */ offset = 0; } else { /* No, move forward `offset` bytes */ offset = align - remainder; } return base + offset; } NRT_MemInfo *NRT_MemInfo_alloc_aligned(size_t size, unsigned align) { NRT_MemInfo *mi; void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi, NULL); NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_aligned %p\n", data)); NRT_MemInfo_init(mi, data, size, NULL, NULL, NULL); return mi; } NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned(size_t size, unsigned align) { NRT_MemInfo *mi; void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi, NULL); /* Only fill up a couple cachelines with debug markers, to minimize overhead. */ memset(data, 0xCB, MIN(size, 256)); NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_safe_aligned %p %zu\n", data, size)); NRT_MemInfo_init(mi, data, size, nrt_internal_dtor_safe, (void*)size, NULL); return mi; } NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned_external(size_t size, unsigned align, NRT_ExternalAllocator *allocator) { NRT_MemInfo *mi; NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_safe_aligned_external %p\n", allocator)); void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi, allocator); /* Only fill up a couple cachelines with debug markers, to minimize overhead. */ memset(data, 0xCB, MIN(size, 256)); NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_safe_aligned %p %zu\n", data, size)); NRT_MemInfo_init(mi, data, size, nrt_internal_dtor_safe, (void*)size, allocator); return mi; } void NRT_dealloc(NRT_MemInfo *mi) { NRT_Debug(nrt_debug_print("NRT_dealloc meminfo: %p external_allocator: %p\n", mi, mi->external_allocator)); if (mi->external_allocator) { mi->external_allocator->free(mi, mi->external_allocator->opaque_data); TheMSys.atomic_inc(&TheMSys.stats_free); } else { NRT_Free(mi); } } void NRT_MemInfo_destroy(NRT_MemInfo *mi) { NRT_dealloc(mi); TheMSys.atomic_inc(&TheMSys.stats_mi_free); } void NRT_MemInfo_acquire(NRT_MemInfo *mi) { NRT_Debug(nrt_debug_print("NRT_MemInfo_acquire %p refct=%zu\n", mi, mi->refct)); assert(mi->refct > 0 && "RefCt cannot be zero"); TheMSys.atomic_inc(&mi->refct); } void NRT_MemInfo_call_dtor(NRT_MemInfo *mi) { NRT_Debug(nrt_debug_print("NRT_MemInfo_call_dtor %p\n", mi)); if (mi->dtor && !TheMSys.shutting) /* We have a destructor and the system is not shutting down */ mi->dtor(mi->data, mi->size, mi->dtor_info); /* Clear and release MemInfo */ NRT_MemInfo_destroy(mi); } void NRT_MemInfo_release(NRT_MemInfo *mi) { NRT_Debug(nrt_debug_print("NRT_MemInfo_release %p refct=%zu\n", mi, mi->refct)); assert (mi->refct > 0 && "RefCt cannot be 0"); /* RefCt drop to zero */ if (TheMSys.atomic_dec(&mi->refct) == 0) { NRT_MemInfo_call_dtor(mi); } } void* NRT_MemInfo_data(NRT_MemInfo* mi) { return mi->data; } size_t NRT_MemInfo_size(NRT_MemInfo* mi) { return mi->size; } void * NRT_MemInfo_external_allocator(NRT_MemInfo *mi) { NRT_Debug(nrt_debug_print("NRT_MemInfo_external_allocator meminfo: %p external_allocator: %p\n", mi, mi->external_allocator)); return mi->external_allocator; } void *NRT_MemInfo_parent(NRT_MemInfo *mi) { return mi->dtor_info; } void NRT_MemInfo_dump(NRT_MemInfo *mi, FILE *out) { fprintf(out, "MemInfo %p refcount %zu\n", mi, mi->refct); } /* * Resizable buffer API. */ static void nrt_varsize_dtor(void *ptr, size_t size, void *info) { NRT_Debug(nrt_debug_print("nrt_varsize_dtor %p\n", ptr)); if (info) { /* call element dtor */ typedef void dtor_fn_t(void *ptr); dtor_fn_t *dtor = info; dtor(ptr); } NRT_Free(ptr); } NRT_MemInfo *NRT_MemInfo_new_varsize(size_t size) { NRT_MemInfo *mi; void *data = NRT_Allocate(size); if (data == NULL) return NULL; mi = NRT_MemInfo_new(data, size, nrt_varsize_dtor, NULL); NRT_Debug(nrt_debug_print("NRT_MemInfo_new_varsize size=%zu " "-> meminfo=%p, data=%p\n", size, mi, data)); return mi; } NRT_MemInfo *NRT_MemInfo_new_varsize_dtor(size_t size, NRT_dtor_function dtor) { NRT_MemInfo *mi = NRT_MemInfo_new_varsize(size); if (mi) { mi->dtor_info = dtor; } return mi; } void *NRT_MemInfo_varsize_alloc(NRT_MemInfo *mi, size_t size) { if (mi->dtor != nrt_varsize_dtor) { nrt_fatal_error("ERROR: NRT_MemInfo_varsize_alloc called " "with a non varsize-allocated meminfo"); return NULL; /* unreachable */ } mi->data = NRT_Allocate(size); if (mi->data == NULL) return NULL; mi->size = size; NRT_Debug(nrt_debug_print("NRT_MemInfo_varsize_alloc %p size=%zu " "-> data=%p\n", mi, size, mi->data)); return mi->data; } void *NRT_MemInfo_varsize_realloc(NRT_MemInfo *mi, size_t size) { if (mi->dtor != nrt_varsize_dtor) { nrt_fatal_error("ERROR: NRT_MemInfo_varsize_realloc called " "with a non varsize-allocated meminfo"); return NULL; /* unreachable */ } mi->data = NRT_Reallocate(mi->data, size); if (mi->data == NULL) return NULL; mi->size = size; NRT_Debug(nrt_debug_print("NRT_MemInfo_varsize_realloc %p size=%zu " "-> data=%p\n", mi, size, mi->data)); return mi->data; } void NRT_MemInfo_varsize_free(NRT_MemInfo *mi, void *ptr) { NRT_Free(ptr); if (ptr == mi->data) mi->data = NULL; } /* * Low-level allocation wrappers. */ void* NRT_Allocate(size_t size) { return NRT_Allocate_External(size, NULL); } void* NRT_Allocate_External(size_t size, NRT_ExternalAllocator *allocator) { void *ptr = NULL; if (allocator) { ptr = allocator->malloc(size, allocator->opaque_data); NRT_Debug(nrt_debug_print("NRT_Allocate_External custom bytes=%zu ptr=%p\n", size, ptr)); } else { ptr = TheMSys.allocator.malloc(size); NRT_Debug(nrt_debug_print("NRT_Allocate_External bytes=%zu ptr=%p\n", size, ptr)); } TheMSys.atomic_inc(&TheMSys.stats_alloc); return ptr; } void *NRT_Reallocate(void *ptr, size_t size) { void *new_ptr = TheMSys.allocator.realloc(ptr, size); NRT_Debug(nrt_debug_print("NRT_Reallocate bytes=%zu ptr=%p -> %p\n", size, ptr, new_ptr)); return new_ptr; } void NRT_Free(void *ptr) { NRT_Debug(nrt_debug_print("NRT_Free %p\n", ptr)); TheMSys.allocator.free(ptr); TheMSys.atomic_inc(&TheMSys.stats_free); } /* * Sample external allocator implementation for internal testing. */ static int sample_external_opaque_data = 0xabacad; static void* sample_external_malloc(size_t size, void* opaque_data) { if (opaque_data != &sample_external_opaque_data) return NULL; return TheMSys.allocator.malloc(size); } static void* sample_external_realloc(void *ptr, size_t new_size, void *opaque_data) { if (opaque_data != &sample_external_opaque_data) return NULL; return TheMSys.allocator.realloc(ptr, new_size); } static void sample_external_free(void *ptr, void* opaque_data) { TheMSys.allocator.free(ptr); } static NRT_ExternalAllocator sample_external_allocator = { // malloc sample_external_malloc, // realloc sample_external_realloc, // free sample_external_free, // opaque_data &sample_external_opaque_data }; NRT_ExternalAllocator* _nrt_get_sample_external_allocator() { return &sample_external_allocator; } /* * Debugging printf function used internally */ void nrt_debug_print(char *fmt, ...) { va_list args; va_start(args, fmt); vfprintf(stderr, fmt, args); va_end(args); } static void nrt_manage_memory_dtor(void *data, size_t size, void *info) { NRT_managed_dtor* dtor = (NRT_managed_dtor*)info; dtor(data); } static NRT_MemInfo* nrt_manage_memory(void *data, NRT_managed_dtor dtor) { return NRT_MemInfo_new(data, 0, nrt_manage_memory_dtor, dtor); } static const NRT_api_functions nrt_functions_table = { NRT_MemInfo_alloc, NRT_MemInfo_alloc_external, nrt_manage_memory, NRT_MemInfo_acquire, NRT_MemInfo_release, NRT_MemInfo_data }; const NRT_api_functions* NRT_get_api(void) { return &nrt_functions_table; } numba-0.55.1/numba/core/runtime/nrt.h000664 000000 000000 00000014777 14174536160 017404 0ustar00rootroot000000 000000 /* All functions described here are threadsafe. */ #ifndef NUMBA_NRT_H_ #define NUMBA_NRT_H_ #include #include #include "../../_numba_common.h" #include "nrt_external.h" /* Debugging facilities - enabled at compile-time */ /* #undef NDEBUG */ #if 0 # define NRT_Debug(X) {X; fflush(stdout); } #else # define NRT_Debug(X) if (0) { X; } #endif /* TypeDefs */ typedef void (*NRT_dtor_function)(void *ptr, size_t size, void *info); typedef void (*NRT_dealloc_func)(void *ptr, void *dealloc_info); typedef size_t (*NRT_atomic_inc_dec_func)(size_t *ptr); typedef int (*NRT_atomic_cas_func)(void * volatile *ptr, void *cmp, void *repl, void **oldptr); typedef struct MemSys NRT_MemSys; typedef void *(*NRT_malloc_func)(size_t size); typedef void *(*NRT_realloc_func)(void *ptr, size_t new_size); typedef void (*NRT_free_func)(void *ptr); /* Memory System API */ /* Initialize the memory system */ VISIBILITY_HIDDEN void NRT_MemSys_init(void); /* Shutdown the memory system */ VISIBILITY_HIDDEN void NRT_MemSys_shutdown(void); /* * Register the system allocation functions */ VISIBILITY_HIDDEN void NRT_MemSys_set_allocator(NRT_malloc_func, NRT_realloc_func, NRT_free_func); /* * Register the atomic increment and decrement functions */ VISIBILITY_HIDDEN void NRT_MemSys_set_atomic_inc_dec(NRT_atomic_inc_dec_func inc, NRT_atomic_inc_dec_func dec); /* * Register the atomic compare and swap function */ VISIBILITY_HIDDEN void NRT_MemSys_set_atomic_cas(NRT_atomic_cas_func cas); /* * Register a non-atomic STUB for increment and decrement */ VISIBILITY_HIDDEN void NRT_MemSys_set_atomic_inc_dec_stub(void); /* * Register a non-atomic STUB for compare and swap */ VISIBILITY_HIDDEN void NRT_MemSys_set_atomic_cas_stub(void); /* * The following functions get internal statistics of the memory subsystem. */ VISIBILITY_HIDDEN size_t NRT_MemSys_get_stats_alloc(void); VISIBILITY_HIDDEN size_t NRT_MemSys_get_stats_free(void); VISIBILITY_HIDDEN size_t NRT_MemSys_get_stats_mi_alloc(void); VISIBILITY_HIDDEN size_t NRT_MemSys_get_stats_mi_free(void); /* Memory Info API */ /* Create a new MemInfo for external memory * * data: data pointer being tracked * dtor: destructor to execute * dtor_info: additional information to pass to the destructor */ VISIBILITY_HIDDEN NRT_MemInfo* NRT_MemInfo_new(void *data, size_t size, NRT_dtor_function dtor, void *dtor_info); /* * The `external_allocator` is for experimental API to customize the allocator. * Set to NULL to use the default builtin allocator. */ VISIBILITY_HIDDEN void NRT_MemInfo_init(NRT_MemInfo *mi, void *data, size_t size, NRT_dtor_function dtor, void *dtor_info, NRT_ExternalAllocator *external_allocator); /* * Returns the refcount of a MemInfo or (size_t)-1 if error. */ VISIBILITY_HIDDEN size_t NRT_MemInfo_refcount(NRT_MemInfo *mi); /* * Allocate memory of `size` bytes and return a pointer to a MemInfo structure * that describes the allocation */ VISIBILITY_HIDDEN NRT_MemInfo *NRT_MemInfo_alloc(size_t size); NRT_MemInfo *NRT_MemInfo_alloc_external(size_t size, NRT_ExternalAllocator *allocator); /* * The "safe" NRT_MemInfo_alloc performs additional steps to help debug * memory errors. * It is guaranteed to: * - zero-fill to the memory region after allocation and before deallocation. * - may do more in the future */ VISIBILITY_HIDDEN NRT_MemInfo *NRT_MemInfo_alloc_safe(size_t size); /* * Similar to NRT_MemInfo_alloc_safe but with a custom dtor. */ VISIBILITY_HIDDEN NRT_MemInfo* NRT_MemInfo_alloc_dtor_safe(size_t size, NRT_dtor_function dtor); /* * Aligned versions of the NRT_MemInfo_alloc and NRT_MemInfo_alloc_safe. * These take an additional argument `align` for number of bytes to align to. */ VISIBILITY_HIDDEN NRT_MemInfo *NRT_MemInfo_alloc_aligned(size_t size, unsigned align); VISIBILITY_HIDDEN NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned(size_t size, unsigned align); /* * Experimental. * A variation to use an external allocator. */ NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned_external(size_t size, unsigned align, NRT_ExternalAllocator *allocator); /* * Internal API. * Release a MemInfo. Calls NRT_MemSys_insert_meminfo. */ VISIBILITY_HIDDEN void NRT_MemInfo_destroy(NRT_MemInfo *mi); /* * Acquire a reference to a MemInfo */ VISIBILITY_HIDDEN void NRT_MemInfo_acquire(NRT_MemInfo* mi); /* * Release a reference to a MemInfo */ VISIBILITY_HIDDEN void NRT_MemInfo_release(NRT_MemInfo* mi); /* * Internal/Compiler API. * Invoke the registered destructor of a MemInfo. */ VISIBILITY_HIDDEN void NRT_MemInfo_call_dtor(NRT_MemInfo *mi); /* * Returns the data pointer */ VISIBILITY_HIDDEN void* NRT_MemInfo_data(NRT_MemInfo* mi); /* * Returns the allocated size */ VISIBILITY_HIDDEN size_t NRT_MemInfo_size(NRT_MemInfo* mi); /* * Experimental. * Returns the external allocator */ VISIBILITY_HIDDEN void* NRT_MemInfo_external_allocator(NRT_MemInfo* mi); /* * Returns the parent MemInfo */ VISIBILITY_HIDDEN void* NRT_MemInfo_parent(NRT_MemInfo* mi); /* * NRT API for resizable buffers. */ VISIBILITY_HIDDEN NRT_MemInfo *NRT_MemInfo_new_varsize(size_t size); VISIBILITY_HIDDEN NRT_MemInfo *NRT_MemInfo_new_varsize_dtor(size_t size, NRT_dtor_function dtor); VISIBILITY_HIDDEN void *NRT_MemInfo_varsize_alloc(NRT_MemInfo *mi, size_t size); VISIBILITY_HIDDEN void *NRT_MemInfo_varsize_realloc(NRT_MemInfo *mi, size_t size); VISIBILITY_HIDDEN void NRT_MemInfo_varsize_free(NRT_MemInfo *mi, void *ptr); /* * Print debug info to FILE */ VISIBILITY_HIDDEN void NRT_MemInfo_dump(NRT_MemInfo *mi, FILE *out); /* Low-level allocation wrappers. */ /* * Allocate memory of `size` bytes. */ VISIBILITY_HIDDEN void* NRT_Allocate(size_t size); /* * Experimental * * An alternative allocator that allows using an external allocator. */ VISIBILITY_HIDDEN void* NRT_Allocate_External(size_t size, NRT_ExternalAllocator *allocator); /* * Deallocate memory pointed by `ptr`. */ VISIBILITY_HIDDEN void NRT_Free(void *ptr); /* * Reallocate memory at `ptr`. */ VISIBILITY_HIDDEN void *NRT_Reallocate(void *ptr, size_t size); /* * Debugging printf function used internally */ VISIBILITY_HIDDEN void nrt_debug_print(char *fmt, ...); /* * Get API function table. */ VISIBILITY_HIDDEN const NRT_api_functions* NRT_get_api(void); /* * FOR INTERNAL USE ONLY. * Get a sample external allocator for testing */ VISIBILITY_HIDDEN NRT_ExternalAllocator* _nrt_get_sample_external_allocator(void); #endif /* NUMBA_NRT_H_ */ numba-0.55.1/numba/core/runtime/nrt.py000664 000000 000000 00000007702 14174536160 017573 0ustar00rootroot000000 000000 from collections import namedtuple from weakref import finalize as _finalize from numba.core.runtime import nrtdynmod from llvmlite import binding as ll from numba.core.compiler_lock import global_compiler_lock from numba.core.typing.typeof import typeof_impl from numba.core import types from numba.core.runtime import _nrt_python as _nrt _nrt_mstats = namedtuple("nrt_mstats", ["alloc", "free", "mi_alloc", "mi_free"]) class _Runtime(object): def __init__(self): self._init = False @global_compiler_lock def initialize(self, ctx): """Initializes the NRT Must be called before any actual call to the NRT API. Safe to be called multiple times. """ if self._init: # Already initialized return # Register globals into the system for py_name in _nrt.c_helpers: if py_name.startswith("_"): # internal API c_name = py_name else: c_name = "NRT_" + py_name c_address = _nrt.c_helpers[py_name] ll.add_symbol(c_name, c_address) # Compile atomic operations self._library = nrtdynmod.compile_nrt_functions(ctx) self._ptr_inc = self._library.get_pointer_to_function("nrt_atomic_add") self._ptr_dec = self._library.get_pointer_to_function("nrt_atomic_sub") self._ptr_cas = self._library.get_pointer_to_function("nrt_atomic_cas") # Install atomic ops to NRT _nrt.memsys_set_atomic_inc_dec(self._ptr_inc, self._ptr_dec) _nrt.memsys_set_atomic_cas(self._ptr_cas) self._init = True def _init_guard(self): if not self._init: msg = "Runtime must be initialized before use." raise RuntimeError(msg) @staticmethod def shutdown(): """ Shutdown the NRT Safe to be called without calling Runtime.initialize first """ _nrt.memsys_shutdown() @property def library(self): """ Return the Library object containing the various NRT functions. """ self._init_guard() return self._library def meminfo_new(self, data, pyobj): """ Returns a MemInfo object that tracks memory at `data` owned by `pyobj`. MemInfo will acquire a reference on `pyobj`. The release of MemInfo will release a reference on `pyobj`. """ self._init_guard() mi = _nrt.meminfo_new(data, pyobj) return MemInfo(mi) def meminfo_alloc(self, size, safe=False): """ Allocate a new memory of `size` bytes and returns a MemInfo object that tracks the allocation. When there is no more reference to the MemInfo object, the underlying memory will be deallocated. If `safe` flag is True, the memory is allocated using the `safe` scheme. This is used for debugging and testing purposes. See `NRT_MemInfo_alloc_safe()` in "nrt.h" for details. """ self._init_guard() if safe: mi = _nrt.meminfo_alloc_safe(size) else: mi = _nrt.meminfo_alloc(size) return MemInfo(mi) def get_allocation_stats(self): """ Returns a namedtuple of (alloc, free, mi_alloc, mi_free) for count of each memory operations. """ # No init guard needed to access stats members return _nrt_mstats(alloc=_nrt.memsys_get_stats_alloc(), free=_nrt.memsys_get_stats_free(), mi_alloc=_nrt.memsys_get_stats_mi_alloc(), mi_free=_nrt.memsys_get_stats_mi_free()) # Alias to _nrt_python._MemInfo MemInfo = _nrt._MemInfo @typeof_impl.register(MemInfo) def typeof_meminfo(val, c): return types.MemInfoPointer(types.voidptr) # Create runtime _nrt.memsys_use_cpython_allocator() rtsys = _Runtime() # Install finalizer _finalize(rtsys, _Runtime.shutdown) # Avoid future use of the class del _Runtime numba-0.55.1/numba/core/runtime/nrt_external.h000664 000000 000000 00000003075 14174536160 021273 0ustar00rootroot000000 000000 #ifndef NUMBA_NRT_EXTERNAL_H_ #define NUMBA_NRT_EXTERNAL_H_ #include typedef struct MemInfo NRT_MemInfo; typedef void NRT_managed_dtor(void *data); typedef void *(*NRT_external_malloc_func)(size_t size, void *opaque_data); typedef void *(*NRT_external_realloc_func)(void *ptr, size_t new_size, void *opaque_data); typedef void (*NRT_external_free_func)(void *ptr, void *opaque_data); struct ExternalMemAllocator { NRT_external_malloc_func malloc; NRT_external_realloc_func realloc; NRT_external_free_func free; void *opaque_data; }; typedef struct ExternalMemAllocator NRT_ExternalAllocator; typedef struct { /* Methods to create MemInfos. MemInfos are like smart pointers for objects that are managed by the Numba. */ /* Allocate memory *nbytes* is the number of bytes to be allocated Returning a new reference. */ NRT_MemInfo* (*allocate)(size_t nbytes); /* Allocates memory using an external allocator but still using Numba's MemInfo. */ NRT_MemInfo* (*allocate_external)(size_t nbytes, NRT_ExternalAllocator *allocator); /* Convert externally allocated memory into a MemInfo. *data* is the memory pointer *dtor* is the deallocator of the memory */ NRT_MemInfo* (*manage_memory)(void *data, NRT_managed_dtor dtor); /* Acquire a reference */ void (*acquire)(NRT_MemInfo* mi); /* Release a reference */ void (*release)(NRT_MemInfo* mi); /* Get MemInfo data pointer */ void* (*get_data)(NRT_MemInfo* mi); } NRT_api_functions; #endif /* NUMBA_NRT_EXTERNAL_H_ */ numba-0.55.1/numba/core/runtime/nrtdynmod.py000664 000000 000000 00000016502 14174536160 021004 0ustar00rootroot000000 000000 """ Dynamically generate the NRT module """ from numba.core import config from numba.core import types, cgutils from llvmlite import ir, binding _word_type = ir.IntType(config.MACHINE_BITS) _pointer_type = ir.PointerType(ir.IntType(8)) _meminfo_struct_type = ir.LiteralStructType([ _word_type, # size_t refct _pointer_type, # dtor_function dtor _pointer_type, # void *dtor_info _pointer_type, # void *data _word_type, # size_t size ]) incref_decref_ty = ir.FunctionType(ir.VoidType(), [_pointer_type]) meminfo_data_ty = ir.FunctionType(_pointer_type, [_pointer_type]) def _define_nrt_meminfo_data(module): """ Implement NRT_MemInfo_data_fast in the module. This allows LLVM to inline lookup of the data pointer. """ fn = cgutils.get_or_insert_function(module, meminfo_data_ty, "NRT_MemInfo_data_fast") builder = ir.IRBuilder(fn.append_basic_block()) [ptr] = fn.args struct_ptr = builder.bitcast(ptr, _meminfo_struct_type.as_pointer()) data_ptr = builder.load(cgutils.gep(builder, struct_ptr, 0, 3)) builder.ret(data_ptr) def _define_nrt_incref(module, atomic_incr): """ Implement NRT_incref in the module """ fn_incref = cgutils.get_or_insert_function(module, incref_decref_ty, "NRT_incref") # Cannot inline this for refcount pruning to work fn_incref.attributes.add('noinline') builder = ir.IRBuilder(fn_incref.append_basic_block()) [ptr] = fn_incref.args is_null = builder.icmp_unsigned("==", ptr, cgutils.get_null_value(ptr.type)) with cgutils.if_unlikely(builder, is_null): builder.ret_void() word_ptr = builder.bitcast(ptr, atomic_incr.args[0].type) if config.DEBUG_NRT: cgutils.printf(builder, "*** NRT_Incref %zu [%p]\n", builder.load(word_ptr), ptr) builder.call(atomic_incr, [word_ptr]) builder.ret_void() def _define_nrt_decref(module, atomic_decr): """ Implement NRT_decref in the module """ fn_decref = cgutils.get_or_insert_function(module, incref_decref_ty, "NRT_decref") # Cannot inline this for refcount pruning to work fn_decref.attributes.add('noinline') calldtor = ir.Function(module, ir.FunctionType(ir.VoidType(), [_pointer_type]), name="NRT_MemInfo_call_dtor") builder = ir.IRBuilder(fn_decref.append_basic_block()) [ptr] = fn_decref.args is_null = builder.icmp_unsigned("==", ptr, cgutils.get_null_value(ptr.type)) with cgutils.if_unlikely(builder, is_null): builder.ret_void() # For memory fence usage, see https://llvm.org/docs/Atomics.html # A release fence is used before the relevant write operation. # No-op on x86. On POWER, it lowers to lwsync. builder.fence("release") word_ptr = builder.bitcast(ptr, atomic_decr.args[0].type) if config.DEBUG_NRT: cgutils.printf(builder, "*** NRT_Decref %zu [%p]\n", builder.load(word_ptr), ptr) newrefct = builder.call(atomic_decr, [word_ptr]) refct_eq_0 = builder.icmp_unsigned("==", newrefct, ir.Constant(newrefct.type, 0)) with cgutils.if_unlikely(builder, refct_eq_0): # An acquire fence is used after the relevant read operation. # No-op on x86. On POWER, it lowers to lwsync. builder.fence("acquire") builder.call(calldtor, [ptr]) builder.ret_void() # Set this to True to measure the overhead of atomic refcounts compared # to non-atomic. _disable_atomicity = 0 def _define_atomic_inc_dec(module, op, ordering): """Define a llvm function for atomic increment/decrement to the given module Argument ``op`` is the operation "add"/"sub". Argument ``ordering`` is the memory ordering. The generated function returns the new value. """ ftype = ir.FunctionType(_word_type, [_word_type.as_pointer()]) fn_atomic = ir.Function(module, ftype, name="nrt_atomic_{0}".format(op)) [ptr] = fn_atomic.args bb = fn_atomic.append_basic_block() builder = ir.IRBuilder(bb) ONE = ir.Constant(_word_type, 1) if not _disable_atomicity: oldval = builder.atomic_rmw(op, ptr, ONE, ordering=ordering) # Perform the operation on the old value so that we can pretend returning # the "new" value. res = getattr(builder, op)(oldval, ONE) builder.ret(res) else: oldval = builder.load(ptr) newval = getattr(builder, op)(oldval, ONE) builder.store(newval, ptr) builder.ret(oldval) return fn_atomic def _define_atomic_cas(module, ordering): """Define a llvm function for atomic compare-and-swap. The generated function is a direct wrapper of the LLVM cmpxchg with the difference that the a int indicate success (1) or failure (0) is returned and the last argument is a output pointer for storing the old value. Note ---- On failure, the generated function behaves like an atomic load. The loaded value is stored to the last argument. """ ftype = ir.FunctionType(ir.IntType(32), [_word_type.as_pointer(), _word_type, _word_type, _word_type.as_pointer()]) fn_cas = ir.Function(module, ftype, name="nrt_atomic_cas") [ptr, cmp, repl, oldptr] = fn_cas.args bb = fn_cas.append_basic_block() builder = ir.IRBuilder(bb) outtup = builder.cmpxchg(ptr, cmp, repl, ordering=ordering) old, ok = cgutils.unpack_tuple(builder, outtup, 2) builder.store(old, oldptr) builder.ret(builder.zext(ok, ftype.return_type)) return fn_cas def _define_nrt_unresolved_abort(ctx, module): """ Defines an abort function due to unresolved symbol. The function takes no args and will always raise an exception. It should be safe to call this function with incorrect number of arguments. """ fnty = ctx.call_conv.get_function_type(types.none, ()) fn = ir.Function(module, fnty, name="nrt_unresolved_abort") bb = fn.append_basic_block() builder = ir.IRBuilder(bb) msg = "numba jitted function aborted due to unresolved symbol" ctx.call_conv.return_user_exc(builder, RuntimeError, (msg,)) return fn def create_nrt_module(ctx): """ Create an IR module defining the LLVM NRT functions. A (IR module, library) tuple is returned. """ codegen = ctx.codegen() library = codegen.create_library("nrt") # Implement LLVM module with atomic ops ir_mod = library.create_ir_module("nrt_module") atomic_inc = _define_atomic_inc_dec(ir_mod, "add", ordering='monotonic') atomic_dec = _define_atomic_inc_dec(ir_mod, "sub", ordering='monotonic') _define_atomic_cas(ir_mod, ordering='monotonic') _define_nrt_meminfo_data(ir_mod) _define_nrt_incref(ir_mod, atomic_inc) _define_nrt_decref(ir_mod, atomic_dec) _define_nrt_unresolved_abort(ctx, ir_mod) return ir_mod, library def compile_nrt_functions(ctx): """ Compile all LLVM NRT functions and return a library containing them. The library is created using the given target context. """ ir_mod, library = create_nrt_module(ctx) library.add_ir_module(ir_mod) library.finalize() return library numba-0.55.1/numba/core/runtime/nrtopt.py000664 000000 000000 00000013310 14174536160 020306 0ustar00rootroot000000 000000 """ NRT specific optimizations """ import re from collections import defaultdict, deque from llvmlite import binding as ll from numba.core import cgutils _regex_incref = re.compile(r'\s*(?:tail)?\s*call void @NRT_incref\((.*)\)') _regex_decref = re.compile(r'\s*(?:tail)?\s*call void @NRT_decref\((.*)\)') _regex_bb = re.compile( r'|'.join([ # unamed BB is just a plain number r'[0-9]+:', # with a proper identifer (see llvm langref) r'[\'"]?[-a-zA-Z$._0-9][-a-zA-Z$._0-9]*[\'"]?:', # is a start of a function definition r'^define', # no name r'^;\s*