pax_global_header00006660000000000000000000000064132036473450014521gustar00rootroot0000000000000052 comment=c61a0ae67e773c768e8ac714059ac71fec869186 dask-0.16.0/000077500000000000000000000000001320364734500125275ustar00rootroot00000000000000dask-0.16.0/.coveragerc000066400000000000000000000001371320364734500146510ustar00rootroot00000000000000[run] omit = */test_*.py dask/compatibility.py dask/_version.py source = dask dask-0.16.0/.github/000077500000000000000000000000001320364734500140675ustar00rootroot00000000000000dask-0.16.0/.github/CONTRIBUTING.md000066400000000000000000000001551320364734500163210ustar00rootroot00000000000000See [developer documentation](http://dask.pydata.org/en/latest/develop.html) for tips on how to get started. dask-0.16.0/.github/ISSUE_TEMPLATE.md000066400000000000000000000020311320364734500165700ustar00rootroot00000000000000Thank you for reporting an issue. Please read the following guidelines to ensure that your issue is handled properly. 1. If you have a general usage question like *"How do I filter a dask.dataframe?"* then please consider asking the question on StackOverflow instead using the #Dask tag 2. If you have found a bug, then please include a self-contained copy-pastable example that generates the issue if possible. 3. Please also include an exception and full traceback of the error if available. 4. If you have an issue with fastparquet, the distributed system, s3fs, etc. then please consider submitting a report to their issue trackers instead. A full list of repositories can be found here: [https://github.com/dask](https://github.com/dask) 5. Please include the versions of Python, dask, and other relevant libraries if they are applicable, (pandas, dask.distributed, etc..) Thank you again for your efforts. [How to create a Minimal, Complete, and Verifiable example](http://stackoverflow.com/help/mcve) dask-0.16.0/.github/PULL_REQUEST_TEMPLATE.md000066400000000000000000000003031320364734500176640ustar00rootroot00000000000000- [ ] Tests added / passed - [ ] Passes `flake8 dask` - [ ] Fully documented, including `docs/source/changelog.rst` for all changes and one of the `docs/source/*-api.rst` files for new API dask-0.16.0/.gitignore000066400000000000000000000001421320364734500145140ustar00rootroot00000000000000*.pyc *.egg-info docs/build build/ dist/ .idea/ log.* log .coverage .DS_Store *.swp *.swo .cache/ dask-0.16.0/.travis.yml000066400000000000000000000031361320364734500146430ustar00rootroot00000000000000language: generic sudo: false dist: trusty os: linux _base_envs: - &coverage COVERAGE='true' PARALLEL='false' - &no_coverage COVERAGE='false' PARALLEL='true' - &optimize PYTHONOPTIMIZE=2 XTRATESTARGS=--ignore=dask/diagnostics - &no_optimize XTRATESTARGS= - &imports TEST_IMPORTS='true' - &no_imports TEST_IMPORTS='false' jobs: fast_finish: true include: - env: - PYTHON=2.7 - NUMPY=1.12.1 - PANDAS=0.19.2 - *coverage - *no_optimize - *no_imports - env: - PYTHON=2.7 - NUMPY=1.13.0 - PANDAS=0.20.2 - *no_coverage - *optimize - *no_imports - env: - PYTHON=3.4 - NUMPY=1.10.4 - PANDAS=0.19.1 - *no_coverage - *optimize - *no_imports if: type != pull_request - env: - PYTHON=3.5 - NUMPY=1.12.1 - PANDAS=0.19.2 - *no_coverage - *no_optimize - *no_imports - env: &py36_env - PYTHON=3.6 - UPSTREAM_DEV=1 # Install nightly versions of NumPy and pandas - NUMPY=1.13.0 # these are overridden later - PANDAS=0.20.3 - *no_coverage - *no_optimize - *imports - env: *py36_env if: type != pull_request os: osx allow_failures: - os: osx install: - source continuous_integration/travis/install.sh script: - source continuous_integration/travis/run_tests.sh - flake8 dask - if [[ $TEST_IMPORTS == 'true' ]]; then source continuous_integration/travis/test_imports.sh; fi after_success: - source continuous_integration/travis/after_success.sh notifications: email: false dask-0.16.0/AUTHORS.md000066400000000000000000000053631320364734500142050ustar00rootroot00000000000000[Arve](http://arve0.github.io/) [Bartosz Telenczuk](https://github.com/btel) [Benjamin Regan-Kelley](https://github.com/minrk) [Blake Griffith](http://github.com/cowlicks) [Bradley McElroy](https://github.com/limx0) [Carlos De la Guardia](https://github.com/dela3499) [Casey Clements](https://github.com/caseyclements) [Christine Doig](https://github.com/chdoig) [Clark Fitzgerald](https://github.com/clarkfitzg) [Daniel Davis](https://github.com/wabu) [Erik Welch](http://github.com/eriknw/) [Erkka Rinne](https://github.com/ererkka) [Eyad Sibai](https://github.com/eyadsibai) [Gabriele Lanaro](http://gabrielelanaro.github.io/) [Hussain Sultan](https://github.com/hussainsultan) [Isaac Slaavitt](https://github.com/isms) [Israel Saeta Perez](https://github.com/dukebody) [jgoutin](https://github.com/JGoutin) [Jim Crist](http://jcrist.github.io/) [John Kirkham](https://github.com/jakirkham) [Jon Renner](https://github.com/jrenner/) [Joshua Corbin](https://github.com/jcorbin) [jslmann](https://github.com/jslmann) [Kristopher Overholt](https://github.com/koverholt) [Kurt Smith](https://github.com/kwmsmith) [Linwood Creekmore](https://github.com/linwoodc3) [Mariano Tepper](http://www.marianotepper.com.ar/) [Matthew Rocklin](http://matthewrocklin.com/) [Max Hutchinson](https://github.com/maxhutch) [Michael Heilman](https://github.com/mheilman) [Mike Graham](https://github.com/mikegraham) [Nicholaus Jackson](https://github.com/NoonienSoong/) [Nir](https://github.com/nirizr) [Olivier Grisel](http://ogrisel.com/) [Pedro Duarte](https://github.com/PedroMDuarte) [Peter Steinberg](https://github.com/PeterDSteinberg/) [Peter Quackenbush](https://github.com/thequackdaddy/) [Phillip Cloud](https://github.com/cpcloud) [Phil Elson](https://github.com/pelson) [Phillip Wolfram](https://github.com/pwolfram) [Rami Chowdhury](https://github.com/necaris) [Rich Postelnik](https://github.com/postelrich) [Rolando Espinoza](https://github.com/rolando) [Ruggero Turra](https://github.com/wiso) [Scott Sanderson](https://github.com/ssanderson) [Sigurd Spieckermann](https://github.com/sisp) [sinhrks](https://github.com/sinhrks) [Skipper Seabold](https://github.com/jseabold) [Stefan Seefeld](https://github.com/stefanseefeld) [Stephan Hoyer](http://stephanhoyer.com) [Stuart Owen](http://stuartowen.com/) [Thomas Smith](https://github.com/tgs) [Tom Augspurger](https://github.com/TomAugspurger) [Tomas Ostasevicius](https://github.com/to266) [Travis Oliphant](https://github.com/teoliphant) [Valentin Haenel](http://haenel.co/) [Vikhyat Korrapati](http://vikhyat.net/) [Vlad Frolov](https://github.com/frol) [Wesley Emeneker](http://github.com/nevermindewe/) [wikiped](https://github.com/wikiped) [Will Warner](https://github.com/electronwill/) dask-0.16.0/LICENSE.txt000066400000000000000000000027201320364734500143530ustar00rootroot00000000000000Copyright (c) 2014-2017, Anaconda, Inc. and contributors All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of Anaconda nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. dask-0.16.0/MANIFEST.in000066400000000000000000000003351320364734500142660ustar00rootroot00000000000000recursive-include dask *.py recursive-include docs/source * include docs/Makefile docs/make.bat include setup.py include README.rst include LICENSE.txt include MANIFEST.in include versioneer.py include dask/_version.py dask-0.16.0/README.rst000066400000000000000000000021141320364734500142140ustar00rootroot00000000000000Dask ==== |Build Status| |Coverage| |Doc Status| |Gitter| |Version Status| Dask is a flexible parallel computing library for analytics. See documentation_ for more information. LICENSE ------- New BSD. See `License File `__. .. _documentation: http://dask.pydata.org/en/latest/ .. |Build Status| image:: https://travis-ci.org/dask/dask.svg?branch=master :target: https://travis-ci.org/dask/dask .. |Coverage| image:: https://coveralls.io/repos/dask/dask/badge.svg :target: https://coveralls.io/r/dask/dask :alt: Coverage status .. |Doc Status| image:: http://readthedocs.org/projects/dask/badge/?version=latest :target: http://dask.pydata.org/en/latest/ :alt: Documentation Status .. |Gitter| image:: https://badges.gitter.im/Join%20Chat.svg :alt: Join the chat at https://gitter.im/dask/dask :target: https://gitter.im/dask/dask?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge .. |Version Status| image:: https://img.shields.io/pypi/v/dask.svg :target: https://pypi.python.org/pypi/dask/ dask-0.16.0/appveyor.yml000066400000000000000000000024761320364734500151300ustar00rootroot00000000000000# Environment loosely based on https://github.com/conda/conda/blob/master/appveyor.yml environment: global: # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the # /E:ON and /V:ON options are not enabled in the batch script intepreter # See: http://stackoverflow.com/a/13751649/163740 CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\continuous_integration\\appveyor\\run_with_env.cmd" matrix: # Since appveyor is quite slow, we only use a single configuration - PYTHON: "3.5" ARCH: "64" NUMPY: "1.12.1" PANDAS: "0.19.2" CONDA_ENV: testenv init: # Use AppVeyor's provided Miniconda: https://www.appveyor.com/docs/installed-software#python - if "%ARCH%" == "64" set MINICONDA=C:\Miniconda35-x64 - if "%ARCH%" == "32" set MINICONDA=C:\Miniconda35 - set PATH=%MINICONDA%;%MINICONDA%/Scripts;%MINICONDA%/Library/bin;%PATH% install: # Update to a known good conda # (to workaround http://help.appveyor.com/discussions/problems/4910) - conda install -q -y conda=4.2.9 - continuous_integration\\appveyor\\setup_conda_environment.cmd build_script: - continuous_integration\\appveyor\\build.cmd test_script: # %CMD_IN_ENV% is needed for distutils/setuptools-based tests # on certain build configurations. - "%CMD_IN_ENV% continuous_integration\\appveyor\\run_tests.cmd" dask-0.16.0/conftest.py000066400000000000000000000005771320364734500147370ustar00rootroot00000000000000import pytest def pytest_addoption(parser): parser.addoption("--runslow", action="store_true", help="run slow tests") def pytest_runtest_setup(item): if 'slow' in item.keywords and not item.config.getoption("--runslow"): pytest.skip("need --runslow option to run") def pytest_ignore_collect(path, config): if 'run_test.py' in str(path): return True dask-0.16.0/continuous_integration/000077500000000000000000000000001320364734500173405ustar00rootroot00000000000000dask-0.16.0/continuous_integration/appveyor/000077500000000000000000000000001320364734500212055ustar00rootroot00000000000000dask-0.16.0/continuous_integration/appveyor/build.cmd000066400000000000000000000001361320364734500227710ustar00rootroot00000000000000call activate %CONDA_ENV% @echo on @rem Install Dask %PIP_INSTALL% --no-deps -e .[complete] dask-0.16.0/continuous_integration/appveyor/run_tests.cmd000066400000000000000000000003671320364734500237260ustar00rootroot00000000000000call activate %CONDA_ENV% @echo on set PYTHONFAULTHANDLER=1 @rem `--capture=sys` avoids clobbering faulthandler tracebacks on crash set PYTEST=py.test --capture=sys @rem %PYTEST% -v --runslow dask\dataframe\tests\test_groupby.py %PYTEST% dask dask-0.16.0/continuous_integration/appveyor/run_with_env.cmd000066400000000000000000000064721320364734500244120ustar00rootroot00000000000000:: From https://github.com/ogrisel/python-appveyor-demo :: :: To build extensions for 64 bit Python 3, we need to configure environment :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) :: :: To build extensions for 64 bit Python 2, we need to configure environment :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) :: :: 32 bit builds, and 64-bit builds for 3.5 and beyond, do not require specific :: environment configurations. :: :: Note: this script needs to be run with the /E:ON and /V:ON flags for the :: cmd interpreter, at least for (SDK v7.0) :: :: More details at: :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows :: http://stackoverflow.com/a/13751649/163740 :: :: Author: Olivier Grisel :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ :: :: Notes about batch files for Python people: :: :: Quotes in values are literally part of the values: :: SET FOO="bar" :: FOO is now five characters long: " b a r " :: If you don't want quotes, don't include them on the right-hand side. :: :: The CALL lines at the end of this file look redundant, but if you move them :: outside of the IF clauses, they do not run properly in the SET_SDK_64==Y :: case, I don't know why. @ECHO OFF SET COMMAND_TO_RUN=%* SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows SET WIN_WDK=c:\Program Files (x86)\Windows Kits\10\Include\wdf :: Extract the major and minor versions, and allow for the minor version to be :: more than 9. This requires the version number to have two dots in it. SET MAJOR_PYTHON_VERSION=%PYTHON:~0,1% IF "%PYTHON:~3,1%" == "." ( SET MINOR_PYTHON_VERSION=%PYTHON:~2,1% ) ELSE ( SET MINOR_PYTHON_VERSION=%PYTHON:~2,2% ) :: Based on the Python version, determine what SDK version to use, and whether :: to set the SDK for 64-bit. IF %MAJOR_PYTHON_VERSION% == 2 ( SET WINDOWS_SDK_VERSION="v7.0" SET SET_SDK_64=Y ) ELSE ( IF %MAJOR_PYTHON_VERSION% == 3 ( SET WINDOWS_SDK_VERSION="v7.1" IF %MINOR_PYTHON_VERSION% LEQ 4 ( SET SET_SDK_64=Y ) ELSE ( SET SET_SDK_64=N IF EXIST "%WIN_WDK%" ( :: See: https://connect.microsoft.com/VisualStudio/feedback/details/1610302/ REN "%WIN_WDK%" 0wdf ) ) ) ELSE ( ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%" EXIT 1 ) ) IF %ARCH% == 64 ( IF %SET_SDK_64% == Y ( ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture SET DISTUTILS_USE_SDK=1 SET MSSdk=1 "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release ECHO Executing: %COMMAND_TO_RUN% call %COMMAND_TO_RUN% || EXIT 1 ) ELSE ( ECHO Using default MSVC build environment for 64 bit architecture ECHO Executing: %COMMAND_TO_RUN% call %COMMAND_TO_RUN% || EXIT 1 ) ) ELSE ( ECHO Using default MSVC build environment for 32 bit architecture ECHO Executing: %COMMAND_TO_RUN% call %COMMAND_TO_RUN% || EXIT 1 ) dask-0.16.0/continuous_integration/appveyor/setup_conda_environment.cmd000066400000000000000000000027211320364734500266240ustar00rootroot00000000000000@rem The cmd /C hack circumvents a regression where conda installs a conda.bat @rem script in non-root environments. set CONDA=cmd /C conda set CONDA_INSTALL=%CONDA% install -q -y set PIP_INSTALL=pip install -q @echo on @rem Deactivate any environment call deactivate @rem Display root environment (for debugging) conda list @rem Clean up any left-over from a previous build conda remove --all -q -y -n %CONDA_ENV% @rem Create test environment @rem (note: no cytoolz as it seems to prevent faulthandler tracebacks on crash) conda create -n %CONDA_ENV% -q -y python=%PYTHON% pytest toolz call activate %CONDA_ENV% @rem Pin matrix items @rem Please see PR ( https://github.com/dask/dask/pull/2185 ) for details. copy NUL %CONDA_PREFIX%\conda-meta\pinned echo numpy %NUMPY% >> %CONDA_PREFIX%\conda-meta\pinned echo pandas %PANDAS% >> %CONDA_PREFIX%\conda-meta\pinned @rem Install optional dependencies for tests %CONDA_INSTALL% numpy pandas cloudpickle distributed %CONDA_INSTALL% s3fs psutil pytables bokeh bcolz scipy h5py ipython %PIP_INSTALL% git+https://github.com/dask/partd --upgrade %PIP_INSTALL% git+https://github.com/dask/cachey --upgrade %PIP_INSTALL% git+https://github.com/dask/distributed --upgrade %PIP_INSTALL% git+https://github.com/mrocklin/sparse --upgrade %PIP_INSTALL% blosc --upgrade %PIP_INSTALL% moto if %PYTHON% LSS 3.0 (%PIP_INSTALL% backports.lzma mock) @rem Display final environment (for reproducing) %CONDA% list %CONDA% list --explicit python -m site dask-0.16.0/continuous_integration/travis/000077500000000000000000000000001320364734500206505ustar00rootroot00000000000000dask-0.16.0/continuous_integration/travis/after_success.sh000066400000000000000000000001611320364734500240330ustar00rootroot00000000000000if [[ $COVERAGE == 'true' ]]; then coverage report --show-missing pip install coveralls coveralls fi dask-0.16.0/continuous_integration/travis/install.sh000066400000000000000000000050131320364734500226510ustar00rootroot00000000000000# Install conda case "$(uname -s)" in 'Darwin') MINICONDA_FILENAME="Miniconda3-4.3.21-MacOSX-x86_64.sh" ;; 'Linux') MINICONDA_FILENAME="Miniconda3-4.3.21-Linux-x86_64.sh" ;; *) ;; esac wget https://repo.continuum.io/miniconda/$MINICONDA_FILENAME -O miniconda.sh bash miniconda.sh -b -p $HOME/miniconda export PATH="$HOME/miniconda/bin:$PATH" conda config --set always_yes yes --set changeps1 no # Create conda environment conda create -q -n test-environment python=$PYTHON source activate test-environment # Pin matrix items # Please see PR ( https://github.com/dask/dask/pull/2185 ) for details. touch $CONDA_PREFIX/conda-meta/pinned if ! [[ ${UPSTREAM_DEV} ]]; then echo "Pinning NumPy $NUMPY, pandas $PANDAS" echo "numpy $NUMPY" >> $CONDA_PREFIX/conda-meta/pinned echo "pandas $PANDAS" >> $CONDA_PREFIX/conda-meta/pinned fi; # Install dependencies. conda install -q -c conda-forge \ numpy \ pandas \ bcolz \ blosc \ bokeh \ boto3 \ chest \ cloudpickle \ coverage \ cytoolz \ distributed \ graphviz \ h5py \ ipython \ partd \ psutil \ "pytest<=3.1.1" \ scikit-image \ scikit-learn \ scipy \ sqlalchemy \ toolz if [[ ${UPSTREAM_DEV} ]]; then echo "Installing NumPy and Pandas dev" conda uninstall -y --force numpy pandas PRE_WHEELS="https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com" pip install -q --pre --no-deps --upgrade --timeout=60 -f $PRE_WHEELS numpy pandas fi; # install pytables from defaults for now conda install -q pytables pip install -q --upgrade --no-deps git+https://github.com/dask/partd pip install -q --upgrade --no-deps git+https://github.com/dask/zict pip install -q --upgrade --no-deps git+https://github.com/dask/distributed pip install -q --upgrade --no-deps git+https://github.com/mrocklin/sparse pip install -q --upgrade --no-deps git+https://github.com/dask/s3fs if [[ $PYTHONOPTIMIZE != '2' ]] && [[ $NUMPY > '1.11.0' ]]; then conda install -q -c conda-forge fastparquet python-snappy pip install -q --no-deps git+https://github.com/dask/fastparquet fi if [[ $PYTHON == '2.7' ]]; then pip install -q --no-deps backports.lzma mock fi pip install -q --upgrade --no-deps \ cachey \ graphviz \ pyarrow \ pandas_datareader pip install -q --upgrade \ cityhash \ flake8 \ moto \ mmh3 \ pytest-xdist \ xxhash # Install dask pip install -q --no-deps -e .[complete] echo conda list conda list dask-0.16.0/continuous_integration/travis/run_tests.sh000066400000000000000000000011271320364734500232330ustar00rootroot00000000000000# Need to make test order deterministic when parallelizing tests, hence PYTHONHASHSEED # (see https://github.com/pytest-dev/pytest-xdist/issues/63) if [[ $PARALLEL == 'true' ]]; then export XTRATESTARGS="-n3 $XTRATESTARGS" export PYTHONHASHSEED=42 fi if [[ $COVERAGE == 'true' ]]; then echo "coverage run `which py.test` dask --runslow --doctest-modules --verbose $XTRATESTARGS" coverage run `which py.test` dask --runslow --doctest-modules --verbose $XTRATESTARGS else echo "py.test dask --runslow --verbose $XTRATESTARGS" py.test dask --runslow --verbose $XTRATESTARGS fi dask-0.16.0/continuous_integration/travis/test_imports.sh000066400000000000000000000020441320364734500237400ustar00rootroot00000000000000test_import () { # Install dependencies if [[ -n "$2" ]]; then output=$(conda install -c conda-forge $2) if [[ $? -eq 1 ]]; then echo $output echo "$1 install failed" >&2 exit 1 fi fi # Check import python -c "$3" if [[ $? -eq 1 ]]; then echo "$1 import failed" >&2 exit 1 else echo "$1 import succeeded" fi # Uninstall dependencies if [[ -n "$2" ]]; then output=$(conda uninstall $2) fi } # Create an empty environment conda create -n test-imports python=$PYTHON source activate test-imports (test_import "Core" "" "import dask, dask.threaded, dask.optimize") && \ (test_import "Delayed" "toolz" "import dask.delayed") && \ (test_import "Bag" "toolz partd cloudpickle" "import dask.bag") && \ (test_import "Array" "toolz numpy" "import dask.array") && \ (test_import "Dataframe" "numpy pandas toolz partd cloudpickle" "import dask.dataframe") && \ (test_import "Distributed" "distributed s3fs" "import dask.distributed") dask-0.16.0/dask/000077500000000000000000000000001320364734500134515ustar00rootroot00000000000000dask-0.16.0/dask/__init__.py000066400000000000000000000007641320364734500155710ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from .core import istask from .context import set_options from .local import get_sync as get try: from .delayed import delayed except ImportError: pass try: from .base import visualize, compute, persist, is_dask_collection except ImportError: pass from ._version import get_versions versions = get_versions() __version__ = versions['version'] __git_revision__ = versions['full-revisionid'] del get_versions, versions dask-0.16.0/dask/_version.py000066400000000000000000000405451320364734500156570ustar00rootroot00000000000000 # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. Generated by # versioneer-0.16 (https://github.com/warner/python-versioneer) """Git implementation of _version.py.""" import errno import os import re import subprocess import sys def get_keywords(): """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). git_refnames = "$Format:%d$" git_full = "$Format:%H$" keywords = {"refnames": git_refnames, "full": git_full} return keywords class VersioneerConfig: """Container for Versioneer configuration parameters.""" def get_config(): """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "pep440" cfg.tag_prefix = "" cfg.parentdir_prefix = "dask-" cfg.versionfile_source = "dask/_version.py" cfg.verbose = False return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator """Decorator to mark a method as the handler for a particular VCS.""" def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): """Call the given command(s).""" assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %s" % dispcmd) print(e) return None else: if verbose: print("unable to find command, tried %s" % (commands,)) return None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) return None return stdout def versions_from_parentdir(parentdir_prefix, root, verbose): """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. """ dirname = os.path.basename(root) if not dirname.startswith(parentdir_prefix): if verbose: print("guessing rootdir is '%s', but '%s' doesn't start with " "prefix '%s'" % (root, dirname, parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None} @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): """Get version information from git keywords.""" if not keywords: raise NotThisMethod("no keywords at all, weird") refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %s" % r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags"} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ if not os.path.exists(os.path.join(root, ".git")): if verbose: print("no .git in %s" % root) raise NotThisMethod("no .git directory") GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out = run_command(GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s*" % tag_prefix], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits return pieces def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): """TAG[.post.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post.devDISTANCE """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post.dev%d" % pieces["distance"] else: # exception #1 rendered = "0.post.dev%d" % pieces["distance"] return rendered def render_pep440_post(pieces): """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%s" % pieces["short"] return rendered def render_pep440_old(pieces): """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Eexceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"]} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%s'" % style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None} def get_versions(): """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which # case we can only use expanded keywords. cfg = get_config() verbose = cfg.verbose try: return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass try: root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for i in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to find root of source tree"} try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) return render(pieces, cfg.style) except NotThisMethod: pass try: if cfg.parentdir_prefix: return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) except NotThisMethod: pass return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version"} dask-0.16.0/dask/array/000077500000000000000000000000001320364734500145675ustar00rootroot00000000000000dask-0.16.0/dask/array/NUMPY_LICENSE.txt000066400000000000000000000030071320364734500174020ustar00rootroot00000000000000Copyright (c) 2005-2015, NumPy Developers. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the NumPy Developers nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. dask-0.16.0/dask/array/__init__.py000066400000000000000000000050621320364734500167030ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from ..utils import ignoring from .core import (Array, concatenate, stack, from_array, store, map_blocks, atop, to_hdf5, to_npy_stack, from_npy_stack, from_delayed, asarray, asanyarray, broadcast_to) from .routines import (take, choose, argwhere, where, coarsen, insert, ravel, roll, unique, squeeze, topk, ptp, diff, ediff1d, bincount, digitize, histogram, cov, array, dstack, vstack, hstack, compress, extract, round, count_nonzero, flatnonzero, nonzero, around, isnull, notnull, isclose, allclose, corrcoef, swapaxes, tensordot, transpose, dot, apply_along_axis, apply_over_axes, result_type, atleast_1d, atleast_2d, atleast_3d) from .reshape import reshape from .ufunc import (add, subtract, multiply, divide, logaddexp, logaddexp2, true_divide, floor_divide, negative, power, remainder, mod, conj, exp, exp2, log, log2, log10, log1p, expm1, sqrt, square, cbrt, reciprocal, sin, cos, tan, arcsin, arccos, arctan, arctan2, hypot, sinh, cosh, tanh, arcsinh, arccosh, arctanh, deg2rad, rad2deg, greater, greater_equal, less, less_equal, not_equal, equal, logical_and, logical_or, logical_xor, logical_not, maximum, minimum, fmax, fmin, isreal, iscomplex, isfinite, isinf, isnan, signbit, copysign, nextafter, spacing, ldexp, fmod, floor, ceil, trunc, degrees, radians, rint, fix, angle, real, imag, clip, fabs, sign, absolute, i0, sinc, nan_to_num, frexp, modf, divide) from .reductions import (sum, prod, mean, std, var, any, all, min, max, vnorm, moment, argmin, argmax, nansum, nanmean, nanstd, nanvar, nanmin, nanmax, nanargmin, nanargmax, cumsum, cumprod) from .percentile import percentile with ignoring(ImportError): from .reductions import nanprod, nancumprod, nancumsum with ignoring(ImportError): from . import ma from . import random, linalg, ghost, learn, fft from .wrap import ones, zeros, empty, full from .creation import ones_like, zeros_like, empty_like, full_like from .rechunk import rechunk from ..context import set_options from ..base import compute from .optimization import optimize from .creation import (arange, linspace, indices, diag, eye, triu, tril, fromfunction, tile, repeat) dask-0.16.0/dask/array/chunk.py000066400000000000000000000123731320364734500162570ustar00rootroot00000000000000""" A set of NumPy functions to apply per chunk """ from __future__ import absolute_import, division, print_function from collections import Container, Iterable, Sequence from functools import wraps from toolz import concat import numpy as np from . import numpy_compat as npcompat from ..compatibility import getargspec from ..utils import ignoring def keepdims_wrapper(a_callable): """ A wrapper for functions that don't provide keepdims to ensure that they do. """ if "keepdims" in getargspec(a_callable).args: return a_callable @wraps(a_callable) def keepdims_wrapped_callable(x, axis=None, keepdims=None, *args, **kwargs): r = a_callable(x, axis=axis, *args, **kwargs) if not keepdims: return r axes = axis if axes is None: axes = range(x.ndim) if not isinstance(axes, (Container, Iterable, Sequence)): axes = [axes] r_slice = tuple() for each_axis in range(x.ndim): if each_axis in axes: r_slice += (None,) else: r_slice += (slice(None),) r = r[r_slice] return r return keepdims_wrapped_callable # Wrap NumPy functions to ensure they provide keepdims. sum = keepdims_wrapper(np.sum) prod = keepdims_wrapper(np.prod) min = keepdims_wrapper(np.min) max = keepdims_wrapper(np.max) argmin = keepdims_wrapper(np.argmin) nanargmin = keepdims_wrapper(np.nanargmin) argmax = keepdims_wrapper(np.argmax) nanargmax = keepdims_wrapper(np.nanargmax) any = keepdims_wrapper(np.any) all = keepdims_wrapper(np.all) nansum = keepdims_wrapper(np.nansum) try: from numpy import nanprod, nancumprod, nancumsum except ImportError: # pragma: no cover nanprod = npcompat.nanprod nancumprod = npcompat.nancumprod nancumsum = npcompat.nancumsum nanprod = keepdims_wrapper(nanprod) nancumprod = keepdims_wrapper(nancumprod) nancumsum = keepdims_wrapper(nancumsum) nanmin = keepdims_wrapper(np.nanmin) nanmax = keepdims_wrapper(np.nanmax) mean = keepdims_wrapper(np.mean) with ignoring(AttributeError): nanmean = keepdims_wrapper(np.nanmean) var = keepdims_wrapper(np.var) with ignoring(AttributeError): nanvar = keepdims_wrapper(np.nanvar) std = keepdims_wrapper(np.std) with ignoring(AttributeError): nanstd = keepdims_wrapper(np.nanstd) def coarsen(reduction, x, axes, trim_excess=False): """ Coarsen array by applying reduction to fixed size neighborhoods Parameters ---------- reduction: function Function like np.sum, np.mean, etc... x: np.ndarray Array to be coarsened axes: dict Mapping of axis to coarsening factor Examples -------- >>> x = np.array([1, 2, 3, 4, 5, 6]) >>> coarsen(np.sum, x, {0: 2}) array([ 3, 7, 11]) >>> coarsen(np.max, x, {0: 3}) array([3, 6]) Provide dictionary of scale per dimension >>> x = np.arange(24).reshape((4, 6)) >>> x array([[ 0, 1, 2, 3, 4, 5], [ 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23]]) >>> coarsen(np.min, x, {0: 2, 1: 3}) array([[ 0, 3], [12, 15]]) You must avoid excess elements explicitly >>> x = np.array([1, 2, 3, 4, 5, 6, 7, 8]) >>> coarsen(np.min, x, {0: 3}, trim_excess=True) array([1, 4]) """ # Insert singleton dimensions if they don't exist already for i in range(x.ndim): if i not in axes: axes[i] = 1 if trim_excess: ind = tuple(slice(0, -(d % axes[i])) if d % axes[i] else slice(None, None) for i, d in enumerate(x.shape)) x = x[ind] # (10, 10) -> (5, 2, 5, 2) newshape = tuple(concat([(x.shape[i] // axes[i], axes[i]) for i in range(x.ndim)])) return reduction(x.reshape(newshape), axis=tuple(range(1, x.ndim * 2, 2))) def trim(x, axes=None): """ Trim boundaries off of array >>> x = np.arange(24).reshape((4, 6)) >>> trim(x, axes={0: 0, 1: 1}) array([[ 1, 2, 3, 4], [ 7, 8, 9, 10], [13, 14, 15, 16], [19, 20, 21, 22]]) >>> trim(x, axes={0: 1, 1: 1}) array([[ 7, 8, 9, 10], [13, 14, 15, 16]]) """ if isinstance(axes, int): axes = [axes] * x.ndim if isinstance(axes, dict): axes = [axes.get(i, 0) for i in range(x.ndim)] return x[tuple(slice(ax, -ax if ax else None) for ax in axes)] try: from numpy import broadcast_to except ImportError: # pragma: no cover broadcast_to = npcompat.broadcast_to def topk(k, x): """ Top k elements of an array >>> topk(2, np.array([5, 1, 3, 6])) array([6, 5]) """ # http://stackoverflow.com/a/23734295/616616 by larsmans k = np.minimum(k, len(x)) ind = np.argpartition(x, -k)[-k:] return np.sort(x[ind])[::-1] def arange(start, stop, step, length, dtype): res = np.arange(start, stop, step, dtype) return res[:-1] if len(res) > length else res def astype(x, astype_dtype=None, **kwargs): return x.astype(astype_dtype, **kwargs) def view(x, dtype, order='C'): if order == 'C': x = np.ascontiguousarray(x) return x.view(dtype) else: x = np.asfortranarray(x) return x.T.view(dtype).T dask-0.16.0/dask/array/conftest.py000066400000000000000000000002021320364734500167600ustar00rootroot00000000000000import os def pytest_ignore_collect(path, config): if os.path.split(str(path))[1].startswith("fft.py"): return True dask-0.16.0/dask/array/core.py000066400000000000000000003261071320364734500161020ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from bisect import bisect from collections import Iterable, Mapping from collections import Iterator from functools import partial, wraps from itertools import product import math from numbers import Number import operator from operator import add, getitem, mul import os import sys import traceback import pickle from threading import Lock import uuid import warnings try: from cytoolz import (partition, concat, join, first, groupby, valmap, accumulate, assoc) from cytoolz.curried import filter, pluck except ImportError: from toolz import (partition, concat, join, first, groupby, valmap, accumulate, assoc) from toolz.curried import filter, pluck from toolz import pipe, map, reduce import numpy as np from . import chunk from .slicing import slice_array, replace_ellipsis from ..base import Base, tokenize, dont_optimize, compute_as_if_collection from ..context import _globals, globalmethod from ..utils import (homogeneous_deepmap, ndeepmap, ignoring, concrete, is_integer, IndexCallable, funcname, derived_from, SerializableLock, ensure_dict, Dispatch) from ..compatibility import unicode, long, getargspec, zip_longest, apply from ..delayed import to_task_dask from .. import threaded, core from .. import sharedict from ..sharedict import ShareDict concatenate_lookup = Dispatch('concatenate') tensordot_lookup = Dispatch('tensordot') concatenate_lookup.register((object, np.ndarray), np.concatenate) tensordot_lookup.register((object, np.ndarray), np.tensordot) @tensordot_lookup.register_lazy('sparse') @concatenate_lookup.register_lazy('sparse') def register_sparse(): import sparse concatenate_lookup.register(sparse.COO, sparse.concatenate) tensordot_lookup.register(sparse.COO, sparse.tensordot) def getter(a, b, asarray=True, lock=None): if isinstance(b, tuple) and any(x is None for x in b): b2 = tuple(x for x in b if x is not None) b3 = tuple(None if x is None else slice(None, None) for x in b if not isinstance(x, (int, long))) return getter(a, b2, asarray=asarray, lock=lock)[b3] if lock: lock.acquire() try: c = a[b] if asarray: c = np.asarray(c) finally: if lock: lock.release() return c def getter_nofancy(a, b, asarray=True, lock=None): """ A simple wrapper around ``getter``. Used to indicate to the optimization passes that the backend doesn't support fancy indexing. """ return getter(a, b, asarray=asarray, lock=lock) def getter_inline(a, b, asarray=True, lock=None): """ A getter function that optimizations feel comfortable inlining Slicing operations with this function may be inlined into a graph, such as in the following rewrite **Before** >>> a = x[:10] # doctest: +SKIP >>> b = a + 1 # doctest: +SKIP >>> c = a * 2 # doctest: +SKIP **After** >>> b = x[:10] + 1 # doctest: +SKIP >>> c = x[:10] * 2 # doctest: +SKIP This inlining can be relevant to operations when running off of disk. """ return getter(a, b, asarray=asarray, lock=lock) from .optimization import optimize, fuse_slice def slices_from_chunks(chunks): """ Translate chunks tuple to a set of slices in product order >>> slices_from_chunks(((2, 2), (3, 3, 3))) # doctest: +NORMALIZE_WHITESPACE [(slice(0, 2, None), slice(0, 3, None)), (slice(0, 2, None), slice(3, 6, None)), (slice(0, 2, None), slice(6, 9, None)), (slice(2, 4, None), slice(0, 3, None)), (slice(2, 4, None), slice(3, 6, None)), (slice(2, 4, None), slice(6, 9, None))] """ cumdims = [list(accumulate(add, (0,) + bds[:-1])) for bds in chunks] shapes = product(*chunks) starts = product(*cumdims) return [tuple(slice(s, s + dim) for s, dim in zip(start, shape)) for start, shape in zip(starts, shapes)] def getem(arr, chunks, getitem=getter, shape=None, out_name=None, lock=False, asarray=True): """ Dask getting various chunks from an array-like >>> getem('X', chunks=(2, 3), shape=(4, 6)) # doctest: +SKIP {('X', 0, 0): (getter, 'X', (slice(0, 2), slice(0, 3))), ('X', 1, 0): (getter, 'X', (slice(2, 4), slice(0, 3))), ('X', 1, 1): (getter, 'X', (slice(2, 4), slice(3, 6))), ('X', 0, 1): (getter, 'X', (slice(0, 2), slice(3, 6)))} >>> getem('X', chunks=((2, 2), (3, 3))) # doctest: +SKIP {('X', 0, 0): (getter, 'X', (slice(0, 2), slice(0, 3))), ('X', 1, 0): (getter, 'X', (slice(2, 4), slice(0, 3))), ('X', 1, 1): (getter, 'X', (slice(2, 4), slice(3, 6))), ('X', 0, 1): (getter, 'X', (slice(0, 2), slice(3, 6)))} """ out_name = out_name or arr chunks = normalize_chunks(chunks, shape) keys = list(product([out_name], *[range(len(bds)) for bds in chunks])) slices = slices_from_chunks(chunks) if not asarray or lock: values = [(getitem, arr, x, asarray, lock) for x in slices] else: # Common case, drop extra parameters values = [(getitem, arr, x) for x in slices] return dict(zip(keys, values)) def dotmany(A, B, leftfunc=None, rightfunc=None, **kwargs): """ Dot product of many aligned chunks >>> x = np.array([[1, 2], [1, 2]]) >>> y = np.array([[10, 20], [10, 20]]) >>> dotmany([x, x, x], [y, y, y]) array([[ 90, 180], [ 90, 180]]) Optionally pass in functions to apply to the left and right chunks >>> dotmany([x, x, x], [y, y, y], rightfunc=np.transpose) array([[150, 150], [150, 150]]) """ if leftfunc: A = map(leftfunc, A) if rightfunc: B = map(rightfunc, B) return sum(map(partial(np.dot, **kwargs), A, B)) def lol_tuples(head, ind, values, dummies): """ List of list of tuple keys Parameters ---------- head : tuple The known tuple so far ind : Iterable An iterable of indices not yet covered values : dict Known values for non-dummy indices dummies : dict Ranges of values for dummy indices Examples -------- >>> lol_tuples(('x',), 'ij', {'i': 1, 'j': 0}, {}) ('x', 1, 0) >>> lol_tuples(('x',), 'ij', {'i': 1}, {'j': range(3)}) [('x', 1, 0), ('x', 1, 1), ('x', 1, 2)] >>> lol_tuples(('x',), 'ij', {'i': 1}, {'j': range(3)}) [('x', 1, 0), ('x', 1, 1), ('x', 1, 2)] >>> lol_tuples(('x',), 'ijk', {'i': 1}, {'j': [0, 1, 2], 'k': [0, 1]}) # doctest: +NORMALIZE_WHITESPACE [[('x', 1, 0, 0), ('x', 1, 0, 1)], [('x', 1, 1, 0), ('x', 1, 1, 1)], [('x', 1, 2, 0), ('x', 1, 2, 1)]] """ if not ind: return head if ind[0] not in dummies: return lol_tuples(head + (values[ind[0]],), ind[1:], values, dummies) else: return [lol_tuples(head + (v,), ind[1:], values, dummies) for v in dummies[ind[0]]] def zero_broadcast_dimensions(lol, nblocks): """ >>> lol = [('x', 1, 0), ('x', 1, 1), ('x', 1, 2)] >>> nblocks = (4, 1, 2) # note singleton dimension in second place >>> lol = [[('x', 1, 0, 0), ('x', 1, 0, 1)], ... [('x', 1, 1, 0), ('x', 1, 1, 1)], ... [('x', 1, 2, 0), ('x', 1, 2, 1)]] >>> zero_broadcast_dimensions(lol, nblocks) # doctest: +NORMALIZE_WHITESPACE [[('x', 1, 0, 0), ('x', 1, 0, 1)], [('x', 1, 0, 0), ('x', 1, 0, 1)], [('x', 1, 0, 0), ('x', 1, 0, 1)]] See Also -------- lol_tuples """ f = lambda t: (t[0],) + tuple(0 if d == 1 else i for i, d in zip(t[1:], nblocks)) return homogeneous_deepmap(f, lol) def broadcast_dimensions(argpairs, numblocks, sentinels=(1, (1,)), consolidate=None): """ Find block dimensions from arguments Parameters ---------- argpairs: iterable name, ijk index pairs numblocks: dict maps {name: number of blocks} sentinels: iterable (optional) values for singleton dimensions consolidate: func (optional) use this to reduce each set of common blocks into a smaller set Examples -------- >>> argpairs = [('x', 'ij'), ('y', 'ji')] >>> numblocks = {'x': (2, 3), 'y': (3, 2)} >>> broadcast_dimensions(argpairs, numblocks) {'i': 2, 'j': 3} Supports numpy broadcasting rules >>> argpairs = [('x', 'ij'), ('y', 'ij')] >>> numblocks = {'x': (2, 1), 'y': (1, 3)} >>> broadcast_dimensions(argpairs, numblocks) {'i': 2, 'j': 3} Works in other contexts too >>> argpairs = [('x', 'ij'), ('y', 'ij')] >>> d = {'x': ('Hello', 1), 'y': (1, (2, 3))} >>> broadcast_dimensions(argpairs, d) {'i': 'Hello', 'j': (2, 3)} """ # List like [('i', 2), ('j', 1), ('i', 1), ('j', 2)] argpairs2 = [(a, ind) for a, ind in argpairs if ind is not None] L = concat([zip(inds, dims) for (x, inds), (x, dims) in join(first, argpairs2, first, numblocks.items())]) g = groupby(0, L) g = dict((k, set([d for i, d in v])) for k, v in g.items()) g2 = dict((k, v - set(sentinels) if len(v) > 1 else v) for k, v in g.items()) if consolidate: return valmap(consolidate, g2) if g2 and not set(map(len, g2.values())) == set([1]): raise ValueError("Shapes do not align %s" % g) return valmap(first, g2) def top(func, output, out_indices, *arrind_pairs, **kwargs): """ Tensor operation Applies a function, ``func``, across blocks from many different input dasks. We arrange the pattern with which those blocks interact with sets of matching indices. E.g.:: top(func, 'z', 'i', 'x', 'i', 'y', 'i') yield an embarrassingly parallel communication pattern and is read as $$ z_i = func(x_i, y_i) $$ More complex patterns may emerge, including multiple indices:: top(func, 'z', 'ij', 'x', 'ij', 'y', 'ji') $$ z_{ij} = func(x_{ij}, y_{ji}) $$ Indices missing in the output but present in the inputs results in many inputs being sent to one function (see examples). Examples -------- Simple embarrassing map operation >>> inc = lambda x: x + 1 >>> top(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (inc, ('x', 0, 0)), ('z', 0, 1): (inc, ('x', 0, 1)), ('z', 1, 0): (inc, ('x', 1, 0)), ('z', 1, 1): (inc, ('x', 1, 1))} Simple operation on two datasets >>> add = lambda x, y: x + y >>> top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Operation that flips one of the datasets >>> addT = lambda x, y: x + y.T # Transpose each chunk >>> # z_ij ~ x_ij y_ji >>> # .. .. .. notice swap >>> top(addT, 'z', 'ij', 'x', 'ij', 'y', 'ji', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 1, 0)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 0, 1)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Dot product with contraction over ``j`` index. Yields list arguments >>> top(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 1), ('y', 1, 1)]), ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 1), ('y', 1, 1)])} Pass ``concatenate=True`` to concatenate arrays ahead of time >>> top(f, 'z', 'i', 'x', 'ij', 'y', 'ij', concatenate=True, ... numblocks={'x': (2, 2), 'y': (2, 2,)}) # doctest: +SKIP {('z', 0): (f, (concatenate_axes, [('x', 0, 0), ('x', 0, 1)], (1,)), (concatenate_axes, [('y', 0, 0), ('y', 0, 1)], (1,))) ('z', 1): (f, (concatenate_axes, [('x', 1, 0), ('x', 1, 1)], (1,)), (concatenate_axes, [('y', 1, 0), ('y', 1, 1)], (1,)))} Supports Broadcasting rules >>> top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 1))} Support keyword arguments with apply >>> def f(a, b=0): return a + b >>> top(f, 'z', 'i', 'x', 'i', numblocks={'x': (2,)}, b=10) # doctest: +SKIP {('z', 0): (apply, f, [('x', 0)], {'b': 10}), ('z', 1): (apply, f, [('x', 1)], {'b': 10})} Include literals by indexing with ``None`` >>> top(add, 'z', 'i', 'x', 'i', 100, None, numblocks={'x': (2,)}) # doctest: +SKIP {('z', 0): (add, ('x', 0), 100), ('z', 1): (add, ('x', 1), 100)} See Also -------- atop """ numblocks = kwargs.pop('numblocks') concatenate = kwargs.pop('concatenate', None) new_axes = kwargs.pop('new_axes', {}) argpairs = list(partition(2, arrind_pairs)) assert set(numblocks) == {name for name, ind in argpairs if ind is not None} all_indices = pipe(argpairs, pluck(1), filter(None), concat, set) dummy_indices = all_indices - set(out_indices) # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions dims = broadcast_dimensions(argpairs, numblocks) for k in new_axes: dims[k] = 1 # (0, 0), (0, 1), (0, 2), (1, 0), ... keytups = list(product(*[range(dims[i]) for i in out_indices])) # {i: 0, j: 0}, {i: 0, j: 1}, ... keydicts = [dict(zip(out_indices, tup)) for tup in keytups] # {j: [1, 2, 3], ...} For j a dummy index of dimension 3 dummies = dict((i, list(range(dims[i]))) for i in dummy_indices) dsk = {} # Unpack dask values in non-array arguments for i, (arg, ind) in enumerate(argpairs): if ind is None: arg2, dsk2 = to_task_dask(arg) if dsk2: dsk.update(ensure_dict(dsk2)) argpairs[i] = (arg2, ind) # Create argument lists valtups = [] for kd in keydicts: args = [] for arg, ind in argpairs: if ind is None: args.append(arg) else: tups = lol_tuples((arg,), ind, kd, dummies) if any(nb == 1 for nb in numblocks[arg]): tups2 = zero_broadcast_dimensions(tups, numblocks[arg]) else: tups2 = tups if concatenate and isinstance(tups2, list): axes = [n for n, i in enumerate(ind) if i in dummies] tups2 = (concatenate_axes, tups2, axes) args.append(tups2) valtups.append(args) if not kwargs: # will not be used in an apply, should be a tuple valtups = [tuple(vt) for vt in valtups] # Add heads to tuples keys = [(output,) + kt for kt in keytups] # Unpack delayed objects in kwargs if kwargs: task, dsk2 = to_task_dask(kwargs) if dsk2: dsk.update(ensure_dict(dsk2)) kwargs2 = task else: kwargs2 = kwargs vals = [(apply, func, vt, kwargs2) for vt in valtups] else: vals = [(func,) + vt for vt in valtups] dsk.update(dict(zip(keys, vals))) return dsk def _concatenate2(arrays, axes=[]): """ Recursively Concatenate nested lists of arrays along axes Each entry in axes corresponds to each level of the nested list. The length of axes should correspond to the level of nesting of arrays. >>> x = np.array([[1, 2], [3, 4]]) >>> _concatenate2([x, x], axes=[0]) array([[1, 2], [3, 4], [1, 2], [3, 4]]) >>> _concatenate2([x, x], axes=[1]) array([[1, 2, 1, 2], [3, 4, 3, 4]]) >>> _concatenate2([[x, x], [x, x]], axes=[0, 1]) array([[1, 2, 1, 2], [3, 4, 3, 4], [1, 2, 1, 2], [3, 4, 3, 4]]) Supports Iterators >>> _concatenate2(iter([x, x]), axes=[1]) array([[1, 2, 1, 2], [3, 4, 3, 4]]) """ if isinstance(arrays, Iterator): arrays = list(arrays) if not isinstance(arrays, (list, tuple)): return arrays if len(axes) > 1: arrays = [_concatenate2(a, axes=axes[1:]) for a in arrays] concatenate = concatenate_lookup.dispatch(type(max(arrays, key=lambda x: x.__array_priority__))) return concatenate(arrays, axis=axes[0]) def apply_infer_dtype(func, args, kwargs, funcname, suggest_dtype=True): args = [np.ones((1,) * x.ndim, dtype=x.dtype) if isinstance(x, Array) else x for x in args] try: with np.errstate(all='ignore'): o = func(*args, **kwargs) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() tb = ''.join(traceback.format_tb(exc_traceback)) suggest = ("Please specify the dtype explicitly using the " "`dtype` kwarg.\n\n") if suggest_dtype else "" msg = ("`dtype` inference failed in `{0}`.\n\n" "{1}" "Original error is below:\n" "------------------------\n" "{2}\n\n" "Traceback:\n" "---------\n" "{3}").format(funcname, suggest, repr(e), tb) else: msg = None if msg is not None: raise ValueError(msg) return o.dtype def map_blocks(func, *args, **kwargs): """ Map a function across all blocks of a dask array. Parameters ---------- func : callable Function to apply to every block in the array. args : dask arrays or constants dtype : np.dtype, optional The ``dtype`` of the output array. It is recommended to provide this. If not provided, will be inferred by applying the function to a small set of fake data. chunks : tuple, optional Chunk shape of resulting blocks if the function does not preserve shape. If not provided, the resulting array is assumed to have the same block structure as the first input array. drop_axis : number or iterable, optional Dimensions lost by the function. new_axis : number or iterable, optional New dimensions created by the function. Note that these are applied after ``drop_axis`` (if present). token : string, optional The key prefix to use for the output array. If not provided, will be determined from the function name. name : string, optional The key name to use for the output array. Note that this fully specifies the output key name, and must be unique. If not provided, will be determined by a hash of the arguments. **kwargs : Other keyword arguments to pass to function. Values must be constants (not dask.arrays) Examples -------- >>> import dask.array as da >>> x = da.arange(6, chunks=3) >>> x.map_blocks(lambda x: x * 2).compute() array([ 0, 2, 4, 6, 8, 10]) The ``da.map_blocks`` function can also accept multiple arrays. >>> d = da.arange(5, chunks=2) >>> e = da.arange(5, chunks=2) >>> f = map_blocks(lambda a, b: a + b**2, d, e) >>> f.compute() array([ 0, 2, 6, 12, 20]) If the function changes shape of the blocks then you must provide chunks explicitly. >>> y = x.map_blocks(lambda x: x[::2], chunks=((2, 2),)) You have a bit of freedom in specifying chunks. If all of the output chunk sizes are the same, you can provide just that chunk size as a single tuple. >>> a = da.arange(18, chunks=(6,)) >>> b = a.map_blocks(lambda x: x[:3], chunks=(3,)) If the function changes the dimension of the blocks you must specify the created or destroyed dimensions. >>> b = a.map_blocks(lambda x: x[None, :, None], chunks=(1, 6, 1), ... new_axis=[0, 2]) Map_blocks aligns blocks by block positions without regard to shape. In the following example we have two arrays with the same number of blocks but with different shape and chunk sizes. >>> x = da.arange(1000, chunks=(100,)) >>> y = da.arange(100, chunks=(10,)) The relevant attribute to match is numblocks. >>> x.numblocks (10,) >>> y.numblocks (10,) If these match (up to broadcasting rules) then we can map arbitrary functions across blocks >>> def func(a, b): ... return np.array([a.max(), b.max()]) >>> da.map_blocks(func, x, y, chunks=(2,), dtype='i8') dask.array >>> _.compute() array([ 99, 9, 199, 19, 299, 29, 399, 39, 499, 49, 599, 59, 699, 69, 799, 79, 899, 89, 999, 99]) Your block function can learn where in the array it is if it supports a ``block_id`` keyword argument. This will receive entries like (2, 0, 1), the position of the block in the dask array. >>> def func(block, block_id=None): ... pass You may specify the key name prefix of the resulting task in the graph with the optional ``token`` keyword argument. >>> x.map_blocks(lambda x: x + 1, token='increment') # doctest: +SKIP dask.array """ if not callable(func): msg = ("First argument must be callable function, not %s\n" "Usage: da.map_blocks(function, x)\n" " or: da.map_blocks(function, x, y, z)") raise TypeError(msg % type(func).__name__) name = kwargs.pop('name', None) token = kwargs.pop('token', None) if not name: name = '%s-%s' % (token or funcname(func), tokenize(token or func, args, **kwargs)) dtype = kwargs.pop('dtype', None) chunks = kwargs.pop('chunks', None) drop_axis = kwargs.pop('drop_axis', []) new_axis = kwargs.pop('new_axis', []) if isinstance(drop_axis, Number): drop_axis = [drop_axis] if isinstance(new_axis, Number): new_axis = [new_axis] arrs = [a for a in args if isinstance(a, Array)] argpairs = [(a.name, tuple(range(a.ndim))[::-1]) if isinstance(a, Array) else (a, None) for a in args] numblocks = {a.name: a.numblocks for a in arrs} arginds = list(concat(argpairs)) out_ind = tuple(range(max(a.ndim for a in arrs)))[::-1] try: spec = getargspec(func) block_id = ('block_id' in spec.args or 'block_id' in getattr(spec, 'kwonly_args', ())) except Exception: block_id = False if block_id: kwargs['block_id'] = '__dummy__' dsk = top(func, name, out_ind, *arginds, numblocks=numblocks, **kwargs) # If func has block_id as an argument, add it to the kwargs for each call if block_id: for k in dsk.keys(): dsk[k] = dsk[k][:-1] + (assoc(dsk[k][-1], 'block_id', k[1:]),) if dtype is None: if block_id: kwargs2 = assoc(kwargs, 'block_id', first(dsk.keys())[1:]) else: kwargs2 = kwargs dtype = apply_infer_dtype(func, args, kwargs2, 'map_blocks') if len(arrs) == 1: numblocks = list(arrs[0].numblocks) else: dims = broadcast_dimensions(argpairs, numblocks) numblocks = [b for (_, b) in sorted(dims.items(), reverse=True)] if drop_axis: if any(numblocks[i] > 1 for i in drop_axis): raise ValueError("Can't drop an axis with more than 1 block. " "Please use `atop` instead.") dsk = dict((tuple(k for i, k in enumerate(k) if i - 1 not in drop_axis), v) for k, v in dsk.items()) numblocks = [n for i, n in enumerate(numblocks) if i not in drop_axis] if new_axis: new_axis = sorted(new_axis) for i in new_axis: if not 0 <= i <= len(numblocks): ndim = len(numblocks) raise ValueError("Can't add axis %d when current " "axis are %r. Missing axis: " "%r" % (i, list(range(ndim)), list(range(ndim, i)))) numblocks.insert(i, 1) dsk, old_dsk = dict(), dsk for key in old_dsk: new_key = list(key) for i in new_axis: new_key.insert(i + 1, 0) dsk[tuple(new_key)] = old_dsk[key] if chunks: if len(chunks) != len(numblocks): raise ValueError("Provided chunks have {0} dims, expected {1} " "dims.".format(len(chunks), len(numblocks))) chunks2 = [] for i, (c, nb) in enumerate(zip(chunks, numblocks)): if isinstance(c, tuple): if not len(c) == nb: raise ValueError("Dimension {0} has {1} blocks, " "chunks specified with " "{2} blocks".format(i, nb, len(c))) chunks2.append(c) else: chunks2.append(nb * (c,)) else: if len(arrs) == 1: chunks2 = list(arrs[0].chunks) else: try: chunks2 = list(broadcast_chunks(*[a.chunks for a in arrs])) except Exception: raise ValueError("Arrays in `map_blocks` don't align, can't " "infer output chunks. Please provide " "`chunks` kwarg.") if drop_axis: chunks2 = [c for (i, c) in enumerate(chunks2) if i not in drop_axis] if new_axis: for i in sorted(new_axis): chunks2.insert(i, (1,)) chunks = tuple(chunks2) return Array(sharedict.merge((name, dsk), *[a.dask for a in arrs]), name, chunks, dtype) def broadcast_chunks(*chunkss): """ Construct a chunks tuple that broadcasts many chunks tuples >>> a = ((5, 5),) >>> b = ((5, 5),) >>> broadcast_chunks(a, b) ((5, 5),) >>> a = ((10, 10, 10), (5, 5),) >>> b = ((5, 5),) >>> broadcast_chunks(a, b) ((10, 10, 10), (5, 5)) >>> a = ((10, 10, 10), (5, 5),) >>> b = ((1,), (5, 5),) >>> broadcast_chunks(a, b) ((10, 10, 10), (5, 5)) >>> a = ((10, 10, 10), (5, 5),) >>> b = ((3, 3,), (5, 5),) >>> broadcast_chunks(a, b) Traceback (most recent call last): ... ValueError: Chunks do not align: [(10, 10, 10), (3, 3)] """ if len(chunkss) == 1: return chunkss[0] n = max(map(len, chunkss)) chunkss2 = [((1,),) * (n - len(c)) + c for c in chunkss] result = [] for i in range(n): step1 = [c[i] for c in chunkss2] if all(c == (1,) for c in step1): step2 = step1 else: step2 = [c for c in step1 if c != (1,)] if len(set(step2)) != 1: raise ValueError("Chunks do not align: %s" % str(step2)) result.append(step2[0]) return tuple(result) def store(sources, targets, lock=True, regions=None, compute=True, **kwargs): """ Store dask arrays in array-like objects, overwrite data in target This stores dask arrays into object that supports numpy-style setitem indexing. It stores values chunk by chunk so that it does not have to fill up memory. For best performance you can align the block size of the storage target with the block size of your array. If your data fits in memory then you may prefer calling ``np.array(myarray)`` instead. Parameters ---------- sources: Array or iterable of Arrays targets: array-like or iterable of array-likes These should support setitem syntax ``target[10:20] = ...`` lock: boolean or threading.Lock, optional Whether or not to lock the data stores while storing. Pass True (lock each file individually), False (don't lock) or a particular ``threading.Lock`` object to be shared among all writes. regions: tuple of slices or iterable of tuple of slices Each ``region`` tuple in ``regions`` should be such that ``target[region].shape = source.shape`` for the corresponding source and target in sources and targets, respectively. compute: boolean, optional If true compute immediately, return ``dask.delayed.Delayed`` otherwise Examples -------- >>> x = ... # doctest: +SKIP >>> import h5py # doctest: +SKIP >>> f = h5py.File('myfile.hdf5') # doctest: +SKIP >>> dset = f.create_dataset('/data', shape=x.shape, ... chunks=x.chunks, ... dtype='f8') # doctest: +SKIP >>> store(x, dset) # doctest: +SKIP Alternatively store many arrays at the same time >>> store([x, y, z], [dset1, dset2, dset3]) # doctest: +SKIP """ if isinstance(sources, Array): sources = [sources] targets = [targets] if any(not isinstance(s, Array) for s in sources): raise ValueError("All sources must be dask array objects") if len(sources) != len(targets): raise ValueError("Different number of sources [%d] and targets [%d]" % (len(sources), len(targets))) if isinstance(regions, tuple) or regions is None: regions = [regions] if len(sources) > 1 and len(regions) == 1: regions *= len(sources) if len(sources) != len(regions): raise ValueError("Different number of sources [%d] and targets [%d] than regions [%d]" % (len(sources), len(targets), len(regions))) updates = {} keys = [] for tgt, src, reg in zip(targets, sources, regions): # if out is a delayed object update dictionary accordingly try: dsk = {} dsk.update(tgt.dask) tgt = tgt.key except AttributeError: dsk = {} update = insert_to_ooc(tgt, src, lock=lock, region=reg) keys.extend(update) update.update(dsk) updates.update(update) name = 'store-' + tokenize(*keys) dsk = sharedict.merge((name, updates), *[src.dask for src in sources]) if compute: compute_as_if_collection(Array, dsk, keys, **kwargs) else: from ..delayed import Delayed dsk.update({name: keys}) return Delayed(name, dsk) def blockdims_from_blockshape(shape, chunks): """ >>> blockdims_from_blockshape((10, 10), (4, 3)) ((4, 4, 2), (3, 3, 3, 1)) >>> blockdims_from_blockshape((10, 0), (4, 0)) ((4, 4, 2), (0,)) """ if chunks is None: raise TypeError("Must supply chunks= keyword argument") if shape is None: raise TypeError("Must supply shape= keyword argument") if np.isnan(sum(shape)) or np.isnan(sum(chunks)): raise ValueError("Array chunk sizes are unknown. shape: %s, chunks: %s" % (shape, chunks)) if not all(map(is_integer, chunks)): raise ValueError("chunks can only contain integers.") if not all(map(is_integer, shape)): raise ValueError("shape can only contain integers.") shape = tuple(map(int, shape)) chunks = tuple(map(int, chunks)) return tuple(((bd,) * (d // bd) + ((d % bd,) if d % bd else ()) if d else (0,)) for d, bd in zip(shape, chunks)) def finalize(results): if not results: return concatenate3(results) results2 = results while isinstance(results2, (tuple, list)): if len(results2) > 1: return concatenate3(results) else: results2 = results2[0] return unpack_singleton(results) CHUNKS_NONE_ERROR_MESSAGE = """ You must specify a chunks= keyword argument. This specifies the chunksize of your array blocks. See the following documentation page for details: http://dask.pydata.org/en/latest/array-creation.html#chunks """.strip() class Array(Base): """ Parallel Dask Array A parallel nd-array comprised of many numpy arrays arranged in a grid. This constructor is for advanced uses only. For normal use see the ``da.from_array`` function. Parameters ---------- dask : dict Task dependency graph name : string Name of array in dask shape : tuple of ints Shape of the entire array chunks: iterable of tuples block sizes along each dimension See Also -------- dask.array.from_array """ __slots__ = 'dask', '_name', '_cached_keys', '_chunks', 'dtype' def __new__(cls, dask, name, chunks, dtype, shape=None): self = super(Array, cls).__new__(cls) assert isinstance(dask, Mapping) if not isinstance(dask, ShareDict): s = ShareDict() s.update_with_key(dask, key=name) dask = s self.dask = dask self.name = name self._chunks = normalize_chunks(chunks, shape) if self._chunks is None: raise ValueError(CHUNKS_NONE_ERROR_MESSAGE) if dtype is None: raise ValueError("You must specify the dtype of the array") self.dtype = np.dtype(dtype) for plugin in _globals.get('array_plugins', ()): result = plugin(self) if result is not None: self = result return self def __reduce__(self): return (Array, (self.dask, self.name, self.chunks, self.dtype)) def __dask_graph__(self): return self.dask def __dask_keys__(self): if self._cached_keys is not None: return self._cached_keys name, chunks, numblocks = self.name, self.chunks, self.numblocks def keys(*args): if not chunks: return [(name,)] ind = len(args) if ind + 1 == len(numblocks): result = [(name,) + args + (i,) for i in range(numblocks[ind])] else: result = [keys(*(args + (i,))) for i in range(numblocks[ind])] return result self._cached_keys = result = keys() return result def __dask_tokenize__(self): return self.name __dask_optimize__ = globalmethod(optimize, key='array_optimize', falsey=dont_optimize) __dask_scheduler__ = staticmethod(threaded.get) def __dask_postcompute__(self): return finalize, () def __dask_postpersist__(self): return Array, (self.name, self.chunks, self.dtype) @property def numblocks(self): return tuple(map(len, self.chunks)) @property def npartitions(self): return reduce(mul, self.numblocks, 1) @property def shape(self): return tuple(map(sum, self.chunks)) def _get_chunks(self): return self._chunks def _set_chunks(self, chunks): raise TypeError("Can not set chunks directly\n\n" "Please use the rechunk method instead:\n" " x.rechunk(%s)" % str(chunks)) chunks = property(_get_chunks, _set_chunks, "chunks property") def __len__(self): return sum(self.chunks[0]) def __array_ufunc__(self, numpy_ufunc, method, *inputs, **kwargs): out = kwargs.get('out', ()) for x in inputs + out: if not isinstance(x, (np.ndarray, Number, Array)): return NotImplemented if method == '__call__': if numpy_ufunc.signature is not None: return NotImplemented if numpy_ufunc.nout > 1: from . import ufunc try: da_ufunc = getattr(ufunc, numpy_ufunc.__name__) except AttributeError: return NotImplemented return da_ufunc(*inputs, **kwargs) else: return elemwise(numpy_ufunc, *inputs, **kwargs) elif method == 'outer': from . import ufunc try: da_ufunc = getattr(ufunc, numpy_ufunc.__name__) except AttributeError: return NotImplemented return da_ufunc.outer(*inputs, **kwargs) else: return NotImplemented def __repr__(self): """ >>> import dask.array as da >>> da.ones((10, 10), chunks=(5, 5), dtype='i4') dask.array<..., shape=(10, 10), dtype=int32, chunksize=(5, 5)> """ chunksize = str(tuple(c[0] for c in self.chunks)) name = self.name.rsplit('-', 1)[0] return ("dask.array<%s, shape=%s, dtype=%s, chunksize=%s>" % (name, self.shape, self.dtype, chunksize)) @property def ndim(self): return len(self.shape) @property def size(self): """ Number of elements in array """ return reduce(mul, self.shape, 1) @property def nbytes(self): """ Number of bytes in array """ return self.size * self.dtype.itemsize @property def itemsize(self): """ Length of one array element in bytes """ return self.dtype.itemsize @property def name(self): return self._name @name.setter def name(self, val): self._name = val # Clear the key cache when the name is reset self._cached_keys = None __array_priority__ = 11 # higher than numpy.ndarray and numpy.matrix def __array__(self, dtype=None, **kwargs): x = self.compute() if dtype and x.dtype != dtype: x = x.astype(dtype) if not isinstance(x, np.ndarray): x = np.array(x) return x @property def _elemwise(self): return elemwise @wraps(store) def store(self, target, **kwargs): return store([self], [target], **kwargs) def to_hdf5(self, filename, datapath, **kwargs): """ Store array in HDF5 file >>> x.to_hdf5('myfile.hdf5', '/x') # doctest: +SKIP Optionally provide arguments as though to ``h5py.File.create_dataset`` >>> x.to_hdf5('myfile.hdf5', '/x', compression='lzf', shuffle=True) # doctest: +SKIP See Also -------- da.store h5py.File.create_dataset """ return to_hdf5(filename, datapath, self, **kwargs) def to_dask_dataframe(self, columns=None): """ Convert dask Array to dask Dataframe Parameters ---------- columns: list or string list of column names if DataFrame, single string if Series See Also -------- dask.dataframe.from_dask_array """ from ..dataframe import from_dask_array return from_dask_array(self, columns=columns) def __int__(self): return int(self.compute()) def __bool__(self): return bool(self.compute()) __nonzero__ = __bool__ # python 2 def __float__(self): return float(self.compute()) def __complex__(self): return complex(self.compute()) def __setitem__(self, key, value): from .routines import where if isinstance(key, Array): if isinstance(value, Array) and value.ndim > 1: raise ValueError('boolean index array should have 1 dimension') y = where(key, value, self) self.dtype = y.dtype self.dask = y.dask self.name = y.name return self else: raise NotImplementedError("Item assignment with %s not supported" % type(key)) def __getitem__(self, index): # Field access, e.g. x['a'] or x[['a', 'b']] if (isinstance(index, (str, unicode)) or (isinstance(index, list) and index and all(isinstance(i, (str, unicode)) for i in index))): out = 'getitem-' + tokenize(self, index) if isinstance(index, (str, unicode)): dt = self.dtype[index] else: dt = np.dtype([(name, self.dtype[name]) for name in index]) if dt.shape: new_axis = list(range(self.ndim, self.ndim + len(dt.shape))) chunks = self.chunks + tuple((i,) for i in dt.shape) return self.map_blocks(getitem, index, dtype=dt.base, name=out, chunks=chunks, new_axis=new_axis) else: return self.map_blocks(getitem, index, dtype=dt, name=out) if not isinstance(index, tuple): index = (index,) from .slicing import normalize_index, slice_with_dask_array index2 = normalize_index(index, self.shape) if any(isinstance(i, Array) for i in index2): self, index2 = slice_with_dask_array(self, index2) if all(isinstance(i, slice) and i == slice(None) for i in index2): return self out = 'getitem-' + tokenize(self, index2) dsk, chunks = slice_array(out, self.name, self.chunks, index2) dsk2 = sharedict.merge(self.dask, (out, dsk)) return Array(dsk2, out, chunks, dtype=self.dtype) def _vindex(self, key): if not isinstance(key, tuple): key = (key,) if any(k is None for k in key): raise IndexError( "vindex does not support indexing with None (np.newaxis), " "got {}".format(key)) if all(isinstance(k, slice) for k in key): raise IndexError( "vindex requires at least one non-slice to vectorize over. " "Use normal slicing instead when only using slices. Got: {}" .format(key)) return _vindex(self, *key) @property def vindex(self): """Vectorized indexing with broadcasting. This is equivalent to numpy's advanced indexing, using arrays that are broadcast against each other. This allows for pointwise indexing: >>> x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) >>> x = from_array(x, chunks=2) >>> x.vindex[[0, 1, 2], [0, 1, 2]].compute() array([1, 5, 9]) Mixed basic/advanced indexing with slices/arrays is also supported. The order of dimensions in the result follows those proposed for ndarray.vindex [1]_: the subspace spanned by arrays is followed by all slices. Note: ``vindex`` provides more general functionality than standard indexing, but it also has fewer optimizations and can be significantly slower. _[1]: https://github.com/numpy/numpy/pull/6256 """ return IndexCallable(self._vindex) @derived_from(np.ndarray) def dot(self, other): from .routines import tensordot return tensordot(self, other, axes=((self.ndim - 1,), (other.ndim - 2,))) @property def A(self): return self @property def T(self): return self.transpose() @derived_from(np.ndarray) def transpose(self, *axes): from .routines import transpose if not axes: axes = None elif len(axes) == 1 and isinstance(axes[0], Iterable): axes = axes[0] return transpose(self, axes=axes) @derived_from(np.ndarray) def ravel(self): from .routines import ravel return ravel(self) flatten = ravel @derived_from(np.ndarray) def choose(self, choices): from .routines import choose return choose(self, choices) @derived_from(np.ndarray) def reshape(self, *shape): from .reshape import reshape if len(shape) == 1 and not isinstance(shape[0], Number): shape = shape[0] return reshape(self, shape) def topk(self, k): """The top k elements of an array. See ``da.topk`` for docstring""" from .routines import topk return topk(k, self) def astype(self, dtype, **kwargs): """Copy of the array, cast to a specified type. Parameters ---------- dtype : str or dtype Typecode or data-type to which the array is cast. casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional Controls what kind of data casting may occur. Defaults to 'unsafe' for backwards compatibility. * 'no' means the data types should not be cast at all. * 'equiv' means only byte-order changes are allowed. * 'safe' means only casts which can preserve values are allowed. * 'same_kind' means only safe casts or casts within a kind, like float64 to float32, are allowed. * 'unsafe' means any data conversions may be done. copy : bool, optional By default, astype always returns a newly allocated array. If this is set to False and the `dtype` requirement is satisfied, the input array is returned instead of a copy. """ # Scalars don't take `casting` or `copy` kwargs - as such we only pass # them to `map_blocks` if specified by user (different than defaults). extra = set(kwargs) - {'casting', 'copy'} if extra: raise TypeError("astype does not take the following keyword " "arguments: {0!s}".format(list(extra))) casting = kwargs.get('casting', 'unsafe') copy = kwargs.get('copy', True) dtype = np.dtype(dtype) if self.dtype == dtype: return self elif not np.can_cast(self.dtype, dtype, casting=casting): raise TypeError("Cannot cast array from {0!r} to {1!r}" " according to the rule " "{2!r}".format(self.dtype, dtype, casting)) name = 'astype-' + tokenize(self, dtype, casting, copy) return self.map_blocks(chunk.astype, dtype=dtype, name=name, astype_dtype=dtype, **kwargs) def __abs__(self): return elemwise(operator.abs, self) def __add__(self, other): return elemwise(operator.add, self, other) def __radd__(self, other): return elemwise(operator.add, other, self) def __and__(self, other): return elemwise(operator.and_, self, other) def __rand__(self, other): return elemwise(operator.and_, other, self) def __div__(self, other): return elemwise(operator.div, self, other) def __rdiv__(self, other): return elemwise(operator.div, other, self) def __eq__(self, other): return elemwise(operator.eq, self, other) def __gt__(self, other): return elemwise(operator.gt, self, other) def __ge__(self, other): return elemwise(operator.ge, self, other) def __invert__(self): return elemwise(operator.invert, self) def __lshift__(self, other): return elemwise(operator.lshift, self, other) def __rlshift__(self, other): return elemwise(operator.lshift, other, self) def __lt__(self, other): return elemwise(operator.lt, self, other) def __le__(self, other): return elemwise(operator.le, self, other) def __mod__(self, other): return elemwise(operator.mod, self, other) def __rmod__(self, other): return elemwise(operator.mod, other, self) def __mul__(self, other): return elemwise(operator.mul, self, other) def __rmul__(self, other): return elemwise(operator.mul, other, self) def __ne__(self, other): return elemwise(operator.ne, self, other) def __neg__(self): return elemwise(operator.neg, self) def __or__(self, other): return elemwise(operator.or_, self, other) def __pos__(self): return self def __ror__(self, other): return elemwise(operator.or_, other, self) def __pow__(self, other): return elemwise(operator.pow, self, other) def __rpow__(self, other): return elemwise(operator.pow, other, self) def __rshift__(self, other): return elemwise(operator.rshift, self, other) def __rrshift__(self, other): return elemwise(operator.rshift, other, self) def __sub__(self, other): return elemwise(operator.sub, self, other) def __rsub__(self, other): return elemwise(operator.sub, other, self) def __truediv__(self, other): return elemwise(operator.truediv, self, other) def __rtruediv__(self, other): return elemwise(operator.truediv, other, self) def __floordiv__(self, other): return elemwise(operator.floordiv, self, other) def __rfloordiv__(self, other): return elemwise(operator.floordiv, other, self) def __xor__(self, other): return elemwise(operator.xor, self, other) def __rxor__(self, other): return elemwise(operator.xor, other, self) def __matmul__(self, other): from .routines import tensordot if not hasattr(other, 'ndim'): other = np.asarray(other) # account for array-like RHS if other.ndim > 2: msg = ('The matrix multiplication operator (@) is not yet ' 'implemented for higher-dimensional Dask arrays. Try ' '`dask.array.tensordot` and see the discussion at ' 'https://github.com/dask/dask/pull/2349 for details.') raise NotImplementedError(msg) return tensordot(self, other, axes=((self.ndim - 1,), (other.ndim - 2,))) def __rmatmul__(self, other): from .routines import tensordot if not hasattr(other, 'ndim'): other = np.asarray(other) # account for array-like on LHS if self.ndim > 2: msg = ('The matrix multiplication operator (@) is not yet ' 'implemented for higher-dimensional Dask arrays. Try ' '`dask.array.tensordot` and see the discussion at ' 'https://github.com/dask/dask/pull/2349 for details.') raise NotImplementedError(msg) return tensordot(other, self, axes=((other.ndim - 1,), (self.ndim - 2,))) @derived_from(np.ndarray) def any(self, axis=None, keepdims=False, split_every=None, out=None): from .reductions import any return any(self, axis=axis, keepdims=keepdims, split_every=split_every, out=out) @derived_from(np.ndarray) def all(self, axis=None, keepdims=False, split_every=None, out=None): from .reductions import all return all(self, axis=axis, keepdims=keepdims, split_every=split_every, out=out) @derived_from(np.ndarray) def min(self, axis=None, keepdims=False, split_every=None, out=None): from .reductions import min return min(self, axis=axis, keepdims=keepdims, split_every=split_every, out=out) @derived_from(np.ndarray) def max(self, axis=None, keepdims=False, split_every=None, out=None): from .reductions import max return max(self, axis=axis, keepdims=keepdims, split_every=split_every, out=out) @derived_from(np.ndarray) def argmin(self, axis=None, split_every=None, out=None): from .reductions import argmin return argmin(self, axis=axis, split_every=split_every, out=out) @derived_from(np.ndarray) def argmax(self, axis=None, split_every=None, out=None): from .reductions import argmax return argmax(self, axis=axis, split_every=split_every, out=out) @derived_from(np.ndarray) def sum(self, axis=None, dtype=None, keepdims=False, split_every=None, out=None): from .reductions import sum return sum(self, axis=axis, dtype=dtype, keepdims=keepdims, split_every=split_every, out=out) @derived_from(np.ndarray) def prod(self, axis=None, dtype=None, keepdims=False, split_every=None, out=None): from .reductions import prod return prod(self, axis=axis, dtype=dtype, keepdims=keepdims, split_every=split_every, out=out) @derived_from(np.ndarray) def mean(self, axis=None, dtype=None, keepdims=False, split_every=None, out=None): from .reductions import mean return mean(self, axis=axis, dtype=dtype, keepdims=keepdims, split_every=split_every, out=out) @derived_from(np.ndarray) def std(self, axis=None, dtype=None, keepdims=False, ddof=0, split_every=None, out=None): from .reductions import std return std(self, axis=axis, dtype=dtype, keepdims=keepdims, ddof=ddof, split_every=split_every, out=out) @derived_from(np.ndarray) def var(self, axis=None, dtype=None, keepdims=False, ddof=0, split_every=None, out=None): from .reductions import var return var(self, axis=axis, dtype=dtype, keepdims=keepdims, ddof=ddof, split_every=split_every, out=out) def moment(self, order, axis=None, dtype=None, keepdims=False, ddof=0, split_every=None, out=None): """Calculate the nth centralized moment. Parameters ---------- order : int Order of the moment that is returned, must be >= 2. axis : int, optional Axis along which the central moment is computed. The default is to compute the moment of the flattened array. dtype : data-type, optional Type to use in computing the moment. For arrays of integer type the default is float64; for arrays of float types it is the same as the array type. keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the result will broadcast correctly against the original array. ddof : int, optional "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is zero. Returns ------- moment : ndarray References ---------- .. [1] Pebay, Philippe (2008), "Formulas for Robust, One-Pass Parallel Computation of Covariances and Arbitrary-Order Statistical Moments" (PDF), Technical Report SAND2008-6212, Sandia National Laboratories """ from .reductions import moment return moment(self, order, axis=axis, dtype=dtype, keepdims=keepdims, ddof=ddof, split_every=split_every, out=out) def vnorm(self, ord=None, axis=None, keepdims=False, split_every=None, out=None): """ Vector norm """ from .reductions import vnorm return vnorm(self, ord=ord, axis=axis, keepdims=keepdims, split_every=split_every, out=out) @wraps(map_blocks) def map_blocks(self, func, *args, **kwargs): return map_blocks(func, self, *args, **kwargs) def map_overlap(self, func, depth, boundary=None, trim=True, **kwargs): """ Map a function over blocks of the array with some overlap We share neighboring zones between blocks of the array, then map a function, then trim away the neighboring strips. Parameters ---------- func: function The function to apply to each extended block depth: int, tuple, or dict The number of cells that each block should share with its neighbors If a tuple or dict this can be different per axis boundary: str, tuple, dict how to handle the boundaries. Values include 'reflect', 'periodic', 'nearest', 'none', or any constant value like 0 or np.nan trim: bool Whether or not to trim the excess after the map function. Set this to false if your mapping function does this for you. **kwargs: Other keyword arguments valid in ``map_blocks`` Examples -------- >>> x = np.array([1, 1, 2, 3, 3, 3, 2, 1, 1]) >>> x = from_array(x, chunks=5) >>> def derivative(x): ... return x - np.roll(x, 1) >>> y = x.map_overlap(derivative, depth=1, boundary=0) >>> y.compute() array([ 1, 0, 1, 1, 0, 0, -1, -1, 0]) >>> import dask.array as da >>> x = np.arange(16).reshape((4, 4)) >>> d = da.from_array(x, chunks=(2, 2)) >>> d.map_overlap(lambda x: x + x.size, depth=1).compute() array([[16, 17, 18, 19], [20, 21, 22, 23], [24, 25, 26, 27], [28, 29, 30, 31]]) >>> func = lambda x: x + x.size >>> depth = {0: 1, 1: 1} >>> boundary = {0: 'reflect', 1: 'none'} >>> d.map_overlap(func, depth, boundary).compute() # doctest: +NORMALIZE_WHITESPACE array([[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23], [24, 25, 26, 27]]) """ from .ghost import map_overlap return map_overlap(self, func, depth, boundary, trim, **kwargs) def cumsum(self, axis, dtype=None, out=None): """ See da.cumsum for docstring """ from .reductions import cumsum return cumsum(self, axis, dtype, out=out) def cumprod(self, axis, dtype=None, out=None): """ See da.cumprod for docstring """ from .reductions import cumprod return cumprod(self, axis, dtype, out=out) @derived_from(np.ndarray) def squeeze(self): from .routines import squeeze return squeeze(self) def rechunk(self, chunks, threshold=None, block_size_limit=None): """ See da.rechunk for docstring """ from . import rechunk # avoid circular import return rechunk(self, chunks, threshold, block_size_limit) @property def real(self): from .ufunc import real return real(self) @property def imag(self): from .ufunc import imag return imag(self) def conj(self): from .ufunc import conj return conj(self) @derived_from(np.ndarray) def clip(self, min=None, max=None): from .ufunc import clip return clip(self, min, max) def view(self, dtype, order='C'): """ Get a view of the array as a new data type Parameters ---------- dtype: The dtype by which to view the array order: string 'C' or 'F' (Fortran) ordering This reinterprets the bytes of the array under a new dtype. If that dtype does not have the same size as the original array then the shape will change. Beware that both numpy and dask.array can behave oddly when taking shape-changing views of arrays under Fortran ordering. Under some versions of NumPy this function will fail when taking shape-changing views of Fortran ordered arrays if the first dimension has chunks of size one. """ dtype = np.dtype(dtype) mult = self.dtype.itemsize / dtype.itemsize if order == 'C': chunks = self.chunks[:-1] + (tuple(ensure_int(c * mult) for c in self.chunks[-1]),) elif order == 'F': chunks = ((tuple(ensure_int(c * mult) for c in self.chunks[0]), ) + self.chunks[1:]) else: raise ValueError("Order must be one of 'C' or 'F'") return self.map_blocks(chunk.view, dtype, order=order, dtype=dtype, chunks=chunks) @derived_from(np.ndarray) def swapaxes(self, axis1, axis2): from .routines import swapaxes return swapaxes(self, axis1, axis2) @derived_from(np.ndarray) def round(self, decimals=0): from .routines import round return round(self, decimals=decimals) def copy(self): """ Copy array. This is a no-op for dask.arrays, which are immutable """ return Array(self.dask, self.name, self.chunks, self.dtype) def __deepcopy__(self, memo): c = self.copy() memo[id(self)] = c return c def to_delayed(self): """ Convert Array into dask Delayed objects Returns an array of values, one value per chunk. See Also -------- dask.array.from_delayed """ from ..delayed import Delayed keys = self.__dask_keys__() dsk = self.__dask_optimize__(self.__dask_graph__(), keys) L = ndeepmap(self.ndim, lambda k: Delayed(k, dsk), keys) return np.array(L, dtype=object) @derived_from(np.ndarray) def repeat(self, repeats, axis=None): from .creation import repeat return repeat(self, repeats, axis=axis) @derived_from(np.ndarray) def nonzero(self): from .routines import nonzero return nonzero(self) def ensure_int(f): i = int(f) if i != f: raise ValueError("Could not coerce %f to integer" % f) return i def normalize_chunks(chunks, shape=None): """ Normalize chunks to tuple of tuples >>> normalize_chunks((2, 2), shape=(5, 6)) ((2, 2, 1), (2, 2, 2)) >>> normalize_chunks(((2, 2, 1), (2, 2, 2)), shape=(4, 6)) # Idempotent ((2, 2, 1), (2, 2, 2)) >>> normalize_chunks([[2, 2], [3, 3]]) # Cleans up lists to tuples ((2, 2), (3, 3)) >>> normalize_chunks(10, shape=(30, 5)) # Supports integer inputs ((10, 10, 10), (5,)) >>> normalize_chunks((-1,), shape=(10,)) # -1 gets mapped to full size ((10,),) >>> normalize_chunks((), shape=(0, 0)) # respects null dimensions ((0,), (0,)) """ if chunks is None: raise ValueError(CHUNKS_NONE_ERROR_MESSAGE) if type(chunks) is not tuple: if type(chunks) is list: chunks = tuple(chunks) if isinstance(chunks, Number): chunks = (chunks,) * len(shape) if not chunks and shape and all(s == 0 for s in shape): chunks = ((0,),) * len(shape) if shape and len(chunks) != len(shape): if not (len(shape) == 1 and sum(chunks) == shape[0]): raise ValueError( "Chunks and shape must be of the same length/dimension. " "Got chunks=%s, shape=%s" % (chunks, shape)) if shape is not None: chunks = tuple(c if c not in {None, -1} else s for c, s in zip(chunks, shape)) if chunks and shape is not None: chunks = sum((blockdims_from_blockshape((s,), (c,)) if not isinstance(c, (tuple, list)) else (c,) for s, c in zip(shape, chunks)), ()) for c in chunks: if not c: raise ValueError("Empty tuples are not allowed in chunks. Express " "zero length dimensions with 0(s) in chunks") return tuple(tuple(int(x) if not math.isnan(x) else x for x in c) for c in chunks) def from_array(x, chunks, name=None, lock=False, asarray=True, fancy=True, getitem=None): """ Create dask array from something that looks like an array Input must have a ``.shape`` and support numpy-style slicing. Parameters ---------- x : array_like chunks : int, tuple How to chunk the array. Must be one of the following forms: - A blocksize like 1000. - A blockshape like (1000, 1000). - Explicit sizes of all blocks along all dimensions like ((1000, 1000, 500), (400, 400)). -1 as a blocksize indicates the size of the corresponding dimension. name : str, optional The key name to use for the array. Defaults to a hash of ``x``. Use ``name=False`` to generate a random name instead of hashing (fast) lock : bool or Lock, optional If ``x`` doesn't support concurrent reads then provide a lock here, or pass in True to have dask.array create one for you. asarray : bool, optional If True (default), then chunks will be converted to instances of ``ndarray``. Set to False to pass passed chunks through unchanged. fancy : bool, optional If ``x`` doesn't support fancy indexing (e.g. indexing with lists or arrays) then set to False. Default is True. Examples -------- >>> x = h5py.File('...')['/data/path'] # doctest: +SKIP >>> a = da.from_array(x, chunks=(1000, 1000)) # doctest: +SKIP If your underlying datastore does not support concurrent reads then include the ``lock=True`` keyword argument or ``lock=mylock`` if you want multiple arrays to coordinate around the same lock. >>> a = da.from_array(x, chunks=(1000, 1000), lock=True) # doctest: +SKIP """ chunks = normalize_chunks(chunks, x.shape) if len(chunks) != len(x.shape): raise ValueError("Input array has %d dimensions but the supplied " "chunks has only %d dimensions" % (len(x.shape), len(chunks))) if tuple(map(sum, chunks)) != x.shape: raise ValueError("Chunks do not add up to shape. " "Got chunks=%s, shape=%s" % (chunks, x.shape)) if name in (None, True): token = tokenize(x, chunks) original_name = 'array-original-' + token name = name or 'array-' + token elif name is False: original_name = name = 'array-' + str(uuid.uuid1()) else: original_name = name if lock is True: lock = SerializableLock() if getitem is None: getitem = getter if fancy else getter_nofancy dsk = getem(original_name, chunks, getitem=getitem, shape=x.shape, out_name=name, lock=lock, asarray=asarray) dsk[original_name] = x return Array(dsk, name, chunks, dtype=x.dtype) def from_delayed(value, shape, dtype, name=None): """ Create a dask array from a dask delayed value This routine is useful for constructing dask arrays in an ad-hoc fashion using dask delayed, particularly when combined with stack and concatenate. The dask array will consist of a single chunk. Examples -------- >>> from dask import delayed >>> value = delayed(np.ones)(5) >>> array = from_delayed(value, (5,), float) >>> array dask.array >>> array.compute() array([ 1., 1., 1., 1., 1.]) """ from dask.delayed import delayed, Delayed if not isinstance(value, Delayed) and hasattr(value, 'key'): value = delayed(value) name = name or 'from-value-' + tokenize(value, shape, dtype) dsk = {(name,) + (0,) * len(shape): value.key} chunks = tuple((d,) for d in shape) return Array(sharedict.merge(value.dask, (name, dsk)), name, chunks, dtype) def from_func(func, shape, dtype=None, name=None, args=(), kwargs={}): """ Create dask array in a single block by calling a function Calling the provided function with func(*args, **kwargs) should return a NumPy array of the indicated shape and dtype. Examples -------- >>> a = from_func(np.arange, (3,), dtype='i8', args=(3,)) >>> a.compute() array([0, 1, 2]) This works particularly well when coupled with dask.array functions like concatenate and stack: >>> arrays = [from_func(np.array, (), dtype='i8', args=(n,)) for n in range(5)] >>> stack(arrays).compute() array([0, 1, 2, 3, 4]) """ name = name or 'from_func-' + tokenize(func, shape, dtype, args, kwargs) if args or kwargs: func = partial(func, *args, **kwargs) dsk = {(name,) + (0,) * len(shape): (func,)} chunks = tuple((i,) for i in shape) return Array(dsk, name, chunks, dtype) def common_blockdim(blockdims): """ Find the common block dimensions from the list of block dimensions Currently only implements the simplest possible heuristic: the common block-dimension is the only one that does not span fully span a dimension. This is a conservative choice that allows us to avoid potentially very expensive rechunking. Assumes that each element of the input block dimensions has all the same sum (i.e., that they correspond to dimensions of the same size). Examples -------- >>> common_blockdim([(3,), (2, 1)]) (2, 1) >>> common_blockdim([(1, 2), (2, 1)]) (1, 1, 1) >>> common_blockdim([(2, 2), (3, 1)]) # doctest: +SKIP Traceback (most recent call last): ... ValueError: Chunks do not align """ if not any(blockdims): return () non_trivial_dims = set([d for d in blockdims if len(d) > 1]) if len(non_trivial_dims) == 1: return first(non_trivial_dims) if len(non_trivial_dims) == 0: return max(blockdims, key=first) if np.isnan(sum(map(sum, blockdims))): raise ValueError("Arrays chunk sizes are unknown: %s", blockdims) if len(set(map(sum, non_trivial_dims))) > 1: raise ValueError("Chunks do not add up to same value", blockdims) # We have multiple non-trivial chunks on this axis # e.g. (5, 2) and (4, 3) # We create a single chunk tuple with the same total length # that evenly divides both, e.g. (4, 1, 2) # To accomplish this we walk down all chunk tuples together, finding the # smallest element, adding it to the output, and subtracting it from all # other elements and remove the element itself. We stop once we have # burned through all of the chunk tuples. # For efficiency's sake we reverse the lists so that we can pop off the end rchunks = [list(ntd)[::-1] for ntd in non_trivial_dims] total = sum(first(non_trivial_dims)) i = 0 out = [] while i < total: m = min(c[-1] for c in rchunks) out.append(m) for c in rchunks: c[-1] -= m if c[-1] == 0: c.pop() i += m return tuple(out) def unify_chunks(*args, **kwargs): """ Unify chunks across a sequence of arrays Parameters ---------- *args: sequence of Array, index pairs Sequence like (x, 'ij', y, 'jk', z, 'i') Examples -------- >>> import dask.array as da >>> x = da.ones(10, chunks=((5, 2, 3),)) >>> y = da.ones(10, chunks=((2, 3, 5),)) >>> chunkss, arrays = unify_chunks(x, 'i', y, 'i') >>> chunkss {'i': (2, 3, 2, 3)} >>> x = da.ones((100, 10), chunks=(20, 5)) >>> y = da.ones((10, 100), chunks=(4, 50)) >>> chunkss, arrays = unify_chunks(x, 'ij', y, 'jk') >>> chunkss # doctest: +SKIP {'k': (50, 50), 'i': (20, 20, 20, 20, 20), 'j': (4, 1, 3, 2)} Returns ------- chunkss : dict Map like {index: chunks}. arrays : list List of rechunked arrays. See Also -------- common_blockdim """ arginds = [(asarray(a) if ind is not None else a, ind) for a, ind in partition(2, args)] # [x, ij, y, jk] args = list(concat(arginds)) # [(x, ij), (y, jk)] warn = kwargs.get('warn', True) arrays, inds = zip(*arginds) if all(ind == inds[0] for ind in inds) and all(a.chunks == arrays[0].chunks for a in arrays): return dict(zip(inds[0], arrays[0].chunks)), arrays nameinds = [(a.name if i is not None else a, i) for a, i in arginds] blockdim_dict = {a.name: a.chunks for a, ind in arginds if ind is not None} chunkss = broadcast_dimensions(nameinds, blockdim_dict, consolidate=common_blockdim) max_parts = max(arg.npartitions for arg, ind in arginds if ind is not None) nparts = np.prod(list(map(len, chunkss.values()))) if warn and nparts and nparts >= max_parts * 10: warnings.warn("Increasing number of chunks by factor of %d" % (nparts / max_parts)) arrays = [] for a, i in arginds: if i is None: arrays.append(a) else: chunks = tuple(chunkss[j] if a.shape[n] > 1 else a.shape[n] if not np.isnan(sum(chunkss[j])) else None for n, j in enumerate(i)) if chunks != a.chunks and all(a.chunks): arrays.append(a.rechunk(chunks)) else: arrays.append(a) return chunkss, arrays def atop(func, out_ind, *args, **kwargs): """ Tensor operation: Generalized inner and outer products A broad class of blocked algorithms and patterns can be specified with a concise multi-index notation. The ``atop`` function applies an in-memory function across multiple blocks of multiple inputs in a variety of ways. Many dask.array operations are special cases of atop including elementwise, broadcasting, reductions, tensordot, and transpose. Parameters ---------- func : callable Function to apply to individual tuples of blocks out_ind : iterable Block pattern of the output, something like 'ijk' or (1, 2, 3) *args : sequence of Array, index pairs Sequence like (x, 'ij', y, 'jk', z, 'i') **kwargs : dict Extra keyword arguments to pass to function dtype : np.dtype Datatype of resulting array. concatenate : bool, keyword only If true concatenate arrays along dummy indices, else provide lists adjust_chunks : dict Dictionary mapping index to function to be applied to chunk sizes new_axes : dict, keyword only New indexes and their dimension lengths Examples -------- 2D embarrassingly parallel operation from two arrays, x, and y. >>> z = atop(operator.add, 'ij', x, 'ij', y, 'ij', dtype='f8') # z = x + y # doctest: +SKIP Outer product multiplying x by y, two 1-d vectors >>> z = atop(operator.mul, 'ij', x, 'i', y, 'j', dtype='f8') # doctest: +SKIP z = x.T >>> z = atop(np.transpose, 'ji', x, 'ij', dtype=x.dtype) # doctest: +SKIP The transpose case above is illustrative because it does same transposition both on each in-memory block by calling ``np.transpose`` and on the order of the blocks themselves, by switching the order of the index ``ij -> ji``. We can compose these same patterns with more variables and more complex in-memory functions z = X + Y.T >>> z = atop(lambda x, y: x + y.T, 'ij', x, 'ij', y, 'ji', dtype='f8') # doctest: +SKIP Any index, like ``i`` missing from the output index is interpreted as a contraction (note that this differs from Einstein convention; repeated indices do not imply contraction.) In the case of a contraction the passed function should expect an iterable of blocks on any array that holds that index. To receive arrays concatenated along contracted dimensions instead pass ``concatenate=True``. Inner product multiplying x by y, two 1-d vectors >>> def sequence_dot(x_blocks, y_blocks): ... result = 0 ... for x, y in zip(x_blocks, y_blocks): ... result += x.dot(y) ... return result >>> z = atop(sequence_dot, '', x, 'i', y, 'i', dtype='f8') # doctest: +SKIP Add new single-chunk dimensions with the ``new_axes=`` keyword, including the length of the new dimension. New dimensions will always be in a single chunk. >>> def f(x): ... return x[:, None] * np.ones((1, 5)) >>> z = atop(f, 'az', x, 'a', new_axes={'z': 5}, dtype=x.dtype) # doctest: +SKIP If the applied function changes the size of each chunk you can specify this with a ``adjust_chunks={...}`` dictionary holding a function for each index that modifies the dimension size in that index. >>> def double(x): ... return np.concatenate([x, x]) >>> y = atop(double, 'ij', x, 'ij', ... adjust_chunks={'i': lambda n: 2 * n}, dtype=x.dtype) # doctest: +SKIP Include literals by indexing with None >>> y = atop(add, 'ij', x, 'ij', 1234, None, dtype=x.dtype) # doctest: +SKIP See Also -------- top - dict formulation of this function, contains most logic """ out = kwargs.pop('name', None) # May be None at this point token = kwargs.pop('token', None) dtype = kwargs.pop('dtype', None) adjust_chunks = kwargs.pop('adjust_chunks', None) new_axes = kwargs.get('new_axes', {}) if dtype is None: raise ValueError("Must specify dtype of output array") chunkss, arrays = unify_chunks(*args) for k, v in new_axes.items(): chunkss[k] = (v,) arginds = list(zip(arrays, args[1::2])) numblocks = {a.name: a.numblocks for a, ind in arginds if ind is not None} argindsstr = list(concat([(a if ind is None else a.name, ind) for a, ind in arginds])) # Finish up the name if not out: out = '%s-%s' % (token or funcname(func).strip('_'), tokenize(func, out_ind, argindsstr, dtype, **kwargs)) dsk = top(func, out, out_ind, *argindsstr, numblocks=numblocks, **kwargs) dsks = [a.dask for a, ind in arginds if ind is not None] chunks = [chunkss[i] for i in out_ind] if adjust_chunks: for i, ind in enumerate(out_ind): if ind in adjust_chunks: if callable(adjust_chunks[ind]): chunks[i] = tuple(map(adjust_chunks[ind], chunks[i])) elif isinstance(adjust_chunks[ind], int): chunks[i] = tuple(adjust_chunks[ind] for _ in chunks[i]) elif isinstance(adjust_chunks[ind], (tuple, list)): chunks[i] = tuple(adjust_chunks[ind]) else: raise NotImplementedError( "adjust_chunks values must be callable, int, or tuple") chunks = tuple(chunks) return Array(sharedict.merge((out, dsk), *dsks), out, chunks, dtype=dtype) def unpack_singleton(x): """ >>> unpack_singleton([[[[1]]]]) 1 >>> unpack_singleton(np.array(np.datetime64('2000-01-01'))) array(datetime.date(2000, 1, 1), dtype='datetime64[D]') """ while isinstance(x, (list, tuple)): try: x = x[0] except (IndexError, TypeError, KeyError): break return x def concatenate(seq, axis=0, allow_unknown_chunksizes=False): """ Concatenate arrays along an existing axis Given a sequence of dask Arrays form a new dask Array by stacking them along an existing dimension (axis=0 by default) Parameters ---------- seq: list of dask.arrays axis: int Dimension along which to align all of the arrays allow_unknown_chunksizes: bool Allow unknown chunksizes, such as come from converting from dask dataframes. Dask.array is unable to verify that chunks line up. If data comes from differently aligned sources then this can cause unexpected results. Examples -------- Create slices >>> import dask.array as da >>> import numpy as np >>> data = [from_array(np.ones((4, 4)), chunks=(2, 2)) ... for i in range(3)] >>> x = da.concatenate(data, axis=0) >>> x.shape (12, 4) >>> da.concatenate(data, axis=1).shape (4, 12) Result is a new dask Array See Also -------- stack """ n = len(seq) ndim = len(seq[0].shape) if axis < 0: axis = ndim + axis if axis >= ndim: msg = ("Axis must be less than than number of dimensions" "\nData has %d dimensions, but got axis=%d") raise ValueError(msg % (ndim, axis)) if n == 1: return seq[0] if (not allow_unknown_chunksizes and not all(i == axis or all(x.shape[i] == seq[0].shape[i] for x in seq) for i in range(ndim))): if any(map(np.isnan, seq[0].shape)): raise ValueError("Tried to concatenate arrays with unknown" " shape %s. To force concatenation pass" " allow_unknown_chunksizes=True." % str(seq[0].shape)) raise ValueError("Shapes do not align: %s", [x.shape for x in seq]) inds = [list(range(ndim)) for i in range(n)] for i, ind in enumerate(inds): ind[axis] = -(i + 1) uc_args = list(concat(zip(seq, inds))) _, seq = unify_chunks(*uc_args, warn=False) bds = [a.chunks for a in seq] chunks = (seq[0].chunks[:axis] + (sum([bd[axis] for bd in bds], ()), ) + seq[0].chunks[axis + 1:]) cum_dims = [0] + list(accumulate(add, [len(a.chunks[axis]) for a in seq])) dt = reduce(np.promote_types, [a.dtype for a in seq]) seq = [x.astype(dt) for x in seq] names = [a.name for a in seq] name = 'concatenate-' + tokenize(names, axis) keys = list(product([name], *[range(len(bd)) for bd in chunks])) values = [(names[bisect(cum_dims, key[axis + 1]) - 1],) + key[1:axis + 1] + (key[axis + 1] - cum_dims[bisect(cum_dims, key[axis + 1]) - 1], ) + key[axis + 2:] for key in keys] dsk = dict(zip(keys, values)) dsk2 = sharedict.merge((name, dsk), * [a.dask for a in seq]) return Array(dsk2, name, chunks, dtype=dt) def insert_to_ooc(out, arr, lock=True, region=None): if lock is True: lock = Lock() def store(out, x, index, lock, region): if lock: lock.acquire() try: if region is None: out[index] = np.asanyarray(x) else: out[fuse_slice(region, index)] = np.asanyarray(x) finally: if lock: lock.release() return None slices = slices_from_chunks(arr.chunks) name = 'store-%s' % arr.name dsk = {(name,) + t[1:]: (store, out, t, slc, lock, region) for t, slc in zip(core.flatten(arr.__dask_keys__()), slices)} return dsk def asarray(a): """Convert the input to a dask array. Parameters ---------- a : array-like Input data, in any form that can be converted to a dask array. Returns ------- out : dask array Dask array interpretation of a. Examples -------- >>> import dask.array as da >>> import numpy as np >>> x = np.arange(3) >>> da.asarray(x) dask.array >>> y = [[1, 2, 3], [4, 5, 6]] >>> da.asarray(y) dask.array """ if isinstance(a, Array): return a if isinstance(a, (list, tuple)) and any(isinstance(i, Array) for i in a): a = stack(a) elif not isinstance(getattr(a, 'shape', None), Iterable): a = np.asarray(a) return from_array(a, chunks=a.shape, getitem=getter_inline) def asanyarray(a): """Convert the input to a dask array. Subclasses of ``np.ndarray`` will be passed through as chunks unchanged. Parameters ---------- a : array-like Input data, in any form that can be converted to a dask array. Returns ------- out : dask array Dask array interpretation of a. Examples -------- >>> import dask.array as da >>> import numpy as np >>> x = np.arange(3) >>> da.asanyarray(x) dask.array >>> y = [[1, 2, 3], [4, 5, 6]] >>> da.asanyarray(y) dask.array """ if isinstance(a, Array): return a if isinstance(a, (list, tuple)) and any(isinstance(i, Array) for i in a): a = stack(a) elif not isinstance(getattr(a, 'shape', None), Iterable): a = np.asanyarray(a) return from_array(a, chunks=a.shape, getitem=getter_inline, asarray=False) def is_scalar_for_elemwise(arg): """ >>> is_scalar_for_elemwise(42) True >>> is_scalar_for_elemwise('foo') True >>> is_scalar_for_elemwise(True) True >>> is_scalar_for_elemwise(np.array(42)) True >>> is_scalar_for_elemwise([1, 2, 3]) True >>> is_scalar_for_elemwise(np.array([1, 2, 3])) False >>> is_scalar_for_elemwise(from_array(np.array(0), chunks=())) False >>> is_scalar_for_elemwise(np.dtype('i4')) True """ return (np.isscalar(arg) or not isinstance(getattr(arg, 'shape', None), Iterable) or isinstance(arg, np.dtype) or (isinstance(arg, np.ndarray) and arg.ndim == 0)) def broadcast_shapes(*shapes): """ Determines output shape from broadcasting arrays. Parameters ---------- shapes : tuples The shapes of the arguments. Returns ------- output_shape : tuple Raises ------ ValueError If the input shapes cannot be successfully broadcast together. """ if len(shapes) == 1: return shapes[0] out = [] for sizes in zip_longest(*map(reversed, shapes), fillvalue=-1): dim = 0 if 0 in sizes else max(sizes) if any(i not in [-1, 0, 1, dim] and not np.isnan(i) for i in sizes): raise ValueError("operands could not be broadcast together with " "shapes {0}".format(' '.join(map(str, shapes)))) out.append(dim) return tuple(reversed(out)) def elemwise(op, *args, **kwargs): """ Apply elementwise function across arguments Respects broadcasting rules Examples -------- >>> elemwise(add, x, y) # doctest: +SKIP >>> elemwise(sin, x) # doctest: +SKIP See Also -------- atop """ out = kwargs.pop('out', None) if not set(['name', 'dtype']).issuperset(kwargs): msg = "%s does not take the following keyword arguments %s" raise TypeError(msg % (op.__name__, str(sorted(set(kwargs) - set(['name', 'dtype']))))) args = [np.asarray(a) if isinstance(a, (list, tuple)) else a for a in args] shapes = [getattr(arg, 'shape', ()) for arg in args] shapes = [s if isinstance(s, Iterable) else () for s in shapes] out_ndim = len(broadcast_shapes(*shapes)) # Raises ValueError if dimensions mismatch expr_inds = tuple(range(out_ndim))[::-1] need_enforce_dtype = False if 'dtype' in kwargs: dt = kwargs['dtype'] else: # We follow NumPy's rules for dtype promotion, which special cases # scalars and 0d ndarrays (which it considers equivalent) by using # their values to compute the result dtype: # https://github.com/numpy/numpy/issues/6240 # We don't inspect the values of 0d dask arrays, because these could # hold potentially very expensive calculations. Instead, we treat # them just like other arrays, and if necessary cast the result of op # to match. vals = [np.empty((1,) * max(1, a.ndim), dtype=a.dtype) if not is_scalar_for_elemwise(a) else a for a in args] dt = apply_infer_dtype(op, vals, {}, 'elemwise', suggest_dtype=False) need_enforce_dtype = any(not is_scalar_for_elemwise(a) and a.ndim == 0 for a in args) name = kwargs.get('name', None) or '%s-%s' % (funcname(op), tokenize(op, dt, *args)) atop_kwargs = dict(dtype=dt, name=name, token=funcname(op).strip('_')) if need_enforce_dtype: atop_kwargs['enforce_dtype'] = dt atop_kwargs['enforce_dtype_function'] = op op = _enforce_dtype result = atop(op, expr_inds, *concat((a, tuple(range(a.ndim)[::-1]) if not is_scalar_for_elemwise(a) else None) for a in args), **atop_kwargs) return handle_out(out, result) def handle_out(out, result): """ Handle out parameters If out is a dask.array then this overwrites the contents of that array with the result """ if isinstance(out, tuple): if len(out) == 1: out = out[0] elif len(out) > 1: raise NotImplementedError("The out parameter is not fully supported") else: out = None if isinstance(out, Array): if out.shape != result.shape: raise NotImplementedError( "Mismatched shapes between result and out parameter. " "out=%s, result=%s" % (str(out.shape), str(result.shape))) out._chunks = result.chunks out.dask = result.dask out.dtype = result.dtype out.name = result.name elif out is not None: msg = ("The out parameter is not fully supported." " Received type %s, expected Dask Array" % type(out).__name__) raise NotImplementedError(msg) else: return result def _enforce_dtype(*args, **kwargs): """Calls a function and converts its result to the given dtype. The parameters have deliberately been given unwieldy names to avoid clashes with keyword arguments consumed by atop A dtype of `object` is treated as a special case and not enforced, because it is used as a dummy value in some places when the result will not be a block in an Array. Parameters ---------- enforce_dtype : dtype Result dtype enforce_dtype_function : callable The wrapped function, which will be passed the remaining arguments """ dtype = kwargs.pop('enforce_dtype') function = kwargs.pop('enforce_dtype_function') result = function(*args, **kwargs) if dtype != result.dtype and dtype != object: if not np.can_cast(result, dtype, casting='same_kind'): raise ValueError("Inferred dtype from function %r was %r " "but got %r, which can't be cast using " "casting='same_kind'" % (funcname(function), str(dtype), str(result.dtype))) if np.isscalar(result): # scalar astype method doesn't take the keyword arguments, so # have to convert via 0-dimensional array and back. result = result.astype(dtype) else: try: result = result.astype(dtype, copy=False) except TypeError: # Missing copy kwarg result = result.astype(dtype) return result @wraps(chunk.broadcast_to) def broadcast_to(x, shape): x = asarray(x) shape = tuple(shape) if x.shape == shape: return x ndim_new = len(shape) - x.ndim if ndim_new < 0 or any(new != old for new, old in zip(shape[ndim_new:], x.shape) if old != 1): raise ValueError('cannot broadcast shape %s to shape %s' % (x.shape, shape)) name = 'broadcast_to-' + tokenize(x, shape) chunks = (tuple((s,) for s in shape[:ndim_new]) + tuple(bd if old > 1 else (new,) for bd, old, new in zip(x.chunks, x.shape, shape[ndim_new:]))) dsk = {(name,) + (0,) * ndim_new + key[1:]: (chunk.broadcast_to, key, shape[:ndim_new] + tuple(bd[i] for i, bd in zip(key[1:], chunks[ndim_new:]))) for key in core.flatten(x.__dask_keys__())} return Array(sharedict.merge((name, dsk), x.dask), name, chunks, dtype=x.dtype) def offset_func(func, offset, *args): """ Offsets inputs by offset >>> double = lambda x: x * 2 >>> f = offset_func(double, (10,)) >>> f(1) 22 >>> f(300) 620 """ def _offset(*args): args2 = list(map(add, args, offset)) return func(*args2) with ignoring(Exception): _offset.__name__ = 'offset_' + func.__name__ return _offset def chunks_from_arrays(arrays): """ Chunks tuple from nested list of arrays >>> x = np.array([1, 2]) >>> chunks_from_arrays([x, x]) ((2, 2),) >>> x = np.array([[1, 2]]) >>> chunks_from_arrays([[x], [x]]) ((1, 1), (2,)) >>> x = np.array([[1, 2]]) >>> chunks_from_arrays([[x, x]]) ((1,), (2, 2)) >>> chunks_from_arrays([1, 1]) ((1, 1),) """ if not arrays: return () result = [] dim = 0 def shape(x): try: return x.shape except AttributeError: return (1,) while isinstance(arrays, (list, tuple)): result.append(tuple([shape(deepfirst(a))[dim] for a in arrays])) arrays = arrays[0] dim += 1 return tuple(result) def deepfirst(seq): """ First element in a nested list >>> deepfirst([[[1, 2], [3, 4]], [5, 6], [7, 8]]) 1 """ if not isinstance(seq, (list, tuple)): return seq else: return deepfirst(seq[0]) def ndimlist(seq): if not isinstance(seq, (list, tuple)): return 0 elif not seq: return 1 else: return 1 + ndimlist(seq[0]) def shapelist(a): """ Get the shape of nested list """ if type(a) is list: return tuple([len(a)] + list(shapelist(a[0]))) else: return () def reshapelist(shape, seq): """ Reshape iterator to nested shape >>> reshapelist((2, 3), range(6)) [[0, 1, 2], [3, 4, 5]] """ if len(shape) == 1: return list(seq) else: n = int(len(seq) / shape[0]) return [reshapelist(shape[1:], part) for part in partition(n, seq)] def transposelist(arrays, axes, extradims=0): """ Permute axes of nested list >>> transposelist([[1,1,1],[1,1,1]], [2,1]) [[[1, 1], [1, 1], [1, 1]]] >>> transposelist([[1,1,1],[1,1,1]], [2,1], extradims=1) [[[[1], [1]], [[1], [1]], [[1], [1]]]] """ if len(axes) != ndimlist(arrays): raise ValueError("Length of axes should equal depth of nested arrays") if extradims < 0: raise ValueError("`newdims` should be positive") if len(axes) > len(set(axes)): raise ValueError("`axes` should be unique") ndim = max(axes) + 1 shape = shapelist(arrays) newshape = [shape[axes.index(i)] if i in axes else 1 for i in range(ndim + extradims)] result = list(core.flatten(arrays)) return reshapelist(newshape, result) def stack(seq, axis=0): """ Stack arrays along a new axis Given a sequence of dask Arrays form a new dask Array by stacking them along a new dimension (axis=0 by default) Examples -------- Create slices >>> import dask.array as da >>> import numpy as np >>> data = [from_array(np.ones((4, 4)), chunks=(2, 2)) ... for i in range(3)] >>> x = da.stack(data, axis=0) >>> x.shape (3, 4, 4) >>> da.stack(data, axis=1).shape (4, 3, 4) >>> da.stack(data, axis=-1).shape (4, 4, 3) Result is a new dask Array See Also -------- concatenate """ n = len(seq) ndim = len(seq[0].shape) if axis < 0: axis = ndim + axis + 1 if axis > ndim: raise ValueError("Axis must not be greater than number of dimensions" "\nData has %d dimensions, but got axis=%d" % (ndim, axis)) if not all(x.shape == seq[0].shape for x in seq): raise ValueError("Stacked arrays must have the same shape. Got %s", [x.shape for x in seq]) ind = list(range(ndim)) uc_args = list(concat((x, ind) for x in seq)) _, seq = unify_chunks(*uc_args) dt = reduce(np.promote_types, [a.dtype for a in seq]) seq = [x.astype(dt) for x in seq] assert len(set(a.chunks for a in seq)) == 1 # same chunks chunks = (seq[0].chunks[:axis] + ((1,) * n,) + seq[0].chunks[axis:]) names = [a.name for a in seq] name = 'stack-' + tokenize(names, axis) keys = list(product([name], *[range(len(bd)) for bd in chunks])) inputs = [(names[key[axis + 1]], ) + key[1:axis + 1] + key[axis + 2:] for key in keys] values = [(getitem, inp, (slice(None, None, None),) * axis + (None, ) + (slice(None, None, None), ) * (ndim - axis)) for inp in inputs] dsk = dict(zip(keys, values)) dsk2 = sharedict.merge((name, dsk), *[a.dask for a in seq]) return Array(dsk2, name, chunks, dtype=dt) def concatenate3(arrays): """ Recursive np.concatenate Input should be a nested list of numpy arrays arranged in the order they should appear in the array itself. Each array should have the same number of dimensions as the desired output and the nesting of the lists. >>> x = np.array([[1, 2]]) >>> concatenate3([[x, x, x], [x, x, x]]) array([[1, 2, 1, 2, 1, 2], [1, 2, 1, 2, 1, 2]]) >>> concatenate3([[x, x], [x, x], [x, x]]) array([[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2]]) """ arrays = concrete(arrays) if not arrays: return np.empty(0) advanced = max(core.flatten(arrays, container=(list, tuple)), key=lambda x: getattr(x, '__array_priority__', 0)) if concatenate_lookup.dispatch(type(advanced)) is not np.concatenate: x = unpack_singleton(arrays) return _concatenate2(arrays, axes=list(range(x.ndim))) ndim = ndimlist(arrays) if not ndim: return arrays chunks = chunks_from_arrays(arrays) shape = tuple(map(sum, chunks)) def dtype(x): try: return x.dtype except AttributeError: return type(x) result = np.empty(shape=shape, dtype=dtype(deepfirst(arrays))) for (idx, arr) in zip(slices_from_chunks(chunks), core.flatten(arrays)): if hasattr(arr, 'ndim'): while arr.ndim < ndim: arr = arr[None, ...] result[idx] = arr return result def concatenate_axes(arrays, axes): """ Recursively call np.concatenate along axes """ if len(axes) != ndimlist(arrays): raise ValueError("Length of axes should equal depth of nested arrays") extradims = max(0, deepfirst(arrays).ndim - (max(axes) + 1)) return concatenate3(transposelist(arrays, axes, extradims=extradims)) def to_hdf5(filename, *args, **kwargs): """ Store arrays in HDF5 file This saves several dask arrays into several datapaths in an HDF5 file. It creates the necessary datasets and handles clean file opening/closing. >>> da.to_hdf5('myfile.hdf5', '/x', x) # doctest: +SKIP or >>> da.to_hdf5('myfile.hdf5', {'/x': x, '/y': y}) # doctest: +SKIP Optionally provide arguments as though to ``h5py.File.create_dataset`` >>> da.to_hdf5('myfile.hdf5', '/x', x, compression='lzf', shuffle=True) # doctest: +SKIP This can also be used as a method on a single Array >>> x.to_hdf5('myfile.hdf5', '/x') # doctest: +SKIP See Also -------- da.store h5py.File.create_dataset """ if len(args) == 1 and isinstance(args[0], dict): data = args[0] elif (len(args) == 2 and isinstance(args[0], str) and isinstance(args[1], Array)): data = {args[0]: args[1]} else: raise ValueError("Please provide {'/data/path': array} dictionary") chunks = kwargs.pop('chunks', True) import h5py with h5py.File(filename) as f: dsets = [f.require_dataset(dp, shape=x.shape, dtype=x.dtype, chunks=tuple([c[0] for c in x.chunks]) if chunks is True else chunks, **kwargs) for dp, x in data.items()] store(list(data.values()), dsets) def interleave_none(a, b): """ >>> interleave_none([0, None, 2, None], [1, 3]) (0, 1, 2, 3) """ result = [] i = j = 0 n = len(a) + len(b) while i + j < n: if a[i] is not None: result.append(a[i]) i += 1 else: result.append(b[j]) i += 1 j += 1 return tuple(result) def keyname(name, i, okey): """ >>> keyname('x', 3, [None, None, 0, 2]) ('x', 3, 0, 2) """ return (name, i) + tuple(k for k in okey if k is not None) def _vindex(x, *indexes): """Point wise indexing with broadcasting. >>> x = np.arange(56).reshape((7, 8)) >>> x array([[ 0, 1, 2, 3, 4, 5, 6, 7], [ 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23], [24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55]]) >>> d = from_array(x, chunks=(3, 4)) >>> result = _vindex(d, [0, 1, 6, 0], [0, 1, 0, 7]) >>> result.compute() array([ 0, 9, 48, 7]) """ indexes = replace_ellipsis(x.ndim, indexes) partial_slices = {i: ind for i, ind in enumerate(indexes) if isinstance(ind, slice) and ind != slice(None)} if partial_slices: key = tuple(partial_slices.get(i, slice(None)) for i in range(len(indexes))) x = x[key] array_indexes = {i: np.asarray(ind) for i, ind in enumerate(indexes) if not isinstance(ind, slice)} if any(ind.dtype.kind == 'b' for ind in array_indexes.values()): raise IndexError('vindex does not support indexing with boolean arrays') try: broadcast_indexes = np.broadcast_arrays(*array_indexes.values()) except ValueError: # note: error message exactly matches numpy shapes_str = ' '.join(str(a.shape) for a in array_indexes.values()) raise IndexError('shape mismatch: indexing arrays could not be ' 'broadcast together with shapes ' + shapes_str) broadcast_shape = broadcast_indexes[0].shape lookup = dict(zip(array_indexes, broadcast_indexes)) flat_indexes = [lookup[i].ravel().tolist() if i in lookup else None for i in range(len(indexes))] flat_indexes.extend([None] * (x.ndim - len(flat_indexes))) result_1d = _vindex_1d(x, *flat_indexes) return result_1d.reshape(broadcast_shape + result_1d.shape[1:]) def _vindex_1d(x, *indexes): """Point wise indexing with only 1D lists and full slices.""" indexes = [list(index) if index is not None else index for index in indexes] bounds = [list(accumulate(add, (0,) + c)) for c in x.chunks] bounds2 = [b for i, b in zip(indexes, bounds) if i is not None] axis = _get_axis(indexes) token = tokenize(x, indexes) out_name = 'vindex-merge-' + token points = list() for i, idx in enumerate(zip(*[i for i in indexes if i is not None])): block_idx = [np.searchsorted(b, ind, 'right') - 1 for b, ind in zip(bounds2, idx)] inblock_idx = [ind - bounds2[k][j] for k, (ind, j) in enumerate(zip(idx, block_idx))] points.append((i, tuple(block_idx), tuple(inblock_idx))) chunks = [c for i, c in zip(indexes, x.chunks) if i is None] chunks.insert(0, (len(points),) if points else (0,)) chunks = tuple(chunks) if points: per_block = groupby(1, points) per_block = dict((k, v) for k, v in per_block.items() if v) other_blocks = list(product(*[list(range(len(c))) if i is None else [None] for i, c in zip(indexes, x.chunks)])) full_slices = [slice(None, None) if i is None else None for i in indexes] name = 'vindex-slice-' + token dsk = dict((keyname(name, i, okey), (_vindex_transpose, (_vindex_slice, (x.name,) + interleave_none(okey, key), interleave_none(full_slices, list(zip(*pluck(2, per_block[key]))))), axis)) for i, key in enumerate(per_block) for okey in other_blocks) dsk.update((keyname('vindex-merge-' + token, 0, okey), (_vindex_merge, [list(pluck(0, per_block[key])) for key in per_block], [keyname(name, i, okey) for i in range(len(per_block))])) for okey in other_blocks) return Array(sharedict.merge(x.dask, (out_name, dsk)), out_name, chunks, x.dtype) # output has a zero dimension, just create a new zero-shape array with the # same dtype from .wrap import empty return empty(tuple(map(sum, chunks)), chunks=chunks, dtype=x.dtype, name=out_name) def _get_axis(indexes): """ Get axis along which point-wise slicing results lie This is mostly a hack because I can't figure out NumPy's rule on this and can't be bothered to go reading. >>> _get_axis([[1, 2], None, [1, 2], None]) 0 >>> _get_axis([None, [1, 2], [1, 2], None]) 1 >>> _get_axis([None, None, [1, 2], [1, 2]]) 2 """ ndim = len(indexes) indexes = [slice(None, None) if i is None else [0] for i in indexes] x = np.empty((2,) * ndim) x2 = x[tuple(indexes)] return x2.shape.index(1) def _vindex_slice(block, points): """ Pull out point-wise slices from block """ points = [p if isinstance(p, slice) else list(p) for p in points] return block[tuple(points)] def _vindex_transpose(block, axis): """ Rotate block so that points are on the first dimension """ axes = [axis] + list(range(axis)) + list(range(axis + 1, block.ndim)) return block.transpose(axes) def _vindex_merge(locations, values): """ >>> locations = [0], [2, 1] >>> values = [np.array([[1, 2, 3]]), ... np.array([[10, 20, 30], [40, 50, 60]])] >>> _vindex_merge(locations, values) array([[ 1, 2, 3], [40, 50, 60], [10, 20, 30]]) """ locations = list(map(list, locations)) values = list(values) n = sum(map(len, locations)) shape = list(values[0].shape) shape[0] = n shape = tuple(shape) dtype = values[0].dtype x = np.empty(shape, dtype=dtype) ind = [slice(None, None) for i in range(x.ndim)] for loc, val in zip(locations, values): ind[0] = loc x[tuple(ind)] = val return x def to_npy_stack(dirname, x, axis=0): """ Write dask array to a stack of .npy files This partitions the dask.array along one axis and stores each block along that axis as a single .npy file in the specified directory Examples -------- >>> x = da.ones((5, 10, 10), chunks=(2, 4, 4)) # doctest: +SKIP >>> da.to_npy_stack('data/', x, axis=0) # doctest: +SKIP $ tree data/ data/ |-- 0.npy |-- 1.npy |-- 2.npy |-- info The ``.npy`` files store numpy arrays for ``x[0:2], x[2:4], and x[4:5]`` respectively, as is specified by the chunk size along the zeroth axis. The info file stores the dtype, chunks, and axis information of the array. You can load these stacks with the ``da.from_npy_stack`` function. >>> y = da.from_npy_stack('data/') # doctest: +SKIP See Also -------- from_npy_stack """ chunks = tuple((c if i == axis else (sum(c),)) for i, c in enumerate(x.chunks)) xx = x.rechunk(chunks) if not os.path.exists(dirname): os.mkdir(dirname) meta = {'chunks': chunks, 'dtype': x.dtype, 'axis': axis} with open(os.path.join(dirname, 'info'), 'wb') as f: pickle.dump(meta, f) name = 'to-npy-stack-' + str(uuid.uuid1()) dsk = {(name, i): (np.save, os.path.join(dirname, '%d.npy' % i), key) for i, key in enumerate(core.flatten(xx.__dask_keys__()))} compute_as_if_collection(Array, sharedict.merge(dsk, xx.dask), list(dsk)) def from_npy_stack(dirname, mmap_mode='r'): """ Load dask array from stack of npy files See ``da.to_npy_stack`` for docstring Parameters ---------- dirname: string Directory of .npy files mmap_mode: (None or 'r') Read data in memory map mode """ with open(os.path.join(dirname, 'info'), 'rb') as f: info = pickle.load(f) dtype = info['dtype'] chunks = info['chunks'] axis = info['axis'] name = 'from-npy-stack-%s' % dirname keys = list(product([name], *[range(len(c)) for c in chunks])) values = [(np.load, os.path.join(dirname, '%d.npy' % i), mmap_mode) for i in range(len(chunks[axis]))] dsk = dict(zip(keys, values)) return Array(dsk, name, chunks, dtype) dask-0.16.0/dask/array/creation.py000066400000000000000000000462311320364734500167530ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from functools import partial, wraps from itertools import chain, product from operator import add from numbers import Integral import numpy as np from toolz import accumulate, sliding_window from .. import sharedict from ..base import tokenize from ..utils import ignoring from . import chunk from .core import Array, asarray, normalize_chunks, stack, concatenate from .wrap import empty, ones, zeros, full def empty_like(a, dtype=None, chunks=None): """ Return a new array with the same shape and type as a given array. Parameters ---------- a : array_like The shape and data-type of `a` define these same attributes of the returned array. dtype : data-type, optional Overrides the data type of the result. chunks : sequence of ints The number of samples on each block. Note that the last block will have fewer samples if ``len(array) % chunks != 0``. Returns ------- out : ndarray Array of uninitialized (arbitrary) data with the same shape and type as `a`. See Also -------- ones_like : Return an array of ones with shape and type of input. zeros_like : Return an array of zeros with shape and type of input. empty : Return a new uninitialized array. ones : Return a new array setting values to one. zeros : Return a new array setting values to zero. Notes ----- This function does *not* initialize the returned array; to do that use `zeros_like` or `ones_like` instead. It may be marginally faster than the functions that do set the array values. """ a = asarray(a) return empty( a.shape, dtype=(dtype or a.dtype), chunks=(chunks or a.chunks) ) def ones_like(a, dtype=None, chunks=None): """ Return an array of ones with the same shape and type as a given array. Parameters ---------- a : array_like The shape and data-type of `a` define these same attributes of the returned array. dtype : data-type, optional Overrides the data type of the result. chunks : sequence of ints The number of samples on each block. Note that the last block will have fewer samples if ``len(array) % chunks != 0``. Returns ------- out : ndarray Array of ones with the same shape and type as `a`. See Also -------- zeros_like : Return an array of zeros with shape and type of input. empty_like : Return an empty array with shape and type of input. zeros : Return a new array setting values to zero. ones : Return a new array setting values to one. empty : Return a new uninitialized array. """ a = asarray(a) return ones( a.shape, dtype=(dtype or a.dtype), chunks=(chunks or a.chunks) ) def zeros_like(a, dtype=None, chunks=None): """ Return an array of zeros with the same shape and type as a given array. Parameters ---------- a : array_like The shape and data-type of `a` define these same attributes of the returned array. dtype : data-type, optional Overrides the data type of the result. chunks : sequence of ints The number of samples on each block. Note that the last block will have fewer samples if ``len(array) % chunks != 0``. Returns ------- out : ndarray Array of zeros with the same shape and type as `a`. See Also -------- ones_like : Return an array of ones with shape and type of input. empty_like : Return an empty array with shape and type of input. zeros : Return a new array setting values to zero. ones : Return a new array setting values to one. empty : Return a new uninitialized array. """ a = asarray(a) return zeros( a.shape, dtype=(dtype or a.dtype), chunks=(chunks or a.chunks) ) def full_like(a, fill_value, dtype=None, chunks=None): """ Return a full array with the same shape and type as a given array. Parameters ---------- a : array_like The shape and data-type of `a` define these same attributes of the returned array. fill_value : scalar Fill value. dtype : data-type, optional Overrides the data type of the result. chunks : sequence of ints The number of samples on each block. Note that the last block will have fewer samples if ``len(array) % chunks != 0``. Returns ------- out : ndarray Array of `fill_value` with the same shape and type as `a`. See Also -------- zeros_like : Return an array of zeros with shape and type of input. ones_like : Return an array of ones with shape and type of input. empty_like : Return an empty array with shape and type of input. zeros : Return a new array setting values to zero. ones : Return a new array setting values to one. empty : Return a new uninitialized array. full : Fill a new array. """ a = asarray(a) return full( a.shape, fill_value, dtype=(dtype or a.dtype), chunks=(chunks or a.chunks) ) def linspace(start, stop, num=50, chunks=None, dtype=None): """ Return `num` evenly spaced values over the closed interval [`start`, `stop`]. TODO: implement the `endpoint`, `restep`, and `dtype` keyword args Parameters ---------- start : scalar The starting value of the sequence. stop : scalar The last value of the sequence. num : int, optional Number of samples to include in the returned dask array, including the endpoints. chunks : int The number of samples on each block. Note that the last block will have fewer samples if `num % blocksize != 0` Returns ------- samples : dask array See Also -------- dask.array.arange """ num = int(num) if chunks is None: raise ValueError("Must supply a chunks= keyword argument") chunks = normalize_chunks(chunks, (num,)) range_ = stop - start space = float(range_) / (num - 1) if dtype is None: dtype = np.linspace(0, 1, 1).dtype name = 'linspace-' + tokenize((start, stop, num, chunks, dtype)) dsk = {} blockstart = start for i, bs in enumerate(chunks[0]): blockstop = blockstart + ((bs - 1) * space) task = (partial(np.linspace, dtype=dtype), blockstart, blockstop, bs) blockstart = blockstart + (space * bs) dsk[(name, i)] = task return Array(dsk, name, chunks, dtype=dtype) def arange(*args, **kwargs): """ Return evenly spaced values from `start` to `stop` with step size `step`. The values are half-open [start, stop), so including start and excluding stop. This is basically the same as python's range function but for dask arrays. When using a non-integer step, such as 0.1, the results will often not be consistent. It is better to use linspace for these cases. Parameters ---------- start : int, optional The starting value of the sequence. The default is 0. stop : int The end of the interval, this value is excluded from the interval. step : int, optional The spacing between the values. The default is 1 when not specified. The last value of the sequence. chunks : int The number of samples on each block. Note that the last block will have fewer samples if ``len(array) % chunks != 0``. Returns ------- samples : dask array See Also -------- dask.array.linspace """ if len(args) == 1: start = 0 stop = args[0] step = 1 elif len(args) == 2: start = args[0] stop = args[1] step = 1 elif len(args) == 3: start, stop, step = args else: raise TypeError(''' arange takes 3 positional arguments: arange([start], stop, [step]) ''') if 'chunks' not in kwargs: raise ValueError("Must supply a chunks= keyword argument") chunks = kwargs['chunks'] dtype = kwargs.get('dtype', None) if dtype is None: dtype = np.arange(0, 1, step).dtype num = max(np.ceil((stop - start) / step), 0) chunks = normalize_chunks(chunks, (num,)) name = 'arange-' + tokenize((start, stop, step, chunks, num)) dsk = {} elem_count = 0 for i, bs in enumerate(chunks[0]): blockstart = start + (elem_count * step) blockstop = start + ((elem_count + bs) * step) task = (chunk.arange, blockstart, blockstop, step, bs, dtype) dsk[(name, i)] = task elem_count += bs return Array(dsk, name, chunks, dtype=dtype) def indices(dimensions, dtype=int, chunks=None): """ Implements NumPy's ``indices`` for Dask Arrays. Generates a grid of indices covering the dimensions provided. The final array has the shape ``(len(dimensions), *dimensions)``. The chunks are used to specify the chunking for axis 1 up to ``len(dimensions)``. The 0th axis always has chunks of length 1. Parameters ---------- dimensions : sequence of ints The shape of the index grid. dtype : dtype, optional Type to use for the array. Default is ``int``. chunks : sequence of ints The number of samples on each block. Note that the last block will have fewer samples if ``len(array) % chunks != 0``. Returns ------- grid : dask array """ if chunks is None: raise ValueError("Must supply a chunks= keyword argument") dimensions = tuple(dimensions) dtype = np.dtype(dtype) chunks = tuple(chunks) if len(dimensions) != len(chunks): raise ValueError("Need same number of chunks as dimensions.") grid = [] if np.prod(dimensions): for i in range(len(dimensions)): s = len(dimensions) * [None] s[i] = slice(None) s = tuple(s) r = arange(dimensions[i], dtype=dtype, chunks=chunks[i]) r = r[s] for j in chain(range(i), range(i + 1, len(dimensions))): r = r.repeat(dimensions[j], axis=j) grid.append(r) if grid: grid = stack(grid) else: grid = empty( (len(dimensions),) + dimensions, dtype=dtype, chunks=(1,) + chunks ) return grid def eye(N, chunks, M=None, k=0, dtype=float): """ Return a 2-D Array with ones on the diagonal and zeros elsewhere. Parameters ---------- N : int Number of rows in the output. chunks: int chunk size of resulting blocks M : int, optional Number of columns in the output. If None, defaults to `N`. k : int, optional Index of the diagonal: 0 (the default) refers to the main diagonal, a positive value refers to an upper diagonal, and a negative value to a lower diagonal. dtype : data-type, optional Data-type of the returned array. Returns ------- I : Array of shape (N,M) An array where all elements are equal to zero, except for the `k`-th diagonal, whose values are equal to one. """ if not isinstance(chunks, int): raise ValueError('chunks must be an int') token = tokenize(N, chunk, M, k, dtype) name_eye = 'eye-' + token eye = {} if M is None: M = N vchunks = [chunks] * (N // chunks) if N % chunks != 0: vchunks.append(N % chunks) hchunks = [chunks] * (M // chunks) if M % chunks != 0: hchunks.append(M % chunks) for i, vchunk in enumerate(vchunks): for j, hchunk in enumerate(hchunks): if (j - i - 1) * chunks <= k <= (j - i + 1) * chunks: eye[name_eye, i, j] = (np.eye, vchunk, hchunk, k - (j - i) * chunks, dtype) else: eye[name_eye, i, j] = (np.zeros, (vchunk, hchunk), dtype) return Array(eye, name_eye, shape=(N, M), chunks=(chunks, chunks), dtype=dtype) @wraps(np.diag) def diag(v): name = 'diag-' + tokenize(v) if isinstance(v, np.ndarray): if v.ndim == 1: chunks = ((v.shape[0],), (v.shape[0],)) dsk = {(name, 0, 0): (np.diag, v)} elif v.ndim == 2: chunks = ((min(v.shape),),) dsk = {(name, 0): (np.diag, v)} else: raise ValueError("Array must be 1d or 2d only") return Array(dsk, name, chunks, dtype=v.dtype) if not isinstance(v, Array): raise TypeError("v must be a dask array or numpy array, " "got {0}".format(type(v))) if v.ndim != 1: if v.chunks[0] == v.chunks[1]: dsk = {(name, i): (np.diag, row[i]) for i, row in enumerate(v.__dask_keys__())} return Array(sharedict.merge(v.dask, (name, dsk)), name, (v.chunks[0],), dtype=v.dtype) else: raise NotImplementedError("Extracting diagonals from non-square " "chunked arrays") chunks_1d = v.chunks[0] blocks = v.__dask_keys__() dsk = {} for i, m in enumerate(chunks_1d): for j, n in enumerate(chunks_1d): key = (name, i, j) if i == j: dsk[key] = (np.diag, blocks[i]) else: dsk[key] = (np.zeros, (m, n)) return Array(sharedict.merge(v.dask, (name, dsk)), name, (chunks_1d, chunks_1d), dtype=v.dtype) def triu(m, k=0): """ Upper triangle of an array with elements above the `k`-th diagonal zeroed. Parameters ---------- m : array_like, shape (M, N) Input array. k : int, optional Diagonal above which to zero elements. `k = 0` (the default) is the main diagonal, `k < 0` is below it and `k > 0` is above. Returns ------- triu : ndarray, shape (M, N) Upper triangle of `m`, of same shape and data-type as `m`. See Also -------- tril : lower triangle of an array """ if m.ndim != 2: raise ValueError('input must be 2 dimensional') if m.shape[0] != m.shape[1]: raise NotImplementedError('input must be a square matrix') if m.chunks[0][0] != m.chunks[1][0]: msg = ('chunks must be a square. ' 'Use .rechunk method to change the size of chunks.') raise NotImplementedError(msg) rdim = len(m.chunks[0]) hdim = len(m.chunks[1]) chunk = m.chunks[0][0] token = tokenize(m, k) name = 'triu-' + token dsk = {} for i in range(rdim): for j in range(hdim): if chunk * (j - i + 1) < k: dsk[(name, i, j)] = (np.zeros, (m.chunks[0][i], m.chunks[1][j])) elif chunk * (j - i - 1) < k <= chunk * (j - i + 1): dsk[(name, i, j)] = (np.triu, (m.name, i, j), k - (chunk * (j - i))) else: dsk[(name, i, j)] = (m.name, i, j) return Array(sharedict.merge((name, dsk), m.dask), name, shape=m.shape, chunks=m.chunks, dtype=m.dtype) def tril(m, k=0): """ Lower triangle of an array with elements above the `k`-th diagonal zeroed. Parameters ---------- m : array_like, shape (M, M) Input array. k : int, optional Diagonal above which to zero elements. `k = 0` (the default) is the main diagonal, `k < 0` is below it and `k > 0` is above. Returns ------- tril : ndarray, shape (M, M) Lower triangle of `m`, of same shape and data-type as `m`. See Also -------- triu : upper triangle of an array """ if m.ndim != 2: raise ValueError('input must be 2 dimensional') if m.shape[0] != m.shape[1]: raise NotImplementedError('input must be a square matrix') if not len(set(m.chunks[0] + m.chunks[1])) == 1: msg = ('All chunks must be a square matrix to perform lu decomposition. ' 'Use .rechunk method to change the size of chunks.') raise ValueError(msg) rdim = len(m.chunks[0]) hdim = len(m.chunks[1]) chunk = m.chunks[0][0] token = tokenize(m, k) name = 'tril-' + token dsk = {} for i in range(rdim): for j in range(hdim): if chunk * (j - i + 1) < k: dsk[(name, i, j)] = (m.name, i, j) elif chunk * (j - i - 1) < k <= chunk * (j - i + 1): dsk[(name, i, j)] = (np.tril, (m.name, i, j), k - (chunk * (j - i))) else: dsk[(name, i, j)] = (np.zeros, (m.chunks[0][i], m.chunks[1][j])) dsk = sharedict.merge(m.dask, (name, dsk)) return Array(dsk, name, shape=m.shape, chunks=m.chunks, dtype=m.dtype) def offset_func(func, offset, *args): """ Offsets inputs by offset >>> double = lambda x: x * 2 >>> f = offset_func(double, (10,)) >>> f(1) 22 >>> f(300) 620 """ def _offset(*args): args2 = list(map(add, args, offset)) return func(*args2) with ignoring(Exception): _offset.__name__ = 'offset_' + func.__name__ return _offset @wraps(np.fromfunction) def fromfunction(func, chunks=None, shape=None, dtype=None): if chunks: chunks = normalize_chunks(chunks, shape) name = 'fromfunction-' + tokenize(func, chunks, shape, dtype) keys = list(product([name], *[range(len(bd)) for bd in chunks])) aggdims = [list(accumulate(add, (0,) + bd[:-1])) for bd in chunks] offsets = list(product(*aggdims)) shapes = list(product(*chunks)) values = [(np.fromfunction, offset_func(func, offset), shp) for offset, shp in zip(offsets, shapes)] dsk = dict(zip(keys, values)) return Array(dsk, name, chunks, dtype=dtype) @wraps(np.repeat) def repeat(a, repeats, axis=None): if axis is None: if a.ndim == 1: axis = 0 else: raise NotImplementedError("Must supply an integer axis value") if not isinstance(repeats, Integral): raise NotImplementedError("Only integer valued repeats supported") if -a.ndim <= axis < 0: axis += a.ndim elif not 0 <= axis <= a.ndim - 1: raise ValueError("axis(=%d) out of bounds" % axis) if repeats == 1: return a cchunks = np.cumsum((0,) + a.chunks[axis]) slices = [] for c_start, c_stop in sliding_window(2, cchunks): ls = np.linspace(c_start, c_stop, repeats).round(0) for ls_start, ls_stop in sliding_window(2, ls): if ls_start != ls_stop: slices.append(slice(ls_start, ls_stop)) all_slice = slice(None, None, None) slices = [(all_slice,) * axis + (s,) + (all_slice,) * (a.ndim - axis - 1) for s in slices] slabs = [a[slc] for slc in slices] out = [] for slab in slabs: chunks = list(slab.chunks) assert len(chunks[axis]) == 1 chunks[axis] = (chunks[axis][0] * repeats,) chunks = tuple(chunks) result = slab.map_blocks(np.repeat, repeats, axis=axis, chunks=chunks, dtype=slab.dtype) out.append(result) return concatenate(out, axis=axis) @wraps(np.tile) def tile(A, reps): if not isinstance(reps, Integral): raise NotImplementedError("Only integer valued `reps` supported.") if reps < 0: raise ValueError("Negative `reps` are not allowed.") elif reps == 0: return A[..., :0] elif reps == 1: return A return concatenate(reps * [A], axis=-1) dask-0.16.0/dask/array/fft.py000066400000000000000000000157431320364734500157320ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import collections from functools import wraps import inspect import numpy as np try: import scipy import scipy.fftpack except ImportError: scipy = None from .core import concatenate as _concatenate from .creation import arange as _arange chunk_error = ("Dask array only supports taking an FFT along an axis that \n" "has a single chunk. An FFT operation was tried on axis %s \n" "which has chunks %s. To change the array's chunks use " "dask.Array.rechunk.") fft_preamble = """ Wrapping of %s The axis along which the FFT is applied must have a one chunk. To change the array's chunking use dask.Array.rechunk. The %s docstring follows below: """ def _fft_out_chunks(a, s, axes): """ For computing the output chunks of [i]fft*""" if s is None: return a.chunks chunks = list(a.chunks) for i, axis in enumerate(axes): chunks[axis] = (s[i],) return chunks def _rfft_out_chunks(a, s, axes): """ For computing the output chunks of rfft*""" if s is None: s = [a.chunks[axis][0] for axis in axes] s = list(s) s[-1] = s[-1] // 2 + 1 chunks = list(a.chunks) for i, axis in enumerate(axes): chunks[axis] = (s[i],) return chunks def _irfft_out_chunks(a, s, axes): """ For computing the output chunks of irfft*""" if s is None: s = [a.chunks[axis][0] for axis in axes] s[-1] = 2 * (s[-1] - 1) chunks = list(a.chunks) for i, axis in enumerate(axes): chunks[axis] = (s[i],) return chunks def _hfft_out_chunks(a, s, axes): assert len(axes) == 1 axis = axes[0] if s is None: s = [2 * (a.chunks[axis][0] - 1)] n = s[0] chunks = list(a.chunks) chunks[axis] = (n,) return chunks def _ihfft_out_chunks(a, s, axes): assert len(axes) == 1 axis = axes[0] if s is None: s = [a.chunks[axis][0]] else: assert len(s) == 1 n = s[0] chunks = list(a.chunks) if n % 2 == 0: m = (n // 2) + 1 else: m = (n + 1) // 2 chunks[axis] = (m,) return chunks _out_chunk_fns = {'fft': _fft_out_chunks, 'ifft': _fft_out_chunks, 'rfft': _rfft_out_chunks, 'irfft': _irfft_out_chunks, 'hfft': _hfft_out_chunks, 'ihfft': _ihfft_out_chunks} def fft_wrap(fft_func, kind=None, dtype=None): """ Wrap 1D complex FFT functions Takes a function that behaves like ``numpy.fft`` functions and a specified kind to match it to that are named after the functions in the ``numpy.fft`` API. Supported kinds include: * fft * ifft * rfft * irfft * hfft * ihfft Examples -------- >>> parallel_fft = fft_wrap(np.fft.fft) >>> parallel_ifft = fft_wrap(np.fft.ifft) """ if scipy is not None: if fft_func is scipy.fftpack.rfft: raise ValueError("SciPy's `rfft` doesn't match the NumPy API.") elif fft_func is scipy.fftpack.irfft: raise ValueError("SciPy's `irfft` doesn't match the NumPy API.") if kind is None: kind = fft_func.__name__ try: out_chunk_fn = _out_chunk_fns[kind.rstrip("2n")] except KeyError: raise ValueError("Given unknown `kind` %s." % kind) def func(a, s=None, axes=None): if axes is None: if kind.endswith('2'): axes = (-2, -1) elif kind.endswith('n'): if s is None: axes = tuple(range(a.ndim)) else: axes = tuple(range(len(s))) else: axes = (-1,) else: if len(set(axes)) < len(axes): raise ValueError("Duplicate axes not allowed.") _dtype = dtype if _dtype is None: _dtype = fft_func(np.ones(len(axes) * (8,), dtype=a.dtype)).dtype for each_axis in axes: if len(a.chunks[each_axis]) != 1: raise ValueError(chunk_error % (each_axis, a.chunks[each_axis])) chunks = out_chunk_fn(a, s, axes) args = (s, axes) if kind.endswith('fft'): axis = None if axes is None else axes[0] n = None if s is None else s[0] args = (n, axis) return a.map_blocks(fft_func, *args, dtype=_dtype, chunks=chunks) if kind.endswith('fft'): _func = func def func(a, n=None, axis=None): s = None if n is not None: s = (n,) axes = None if axis is not None: axes = (axis,) return _func(a, s, axes) func_mod = inspect.getmodule(fft_func) func_name = fft_func.__name__ func_fullname = func_mod.__name__ + "." + func_name if fft_func.__doc__ is not None: func.__doc__ = (fft_preamble % (2 * (func_fullname,))) func.__doc__ += fft_func.__doc__ func.__name__ = func_name return func fft = fft_wrap(np.fft.fft, dtype=np.complex_) fft2 = fft_wrap(np.fft.fft2, dtype=np.complex_) fftn = fft_wrap(np.fft.fftn, dtype=np.complex_) ifft = fft_wrap(np.fft.ifft, dtype=np.complex_) ifft2 = fft_wrap(np.fft.ifft2, dtype=np.complex_) ifftn = fft_wrap(np.fft.ifftn, dtype=np.complex_) rfft = fft_wrap(np.fft.rfft, dtype=np.complex_) rfft2 = fft_wrap(np.fft.rfft2, dtype=np.complex_) rfftn = fft_wrap(np.fft.rfftn, dtype=np.complex_) irfft = fft_wrap(np.fft.irfft, dtype=np.float_) irfft2 = fft_wrap(np.fft.irfft2, dtype=np.float_) irfftn = fft_wrap(np.fft.irfftn, dtype=np.float_) hfft = fft_wrap(np.fft.hfft, dtype=np.float_) ihfft = fft_wrap(np.fft.ihfft, dtype=np.complex_) def _fftfreq_block(i, n, d): r = i.copy() r[i >= (n + 1) // 2] -= n r /= n * d return r @wraps(np.fft.fftfreq) def fftfreq(n, d=1.0, chunks=None): n = int(n) d = float(d) r = _arange(n, dtype=float, chunks=chunks) return r.map_blocks(_fftfreq_block, dtype=float, n=n, d=d) @wraps(np.fft.rfftfreq) def rfftfreq(n, d=1.0, chunks=None): n = int(n) d = float(d) r = _arange(n // 2 + 1, dtype=float, chunks=chunks) r /= n * d return r def _fftshift_helper(x, axes=None, inverse=False): if axes is None: axes = list(range(x.ndim)) elif not isinstance(axes, collections.Sequence): axes = (axes,) y = x for i in axes: n = y.shape[i] n_2 = (n + int(inverse is False)) // 2 l = y.ndim * [slice(None)] l[i] = slice(None, n_2) l = tuple(l) r = y.ndim * [slice(None)] r[i] = slice(n_2, None) r = tuple(r) y = _concatenate([y[r], y[l]], axis=i) return y @wraps(np.fft.fftshift) def fftshift(x, axes=None): return _fftshift_helper(x, axes=axes, inverse=False) @wraps(np.fft.ifftshift) def ifftshift(x, axes=None): return _fftshift_helper(x, axes=axes, inverse=True) dask-0.16.0/dask/array/ghost.py000066400000000000000000000311371320364734500162720ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from operator import getitem from itertools import product from numbers import Integral from toolz import merge, pipe, concat, partial from toolz.curried import map from . import chunk, wrap from .core import Array, map_blocks, concatenate, concatenate3, reshapelist from .. import sharedict from ..base import tokenize from ..core import flatten from ..utils import concrete def fractional_slice(task, axes): """ >>> fractional_slice(('x', 5.1), {0: 2}) # doctest: +SKIP (getitem, ('x', 6), (slice(0, 2),)) >>> fractional_slice(('x', 3, 5.1), {0: 2, 1: 3}) # doctest: +SKIP (getitem, ('x', 3, 5), (slice(None, None, None), slice(-3, None))) >>> fractional_slice(('x', 2.9, 5.1), {0: 2, 1: 3}) # doctest: +SKIP (getitem, ('x', 3, 5), (slice(0, 2), slice(-3, None))) """ rounded = (task[0],) + tuple(int(round(i)) for i in task[1:]) index = [] for i, (t, r) in enumerate(zip(task[1:], rounded[1:])): depth = axes.get(i, 0) if t == r: index.append(slice(None, None, None)) elif t < r: index.append(slice(0, depth)) elif t > r and depth == 0: index.append(slice(0, 0)) else: index.append(slice(-depth, None)) index = tuple(index) if all(ind == slice(None, None, None) for ind in index): return task else: return (getitem, rounded, index) def expand_key(k, dims): """ Get all neighboring keys around center >>> expand_key(('x', 2, 3), dims=[5, 5]) # doctest: +NORMALIZE_WHITESPACE [[('x', 1.1, 2.1), ('x', 1.1, 3), ('x', 1.1, 3.9)], [('x', 2, 2.1), ('x', 2, 3), ('x', 2, 3.9)], [('x', 2.9, 2.1), ('x', 2.9, 3), ('x', 2.9, 3.9)]] >>> expand_key(('x', 0, 4), dims=[5, 5]) # doctest: +NORMALIZE_WHITESPACE [[('x', 0, 3.1), ('x', 0, 4)], [('x', 0.9, 3.1), ('x', 0.9, 4)]] """ def inds(i, ind): rv = [] if ind - 0.9 > 0: rv.append(ind - 0.9) rv.append(ind) if ind + 0.9 < dims[i] - 1: rv.append(ind + 0.9) return rv shape = [] for i, ind in enumerate(k[1:]): num = 1 if ind > 0: num += 1 if ind < dims[i] - 1: num += 1 shape.append(num) seq = list(product([k[0]], *[inds(i, ind) for i, ind in enumerate(k[1:])])) return reshapelist(shape, seq) def ghost_internal(x, axes): """ Share boundaries between neighboring blocks Parameters ---------- x: da.Array A dask array axes: dict The size of the shared boundary per axis The axes input informs how many cells to overlap between neighboring blocks {0: 2, 2: 5} means share two cells in 0 axis, 5 cells in 2 axis """ dims = list(map(len, x.chunks)) expand_key2 = partial(expand_key, dims=dims) interior_keys = pipe(x.__dask_keys__(), flatten, map(expand_key2), map(flatten), concat, list) token = tokenize(x, axes) name = 'ghost-' + token interior_slices = {} ghost_blocks = {} for k in interior_keys: frac_slice = fractional_slice(k, axes) if k != frac_slice: interior_slices[k] = frac_slice else: ghost_blocks[(name,) + k[1:]] = (concatenate3, (concrete, expand_key2(k))) chunks = [] for i, bds in enumerate(x.chunks): if len(bds) == 1: chunks.append(bds) else: left = [bds[0] + axes.get(i, 0)] right = [bds[-1] + axes.get(i, 0)] mid = [] for bd in bds[1:-1]: mid.append(bd + axes.get(i, 0) * 2) chunks.append(left + mid + right) dsk = merge(interior_slices, ghost_blocks) dsk = sharedict.merge(x.dask, (name, dsk)) return Array(dsk, name, chunks, dtype=x.dtype) def trim_internal(x, axes): """ Trim sides from each block This couples well with the ghost operation, which may leave excess data on each block See also -------- dask.array.chunk.trim dask.array.map_blocks """ olist = [] for i, bd in enumerate(x.chunks): ilist = [] for d in bd: ilist.append(d - axes.get(i, 0) * 2) olist.append(tuple(ilist)) chunks = tuple(olist) return map_blocks(partial(chunk.trim, axes=axes), x, chunks=chunks, dtype=x.dtype) def periodic(x, axis, depth): """ Copy a slice of an array around to its other side Useful to create periodic boundary conditions for ghost """ left = ((slice(None, None, None),) * axis + (slice(0, depth),) + (slice(None, None, None),) * (x.ndim - axis - 1)) right = ((slice(None, None, None),) * axis + (slice(-depth, None),) + (slice(None, None, None),) * (x.ndim - axis - 1)) l = x[left] r = x[right] l, r = _remove_ghost_boundaries(l, r, axis, depth) return concatenate([r, x, l], axis=axis) def reflect(x, axis, depth): """ Reflect boundaries of array on the same side This is the converse of ``periodic`` """ if depth == 1: left = ((slice(None, None, None),) * axis + (slice(0, 1),) + (slice(None, None, None),) * (x.ndim - axis - 1)) else: left = ((slice(None, None, None),) * axis + (slice(depth - 1, None, -1),) + (slice(None, None, None),) * (x.ndim - axis - 1)) right = ((slice(None, None, None),) * axis + (slice(-1, -depth - 1, -1),) + (slice(None, None, None),) * (x.ndim - axis - 1)) l = x[left] r = x[right] l, r = _remove_ghost_boundaries(l, r, axis, depth) return concatenate([l, x, r], axis=axis) def nearest(x, axis, depth): """ Each reflect each boundary value outwards This mimics what the skimage.filters.gaussian_filter(... mode="nearest") does. """ left = ((slice(None, None, None),) * axis + (slice(0, 1),) + (slice(None, None, None),) * (x.ndim - axis - 1)) right = ((slice(None, None, None),) * axis + (slice(-1, -2, -1),) + (slice(None, None, None),) * (x.ndim - axis - 1)) l = concatenate([x[left]] * depth, axis=axis) r = concatenate([x[right]] * depth, axis=axis) l, r = _remove_ghost_boundaries(l, r, axis, depth) return concatenate([l, x, r], axis=axis) def constant(x, axis, depth, value): """ Add constant slice to either side of array """ chunks = list(x.chunks) chunks[axis] = (depth,) c = wrap.full(tuple(map(sum, chunks)), value, chunks=tuple(chunks), dtype=x.dtype) return concatenate([c, x, c], axis=axis) def _remove_ghost_boundaries(l, r, axis, depth): lchunks = list(l.chunks) lchunks[axis] = (depth,) rchunks = list(r.chunks) rchunks[axis] = (depth,) l = l.rechunk(tuple(lchunks)) r = r.rechunk(tuple(rchunks)) return l, r def boundaries(x, depth=None, kind=None): """ Add boundary conditions to an array before ghosting See Also -------- periodic constant """ if not isinstance(kind, dict): kind = dict((i, kind) for i in range(x.ndim)) if not isinstance(depth, dict): depth = dict((i, depth) for i in range(x.ndim)) for i in range(x.ndim): d = depth.get(i, 0) if d == 0: continue this_kind = kind.get(i, 'none') if this_kind == 'none': continue elif this_kind == 'periodic': x = periodic(x, i, d) elif this_kind == 'reflect': x = reflect(x, i, d) elif this_kind == 'nearest': x = nearest(x, i, d) elif i in kind: x = constant(x, i, d, kind[i]) return x def ghost(x, depth, boundary): """ Share boundaries between neighboring blocks Parameters ---------- x: da.Array A dask array depth: dict The size of the shared boundary per axis boundary: dict The boundary condition on each axis. Options are 'reflect', 'periodic', 'nearest', 'none', or an array value. Such a value will fill the boundary with that value. The depth input informs how many cells to overlap between neighboring blocks ``{0: 2, 2: 5}`` means share two cells in 0 axis, 5 cells in 2 axis. Axes missing from this input will not be overlapped. Examples -------- >>> import numpy as np >>> import dask.array as da >>> x = np.arange(64).reshape((8, 8)) >>> d = da.from_array(x, chunks=(4, 4)) >>> d.chunks ((4, 4), (4, 4)) >>> g = da.ghost.ghost(d, depth={0: 2, 1: 1}, ... boundary={0: 100, 1: 'reflect'}) >>> g.chunks ((8, 8), (6, 6)) >>> np.array(g) array([[100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [ 0, 0, 1, 2, 3, 4, 3, 4, 5, 6, 7, 7], [ 8, 8, 9, 10, 11, 12, 11, 12, 13, 14, 15, 15], [ 16, 16, 17, 18, 19, 20, 19, 20, 21, 22, 23, 23], [ 24, 24, 25, 26, 27, 28, 27, 28, 29, 30, 31, 31], [ 32, 32, 33, 34, 35, 36, 35, 36, 37, 38, 39, 39], [ 40, 40, 41, 42, 43, 44, 43, 44, 45, 46, 47, 47], [ 16, 16, 17, 18, 19, 20, 19, 20, 21, 22, 23, 23], [ 24, 24, 25, 26, 27, 28, 27, 28, 29, 30, 31, 31], [ 32, 32, 33, 34, 35, 36, 35, 36, 37, 38, 39, 39], [ 40, 40, 41, 42, 43, 44, 43, 44, 45, 46, 47, 47], [ 48, 48, 49, 50, 51, 52, 51, 52, 53, 54, 55, 55], [ 56, 56, 57, 58, 59, 60, 59, 60, 61, 62, 63, 63], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]]) """ depth2 = coerce_depth(x.ndim, depth) boundary2 = coerce_boundary(x.ndim, boundary) # is depth larger than chunk size? depth_values = [depth2.get(i, 0) for i in range(x.ndim)] for d, c in zip(depth_values, x.chunks): if d > min(c): raise ValueError("The overlapping depth %d is larger than your\n" "smallest chunk size %d. Rechunk your array\n" "with a larger chunk size or a chunk size that\n" "more evenly divides the shape of your array." % (d, min(c))) x2 = boundaries(x, depth2, boundary2) x3 = ghost_internal(x2, depth2) trim = dict((k, v * 2 if boundary2.get(k, 'none') != 'none' else 0) for k, v in depth2.items()) x4 = chunk.trim(x3, trim) return x4 def add_dummy_padding(x, depth, boundary): """ Pads an array which has 'none' as the boundary type. Used to simplify trimming arrays which use 'none'. >>> import dask.array as da >>> x = da.arange(6, chunks=3) >>> add_dummy_padding(x, {0: 1}, {0: 'none'}).compute() # doctest: +NORMALIZE_WHITESPACE array([..., 0, 1, 2, 3, 4, 5, ...]) """ for k, v in boundary.items(): d = depth[k] if v == 'none' and d > 0: empty_shape = list(x.shape) empty_shape[k] = d empty_chunks = list(x.chunks) empty_chunks[k] = (d,) empty = wrap.empty(empty_shape, chunks=empty_chunks, dtype=x.dtype) out_chunks = list(x.chunks) ax_chunks = list(out_chunks[k]) ax_chunks[0] += d ax_chunks[-1] += d out_chunks[k] = tuple(ax_chunks) x = concatenate([empty, x, empty], axis=k) x = x.rechunk(out_chunks) return x def map_overlap(x, func, depth, boundary=None, trim=True, **kwargs): depth2 = coerce_depth(x.ndim, depth) boundary2 = coerce_boundary(x.ndim, boundary) g = ghost(x, depth=depth2, boundary=boundary2) g2 = g.map_blocks(func, **kwargs) if trim: g3 = add_dummy_padding(g2, depth2, boundary2) return trim_internal(g3, depth2) else: return g2 def coerce_depth(ndim, depth): if isinstance(depth, Integral): depth = (depth,) * ndim if isinstance(depth, tuple): depth = dict(zip(range(ndim), depth)) return depth def coerce_boundary(ndim, boundary): if boundary is None: boundary = 'reflect' if not isinstance(boundary, (tuple, dict)): boundary = (boundary,) * ndim if isinstance(boundary, tuple): boundary = dict(zip(range(ndim), boundary)) return boundary dask-0.16.0/dask/array/image.py000066400000000000000000000037151320364734500162310ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from glob import glob import os try: from skimage.io import imread as sk_imread except ImportError: pass from .core import Array from ..base import tokenize def add_leading_dimension(x): return x[None, ...] def imread(filename, imread=None, preprocess=None): """ Read a stack of images into a dask array Parameters ---------- filename: string A globstring like 'myfile.*.png' imread: function (optional) Optionally provide custom imread function. Function should expect a filename and produce a numpy array. Defaults to ``skimage.io.imread``. preprocess: function (optional) Optionally provide custom function to preprocess the image. Function should expect a numpy array for a single image. Examples -------- >>> from dask.array.image import imread >>> im = imread('2015-*-*.png') # doctest: +SKIP >>> im.shape # doctest: +SKIP (365, 1000, 1000, 3) Returns ------- Dask array of all images stacked along the first dimension. All images will be treated as individual chunks """ imread = imread or sk_imread filenames = sorted(glob(filename)) if not filenames: raise ValueError("No files found under name %s" % filename) name = 'imread-%s' % tokenize(filenames, map(os.path.getmtime, filenames)) sample = imread(filenames[0]) if preprocess: sample = preprocess(sample) keys = [(name, i) + (0,) * len(sample.shape) for i in range(len(filenames))] if preprocess: values = [(add_leading_dimension, (preprocess, (imread, fn))) for fn in filenames] else: values = [(add_leading_dimension, (imread, fn)) for fn in filenames] dsk = dict(zip(keys, values)) chunks = ((1, ) * len(filenames), ) + tuple((d, ) for d in sample.shape) return Array(dsk, name, chunks, sample.dtype) dask-0.16.0/dask/array/learn.py000066400000000000000000000063561320364734500162540ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import numpy as np from toolz import merge, partial from ..base import tokenize from .. import threaded def _partial_fit(model, x, y, kwargs=None): kwargs = kwargs or dict() model.partial_fit(x, y, **kwargs) return model def fit(model, x, y, get=threaded.get, **kwargs): """ Fit scikit learn model against dask arrays Model must support the ``partial_fit`` interface for online or batch learning. This method will be called on dask arrays in sequential order. Ideally your rows are independent and identically distributed. Parameters ---------- model: sklearn model Any model supporting partial_fit interface x: dask Array Two dimensional array, likely tall and skinny y: dask Array One dimensional array with same chunks as x's rows kwargs: options to pass to partial_fit Examples -------- >>> import dask.array as da >>> X = da.random.random((10, 3), chunks=(5, 3)) >>> y = da.random.randint(0, 2, 10, chunks=(5,)) >>> from sklearn.linear_model import SGDClassifier >>> sgd = SGDClassifier() >>> sgd = da.learn.fit(sgd, X, y, classes=[1, 0]) >>> sgd # doctest: +SKIP SGDClassifier(alpha=0.0001, class_weight=None, epsilon=0.1, eta0=0.0, fit_intercept=True, l1_ratio=0.15, learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1, penalty='l2', power_t=0.5, random_state=None, shuffle=False, verbose=0, warm_start=False) This passes all of X and y through the classifier sequentially. We can use the classifier as normal on in-memory data >>> import numpy as np >>> sgd.predict(np.random.random((4, 3))) # doctest: +SKIP array([1, 0, 0, 1]) Or predict on a larger dataset >>> z = da.random.random((400, 3), chunks=(100, 3)) >>> da.learn.predict(sgd, z) # doctest: +SKIP dask.array """ assert x.ndim == 2 if y is not None: assert y.ndim == 1 assert x.chunks[0] == y.chunks[0] assert hasattr(model, 'partial_fit') if len(x.chunks[1]) > 1: x = x.reblock(chunks=(x.chunks[0], sum(x.chunks[1]))) nblocks = len(x.chunks[0]) name = 'fit-' + tokenize(model, x, y, kwargs) dsk = {(name, -1): model} dsk.update(dict(((name, i), (_partial_fit, (name, i - 1), (x.name, i, 0), (getattr(y, 'name', ''), i), kwargs)) for i in range(nblocks))) return get(merge(x.dask, getattr(y, 'dask', {}), dsk), (name, nblocks - 1)) def _predict(model, x): return model.predict(x)[:, None] def predict(model, x): """ Predict with a scikit learn model Parameters ---------- model : scikit learn classifier x : dask Array See docstring for ``da.learn.fit`` """ assert x.ndim == 2 if len(x.chunks[1]) > 1: x = x.reblock(chunks=(x.chunks[0], sum(x.chunks[1]))) func = partial(_predict, model) xx = np.zeros((1, x.shape[1]), dtype=x.dtype) dt = model.predict(xx).dtype return x.map_blocks(func, chunks=(x.chunks[0], (1,)), dtype=dt).squeeze() dask-0.16.0/dask/array/linalg.py000066400000000000000000000720061320364734500164140ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import operator from functools import wraps from numbers import Number import numpy as np import toolz from ..base import tokenize from ..compatibility import apply from .. import sharedict from .core import top, dotmany, Array from .creation import eye from .random import RandomState def _cumsum_blocks(it): total = 0 for x in it: total_previous = total total += x yield (total_previous, total) def _cumsum_part(last, new): return (last[1], last[1] + new) def tsqr(data, name=None, compute_svd=False): """ Direct Tall-and-Skinny QR algorithm As presented in: A. Benson, D. Gleich, and J. Demmel. Direct QR factorizations for tall-and-skinny matrices in MapReduce architectures. IEEE International Conference on Big Data, 2013. http://arxiv.org/abs/1301.1071 This algorithm is used to compute both the QR decomposition and the Singular Value Decomposition. It requires that the input array have a single column of blocks, each of which fit in memory. If blocks are of size ``(n, k)`` then this algorithm has memory use that scales as ``n**2 * k * nthreads``. Parameters ---------- data: Array compute_svd: bool Whether to compute the SVD rather than the QR decomposition See Also -------- dask.array.linalg.qr - Powered by this algorithm dask.array.linalg.svd - Powered by this algorithm """ if not (data.ndim == 2 and # Is a matrix len(data.chunks[1]) == 1): # Only one column block raise ValueError( "Input must have the following properties:\n" " 1. Have two dimensions\n" " 2. Have only one column of blocks") prefix = name or 'tsqr-' + tokenize(data, compute_svd) prefix += '_' m, n = data.shape numblocks = (len(data.chunks[0]), 1) name_qr_st1 = prefix + 'QR_st1' dsk_qr_st1 = top(np.linalg.qr, name_qr_st1, 'ij', data.name, 'ij', numblocks={data.name: numblocks}) # qr[0] name_q_st1 = prefix + 'Q_st1' dsk_q_st1 = dict(((name_q_st1, i, 0), (operator.getitem, (name_qr_st1, i, 0), 0)) for i in range(numblocks[0])) # qr[1] name_r_st1 = prefix + 'R_st1' dsk_r_st1 = dict(((name_r_st1, i, 0), (operator.getitem, (name_qr_st1, i, 0), 1)) for i in range(numblocks[0])) # Stacking for in-core QR computation to_stack = [(name_r_st1, i, 0) for i in range(numblocks[0])] name_r_st1_stacked = prefix + 'R_st1_stacked' dsk_r_st1_stacked = {(name_r_st1_stacked, 0, 0): (np.vstack, (tuple, to_stack))} # In-core QR computation name_qr_st2 = prefix + 'QR_st2' dsk_qr_st2 = top(np.linalg.qr, name_qr_st2, 'ij', name_r_st1_stacked, 'ij', numblocks={name_r_st1_stacked: (1, 1)}) # qr[0] name_q_st2_aux = prefix + 'Q_st2_aux' dsk_q_st2_aux = {(name_q_st2_aux, 0, 0): (operator.getitem, (name_qr_st2, 0, 0), 0)} if not any(np.isnan(c) for cs in data.chunks for c in cs): q2_block_sizes = [min(e, n) for e in data.chunks[0]] block_slices = [(slice(e[0], e[1]), slice(0, n)) for e in _cumsum_blocks(q2_block_sizes)] dsk_q_blockslices = {} else: name_q2bs = prefix + 'q2-shape' dsk_q2_shapes = {(name_q2bs, i): (min, (getattr, (data.name, i, 0), 'shape')) for i in range(numblocks[0])} dsk_n = {prefix + 'n': (operator.getitem, (getattr, (data.name, 0, 0), 'shape'), 1)} name_q2cs = prefix + 'q2-shape-cumsum' dsk_q2_cumsum = {(name_q2cs, 0): [0, (name_q2bs, 0)]} dsk_q2_cumsum.update({(name_q2cs, i): (_cumsum_part, (name_q2cs, i - 1), (name_q2bs, i)) for i in range(1, numblocks[0])}) name_blockslice = prefix + 'q2-blockslice' dsk_block_slices = {(name_blockslice, i): (tuple, [ (apply, slice, (name_q2cs, i)), (slice, 0, prefix + 'n')]) for i in range(numblocks[0])} dsk_q_blockslices = toolz.merge(dsk_n, dsk_q2_shapes, dsk_q2_cumsum, dsk_block_slices) block_slices = [(name_blockslice, i) for i in range(numblocks[0])] name_q_st2 = prefix + 'Q_st2' dsk_q_st2 = dict(((name_q_st2, i, 0), (operator.getitem, (name_q_st2_aux, 0, 0), b)) for i, b in enumerate(block_slices)) # qr[1] name_r_st2 = prefix + 'R' dsk_r_st2 = {(name_r_st2, 0, 0): (operator.getitem, (name_qr_st2, 0, 0), 1)} name_q_st3 = prefix + 'Q' dsk_q_st3 = top(np.dot, name_q_st3, 'ij', name_q_st1, 'ij', name_q_st2, 'ij', numblocks={name_q_st1: numblocks, name_q_st2: numblocks}) dsk = sharedict.ShareDict() dsk.update(data.dask) dsk.update_with_key(dsk_qr_st1, key=name_qr_st1) dsk.update_with_key(dsk_q_st1, key=name_q_st1) dsk.update_with_key(dsk_r_st1, key=name_r_st1) dsk.update_with_key(dsk_r_st1_stacked, key=name_r_st1_stacked) dsk.update_with_key(dsk_qr_st2, key=name_qr_st2) dsk.update_with_key(dsk_q_st2_aux, key=name_q_st2_aux) dsk.update_with_key(dsk_q_st2, key=name_q_st2) dsk.update_with_key(dsk_q_st3, key=name_q_st3) dsk.update_with_key(dsk_q_blockslices, key=prefix + '-q-blockslices') dsk.update_with_key(dsk_r_st2, key=name_r_st2) if not compute_svd: qq, rr = np.linalg.qr(np.ones(shape=(1, 1), dtype=data.dtype)) q = Array(dsk, name_q_st3, shape=data.shape, chunks=data.chunks, dtype=qq.dtype) r = Array(dsk, name_r_st2, shape=(n, n), chunks=(n, n), dtype=rr.dtype) return q, r else: # In-core SVD computation name_svd_st2 = prefix + 'SVD_st2' dsk_svd_st2 = top(np.linalg.svd, name_svd_st2, 'ij', name_r_st2, 'ij', numblocks={name_r_st2: (1, 1)}) # svd[0] name_u_st2 = prefix + 'U_st2' dsk_u_st2 = {(name_u_st2, 0, 0): (operator.getitem, (name_svd_st2, 0, 0), 0)} # svd[1] name_s_st2 = prefix + 'S' dsk_s_st2 = {(name_s_st2, 0): (operator.getitem, (name_svd_st2, 0, 0), 1)} # svd[2] name_v_st2 = prefix + 'V' dsk_v_st2 = {(name_v_st2, 0, 0): (operator.getitem, (name_svd_st2, 0, 0), 2)} # Q * U name_u_st4 = prefix + 'U' dsk_u_st4 = top(dotmany, name_u_st4, 'ij', name_q_st3, 'ik', name_u_st2, 'kj', numblocks={name_q_st3: numblocks, name_u_st2: (1, 1)}) dsk.update_with_key(dsk_svd_st2, key=name_svd_st2) dsk.update_with_key(dsk_u_st2, key=name_u_st2) dsk.update_with_key(dsk_u_st4, key=name_u_st4) dsk.update_with_key(dsk_s_st2, key=name_s_st2) dsk.update_with_key(dsk_v_st2, key=name_v_st2) uu, ss, vv = np.linalg.svd(np.ones(shape=(1, 1), dtype=data.dtype)) u = Array(dsk, name_u_st4, shape=data.shape, chunks=data.chunks, dtype=uu.dtype) s = Array(dsk, name_s_st2, shape=(n,), chunks=((n,),), dtype=ss.dtype) v = Array(dsk, name_v_st2, shape=(n, n), chunks=((n,), (n,)), dtype=vv.dtype) return u, s, v def compression_level(n, q, oversampling=10, min_subspace_size=20): """ Compression level to use in svd_compressed Given the size ``n`` of a space, compress that that to one of size ``q`` plus oversampling. The oversampling allows for greater flexibility in finding an appropriate subspace, a low value is often enough (10 is already a very conservative choice, it can be further reduced). ``q + oversampling`` should not be larger than ``n``. In this specific implementation, ``q + oversampling`` is at least ``min_subspace_size``. >>> compression_level(100, 10) 20 """ return min(max(min_subspace_size, q + oversampling), n) def compression_matrix(data, q, n_power_iter=0, seed=None): """ Randomly sample matrix to find most active subspace This compression matrix returned by this algorithm can be used to compute both the QR decomposition and the Singular Value Decomposition. Parameters ---------- data: Array q: int Size of the desired subspace (the actual size will be bigger, because of oversampling, see ``da.linalg.compression_level``) n_power_iter: int number of power iterations, useful when the singular values of the input matrix decay very slowly. References ---------- N. Halko, P. G. Martinsson, and J. A. Tropp. Finding structure with randomness: Probabilistic algorithms for constructing approximate matrix decompositions. SIAM Rev., Survey and Review section, Vol. 53, num. 2, pp. 217-288, June 2011 http://arxiv.org/abs/0909.4061 """ n = data.shape[1] comp_level = compression_level(n, q) state = RandomState(seed) omega = state.standard_normal(size=(n, comp_level), chunks=(data.chunks[1], (comp_level,))) mat_h = data.dot(omega) for j in range(n_power_iter): mat_h = data.dot(data.T.dot(mat_h)) q, _ = tsqr(mat_h) return q.T def svd_compressed(a, k, n_power_iter=0, seed=None, name=None): """ Randomly compressed rank-k thin Singular Value Decomposition. This computes the approximate singular value decomposition of a large array. This algorithm is generally faster than the normal algorithm but does not provide exact results. One can balance between performance and accuracy with input parameters (see below). Parameters ---------- a: Array Input array k: int Rank of the desired thin SVD decomposition. n_power_iter: int Number of power iterations, useful when the singular values decay slowly. Error decreases exponentially as n_power_iter increases. In practice, set n_power_iter <= 4. Examples -------- >>> u, s, vt = svd_compressed(x, 20) # doctest: +SKIP Returns ------- u: Array, unitary / orthogonal s: Array, singular values in decreasing order (largest first) v: Array, unitary / orthogonal References ---------- N. Halko, P. G. Martinsson, and J. A. Tropp. Finding structure with randomness: Probabilistic algorithms for constructing approximate matrix decompositions. SIAM Rev., Survey and Review section, Vol. 53, num. 2, pp. 217-288, June 2011 http://arxiv.org/abs/0909.4061 """ comp = compression_matrix(a, k, n_power_iter=n_power_iter, seed=seed) a_compressed = comp.dot(a) v, s, u = tsqr(a_compressed.T, name, compute_svd=True) u = comp.T.dot(u) v = v.T u = u[:, :k] s = s[:k] v = v[:k, :] return u, s, v def qr(a, name=None): """ Compute the qr factorization of a matrix. Examples -------- >>> q, r = da.linalg.qr(x) # doctest: +SKIP Returns ------- q: Array, orthonormal r: Array, upper-triangular See Also -------- np.linalg.qr : Equivalent NumPy Operation dask.array.linalg.tsqr: Actual implementation with citation """ return tsqr(a, name) def svd(a, name=None): """ Compute the singular value decomposition of a matrix. Examples -------- >>> u, s, v = da.linalg.svd(x) # doctest: +SKIP Returns ------- u: Array, unitary / orthogonal s: Array, singular values in decreasing order (largest first) v: Array, unitary / orthogonal See Also -------- np.linalg.svd : Equivalent NumPy Operation dask.array.linalg.tsqr: Actual implementation with citation """ return tsqr(a, name, compute_svd=True) def _solve_triangular_lower(a, b): import scipy.linalg return scipy.linalg.solve_triangular(a, b, lower=True) def lu(a): """ Compute the lu decomposition of a matrix. Examples -------- >>> p, l, u = da.linalg.lu(x) # doctest: +SKIP Returns ------- p: Array, permutation matrix l: Array, lower triangular matrix with unit diagonal. u: Array, upper triangular matrix """ import scipy.linalg if a.ndim != 2: raise ValueError('Dimension must be 2 to perform lu decomposition') xdim, ydim = a.shape if xdim != ydim: raise ValueError('Input must be a square matrix to perform lu decomposition') if not len(set(a.chunks[0] + a.chunks[1])) == 1: msg = ('All chunks must be a square matrix to perform lu decomposition. ' 'Use .rechunk method to change the size of chunks.') raise ValueError(msg) vdim = len(a.chunks[0]) hdim = len(a.chunks[1]) token = tokenize(a) name_lu = 'lu-lu-' + token name_p = 'lu-p-' + token name_l = 'lu-l-' + token name_u = 'lu-u-' + token # for internal calculation name_p_inv = 'lu-p-inv-' + token name_l_permuted = 'lu-l-permute-' + token name_u_transposed = 'lu-u-transpose-' + token name_plu_dot = 'lu-plu-dot-' + token name_lu_dot = 'lu-lu-dot-' + token dsk = {} for i in range(min(vdim, hdim)): target = (a.name, i, i) if i > 0: prevs = [] for p in range(i): prev = name_plu_dot, i, p, p, i dsk[prev] = (np.dot, (name_l_permuted, i, p), (name_u, p, i)) prevs.append(prev) target = (operator.sub, target, (sum, prevs)) # diagonal block dsk[name_lu, i, i] = (scipy.linalg.lu, target) # sweep to horizontal for j in range(i + 1, hdim): target = (np.dot, (name_p_inv, i, i), (a.name, i, j)) if i > 0: prevs = [] for p in range(i): prev = name_lu_dot, i, p, p, j dsk[prev] = (np.dot, (name_l, i, p), (name_u, p, j)) prevs.append(prev) target = (operator.sub, target, (sum, prevs)) dsk[name_lu, i, j] = (_solve_triangular_lower, (name_l, i, i), target) # sweep to vertical for k in range(i + 1, vdim): target = (a.name, k, i) if i > 0: prevs = [] for p in range(i): prev = name_plu_dot, k, p, p, i dsk[prev] = (np.dot, (name_l_permuted, k, p), (name_u, p, i)) prevs.append(prev) target = (operator.sub, target, (sum, prevs)) # solving x.dot(u) = target is equal to u.T.dot(x.T) = target.T dsk[name_lu, k, i] = (np.transpose, (_solve_triangular_lower, (name_u_transposed, i, i), (np.transpose, target))) for i in range(min(vdim, hdim)): for j in range(min(vdim, hdim)): if i == j: dsk[name_p, i, j] = (operator.getitem, (name_lu, i, j), 0) dsk[name_l, i, j] = (operator.getitem, (name_lu, i, j), 1) dsk[name_u, i, j] = (operator.getitem, (name_lu, i, j), 2) # permuted l is required to be propagated to i > j blocks dsk[name_l_permuted, i, j] = (np.dot, (name_p, i, j), (name_l, i, j)) dsk[name_u_transposed, i, j] = (np.transpose, (name_u, i, j)) # transposed permutation matrix is equal to its inverse dsk[name_p_inv, i, j] = (np.transpose, (name_p, i, j)) elif i > j: dsk[name_p, i, j] = (np.zeros, (a.chunks[0][i], a.chunks[1][j])) # calculations are performed using permuted l, # thus the result should be reverted by inverted (=transposed) p # to have the same row order as diagonal blocks dsk[name_l, i, j] = (np.dot, (name_p_inv, i, i), (name_lu, i, j)) dsk[name_u, i, j] = (np.zeros, (a.chunks[0][i], a.chunks[1][j])) dsk[name_l_permuted, i, j] = (name_lu, i, j) else: dsk[name_p, i, j] = (np.zeros, (a.chunks[0][i], a.chunks[1][j])) dsk[name_l, i, j] = (np.zeros, (a.chunks[0][i], a.chunks[1][j])) dsk[name_u, i, j] = (name_lu, i, j) # l_permuted is not referred in upper triangulars dsk = sharedict.merge(a.dask, ('lu-' + token, dsk)) pp, ll, uu = scipy.linalg.lu(np.ones(shape=(1, 1), dtype=a.dtype)) p = Array(dsk, name_p, shape=a.shape, chunks=a.chunks, dtype=pp.dtype) l = Array(dsk, name_l, shape=a.shape, chunks=a.chunks, dtype=ll.dtype) u = Array(dsk, name_u, shape=a.shape, chunks=a.chunks, dtype=uu.dtype) return p, l, u def solve_triangular(a, b, lower=False): """ Solve the equation `a x = b` for `x`, assuming a is a triangular matrix. Parameters ---------- a : (M, M) array_like A triangular matrix b : (M,) or (M, N) array_like Right-hand side matrix in `a x = b` lower : bool, optional Use only data contained in the lower triangle of `a`. Default is to use upper triangle. Returns ------- x : (M,) or (M, N) array Solution to the system `a x = b`. Shape of return matches `b`. """ import scipy.linalg if a.ndim != 2: raise ValueError('a must be 2 dimensional') if b.ndim <= 2: if a.shape[1] != b.shape[0]: raise ValueError('a.shape[1] and b.shape[0] must be equal') if a.chunks[1] != b.chunks[0]: msg = ('a.chunks[1] and b.chunks[0] must be equal. ' 'Use .rechunk method to change the size of chunks.') raise ValueError(msg) else: raise ValueError('b must be 1 or 2 dimensional') vchunks = len(a.chunks[1]) hchunks = 1 if b.ndim == 1 else len(b.chunks[1]) token = tokenize(a, b, lower) name = 'solve-triangular-' + token # for internal calculation # (name, i, j, k, l) corresponds to a_ij.dot(b_kl) name_mdot = 'solve-tri-dot-' + token def _b_init(i, j): if b.ndim == 1: return b.name, i else: return b.name, i, j def _key(i, j): if b.ndim == 1: return name, i else: return name, i, j dsk = {} if lower: for i in range(vchunks): for j in range(hchunks): target = _b_init(i, j) if i > 0: prevs = [] for k in range(i): prev = name_mdot, i, k, k, j dsk[prev] = (np.dot, (a.name, i, k), _key(k, j)) prevs.append(prev) target = (operator.sub, target, (sum, prevs)) dsk[_key(i, j)] = (_solve_triangular_lower, (a.name, i, i), target) else: for i in range(vchunks): for j in range(hchunks): target = _b_init(i, j) if i < vchunks - 1: prevs = [] for k in range(i + 1, vchunks): prev = name_mdot, i, k, k, j dsk[prev] = (np.dot, (a.name, i, k), _key(k, j)) prevs.append(prev) target = (operator.sub, target, (sum, prevs)) dsk[_key(i, j)] = (scipy.linalg.solve_triangular, (a.name, i, i), target) dsk = sharedict.merge(a.dask, b.dask, (name, dsk)) res = _solve_triangular_lower(np.array([[1, 0], [1, 2]], dtype=a.dtype), np.array([0, 1], dtype=b.dtype)) return Array(dsk, name, shape=b.shape, chunks=b.chunks, dtype=res.dtype) def solve(a, b, sym_pos=False): """ Solve the equation ``a x = b`` for ``x``. By default, use LU decomposition and forward / backward substitutions. When ``sym_pos`` is ``True``, use Cholesky decomposition. Parameters ---------- a : (M, M) array_like A square matrix. b : (M,) or (M, N) array_like Right-hand side matrix in ``a x = b``. sym_pos : bool Assume a is symmetric and positive definite. If ``True``, use Cholesky decomposition. Returns ------- x : (M,) or (M, N) Array Solution to the system ``a x = b``. Shape of the return matches the shape of `b`. """ if sym_pos: l, u = _cholesky(a) else: p, l, u = lu(a) b = p.T.dot(b) uy = solve_triangular(l, b, lower=True) return solve_triangular(u, uy) def inv(a): """ Compute the inverse of a matrix with LU decomposition and forward / backward substitutions. Parameters ---------- a : array_like Square matrix to be inverted. Returns ------- ainv : Array Inverse of the matrix `a`. """ return solve(a, eye(a.shape[0], chunks=a.chunks[0][0])) def _cholesky_lower(a): import scipy.linalg return scipy.linalg.cholesky(a, lower=True) def cholesky(a, lower=False): """ Returns the Cholesky decomposition, :math:`A = L L^*` or :math:`A = U^* U` of a Hermitian positive-definite matrix A. Parameters ---------- a : (M, M) array_like Matrix to be decomposed lower : bool, optional Whether to compute the upper or lower triangular Cholesky factorization. Default is upper-triangular. Returns ------- c : (M, M) Array Upper- or lower-triangular Cholesky factor of `a`. """ l, u = _cholesky(a) if lower: return l else: return u def _cholesky(a): """ Private function to perform Cholesky decomposition, which returns both lower and upper triangulars. """ import scipy.linalg if a.ndim != 2: raise ValueError('Dimension must be 2 to perform cholesky decomposition') xdim, ydim = a.shape if xdim != ydim: raise ValueError('Input must be a square matrix to perform cholesky decomposition') if not len(set(a.chunks[0] + a.chunks[1])) == 1: msg = ('All chunks must be a square matrix to perform cholesky decomposition. ' 'Use .rechunk method to change the size of chunks.') raise ValueError(msg) vdim = len(a.chunks[0]) hdim = len(a.chunks[1]) token = tokenize(a) name = 'cholesky-' + token # (name_lt_dot, i, j, k, l) corresponds to l_ij.dot(l_kl.T) name_lt_dot = 'cholesky-lt-dot-' + token # because transposed results are needed for calculation, # we can build graph for upper triangular simultaneously name_upper = 'cholesky-upper-' + token # calculates lower triangulars because subscriptions get simpler dsk = {} for i in range(vdim): for j in range(hdim): if i < j: dsk[name, i, j] = (np.zeros, (a.chunks[0][i], a.chunks[1][j])) dsk[name_upper, j, i] = (name, i, j) elif i == j: target = (a.name, i, j) if i > 0: prevs = [] for p in range(i): prev = name_lt_dot, i, p, i, p dsk[prev] = (np.dot, (name, i, p), (name_upper, p, i)) prevs.append(prev) target = (operator.sub, target, (sum, prevs)) dsk[name, i, i] = (_cholesky_lower, target) dsk[name_upper, i, i] = (np.transpose, (name, i, i)) else: # solving x.dot(L11.T) = (A21 - L20.dot(L10.T)) is equal to # L11.dot(x.T) = A21.T - L10.dot(L20.T) # L11.dot(x.T) = A12 - L10.dot(L02) target = (a.name, j, i) if j > 0: prevs = [] for p in range(j): prev = name_lt_dot, j, p, i, p dsk[prev] = (np.dot, (name, j, p), (name_upper, p, i)) prevs.append(prev) target = (operator.sub, target, (sum, prevs)) dsk[name_upper, j, i] = (_solve_triangular_lower,(name, j, j), target) dsk[name, i, j] = (np.transpose, (name_upper, j, i)) dsk = sharedict.merge(a.dask, (name, dsk)) cho = scipy.linalg.cholesky(np.array([[1, 2], [2, 5]], dtype=a.dtype)) lower = Array(dsk, name, shape=a.shape, chunks=a.chunks, dtype=cho.dtype) # do not use .T, because part of transposed blocks are already calculated upper = Array(dsk, name_upper, shape=a.shape, chunks=a.chunks, dtype=cho.dtype) return lower, upper def _sort_decreasing(x): x[::-1].sort() return x def lstsq(a, b): """ Return the least-squares solution to a linear matrix equation using QR decomposition. Solves the equation `a x = b` by computing a vector `x` that minimizes the Euclidean 2-norm `|| b - a x ||^2`. The equation may be under-, well-, or over- determined (i.e., the number of linearly independent rows of `a` can be less than, equal to, or greater than its number of linearly independent columns). If `a` is square and of full rank, then `x` (but for round-off error) is the "exact" solution of the equation. Parameters ---------- a : (M, N) array_like "Coefficient" matrix. b : (M,) array_like Ordinate or "dependent variable" values. Returns ------- x : (N,) Array Least-squares solution. If `b` is two-dimensional, the solutions are in the `K` columns of `x`. residuals : (1,) Array Sums of residuals; squared Euclidean 2-norm for each column in ``b - a*x``. rank : Array Rank of matrix `a`. s : (min(M, N),) Array Singular values of `a`. """ q, r = qr(a) x = solve_triangular(r, q.T.dot(b)) residuals = b - a.dot(x) residuals = (residuals ** 2).sum(keepdims=True) token = tokenize(a, b) # r must be a triangular with single block # rank rname = 'lstsq-rank-' + token rdsk = {(rname, ): (np.linalg.matrix_rank, (r.name, 0, 0))} rdsk = sharedict.merge(r.dask, (rname, rdsk)) # rank must be an integer rank = Array(rdsk, rname, shape=(), chunks=(), dtype=int) # singular sname = 'lstsq-singular-' + token rt = r.T sdsk = {(sname, 0): (_sort_decreasing, (np.sqrt, (np.linalg.eigvals, (np.dot, (rt.name, 0, 0), (r.name, 0, 0)))))} sdsk = sharedict.merge(rt.dask, (sname, sdsk)) _, _, _, ss = np.linalg.lstsq(np.array([[1, 0], [1, 2]], dtype=a.dtype), np.array([0, 1], dtype=b.dtype)) s = Array(sdsk, sname, shape=(r.shape[0], ), chunks=r.shape[0], dtype=ss.dtype) return x, residuals, rank, s @wraps(np.linalg.norm) def norm(x, ord=None, axis=None, keepdims=False): if x.ndim > 2: raise ValueError("Improper number of dimensions to norm.") if axis is None: axis = tuple(range(x.ndim)) elif isinstance(axis, Number): axis = (int(axis),) else: axis = tuple(axis) if ord == "fro": ord = None if len(axis) == 1: raise ValueError("Invalid norm order for vectors.") # Coerce to double precision. r = x.astype(np.promote_types(x.dtype, float)) if ord is None: r = (abs(r) ** 2).sum(axis=axis, keepdims=keepdims) ** 0.5 elif ord == "nuc": if len(axis) == 1: raise ValueError("Invalid norm order for vectors.") r = svd(x)[1][None].sum(keepdims=keepdims) elif ord == np.inf: r = abs(r) if len(axis) == 1: r = r.max(axis=axis, keepdims=keepdims) else: r = r.sum(axis=axis[1], keepdims=keepdims).max(keepdims=keepdims) elif ord == -np.inf: r = abs(r) if len(axis) == 1: r = r.min(axis=axis, keepdims=keepdims) else: r = r.sum(axis=axis[1], keepdims=keepdims).min(keepdims=keepdims) elif ord == 0: if len(axis) == 2: raise ValueError("Invalid norm order for matrices.") r = (r != 0).astype(r.dtype).sum(axis=0, keepdims=keepdims) elif ord == 1: r = abs(r) if len(axis) == 1: r = r.sum(axis=axis, keepdims=keepdims) else: r = r.sum(axis=axis[0], keepdims=keepdims).max(keepdims=keepdims) elif len(axis) == 2 and ord == -1: r = abs(r).sum(axis=axis[0], keepdims=keepdims).min(keepdims=keepdims) elif len(axis) == 2 and ord == 2: r = svd(x)[1][None].max(keepdims=keepdims) elif len(axis) == 2 and ord == -2: r = svd(x)[1][None].min(keepdims=keepdims) else: if len(axis) == 2: raise ValueError("Invalid norm order for matrices.") r = (abs(r) ** ord).sum(axis=axis, keepdims=keepdims) ** (1.0 / ord) return r dask-0.16.0/dask/array/ma.py000066400000000000000000000164411320364734500155440ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from functools import wraps from distutils.version import LooseVersion import numpy as np from ..base import normalize_token from .core import (concatenate_lookup, tensordot_lookup, map_blocks, asanyarray, atop) if LooseVersion(np.__version__) < '1.11.0': raise ImportError("dask.array.ma requires numpy >= 1.11.0") @normalize_token.register(np.ma.masked_array) def normalize_masked_array(x): data = normalize_token(x.data) mask = normalize_token(x.mask) fill_value = normalize_token(x.fill_value) return (data, mask, fill_value) @concatenate_lookup.register(np.ma.masked_array) def _concatenate(arrays, axis=0): out = np.ma.concatenate(arrays, axis=axis) fill_values = [i.fill_value for i in arrays if hasattr(i, 'fill_value')] if any(isinstance(f, np.ndarray) for f in fill_values): raise ValueError("Dask doesn't support masked array's with " "non-scalar `fill_value`s") if fill_values: # If all the fill_values are the same copy over the fill value fill_values = np.unique(fill_values) if len(fill_values) == 1: out.fill_value = fill_values[0] return out @tensordot_lookup.register(np.ma.masked_array) def _tensordot(a, b, axes=2): # Much of this is stolen from numpy/core/numeric.py::tensordot # Please see license at https://github.com/numpy/numpy/blob/master/LICENSE.txt try: iter(axes) except TypeError: axes_a = list(range(-axes, 0)) axes_b = list(range(0, axes)) else: axes_a, axes_b = axes try: na = len(axes_a) axes_a = list(axes_a) except TypeError: axes_a = [axes_a] na = 1 try: nb = len(axes_b) axes_b = list(axes_b) except TypeError: axes_b = [axes_b] nb = 1 # a, b = asarray(a), asarray(b) # <--- modified as_ = a.shape nda = a.ndim bs = b.shape ndb = b.ndim equal = True if na != nb: equal = False else: for k in range(na): if as_[axes_a[k]] != bs[axes_b[k]]: equal = False break if axes_a[k] < 0: axes_a[k] += nda if axes_b[k] < 0: axes_b[k] += ndb if not equal: raise ValueError("shape-mismatch for sum") # Move the axes to sum over to the end of "a" # and to the front of "b" notin = [k for k in range(nda) if k not in axes_a] newaxes_a = notin + axes_a N2 = 1 for axis in axes_a: N2 *= as_[axis] newshape_a = (-1, N2) olda = [as_[axis] for axis in notin] notin = [k for k in range(ndb) if k not in axes_b] newaxes_b = axes_b + notin N2 = 1 for axis in axes_b: N2 *= bs[axis] newshape_b = (N2, -1) oldb = [bs[axis] for axis in notin] at = a.transpose(newaxes_a).reshape(newshape_a) bt = b.transpose(newaxes_b).reshape(newshape_b) res = np.ma.dot(at, bt) return res.reshape(olda + oldb) @wraps(np.ma.filled) def filled(a, fill_value=None): a = asanyarray(a) return a.map_blocks(np.ma.filled, fill_value=fill_value) def _wrap_masked(f): @wraps(f) def _(a, value): a = asanyarray(a) value = asanyarray(value) ainds = tuple(range(a.ndim))[::-1] vinds = tuple(range(value.ndim))[::-1] oinds = max(ainds, vinds, key=len) return atop(f, oinds, a, ainds, value, vinds, dtype=a.dtype) return _ masked_greater = _wrap_masked(np.ma.masked_greater) masked_greater_equal = _wrap_masked(np.ma.masked_greater_equal) masked_less = _wrap_masked(np.ma.masked_less) masked_less_equal = _wrap_masked(np.ma.masked_less_equal) masked_not_equal = _wrap_masked(np.ma.masked_not_equal) @wraps(np.ma.masked_equal) def masked_equal(a, value): a = asanyarray(a) if getattr(value, 'shape', ()): raise ValueError("da.ma.masked_equal doesn't support array `value`s") inds = tuple(range(a.ndim)) return atop(np.ma.masked_equal, inds, a, inds, value, (), dtype=a.dtype) @wraps(np.ma.masked_invalid) def masked_invalid(a): return asanyarray(a).map_blocks(np.ma.masked_invalid) @wraps(np.ma.masked_inside) def masked_inside(x, v1, v2): x = asanyarray(x) return x.map_blocks(np.ma.masked_inside, v1, v2) @wraps(np.ma.masked_outside) def masked_outside(x, v1, v2): x = asanyarray(x) return x.map_blocks(np.ma.masked_outside, v1, v2) @wraps(np.ma.masked_where) def masked_where(condition, a): cshape = getattr(condition, 'shape', ()) if cshape and cshape != a.shape: raise IndexError("Inconsistant shape between the condition and the " "input (got %s and %s)" % (cshape, a.shape)) condition = asanyarray(condition) a = asanyarray(a) ainds = tuple(range(a.ndim)) cinds = tuple(range(condition.ndim)) return atop(np.ma.masked_where, ainds, condition, cinds, a, ainds, dtype=a.dtype) @wraps(np.ma.masked_values) def masked_values(x, value, rtol=1e-05, atol=1e-08, shrink=True): x = asanyarray(x) if getattr(value, 'shape', ()): raise ValueError("da.ma.masked_values doesn't support array `value`s") return map_blocks(np.ma.masked_values, x, value, rtol=rtol, atol=atol, shrink=shrink) @wraps(np.ma.fix_invalid) def fix_invalid(a, fill_value=None): a = asanyarray(a) return a.map_blocks(np.ma.fix_invalid, fill_value=fill_value) @wraps(np.ma.getdata) def getdata(a): a = asanyarray(a) return a.map_blocks(np.ma.getdata) @wraps(np.ma.getmaskarray) def getmaskarray(a): a = asanyarray(a) return a.map_blocks(np.ma.getmaskarray) def _masked_array(data, mask=np.ma.nomask, **kwargs): dtype = kwargs.pop('masked_dtype', None) return np.ma.masked_array(data, mask=mask, dtype=dtype, **kwargs) @wraps(np.ma.masked_array) def masked_array(data, mask=np.ma.nomask, fill_value=None, **kwargs): data = asanyarray(data) inds = tuple(range(data.ndim)) arginds = [inds, data, inds] if getattr(fill_value, 'shape', ()): raise ValueError("non-scalar fill_value not supported") kwargs['fill_value'] = fill_value if mask is not np.ma.nomask: mask = asanyarray(mask) if mask.size == 1: mask = mask.reshape((1,) * data.ndim) elif data.shape != mask.shape: raise np.ma.MaskError("Mask and data not compatible: data shape " "is %s, and mask shape is " "%s." % (repr(data.shape), repr(mask.shape))) arginds.extend([mask, inds]) if 'dtype' in kwargs: kwargs['masked_dtype'] = kwargs['dtype'] else: kwargs['dtype'] = data.dtype return atop(_masked_array, *arginds, **kwargs) def _set_fill_value(x, fill_value): if isinstance(x, np.ma.masked_array): x = x.copy() np.ma.set_fill_value(x, fill_value=fill_value) return x @wraps(np.ma.set_fill_value) def set_fill_value(a, fill_value): a = asanyarray(a) if getattr(fill_value, 'shape', ()): raise ValueError("da.ma.set_fill_value doesn't support array `value`s") fill_value = np.ma.core._check_fill_value(fill_value, a.dtype) res = a.map_blocks(_set_fill_value, fill_value) a.dask = res.dask a.name = res.name dask-0.16.0/dask/array/numpy_compat.py000066400000000000000000000331671320364734500176660ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from ..compatibility import builtins import numpy as np import warnings try: isclose = np.isclose except AttributeError: def isclose(*args, **kwargs): raise RuntimeError("You need numpy version 1.7 or greater to use " "isclose.") try: full = np.full except AttributeError: def full(shape, fill_value, dtype=None, order=None): """Our implementation of numpy.full because your numpy is old.""" if order is not None: raise NotImplementedError("`order` kwarg is not supported upgrade " "to Numpy 1.8 or greater for support.") return np.multiply(fill_value, np.ones(shape, dtype=dtype), dtype=dtype) # Taken from scikit-learn: # https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/fixes.py#L84 try: with warnings.catch_warnings(): if (not np.allclose(np.divide(.4, 1, casting="unsafe"), np.divide(.4, 1, casting="unsafe", dtype=np.float)) or not np.allclose(np.divide(1, .5, dtype='i8'), 2) or not np.allclose(np.divide(.4, 1), .4)): raise TypeError('Divide not working with dtype: ' 'https://github.com/numpy/numpy/issues/3484') divide = np.divide ma_divide = np.ma.divide except TypeError: # Divide with dtype doesn't work on Python 3 def divide(x1, x2, out=None, dtype=None): """Implementation of numpy.divide that works with dtype kwarg. Temporary compatibility fix for a bug in numpy's version. See https://github.com/numpy/numpy/issues/3484 for the relevant issue.""" x = np.divide(x1, x2, out) if dtype is not None: x = x.astype(dtype) return x ma_divide = np.ma.core._DomainedBinaryOperation(divide, np.ma.core._DomainSafeDivide(), 0, 1) # functions copied from numpy try: from numpy import broadcast_to, nanprod, nancumsum, nancumprod except ImportError: # pragma: no cover # these functions should arrive in numpy v1.10 to v1.12. Until then, # they are duplicated here # See https://github.com/numpy/numpy/blob/master/LICENSE.txt # or NUMPY_LICENSE.txt within this directory def _maybe_view_as_subclass(original_array, new_array): if type(original_array) is not type(new_array): # if input was an ndarray subclass and subclasses were OK, # then view the result as that subclass. new_array = new_array.view(type=type(original_array)) # Since we have done something akin to a view from original_array, we # should let the subclass finalize (if it has it implemented, i.e., is # not None). if new_array.__array_finalize__: new_array.__array_finalize__(original_array) return new_array def _broadcast_to(array, shape, subok, readonly): shape = tuple(shape) if np.iterable(shape) else (shape,) array = np.array(array, copy=False, subok=subok) if not shape and array.shape: raise ValueError('cannot broadcast a non-scalar to a scalar array') if builtins.any(size < 0 for size in shape): raise ValueError('all elements of broadcast shape must be non-' 'negative') broadcast = np.nditer( (array,), flags=['multi_index', 'zerosize_ok', 'refs_ok'], op_flags=['readonly'], itershape=shape, order='C').itviews[0] result = _maybe_view_as_subclass(array, broadcast) if not readonly and array.flags.writeable: result.flags.writeable = True return result def broadcast_to(array, shape, subok=False): """Broadcast an array to a new shape. Parameters ---------- array : array_like The array to broadcast. shape : tuple The shape of the desired array. subok : bool, optional If True, then sub-classes will be passed-through, otherwise the returned array will be forced to be a base-class array (default). Returns ------- broadcast : array A readonly view on the original array with the given shape. It is typically not contiguous. Furthermore, more than one element of a broadcasted array may refer to a single memory location. Raises ------ ValueError If the array is not compatible with the new shape according to NumPy's broadcasting rules. Examples -------- >>> x = np.array([1, 2, 3]) >>> np.broadcast_to(x, (3, 3)) # doctest: +SKIP array([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) """ return _broadcast_to(array, shape, subok=subok, readonly=True) def _replace_nan(a, val): """ If `a` is of inexact type, make a copy of `a`, replace NaNs with the `val` value, and return the copy together with a boolean mask marking the locations where NaNs were present. If `a` is not of inexact type, do nothing and return `a` together with a mask of None. Note that scalars will end up as array scalars, which is important for using the result as the value of the out argument in some operations. Parameters ---------- a : array-like Input array. val : float NaN values are set to val before doing the operation. Returns ------- y : ndarray If `a` is of inexact type, return a copy of `a` with the NaNs replaced by the fill value, otherwise return `a`. mask: {bool, None} If `a` is of inexact type, return a boolean mask marking locations of NaNs, otherwise return None. """ is_new = not isinstance(a, np.ndarray) if is_new: a = np.array(a) if not issubclass(a.dtype.type, np.inexact): return a, None if not is_new: # need copy a = np.array(a, subok=True) mask = np.isnan(a) np.copyto(a, val, where=mask) return a, mask def nanprod(a, axis=None, dtype=None, out=None, keepdims=0): """ Return the product of array elements over a given axis treating Not a Numbers (NaNs) as zero. One is returned for slices that are all-NaN or empty. .. versionadded:: 1.10.0 Parameters ---------- a : array_like Array containing numbers whose sum is desired. If `a` is not an array, a conversion is attempted. axis : int, optional Axis along which the product is computed. The default is to compute the product of the flattened array. dtype : data-type, optional The type of the returned array and of the accumulator in which the elements are summed. By default, the dtype of `a` is used. An exception is when `a` has an integer type with less precision than the platform (u)intp. In that case, the default will be either (u)int32 or (u)int64 depending on whether the platform is 32 or 64 bits. For inexact inputs, dtype must be inexact. out : ndarray, optional Alternate output array in which to place the result. The default is ``None``. If provided, it must have the same shape as the expected output, but the type will be cast if necessary. See `doc.ufuncs` for details. The casting of NaN to integer can yield unexpected results. keepdims : bool, optional If True, the axes which are reduced are left in the result as dimensions with size one. With this option, the result will broadcast correctly against the original `arr`. Returns ------- y : ndarray or numpy scalar See Also -------- numpy.prod : Product across array propagating NaNs. isnan : Show which elements are NaN. Notes ----- Numpy integer arithmetic is modular. If the size of a product exceeds the size of an integer accumulator, its value will wrap around and the result will be incorrect. Specifying ``dtype=double`` can alleviate that problem. Examples -------- >>> np.nanprod(1) 1 >>> np.nanprod([1]) 1 >>> np.nanprod([1, np.nan]) 1.0 >>> a = np.array([[1, 2], [3, np.nan]]) >>> np.nanprod(a) 6.0 >>> np.nanprod(a, axis=0) array([ 3., 2.]) """ a, mask = _replace_nan(a, 1) return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) def nancumsum(a, axis=None, dtype=None, out=None): """ Return the cumulative sum of array elements over a given axis treating Not a Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are encountered and leading NaNs are replaced by zeros. Zeros are returned for slices that are all-NaN or empty. .. versionadded:: 1.12.0 Parameters ---------- a : array_like Input array. axis : int, optional Axis along which the cumulative sum is computed. The default (None) is to compute the cumsum over the flattened array. dtype : dtype, optional Type of the returned array and of the accumulator in which the elements are summed. If `dtype` is not specified, it defaults to the dtype of `a`, unless `a` has an integer dtype with a precision less than that of the default platform integer. In that case, the default platform integer is used. out : ndarray, optional Alternative output array in which to place the result. It must have the same shape and buffer length as the expected output but the type will be cast if necessary. See `doc.ufuncs` (Section "Output arguments") for more details. Returns ------- nancumsum : ndarray. A new array holding the result is returned unless `out` is specified, in which it is returned. The result has the same size as `a`, and the same shape as `a` if `axis` is not None or `a` is a 1-d array. See Also -------- numpy.cumsum : Cumulative sum across array propagating NaNs. isnan : Show which elements are NaN. Examples -------- >>> np.nancumsum(1) #doctest: +SKIP array([1]) >>> np.nancumsum([1]) #doctest: +SKIP array([1]) >>> np.nancumsum([1, np.nan]) #doctest: +SKIP array([ 1., 1.]) >>> a = np.array([[1, 2], [3, np.nan]]) >>> np.nancumsum(a) #doctest: +SKIP array([ 1., 3., 6., 6.]) >>> np.nancumsum(a, axis=0) #doctest: +SKIP array([[ 1., 2.], [ 4., 2.]]) >>> np.nancumsum(a, axis=1) #doctest: +SKIP array([[ 1., 3.], [ 3., 3.]]) """ a, mask = _replace_nan(a, 0) return np.cumsum(a, axis=axis, dtype=dtype, out=out) def nancumprod(a, axis=None, dtype=None, out=None): """ Return the cumulative product of array elements over a given axis treating Not a Numbers (NaNs) as one. The cumulative product does not change when NaNs are encountered and leading NaNs are replaced by ones. Ones are returned for slices that are all-NaN or empty. .. versionadded:: 1.12.0 Parameters ---------- a : array_like Input array. axis : int, optional Axis along which the cumulative product is computed. By default the input is flattened. dtype : dtype, optional Type of the returned array, as well as of the accumulator in which the elements are multiplied. If *dtype* is not specified, it defaults to the dtype of `a`, unless `a` has an integer dtype with a precision less than that of the default platform integer. In that case, the default platform integer is used instead. out : ndarray, optional Alternative output array in which to place the result. It must have the same shape and buffer length as the expected output but the type of the resulting values will be cast if necessary. Returns ------- nancumprod : ndarray A new array holding the result is returned unless `out` is specified, in which case it is returned. See Also -------- numpy.cumprod : Cumulative product across array propagating NaNs. isnan : Show which elements are NaN. Examples -------- >>> np.nancumprod(1) #doctest: +SKIP array([1]) >>> np.nancumprod([1]) #doctest: +SKIP array([1]) >>> np.nancumprod([1, np.nan]) #doctest: +SKIP array([ 1., 1.]) >>> a = np.array([[1, 2], [3, np.nan]]) >>> np.nancumprod(a) #doctest: +SKIP array([ 1., 2., 6., 6.]) >>> np.nancumprod(a, axis=0) #doctest: +SKIP array([[ 1., 2.], [ 3., 2.]]) >>> np.nancumprod(a, axis=1) #doctest: +SKIP array([[ 1., 2.], [ 3., 3.]]) """ a, mask = _replace_nan(a, 1) return np.cumprod(a, axis=axis, dtype=dtype, out=out) dask-0.16.0/dask/array/optimization.py000066400000000000000000000250021320364734500176660ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from operator import getitem import numpy as np from .core import getter, getter_nofancy, getter_inline from ..compatibility import zip_longest from ..core import flatten, reverse_dict from ..optimize import cull, fuse, inline_functions from ..utils import ensure_dict # All get* functions the optimizations know about GETTERS = (getter, getter_nofancy, getter_inline, getitem) # These get* functions aren't ever completely removed from the graph, # even if the index should be a no-op by numpy semantics. Some array-like's # don't completely follow semantics, making indexing always necessary. GETNOREMOVE = (getter, getter_nofancy) def optimize(dsk, keys, fuse_keys=None, fast_functions=None, inline_functions_fast_functions=(getter_inline,), rename_fused_keys=True, **kwargs): """ Optimize dask for array computation 1. Cull tasks not necessary to evaluate keys 2. Remove full slicing, e.g. x[:] 3. Inline fast functions like getitem and np.transpose """ dsk = ensure_dict(dsk) keys = list(flatten(keys)) if fast_functions is not None: inline_functions_fast_functions = fast_functions dsk2, dependencies = cull(dsk, keys) hold = hold_keys(dsk2, dependencies) dsk3, dependencies = fuse(dsk2, hold + keys + (fuse_keys or []), dependencies, rename_keys=rename_fused_keys) if inline_functions_fast_functions: dsk4 = inline_functions(dsk3, keys, dependencies=dependencies, fast_functions=inline_functions_fast_functions) else: dsk4 = dsk3 dsk5 = optimize_slices(dsk4) return dsk5 def hold_keys(dsk, dependencies): """ Find keys to avoid fusion We don't want to fuse data present in the graph because it is easier to serialize as a raw value. We don't want to fuse chains after getitem/GETTERS because we want to move around only small pieces of data, rather than the underlying arrays. """ dependents = reverse_dict(dependencies) data = {k for k, v in dsk.items() if type(v) not in (tuple, str)} hold_keys = list(data) for dat in data: deps = dependents[dat] for dep in deps: task = dsk[dep] # If the task is a get* function, we walk up the chain, and stop # when there's either more than one dependent, or the dependent is # no longer a get* function or an alias. We then add the final # key to the list of keys not to fuse. if type(task) is tuple and task and task[0] in GETTERS: try: while len(dependents[dep]) == 1: new_dep = next(iter(dependents[dep])) new_task = dsk[new_dep] # If the task is a get* or an alias, continue up the # linear chain if new_task[0] in GETTERS or new_task in dsk: dep = new_dep else: break except (IndexError, TypeError): pass hold_keys.append(dep) return hold_keys def optimize_slices(dsk): """ Optimize slices 1. Fuse repeated slices, like x[5:][2:6] -> x[7:11] 2. Remove full slices, like x[:] -> x See also: fuse_slice_dict """ fancy_ind_types = (list, np.ndarray) dsk = dsk.copy() for k, v in dsk.items(): if type(v) is tuple and v[0] in GETTERS and len(v) in (3, 5): if len(v) == 3: get, a, a_index = v # getter defaults to asarray=True, getitem is semantically False a_asarray = get is not getitem a_lock = None else: get, a, a_index, a_asarray, a_lock = v while type(a) is tuple and a[0] in GETTERS and len(a) in (3, 5): if len(a) == 3: f2, b, b_index = a b_asarray = f2 is not getitem b_lock = None else: f2, b, b_index, b_asarray, b_lock = a if a_lock and a_lock is not b_lock: break if (type(a_index) is tuple) != (type(b_index) is tuple): break if type(a_index) is tuple: indices = b_index + a_index if (len(a_index) != len(b_index) and any(i is None for i in indices)): break if (f2 is getter_nofancy and any(isinstance(i, fancy_ind_types) for i in indices)): break elif (f2 is getter_nofancy and (type(a_index) in fancy_ind_types or type(b_index) in fancy_ind_types)): break try: c_index = fuse_slice(b_index, a_index) # rely on fact that nested gets never decrease in # strictness e.g. `(getter_nofancy, (getter, ...))` never # happens get = getter if f2 is getter_inline else f2 except NotImplementedError: break a, a_index, a_lock = b, c_index, b_lock a_asarray |= b_asarray # Skip the get call if not from from_array and nothing to do if (get not in GETNOREMOVE and ((type(a_index) is slice and not a_index.start and a_index.stop is None and a_index.step is None) or (type(a_index) is tuple and all(type(s) is slice and not s.start and s.stop is None and s.step is None for s in a_index)))): dsk[k] = a elif get is getitem or (a_asarray and not a_lock): # default settings are fine, drop the extra parameters Since we # always fallback to inner `get` functions, `get is getitem` # can only occur if all gets are getitem, meaning all # parameters must be getitem defaults. dsk[k] = (get, a, a_index) else: dsk[k] = (get, a, a_index, a_asarray, a_lock) return dsk def normalize_slice(s): """ Replace Nones in slices with integers >>> normalize_slice(slice(None, None, None)) slice(0, None, 1) """ start, stop, step = s.start, s.stop, s.step if start is None: start = 0 if step is None: step = 1 if start < 0 or step < 0 or stop is not None and stop < 0: raise NotImplementedError() return slice(start, stop, step) def check_for_nonfusible_fancy_indexing(fancy, normal): # Check for fancy indexing and normal indexing, where the fancy # indexed dimensions != normal indexed dimensions with integers. E.g.: # disallow things like: # x[:, [1, 2], :][0, :, :] -> x[0, [1, 2], :] or # x[0, :, :][:, [1, 2], :] -> x[0, [1, 2], :] for f, n in zip_longest(fancy, normal, fillvalue=slice(None)): if type(f) is not list and isinstance(n, int): raise NotImplementedError("Can't handle normal indexing with " "integers and fancy indexing if the " "integers and fancy indices don't " "align with the same dimensions.") def fuse_slice(a, b): """ Fuse stacked slices together Fuse a pair of repeated slices into a single slice: >>> fuse_slice(slice(1000, 2000), slice(10, 15)) slice(1010, 1015, None) This also works for tuples of slices >>> fuse_slice((slice(100, 200), slice(100, 200, 10)), ... (slice(10, 15), [5, 2])) (slice(110, 115, None), [150, 120]) And a variety of other interesting cases >>> fuse_slice(slice(1000, 2000), 10) # integers 1010 >>> fuse_slice(slice(1000, 2000, 5), slice(10, 20, 2)) slice(1050, 1100, 10) >>> fuse_slice(slice(1000, 2000, 5), [1, 2, 3]) # lists [1005, 1010, 1015] >>> fuse_slice(None, slice(None, None)) # doctest: +SKIP None """ # None only works if the second side is a full slice if a is None and b == slice(None, None): return None # Replace None with 0 and one in start and step if isinstance(a, slice): a = normalize_slice(a) if isinstance(b, slice): b = normalize_slice(b) if isinstance(a, slice) and isinstance(b, int): if b < 0: raise NotImplementedError() return a.start + b * a.step if isinstance(a, slice) and isinstance(b, slice): start = a.start + a.step * b.start if b.stop is not None: stop = a.start + a.step * b.stop else: stop = None if a.stop is not None: if stop is not None: stop = min(a.stop, stop) else: stop = a.stop step = a.step * b.step if step == 1: step = None return slice(start, stop, step) if isinstance(b, list): return [fuse_slice(a, bb) for bb in b] if isinstance(a, list) and isinstance(b, (int, slice)): return a[b] if isinstance(a, tuple) and not isinstance(b, tuple): b = (b,) # If given two tuples walk through both, being mindful of uneven sizes # and newaxes if isinstance(a, tuple) and isinstance(b, tuple): # Check for non-fusible cases with fancy-indexing a_has_lists = any(isinstance(item, list) for item in a) b_has_lists = any(isinstance(item, list) for item in b) if a_has_lists and b_has_lists: raise NotImplementedError("Can't handle multiple list indexing") elif a_has_lists: check_for_nonfusible_fancy_indexing(a, b) elif b_has_lists: check_for_nonfusible_fancy_indexing(b, a) j = 0 result = list() for i in range(len(a)): # axis ceased to exist or we're out of b if isinstance(a[i], int) or j == len(b): result.append(a[i]) continue while b[j] is None: # insert any Nones on the rhs result.append(None) j += 1 result.append(fuse_slice(a[i], b[j])) # Common case j += 1 while j < len(b): # anything leftover on the right? result.append(b[j]) j += 1 return tuple(result) raise NotImplementedError() dask-0.16.0/dask/array/percentile.py000066400000000000000000000142101320364734500172710ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from functools import wraps from collections import Iterator import numpy as np from toolz import merge, merge_sorted from .core import Array from ..base import tokenize from .. import sharedict @wraps(np.percentile) def _percentile(a, q, interpolation='linear'): if not len(a): return None if isinstance(q, Iterator): q = list(q) if a.dtype.name == 'category': result = np.percentile(a.codes, q, interpolation=interpolation) import pandas as pd return pd.Categorical.from_codes(result, a.categories, a.ordered) if np.issubdtype(a.dtype, np.datetime64): a2 = a.astype('i8') result = np.percentile(a2, q, interpolation=interpolation) return result.astype(a.dtype) if not np.issubdtype(a.dtype, np.number): interpolation = 'nearest' return np.percentile(a, q, interpolation=interpolation) def percentile(a, q, interpolation='linear'): """ Approximate percentile of 1-D array See numpy.percentile for more information """ if not a.ndim == 1: raise NotImplementedError( "Percentiles only implemented for 1-d arrays") q = np.array(q) token = tokenize(a, list(q), interpolation) name = 'percentile_chunk-' + token dsk = dict(((name, i), (_percentile, (key), q, interpolation)) for i, key in enumerate(a.__dask_keys__())) name2 = 'percentile-' + token dsk2 = {(name2, 0): (merge_percentiles, q, [q] * len(a.chunks[0]), sorted(dsk), a.chunks[0], interpolation)} dtype = a.dtype if np.issubdtype(dtype, np.integer): dtype = (np.array([], dtype=dtype) / 0.5).dtype dsk = merge(dsk, dsk2) dsk = sharedict.merge(a.dask, (name2, dsk)) return Array(dsk, name2, chunks=((len(q),),), dtype=dtype) def merge_percentiles(finalq, qs, vals, Ns, interpolation='lower'): """ Combine several percentile calculations of different data. Parameters ---------- finalq : numpy.array Percentiles to compute (must use same scale as ``qs``). qs : sequence of numpy.arrays Percentiles calculated on different sets of data. vals : sequence of numpy.arrays Resulting values associated with percentiles ``qs``. Ns : sequence of integers The number of data elements associated with each data set. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} Specify the type of interpolation to use to calculate final percentiles. For more information, see numpy.percentile. Examples -------- >>> finalq = [10, 20, 30, 40, 50, 60, 70, 80] >>> qs = [[20, 40, 60, 80], [20, 40, 60, 80]] >>> vals = [np.array([1, 2, 3, 4]), np.array([10, 11, 12, 13])] >>> Ns = [100, 100] # Both original arrays had 100 elements >>> merge_percentiles(finalq, qs, vals, Ns) array([ 1, 2, 3, 4, 10, 11, 12, 13]) """ if isinstance(finalq, Iterator): finalq = list(finalq) finalq = np.array(finalq) qs = list(map(list, qs)) vals = list(vals) Ns = list(Ns) L = list(zip(*[(q, val, N) for q, val, N in zip(qs, vals, Ns) if N])) if not L: raise ValueError("No non-trivial arrays found") qs, vals, Ns = L # TODO: Perform this check above in percentile once dtype checking is easy # Here we silently change meaning if vals[0].dtype.name == 'category': result = merge_percentiles(finalq, qs, [v.codes for v in vals], Ns, interpolation) import pandas as pd return pd.Categorical.from_codes(result, vals[0].categories, vals[0].ordered) if not np.issubdtype(vals[0].dtype, np.number): interpolation = 'nearest' if len(vals) != len(qs) or len(Ns) != len(qs): raise ValueError('qs, vals, and Ns parameters must be the same length') # transform qs and Ns into number of observations between percentiles counts = [] for q, N in zip(qs, Ns): count = np.empty(len(q)) count[1:] = np.diff(q) count[0] = q[0] count *= N counts.append(count) # Sort by calculated percentile values, then number of observations. # >95% of the time in this function is spent in `merge_sorted` below. # An alternative that uses numpy sort is shown. It is sometimes # comparable to, but typically slower than, `merge_sorted`. # # >>> A = np.concatenate(map(np.array, map(zip, vals, counts))) # >>> A.sort(0, kind='mergesort') combined_vals_counts = merge_sorted(*map(zip, vals, counts)) combined_vals, combined_counts = zip(*combined_vals_counts) combined_vals = np.array(combined_vals) combined_counts = np.array(combined_counts) # percentile-like, but scaled by total number of observations combined_q = np.cumsum(combined_counts) # rescale finalq percentiles to match combined_q desired_q = finalq * sum(Ns) # the behavior of different interpolation methods should be # investigated further. if interpolation == 'linear': rv = np.interp(desired_q, combined_q, combined_vals) else: left = np.searchsorted(combined_q, desired_q, side='left') right = np.searchsorted(combined_q, desired_q, side='right') - 1 np.minimum(left, len(combined_vals) - 1, left) # don't exceed max index lower = np.minimum(left, right) upper = np.maximum(left, right) if interpolation == 'lower': rv = combined_vals[lower] elif interpolation == 'higher': rv = combined_vals[upper] elif interpolation == 'midpoint': rv = 0.5 * (combined_vals[lower] + combined_vals[upper]) elif interpolation == 'nearest': lower_residual = np.abs(combined_q[lower] - desired_q) upper_residual = np.abs(combined_q[upper] - desired_q) mask = lower_residual > upper_residual index = lower # alias; we no longer need lower index[mask] = upper[mask] rv = combined_vals[index] else: raise ValueError("interpolation can only be 'linear', 'lower', " "'higher', 'midpoint', or 'nearest'") return rv dask-0.16.0/dask/array/random.py000066400000000000000000000441761320364734500164350ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from itertools import product from numbers import Integral from operator import getitem import numpy as np from .core import (normalize_chunks, Array, slices_from_chunks, asarray, broadcast_shapes, broadcast_to) from .. import sharedict from ..base import tokenize from ..utils import ignoring, random_state_data def doc_wraps(func): """ Copy docstring from one function to another """ def _(func2): if func.__doc__ is not None: func2.__doc__ = func.__doc__.replace('>>>', '>>').replace('...', '..') return func2 return _ class RandomState(object): """ Mersenne Twister pseudo-random number generator This object contains state to deterministically generate pseudo-random numbers from a variety of probability distributions. It is identical to ``np.random.RandomState`` except that all functions also take a ``chunks=`` keyword argument. Examples -------- >>> import dask.array as da >>> state = da.random.RandomState(1234) # a seed >>> x = state.normal(10, 0.1, size=3, chunks=(2,)) >>> x.compute() array([ 10.01867852, 10.04812289, 9.89649746]) See Also: np.random.RandomState """ def __init__(self, seed=None): self._numpy_state = np.random.RandomState(seed) def seed(self, seed=None): self._numpy_state.seed(seed) def _wrap(self, func, *args, **kwargs): """ Wrap numpy random function to produce dask.array random function extra_chunks should be a chunks tuple to append to the end of chunks """ size = kwargs.pop('size', None) chunks = kwargs.pop('chunks') extra_chunks = kwargs.pop('extra_chunks', ()) if size is not None and not isinstance(size, (tuple, list)): size = (size,) args_shapes = {ar.shape for ar in args if isinstance(ar, (Array, np.ndarray))} args_shapes.union({ar.shape for ar in kwargs.values() if isinstance(ar, (Array, np.ndarray))}) shapes = list(args_shapes) if size is not None: shapes += [size] # broadcast to the final size(shape) size = broadcast_shapes(*shapes) chunks = normalize_chunks(chunks, size) slices = slices_from_chunks(chunks) def _broadcast_any(ar, shape, chunks): if isinstance(ar, Array): return broadcast_to(ar, shape).rechunk(chunks) if isinstance(ar, np.ndarray): return np.ascontiguousarray(np.broadcast_to(ar, shape)) # Broadcast all arguments, get tiny versions as well # Start adding the relevant bits to the graph dsk = {} dsks = [] lookup = {} small_args = [] for i, ar in enumerate(args): if isinstance(ar, (np.ndarray, Array)): res = _broadcast_any(ar, size, chunks) if isinstance(res, Array): dsks.append(res.dask) lookup[i] = res.name elif isinstance(res, np.ndarray): name = 'array-{}'.format(tokenize(res)) lookup[i] = name dsk[name] = res small_args.append(ar[tuple(0 for _ in ar.shape)]) else: small_args.append(ar) small_kwargs = {} for key, ar in kwargs.items(): if isinstance(ar, (np.ndarray, Array)): res = _broadcast_any(ar, size, chunks) if isinstance(res, Array): dsks.append(res.dask) lookup[key] = res.name elif isinstance(res, np.ndarray): name = 'array-{}'.format(tokenize(res)) lookup[key] = name dsk[name] = res small_kwargs[key] = ar[tuple(0 for _ in ar.shape)] else: small_kwargs[key] = ar # Get dtype small_kwargs['size'] = (0,) dtype = func(np.random.RandomState(), *small_args, **small_kwargs).dtype sizes = list(product(*chunks)) state_data = random_state_data(len(sizes), self._numpy_state) token = tokenize(state_data, size, chunks, args, kwargs) name = 'da.random.{0}-{1}'.format(func.__name__, token) keys = product([name], *([range(len(bd)) for bd in chunks] + [[0]] * len(extra_chunks))) blocks = product(*[range(len(bd)) for bd in chunks]) vals = [] for state, size, slc, block in zip(state_data, sizes, slices, blocks): arg = [] for i, ar in enumerate(args): if i not in lookup: arg.append(ar) else: if isinstance(ar, Array): arg.append((lookup[i], ) + block) else: # np.ndarray arg.append((getitem, lookup[i], slc)) kwrg = {} for k, ar in kwargs.items(): if k not in lookup: kwrg[k] = ar else: if isinstance(ar, Array): kwrg[k] = (lookup[k], ) + block else: # np.ndarray kwrg[k] = (getitem, lookup[k], slc) vals.append((_apply_random, func.__name__, state, size, arg, kwrg)) dsk.update(dict(zip(keys, vals))) dsk = sharedict.merge((name, dsk), *dsks) return Array(dsk, name, chunks + extra_chunks, dtype=dtype) @doc_wraps(np.random.RandomState.beta) def beta(self, a, b, size=None, chunks=None): return self._wrap(np.random.RandomState.beta, a, b, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.binomial) def binomial(self, n, p, size=None, chunks=None): return self._wrap(np.random.RandomState.binomial, n, p, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.chisquare) def chisquare(self, df, size=None, chunks=None): return self._wrap(np.random.RandomState.chisquare, df, size=size, chunks=chunks) with ignoring(AttributeError): @doc_wraps(np.random.RandomState.choice) def choice(self, a, size=None, replace=True, p=None, chunks=None): dsks = [] # Normalize and validate `a` if isinstance(a, Integral): # On windows the output dtype differs if p is provided or # absent, see https://github.com/numpy/numpy/issues/9867 dummy_p = np.array([1]) if p is not None else p dtype = np.random.choice(1, size=(), p=dummy_p).dtype len_a = a if a < 0: raise ValueError("a must be greater than 0") else: a = asarray(a).rechunk(a.shape) dtype = a.dtype if a.ndim != 1: raise ValueError("a must be one dimensional") len_a = len(a) dsks.append(a.dask) a = a.__dask_keys__()[0] # Normalize and validate `p` if p is not None: if not isinstance(p, Array): # If p is not a dask array, first check the sum is close # to 1 before converting. p = np.asarray(p) if not np.isclose(p.sum(), 1, rtol=1e-7, atol=0): raise ValueError("probabilities do not sum to 1") p = asarray(p) else: p = p.rechunk(p.shape) if p.ndim != 1: raise ValueError("p must be one dimensional") if len(p) != len_a: raise ValueError("a and p must have the same size") dsks.append(p.dask) p = p.__dask_keys__()[0] if size is None: size = () elif not isinstance(size, (tuple, list)): size = (size,) chunks = normalize_chunks(chunks, size) sizes = list(product(*chunks)) state_data = random_state_data(len(sizes), self._numpy_state) name = 'da.random.choice-%s' % tokenize(state_data, size, chunks, a, replace, p) keys = product([name], *(range(len(bd)) for bd in chunks)) dsk = {k: (_choice, state, a, size, replace, p) for k, state, size in zip(keys, state_data, sizes)} return Array(sharedict.merge((name, dsk), *dsks), name, chunks, dtype=dtype) # @doc_wraps(np.random.RandomState.dirichlet) # def dirichlet(self, alpha, size=None, chunks=None): @doc_wraps(np.random.RandomState.exponential) def exponential(self, scale=1.0, size=None, chunks=None): return self._wrap(np.random.RandomState.exponential, scale, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.f) def f(self, dfnum, dfden, size=None, chunks=None): return self._wrap(np.random.RandomState.f, dfnum, dfden, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.gamma) def gamma(self, shape, scale=1.0, size=None, chunks=None): return self._wrap(np.random.RandomState.gamma, shape, scale, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.geometric) def geometric(self, p, size=None, chunks=None): return self._wrap(np.random.RandomState.geometric, p, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.gumbel) def gumbel(self, loc=0.0, scale=1.0, size=None, chunks=None): return self._wrap(np.random.RandomState.gumbel, loc, scale, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.hypergeometric) def hypergeometric(self, ngood, nbad, nsample, size=None, chunks=None): return self._wrap(np.random.RandomState.hypergeometric, ngood, nbad, nsample, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.laplace) def laplace(self, loc=0.0, scale=1.0, size=None, chunks=None): return self._wrap(np.random.RandomState.laplace, loc, scale, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.logistic) def logistic(self, loc=0.0, scale=1.0, size=None, chunks=None): return self._wrap(np.random.RandomState.logistic, loc, scale, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.lognormal) def lognormal(self, mean=0.0, sigma=1.0, size=None, chunks=None): return self._wrap(np.random.RandomState.lognormal, mean, sigma, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.logseries) def logseries(self, p, size=None, chunks=None): return self._wrap(np.random.RandomState.logseries, p, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.multinomial) def multinomial(self, n, pvals, size=None, chunks=None): return self._wrap(np.random.RandomState.multinomial, n, pvals, size=size, chunks=chunks, extra_chunks=((len(pvals),),)) @doc_wraps(np.random.RandomState.negative_binomial) def negative_binomial(self, n, p, size=None, chunks=None): return self._wrap(np.random.RandomState.negative_binomial, n, p, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.noncentral_chisquare) def noncentral_chisquare(self, df, nonc, size=None, chunks=None): return self._wrap(np.random.RandomState.noncentral_chisquare, df, nonc, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.noncentral_f) def noncentral_f(self, dfnum, dfden, nonc, size=None, chunks=None): return self._wrap(np.random.RandomState.noncentral_f, dfnum, dfden, nonc, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.normal) def normal(self, loc=0.0, scale=1.0, size=None, chunks=None): return self._wrap(np.random.RandomState.normal, loc, scale, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.pareto) def pareto(self, a, size=None, chunks=None): return self._wrap(np.random.RandomState.pareto, a, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.poisson) def poisson(self, lam=1.0, size=None, chunks=None): return self._wrap(np.random.RandomState.poisson, lam, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.power) def power(self, a, size=None, chunks=None): return self._wrap(np.random.RandomState.power, a, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.randint) def randint(self, low, high=None, size=None, chunks=None): return self._wrap(np.random.RandomState.randint, low, high, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.random_integers) def random_integers(self, low, high=None, size=None, chunks=None): return self._wrap(np.random.RandomState.random_integers, low, high, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.random_sample) def random_sample(self, size=None, chunks=None): return self._wrap(np.random.RandomState.random_sample, size=size, chunks=chunks) random = random_sample @doc_wraps(np.random.RandomState.rayleigh) def rayleigh(self, scale=1.0, size=None, chunks=None): return self._wrap(np.random.RandomState.rayleigh, scale, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.standard_cauchy) def standard_cauchy(self, size=None, chunks=None): return self._wrap(np.random.RandomState.standard_cauchy, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.standard_exponential) def standard_exponential(self, size=None, chunks=None): return self._wrap(np.random.RandomState.standard_exponential, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.standard_gamma) def standard_gamma(self, shape, size=None, chunks=None): return self._wrap(np.random.RandomState.standard_gamma, shape, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.standard_normal) def standard_normal(self, size=None, chunks=None): return self._wrap(np.random.RandomState.standard_normal, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.standard_t) def standard_t(self, df, size=None, chunks=None): return self._wrap(np.random.RandomState.standard_t, df, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.tomaxint) def tomaxint(self, size=None, chunks=None): return self._wrap(np.random.RandomState.tomaxint, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.triangular) def triangular(self, left, mode, right, size=None, chunks=None): return self._wrap(np.random.RandomState.triangular, left, mode, right, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.uniform) def uniform(self, low=0.0, high=1.0, size=None, chunks=None): return self._wrap(np.random.RandomState.uniform, low, high, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.vonmises) def vonmises(self, mu, kappa, size=None, chunks=None): return self._wrap(np.random.RandomState.vonmises, mu, kappa, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.wald) def wald(self, mean, scale, size=None, chunks=None): return self._wrap(np.random.RandomState.wald, mean, scale, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.weibull) def weibull(self, a, size=None, chunks=None): return self._wrap(np.random.RandomState.weibull, a, size=size, chunks=chunks) @doc_wraps(np.random.RandomState.zipf) def zipf(self, a, size=None, chunks=None): return self._wrap(np.random.RandomState.zipf, a, size=size, chunks=chunks) def _choice(state_data, a, size, replace, p): state = np.random.RandomState(state_data) return state.choice(a, size=size, replace=replace, p=p) def _apply_random(func, state_data, size, args, kwargs): """Apply RandomState method with seed""" state = np.random.RandomState(state_data) func = getattr(state, func) return func(*args, size=size, **kwargs) _state = RandomState() seed = _state.seed beta = _state.beta binomial = _state.binomial chisquare = _state.chisquare if hasattr(_state, 'choice'): choice = _state.choice exponential = _state.exponential f = _state.f gamma = _state.gamma geometric = _state.geometric gumbel = _state.gumbel hypergeometric = _state.hypergeometric laplace = _state.laplace logistic = _state.logistic lognormal = _state.lognormal logseries = _state.logseries multinomial = _state.multinomial negative_binomial = _state.negative_binomial noncentral_chisquare = _state.noncentral_chisquare noncentral_f = _state.noncentral_f normal = _state.normal pareto = _state.pareto poisson = _state.poisson power = _state.power rayleigh = _state.rayleigh random_sample = _state.random_sample random = random_sample randint = _state.randint random_integers = _state.random_integers triangular = _state.triangular uniform = _state.uniform vonmises = _state.vonmises wald = _state.wald weibull = _state.weibull zipf = _state.zipf """ Standard distributions """ standard_cauchy = _state.standard_cauchy standard_exponential = _state.standard_exponential standard_gamma = _state.standard_gamma standard_normal = _state.standard_normal standard_t = _state.standard_t dask-0.16.0/dask/array/rechunk.py000066400000000000000000000526331320364734500166110ustar00rootroot00000000000000""" The rechunk module defines: intersect_chunks: a function for converting chunks to new dimensions rechunk: a function to convert the blocks of an existing dask array to new chunks or blockshape """ from __future__ import absolute_import, division, print_function import math import heapq from itertools import product, chain, count from operator import getitem, add, mul, itemgetter import numpy as np import toolz from toolz import accumulate, reduce from ..base import tokenize from .core import concatenate3, Array, normalize_chunks from .wrap import empty from .. import sharedict def cumdims_label(chunks, const): """ Internal utility for cumulative sum with label. >>> cumdims_label(((5, 3, 3), (2, 2, 1)), 'n') # doctest: +NORMALIZE_WHITESPACE [(('n', 0), ('n', 5), ('n', 8), ('n', 11)), (('n', 0), ('n', 2), ('n', 4), ('n', 5))] """ return [tuple(zip((const,) * (1 + len(bds)), accumulate(add, (0,) + bds))) for bds in chunks] def _breakpoints(cumold, cumnew): """ >>> new = cumdims_label(((2, 3), (2, 2, 1)), 'n') >>> old = cumdims_label(((2, 2, 1), (5,)), 'o') >>> _breakpoints(new[0], old[0]) (('n', 0), ('o', 0), ('n', 2), ('o', 2), ('o', 4), ('n', 5), ('o', 5)) >>> _breakpoints(new[1], old[1]) (('n', 0), ('o', 0), ('n', 2), ('n', 4), ('n', 5), ('o', 5)) """ return tuple(sorted(cumold + cumnew, key=itemgetter(1))) def _intersect_1d(breaks): """ Internal utility to intersect chunks for 1d after preprocessing. >>> new = cumdims_label(((2, 3), (2, 2, 1)), 'n') >>> old = cumdims_label(((2, 2, 1), (5,)), 'o') >>> _intersect_1d(_breakpoints(old[0], new[0])) # doctest: +NORMALIZE_WHITESPACE [[(0, slice(0, 2, None))], [(1, slice(0, 2, None)), (2, slice(0, 1, None))]] >>> _intersect_1d(_breakpoints(old[1], new[1])) # doctest: +NORMALIZE_WHITESPACE [[(0, slice(0, 2, None))], [(0, slice(2, 4, None))], [(0, slice(4, 5, None))]] Parameters ---------- breaks: list of tuples Each tuple is ('o', 8) or ('n', 8) These are pairs of 'o' old or new 'n' indicator with a corresponding cumulative sum. Uses 'o' and 'n' to make new tuples of slices for the new block crosswalk to old blocks. """ start = 0 last_end = 0 old_idx = 0 ret = [] ret_next = [] for idx in range(1, len(breaks)): label, br = breaks[idx] last_label, last_br = breaks[idx - 1] if last_label == 'n': if ret_next: ret.append(ret_next) ret_next = [] if last_label == 'o': start = 0 else: start = last_end end = br - last_br + start last_end = end if br == last_br: continue ret_next.append((old_idx, slice(start, end))) if label == 'o': old_idx += 1 start = 0 if ret_next: ret.append(ret_next) return ret def _old_to_new(old_chunks, new_chunks): """ Helper to build old_chunks to new_chunks. Handles missing values, as long as the missing dimension is unchanged. Examples -------- >>> old = ((10, 10, 10, 10, 10), ) >>> new = ((25, 5, 20), ) >>> _old_to_new(old, new) # doctest: +NORMALIZE_WHITESPACE [[[(0, slice(0, 10, None)), (1, slice(0, 10, None)), (2, slice(0, 5, None))], [(2, slice(5, 10, None))], [(3, slice(0, 10, None)), (4, slice(0, 10, None))]]] """ old_known = [x for x in old_chunks if not any(math.isnan(y) for y in x)] new_known = [x for x in new_chunks if not any(math.isnan(y) for y in x)] n_missing = [sum(math.isnan(y) for y in x) for x in old_chunks] n_missing2 = [sum(math.isnan(y) for y in x) for x in new_chunks] cmo = cumdims_label(old_known, 'o') cmn = cumdims_label(new_known, 'n') sums = [sum(o) for o in old_known] sums2 = [sum(n) for n in new_known] if not sums == sums2: raise ValueError('Cannot change dimensions from to %r' % sums2) if not n_missing == n_missing2: raise ValueError('Chunks must be unchanging along unknown dimensions') old_to_new = [_intersect_1d(_breakpoints(cm[0], cm[1])) for cm in zip(cmo, cmn)] for idx, missing in enumerate(n_missing): if missing: # Missing dimensions are always unchanged, so old -> new is everything extra = [[(i, slice(0, None))] for i in range(missing)] old_to_new.insert(idx, extra) return old_to_new def intersect_chunks(old_chunks, new_chunks): """ Make dask.array slices as intersection of old and new chunks. >>> intersections = intersect_chunks(((4, 4), (2,)), ... ((8,), (1, 1))) >>> list(intersections) # doctest: +NORMALIZE_WHITESPACE [(((0, slice(0, 4, None)), (0, slice(0, 1, None))), ((1, slice(0, 4, None)), (0, slice(0, 1, None)))), (((0, slice(0, 4, None)), (0, slice(1, 2, None))), ((1, slice(0, 4, None)), (0, slice(1, 2, None))))] Parameters ---------- old_chunks : iterable of tuples block sizes along each dimension (convert from old_chunks) new_chunks: iterable of tuples block sizes along each dimension (converts to new_chunks) """ old_to_new = _old_to_new(old_chunks, new_chunks) cross1 = product(*old_to_new) cross = chain(tuple(product(*cr)) for cr in cross1) return cross def blockdims_dict_to_tuple(old, new): """ >>> blockdims_dict_to_tuple((4, 5, 6), {1: 10}) (4, 10, 6) """ newlist = list(old) for k, v in new.items(): newlist[k] = v return tuple(newlist) def blockshape_dict_to_tuple(old_chunks, d): """ >>> blockshape_dict_to_tuple(((4, 4), (5, 5)), {1: 3}) ((4, 4), (3, 3, 3, 1)) """ shape = tuple(map(sum, old_chunks)) new_chunks = list(old_chunks) for k, v in d.items(): div = shape[k] // v mod = shape[k] % v new_chunks[k] = (v,) * div + ((mod,) if mod else ()) return tuple(new_chunks) DEFAULT_THRESHOLD = 4 DEFAULT_BLOCK_SIZE_LIMIT = 1e8 def rechunk(x, chunks, threshold=DEFAULT_THRESHOLD, block_size_limit=DEFAULT_BLOCK_SIZE_LIMIT): """ Convert blocks in dask array x for new chunks. >>> import dask.array as da >>> a = np.random.uniform(0, 1, 7**4).reshape((7,) * 4) >>> x = da.from_array(a, chunks=((2, 3, 2),)*4) >>> x.chunks ((2, 3, 2), (2, 3, 2), (2, 3, 2), (2, 3, 2)) >>> y = rechunk(x, chunks=((2, 4, 1), (4, 2, 1), (4, 3), (7,))) >>> y.chunks ((2, 4, 1), (4, 2, 1), (4, 3), (7,)) chunks also accept dict arguments mapping axis to blockshape >>> y = rechunk(x, chunks={1: 2}) # rechunk axis 1 with blockshape 2 Parameters ---------- x: dask array chunks: tuple The new block dimensions to create threshold: int The graph growth factor under which we don't bother introducing an intermediate step block_size_limit: int The maximum block size (in bytes) we want to produce during an intermediate step """ threshold = threshold or DEFAULT_THRESHOLD block_size_limit = block_size_limit or DEFAULT_BLOCK_SIZE_LIMIT if isinstance(chunks, dict): if not chunks or isinstance(next(iter(chunks.values())), int): chunks = blockshape_dict_to_tuple(x.chunks, chunks) else: chunks = blockdims_dict_to_tuple(x.chunks, chunks) if isinstance(chunks, (tuple, list)): chunks = tuple(lc if lc is not None else rc for lc, rc in zip(chunks, x.chunks)) chunks = normalize_chunks(chunks, x.shape) if chunks == x.chunks: return x ndim = x.ndim if not len(chunks) == ndim: raise ValueError("Provided chunks are not consistent with shape") new_shapes = tuple(map(sum, chunks)) for new, old in zip(new_shapes, x.shape): if new != old and not math.isnan(old) and not math.isnan(new): raise ValueError("Provided chunks are not consistent with shape") steps = plan_rechunk(x.chunks, chunks, x.dtype.itemsize, threshold, block_size_limit) for c in steps: x = _compute_rechunk(x, c) return x def _number_of_blocks(chunks): return reduce(mul, map(len, chunks)) def _largest_block_size(chunks): return reduce(mul, map(max, chunks)) def estimate_graph_size(old_chunks, new_chunks): """ Estimate the graph size during a rechunk computation. """ # Estimate the number of intermediate blocks that will be produced # (we don't use intersect_chunks() which is much more expensive) crossed_size = reduce(mul, (len(oc) + len(nc) for oc, nc in zip(old_chunks, new_chunks))) return crossed_size def divide_to_width(desired_chunks, max_width): """ Minimally divide the given chunks so as to make the largest chunk width less or equal than *max_width*. """ chunks = [] for c in desired_chunks: nb_divides = int(np.ceil(c / max_width)) for i in range(nb_divides): n = c // (nb_divides - i) chunks.append(n) c -= n assert c == 0 return tuple(chunks) def merge_to_number(desired_chunks, max_number): """ Minimally merge the given chunks so as to drop the number of chunks below *max_number*, while minimizing the largest width. """ if len(desired_chunks) <= max_number: return desired_chunks distinct = set(desired_chunks) if len(distinct) == 1: # Fast path for homogeneous target, also ensuring a regular result w = distinct.pop() n = len(desired_chunks) total = n * w desired_width = total // max_number width = w * (desired_width // w) adjust = (total - max_number * width) // w return (width + w,) * adjust + (width,) * (max_number - adjust) desired_width = sum(desired_chunks) // max_number nmerges = len(desired_chunks) - max_number heap = [(desired_chunks[i] + desired_chunks[i + 1], i, i + 1) for i in range(len(desired_chunks) - 1)] heapq.heapify(heap) chunks = list(desired_chunks) while nmerges > 0: # Find smallest interval to merge width, i, j = heapq.heappop(heap) # If interval was made invalid by another merge, recompute # it, re-insert it and retry. if chunks[j] == 0: j += 1 while chunks[j] == 0: j += 1 heapq.heappush(heap, (chunks[i] + chunks[j], i, j)) continue elif chunks[i] + chunks[j] != width: heapq.heappush(heap, (chunks[i] + chunks[j], i, j)) continue # Merge assert chunks[i] != 0 chunks[i] = 0 # mark deleted chunks[j] = width nmerges -= 1 return tuple(filter(None, chunks)) def find_merge_rechunk(old_chunks, new_chunks, block_size_limit): """ Find an intermediate rechunk that would merge some adjacent blocks together in order to get us nearer the *new_chunks* target, without violating the *block_size_limit* (in number of elements). """ ndim = len(old_chunks) old_largest_width = [max(c) for c in old_chunks] new_largest_width = [max(c) for c in new_chunks] graph_size_effect = { dim: len(nc) / len(oc) for dim, (oc, nc) in enumerate(zip(old_chunks, new_chunks)) } block_size_effect = { dim: new_largest_width[dim] / (old_largest_width[dim] or 1) for dim in range(ndim) } # Our goal is to reduce the number of nodes in the rechunk graph # by merging some adjacent chunks, so consider dimensions where we can # reduce the # of chunks merge_candidates = [dim for dim in range(ndim) if graph_size_effect[dim] <= 1.0] # Merging along each dimension reduces the graph size by a certain factor # and increases memory largest block size by a certain factor. # We want to optimize the graph size while staying below the given # block_size_limit. This is in effect a knapsack problem, except with # multiplicative values and weights. Just use a greedy algorithm # by trying dimensions in decreasing value / weight order. def key(k): gse = graph_size_effect[k] bse = block_size_effect[k] if bse == 1: bse = 1 + 1e-9 return (np.log(gse) / np.log(bse)) if bse > 0 else 0 sorted_candidates = sorted(merge_candidates, key=key) largest_block_size = reduce(mul, old_largest_width) chunks = list(old_chunks) memory_limit_hit = False for dim in sorted_candidates: # Examine this dimension for possible graph reduction new_largest_block_size = ( largest_block_size * new_largest_width[dim] // (old_largest_width[dim] or 1)) if new_largest_block_size <= block_size_limit: # Full replacement by new chunks is possible chunks[dim] = new_chunks[dim] largest_block_size = new_largest_block_size else: # Try a partial rechunk, dividing the new chunks into # smaller pieces largest_width = old_largest_width[dim] chunk_limit = int(block_size_limit * largest_width / largest_block_size) c = divide_to_width(new_chunks[dim], chunk_limit) if len(c) <= len(old_chunks[dim]): # We manage to reduce the number of blocks, so do it chunks[dim] = c largest_block_size = largest_block_size * max(c) // largest_width memory_limit_hit = True assert largest_block_size == _largest_block_size(chunks) assert largest_block_size <= block_size_limit return tuple(chunks), memory_limit_hit def find_split_rechunk(old_chunks, new_chunks, graph_size_limit): """ Find an intermediate rechunk that would split some chunks to get us nearer *new_chunks*, without violating the *graph_size_limit*. """ ndim = len(old_chunks) chunks = list(old_chunks) for dim in range(ndim): graph_size = estimate_graph_size(chunks, new_chunks) if graph_size > graph_size_limit: break if len(old_chunks[dim]) > len(new_chunks[dim]): # It's not interesting to split continue # Merge the new chunks so as to stay within the graph size budget max_number = int(len(old_chunks[dim]) * graph_size_limit / graph_size) c = merge_to_number(new_chunks[dim], max_number) assert len(c) <= max_number # Consider the merge successful if its result has a greater length # and smaller max width than the old chunks if len(c) >= len(old_chunks[dim]) and max(c) <= max(old_chunks[dim]): chunks[dim] = c return tuple(chunks) def plan_rechunk(old_chunks, new_chunks, itemsize, threshold=DEFAULT_THRESHOLD, block_size_limit=DEFAULT_BLOCK_SIZE_LIMIT): """ Plan an iterative rechunking from *old_chunks* to *new_chunks*. The plan aims to minimize the rechunk graph size. Parameters ---------- itemsize: int The item size of the array threshold: int The graph growth factor under which we don't bother introducing an intermediate step block_size_limit: int The maximum block size (in bytes) we want to produce during an intermediate step Notes ----- No intermediate steps will be planned if any dimension of ``old_chunks`` is unknown. """ ndim = len(new_chunks) steps = [] has_nans = [any(math.isnan(y) for y in x) for x in old_chunks] if ndim <= 1 or not all(new_chunks) or any(has_nans): # Trivial array / unknown dim => no need / ability for an intermediate return steps + [new_chunks] # Make it a number ef elements block_size_limit /= itemsize # Fix block_size_limit if too small for either old_chunks or new_chunks largest_old_block = _largest_block_size(old_chunks) largest_new_block = _largest_block_size(new_chunks) block_size_limit = max([block_size_limit, largest_old_block, largest_new_block, ]) # The graph size above which to optimize graph_size_threshold = threshold * (_number_of_blocks(old_chunks) + _number_of_blocks(new_chunks)) current_chunks = old_chunks first_pass = True while True: graph_size = estimate_graph_size(current_chunks, new_chunks) if graph_size < graph_size_threshold: break if first_pass: chunks = current_chunks else: # We hit the block_size_limit in a previous merge pass => # accept a significant increase in graph size in exchange for # 1) getting nearer the goal 2) reducing the largest block size # to make place for the following merge. # To see this pass in action, make the block_size_limit very small. chunks = find_split_rechunk(current_chunks, new_chunks, graph_size * threshold) chunks, memory_limit_hit = find_merge_rechunk(chunks, new_chunks, block_size_limit) if (chunks == current_chunks and not first_pass) or chunks == new_chunks: break steps.append(chunks) current_chunks = chunks if not memory_limit_hit: break first_pass = False return steps + [new_chunks] def _compute_rechunk(x, chunks): """ Compute the rechunk of *x* to the given *chunks*. """ if x.size == 0: # Special case for empty array, as the algorithm below does not behave correctly return empty(x.shape, chunks=chunks, dtype=x.dtype) ndim = x.ndim crossed = intersect_chunks(x.chunks, chunks) x2 = dict() intermediates = dict() token = tokenize(x, chunks) merge_temp_name = 'rechunk-merge-' + token split_temp_name = 'rechunk-split-' + token split_name_suffixes = count() # Pre-allocate old block references, to allow re-use and reduce the # graph's memory footprint a bit. old_blocks = np.empty([len(c) for c in x.chunks], dtype='O') for index in np.ndindex(old_blocks.shape): old_blocks[index] = (x.name,) + index # Iterate over all new blocks new_index = product(*(range(len(c)) for c in chunks)) for new_idx, cross1 in zip(new_index, crossed): key = (merge_temp_name,) + new_idx old_block_indices = [[cr[i][0] for cr in cross1] for i in range(ndim)] subdims1 = [len(set(old_block_indices[i])) for i in range(ndim)] rec_cat_arg = np.empty(subdims1, dtype='O') rec_cat_arg_flat = rec_cat_arg.flat # Iterate over the old blocks required to build the new block for rec_cat_index, ind_slices in enumerate(cross1): old_block_index, slices = zip(*ind_slices) name = (split_temp_name, next(split_name_suffixes)) intermediates[name] = (getitem, old_blocks[old_block_index], slices) rec_cat_arg_flat[rec_cat_index] = name assert rec_cat_index == rec_cat_arg.size - 1 # New block is formed by concatenation of sliced old blocks if all(d == 1 for d in rec_cat_arg.shape): x2[key] = rec_cat_arg.flat[0] else: x2[key] = (concatenate3, rec_cat_arg.tolist()) assert new_idx == tuple(len(c) - 1 for c in chunks) del old_blocks, new_index x2 = sharedict.merge(x.dask, (merge_temp_name, toolz.merge(x2, intermediates))) return Array(x2, merge_temp_name, chunks, dtype=x.dtype) class _PrettyBlocks(object): def __init__(self, blocks): self.blocks = blocks def __str__(self): runs = [] run = [] repeats = 0 for c in self.blocks: if run and run[-1] == c: if repeats == 0 and len(run) > 1: runs.append((None, run[:-1])) run = run[-1:] repeats += 1 else: if repeats > 0: assert len(run) == 1 runs.append((repeats + 1, run[-1])) run = [] repeats = 0 run.append(c) if run: if repeats == 0: runs.append((None, run)) else: assert len(run) == 1 runs.append((repeats + 1, run[-1])) parts = [] for repeats, run in runs: if repeats is None: parts.append(str(run)) else: parts.append("%d*[%s]" % (repeats, run)) return " | ".join(parts) __repr__ = __str__ def format_blocks(blocks): """ Pretty-format *blocks*. >>> format_blocks((10, 10, 10)) 3*[10] >>> format_blocks((2, 3, 4)) [2, 3, 4] >>> format_blocks((10, 10, 5, 6, 2, 2, 2, 7)) 2*[10] | [5, 6] | 3*[2] | [7] """ assert (isinstance(blocks, tuple) and all(isinstance(x, int) or math.isnan(x) for x in blocks)) return _PrettyBlocks(blocks) def format_chunks(chunks): """ >>> format_chunks((10 * (3,), 3 * (10,))) (10*[3], 3*[10]) """ assert isinstance(chunks, tuple) return tuple(format_blocks(c) for c in chunks) def format_plan(plan): """ >>> format_plan([((10, 10, 10), (15, 15)), ((30,), (10, 10, 10))]) [(3*[10], 2*[15]), ([30], 3*[10])] """ return [format_chunks(c) for c in plan] dask-0.16.0/dask/array/reductions.py000066400000000000000000000623261320364734500173310ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import warnings from functools import partial, wraps from itertools import product, repeat from math import factorial, log, ceil import operator import numpy as np from toolz import compose, partition_all, get, accumulate, pluck from . import chunk from .core import _concatenate2, Array, atop, lol_tuples, handle_out from .ufunc import sqrt from .wrap import zeros, ones from .numpy_compat import ma_divide, divide as np_divide from ..compatibility import getargspec, builtins from ..base import tokenize from ..context import _globals from ..utils import ignoring, funcname, Dispatch from .. import sharedict # Generic functions to support chunks of different types empty_lookup = Dispatch('empty') empty_lookup.register((object, np.ndarray), np.empty) empty_lookup.register(np.ma.masked_array, np.ma.empty) divide_lookup = Dispatch('divide') divide_lookup.register((object, np.ndarray), np_divide) divide_lookup.register(np.ma.masked_array, ma_divide) def divide(a, b, dtype=None): key = lambda x: getattr(x, '__array_priority__', float('-inf')) f = divide_lookup.dispatch(type(builtins.max(a, b, key=key))) return f(a, b, dtype=dtype) def reduction(x, chunk, aggregate, axis=None, keepdims=None, dtype=None, split_every=None, combine=None, name=None, out=None): """ General version of reductions >>> reduction(my_array, np.sum, np.sum, axis=0, keepdims=False) # doctest: +SKIP """ if axis is None: axis = tuple(range(x.ndim)) if isinstance(axis, int): axis = (axis,) axis = tuple(validate_axis(x.ndim, a) for a in axis) if dtype is None: raise ValueError("Must specify dtype") if 'dtype' in getargspec(chunk).args: chunk = partial(chunk, dtype=dtype) if 'dtype' in getargspec(aggregate).args: aggregate = partial(aggregate, dtype=dtype) # Map chunk across all blocks inds = tuple(range(x.ndim)) # The dtype of `tmp` doesn't actually matter, and may be incorrect. tmp = atop(chunk, inds, x, inds, axis=axis, keepdims=True, dtype=x.dtype) tmp._chunks = tuple((1, ) * len(c) if i in axis else c for (i, c) in enumerate(tmp.chunks)) result = _tree_reduce(tmp, aggregate, axis, keepdims, dtype, split_every, combine, name=name) return handle_out(out, result) def _tree_reduce(x, aggregate, axis, keepdims, dtype, split_every=None, combine=None, name=None): """Perform the tree reduction step of a reduction. Lower level, users should use ``reduction`` or ``arg_reduction`` directly. """ # Normalize split_every split_every = split_every or _globals.get('split_every', 4) if isinstance(split_every, dict): split_every = dict((k, split_every.get(k, 2)) for k in axis) elif isinstance(split_every, int): n = builtins.max(int(split_every ** (1 / (len(axis) or 1))), 2) split_every = dict.fromkeys(axis, n) else: split_every = dict((k, v) for (k, v) in enumerate(x.numblocks) if k in axis) # Reduce across intermediates depth = 1 for i, n in enumerate(x.numblocks): if i in split_every and split_every[i] != 1: depth = int(builtins.max(depth, ceil(log(n, split_every[i])))) func = compose(partial(combine or aggregate, axis=axis, keepdims=True), partial(_concatenate2, axes=axis)) for i in range(depth - 1): x = partial_reduce(func, x, split_every, True, dtype=dtype, name=(name or funcname(combine or aggregate)) + '-partial') func = compose(partial(aggregate, axis=axis, keepdims=keepdims), partial(_concatenate2, axes=axis)) return partial_reduce(func, x, split_every, keepdims=keepdims, dtype=dtype, name=(name or funcname(aggregate)) + '-aggregate') def partial_reduce(func, x, split_every, keepdims=False, dtype=None, name=None): """Partial reduction across multiple axes. Parameters ---------- func : function x : Array split_every : dict Maximum reduction block sizes in each dimension. Examples -------- Reduce across axis 0 and 2, merging a maximum of 1 block in the 0th dimension, and 3 blocks in the 2nd dimension: >>> partial_reduce(np.min, x, {0: 1, 2: 3}) # doctest: +SKIP """ name = (name or funcname(func)) + '-' + tokenize(func, x, split_every, keepdims, dtype) parts = [list(partition_all(split_every.get(i, 1), range(n))) for (i, n) in enumerate(x.numblocks)] keys = product(*map(range, map(len, parts))) out_chunks = [tuple(1 for p in partition_all(split_every[i], c)) if i in split_every else c for (i, c) in enumerate(x.chunks)] if not keepdims: out_axis = [i for i in range(x.ndim) if i not in split_every] getter = lambda k: get(out_axis, k) keys = map(getter, keys) out_chunks = list(getter(out_chunks)) dsk = {} for k, p in zip(keys, product(*parts)): decided = dict((i, j[0]) for (i, j) in enumerate(p) if len(j) == 1) dummy = dict(i for i in enumerate(p) if i[0] not in decided) g = lol_tuples((x.name,), range(x.ndim), decided, dummy) dsk[(name,) + k] = (func, g) return Array(sharedict.merge(x.dask, (name, dsk)), name, out_chunks, dtype=dtype) @wraps(chunk.sum) def sum(a, axis=None, dtype=None, keepdims=False, split_every=None, out=None): if dtype is not None: dt = dtype else: dt = np.empty((1,), dtype=a.dtype).sum().dtype return reduction(a, chunk.sum, chunk.sum, axis=axis, keepdims=keepdims, dtype=dt, split_every=split_every, out=out) @wraps(chunk.prod) def prod(a, axis=None, dtype=None, keepdims=False, split_every=None, out=None): if dtype is not None: dt = dtype else: dt = np.empty((1,), dtype=a.dtype).prod().dtype return reduction(a, chunk.prod, chunk.prod, axis=axis, keepdims=keepdims, dtype=dt, split_every=split_every, out=out) @wraps(chunk.min) def min(a, axis=None, keepdims=False, split_every=None, out=None): return reduction(a, chunk.min, chunk.min, axis=axis, keepdims=keepdims, dtype=a.dtype, split_every=split_every, out=out) @wraps(chunk.max) def max(a, axis=None, keepdims=False, split_every=None, out=None): return reduction(a, chunk.max, chunk.max, axis=axis, keepdims=keepdims, dtype=a.dtype, split_every=split_every, out=out) @wraps(chunk.any) def any(a, axis=None, keepdims=False, split_every=None, out=None): return reduction(a, chunk.any, chunk.any, axis=axis, keepdims=keepdims, dtype='bool', split_every=split_every, out=out) @wraps(chunk.all) def all(a, axis=None, keepdims=False, split_every=None, out=None): return reduction(a, chunk.all, chunk.all, axis=axis, keepdims=keepdims, dtype='bool', split_every=split_every, out=out) @wraps(chunk.nansum) def nansum(a, axis=None, dtype=None, keepdims=False, split_every=None, out=None): if dtype is not None: dt = dtype else: dt = chunk.nansum(np.empty((1,), dtype=a.dtype)).dtype return reduction(a, chunk.nansum, chunk.sum, axis=axis, keepdims=keepdims, dtype=dt, split_every=split_every, out=out) with ignoring(AttributeError): @wraps(chunk.nanprod) def nanprod(a, axis=None, dtype=None, keepdims=False, split_every=None, out=None): if dtype is not None: dt = dtype else: dt = chunk.nanprod(np.empty((1,), dtype=a.dtype)).dtype return reduction(a, chunk.nanprod, chunk.prod, axis=axis, keepdims=keepdims, dtype=dt, split_every=split_every, out=out) @wraps(chunk.nancumsum) def nancumsum(x, axis, dtype=None, out=None): return cumreduction(chunk.nancumsum, operator.add, 0, x, axis, dtype, out=out) @wraps(chunk.nancumprod) def nancumprod(x, axis, dtype=None, out=None): return cumreduction(chunk.nancumprod, operator.mul, 1, x, axis, dtype, out=out) @wraps(chunk.nanmin) def nanmin(a, axis=None, keepdims=False, split_every=None, out=None): return reduction(a, chunk.nanmin, chunk.nanmin, axis=axis, keepdims=keepdims, dtype=a.dtype, split_every=split_every, out=out) @wraps(chunk.nanmax) def nanmax(a, axis=None, keepdims=False, split_every=None, out=None): return reduction(a, chunk.nanmax, chunk.nanmax, axis=axis, keepdims=keepdims, dtype=a.dtype, split_every=split_every, out=out) def numel(x, **kwargs): """ A reduction to count the number of elements """ return chunk.sum(np.ones_like(x), **kwargs) def nannumel(x, **kwargs): """ A reduction to count the number of elements """ return chunk.sum(~np.isnan(x), **kwargs) def mean_chunk(x, sum=chunk.sum, numel=numel, dtype='f8', **kwargs): n = numel(x, dtype=dtype, **kwargs) total = sum(x, dtype=dtype, **kwargs) empty = empty_lookup.dispatch(type(n)) result = empty(n.shape, dtype=[('total', total.dtype), ('n', n.dtype)]) result['n'] = n result['total'] = total return result def mean_combine(pair, sum=chunk.sum, numel=numel, dtype='f8', **kwargs): n = sum(pair['n'], **kwargs) total = sum(pair['total'], **kwargs) empty = empty_lookup.dispatch(type(n)) result = empty(n.shape, dtype=pair.dtype) result['n'] = n result['total'] = total return result def mean_agg(pair, dtype='f8', **kwargs): return divide(pair['total'].sum(dtype=dtype, **kwargs), pair['n'].sum(dtype=dtype, **kwargs), dtype=dtype) @wraps(chunk.mean) def mean(a, axis=None, dtype=None, keepdims=False, split_every=None, out=None): if dtype is not None: dt = dtype else: dt = np.mean(np.empty(shape=(1,), dtype=a.dtype)).dtype return reduction(a, mean_chunk, mean_agg, axis=axis, keepdims=keepdims, dtype=dt, split_every=split_every, combine=mean_combine, out=out) def nanmean(a, axis=None, dtype=None, keepdims=False, split_every=None, out=None): if dtype is not None: dt = dtype else: dt = np.mean(np.empty(shape=(1,), dtype=a.dtype)).dtype return reduction(a, partial(mean_chunk, sum=chunk.nansum, numel=nannumel), mean_agg, axis=axis, keepdims=keepdims, dtype=dt, split_every=split_every, out=out, combine=partial(mean_combine, sum=chunk.nansum, numel=nannumel)) with ignoring(AttributeError): nanmean = wraps(chunk.nanmean)(nanmean) def moment_chunk(A, order=2, sum=chunk.sum, numel=numel, dtype='f8', **kwargs): total = sum(A, dtype=dtype, **kwargs) n = numel(A, **kwargs).astype(np.int64) u = total / n empty = empty_lookup.dispatch(type(n)) M = empty(n.shape + (order - 1,), dtype=dtype) for i in range(2, order + 1): M[..., i - 2] = sum((A - u)**i, dtype=dtype, **kwargs) result = empty(n.shape, dtype=[('total', total.dtype), ('n', n.dtype), ('M', M.dtype, (order - 1,))]) result['total'] = total result['n'] = n result['M'] = M return result def _moment_helper(Ms, ns, inner_term, order, sum, kwargs): M = Ms[..., order - 2].sum(**kwargs) + sum(ns * inner_term ** order, **kwargs) for k in range(1, order - 1): coeff = factorial(order) / (factorial(k) * factorial(order - k)) M += coeff * sum(Ms[..., order - k - 2] * inner_term**k, **kwargs) return M def moment_combine(data, order=2, ddof=0, dtype='f8', sum=np.sum, **kwargs): kwargs['dtype'] = dtype kwargs['keepdims'] = True totals = data['total'] ns = data['n'] Ms = data['M'] total = totals.sum(**kwargs) n = sum(ns, **kwargs) mu = divide(total, n, dtype=dtype) inner_term = divide(totals, ns, dtype=dtype) - mu empty = empty_lookup.dispatch(type(n)) M = empty(n.shape + (order - 1,), dtype=dtype) for o in range(2, order + 1): M[..., o - 2] = _moment_helper(Ms, ns, inner_term, o, sum, kwargs) result = empty(n.shape, dtype=[('total', total.dtype), ('n', n.dtype), ('M', Ms.dtype, (order - 1,))]) result['total'] = total result['n'] = n result['M'] = M return result def moment_agg(data, order=2, ddof=0, dtype='f8', sum=np.sum, **kwargs): totals = data['total'] ns = data['n'] Ms = data['M'] kwargs['dtype'] = dtype # To properly handle ndarrays, the original dimensions need to be kept for # part of the calculation. keepdim_kw = kwargs.copy() keepdim_kw['keepdims'] = True n = sum(ns, **keepdim_kw) mu = divide(totals.sum(**keepdim_kw), n, dtype=dtype) inner_term = divide(totals, ns, dtype=dtype) - mu M = _moment_helper(Ms, ns, inner_term, order, sum, kwargs) return divide(M, sum(n, **kwargs) - ddof, dtype=dtype) def moment(a, order, axis=None, dtype=None, keepdims=False, ddof=0, split_every=None, out=None): if not isinstance(order, int) or order < 0: raise ValueError("Order must be an integer >= 0") if order < 2: reduced = a.sum(axis=axis) # get reduced shape and chunks if order == 0: # When order equals 0, the result is 1, by definition. return ones(reduced.shape, chunks=reduced.chunks, dtype='f8') # By definition the first order about the mean is 0. return zeros(reduced.shape, chunks=reduced.chunks, dtype='f8') if dtype is not None: dt = dtype else: dt = np.var(np.ones(shape=(1,), dtype=a.dtype)).dtype return reduction(a, partial(moment_chunk, order=order), partial(moment_agg, order=order, ddof=ddof), axis=axis, keepdims=keepdims, dtype=dt, split_every=split_every, out=out, combine=partial(moment_combine, order=order)) @wraps(chunk.var) def var(a, axis=None, dtype=None, keepdims=False, ddof=0, split_every=None, out=None): if dtype is not None: dt = dtype else: dt = np.var(np.ones(shape=(1,), dtype=a.dtype)).dtype return reduction(a, moment_chunk, partial(moment_agg, ddof=ddof), axis=axis, keepdims=keepdims, dtype=dt, split_every=split_every, combine=moment_combine, name='var', out=out) def nanvar(a, axis=None, dtype=None, keepdims=False, ddof=0, split_every=None, out=None): if dtype is not None: dt = dtype else: dt = np.var(np.ones(shape=(1,), dtype=a.dtype)).dtype return reduction(a, partial(moment_chunk, sum=chunk.nansum, numel=nannumel), partial(moment_agg, sum=np.nansum, ddof=ddof), axis=axis, keepdims=keepdims, dtype=dt, split_every=split_every, combine=partial(moment_combine, sum=np.nansum), out=out) with ignoring(AttributeError): nanvar = wraps(chunk.nanvar)(nanvar) @wraps(chunk.std) def std(a, axis=None, dtype=None, keepdims=False, ddof=0, split_every=None, out=None): result = sqrt(a.var(axis=axis, dtype=dtype, keepdims=keepdims, ddof=ddof, split_every=split_every, out=out)) if dtype and dtype != result.dtype: result = result.astype(dtype) return result def nanstd(a, axis=None, dtype=None, keepdims=False, ddof=0, split_every=None, out=None): result = sqrt(nanvar(a, axis=axis, dtype=dtype, keepdims=keepdims, ddof=ddof, split_every=split_every, out=out)) if dtype and dtype != result.dtype: result = result.astype(dtype) return result with ignoring(AttributeError): nanstd = wraps(chunk.nanstd)(nanstd) def vnorm(a, ord=None, axis=None, dtype=None, keepdims=False, split_every=None, out=None): """ Vector norm See np.linalg.norm """ warnings.warn( "DeprecationWarning: Please use `dask.array.linalg.norm` instead.", UserWarning ) if ord is None or ord == 'fro': ord = 2 if ord == np.inf: return max(abs(a), axis=axis, keepdims=keepdims, split_every=split_every, out=out) elif ord == -np.inf: return min(abs(a), axis=axis, keepdims=keepdims, split_every=split_every, out=out) elif ord == 1: return sum(abs(a), axis=axis, dtype=dtype, keepdims=keepdims, split_every=split_every, out=out) else: return sum(abs(a) ** ord, axis=axis, dtype=dtype, keepdims=keepdims, split_every=split_every, out=out) ** (1. / ord) def _arg_combine(data, axis, argfunc, keepdims=False): """Merge intermediate results from ``arg_*`` functions""" axis = None if len(axis) == data.ndim or data.ndim == 1 else axis[0] vals = data['vals'] arg = data['arg'] if axis is None: local_args = argfunc(vals, axis=axis, keepdims=keepdims) vals = vals.ravel()[local_args] arg = arg.ravel()[local_args] else: local_args = argfunc(vals, axis=axis) inds = np.ogrid[tuple(map(slice, local_args.shape))] inds.insert(axis, local_args) vals = vals[inds] arg = arg[inds] if keepdims: vals = np.expand_dims(vals, axis) arg = np.expand_dims(arg, axis) return arg, vals def arg_chunk(func, argfunc, x, axis, offset_info): arg_axis = None if len(axis) == x.ndim or x.ndim == 1 else axis[0] vals = func(x, axis=arg_axis, keepdims=True) arg = argfunc(x, axis=arg_axis, keepdims=True) if arg_axis is None: offset, total_shape = offset_info ind = np.unravel_index(arg.ravel()[0], x.shape) total_ind = tuple(o + i for (o, i) in zip(offset, ind)) arg[:] = np.ravel_multi_index(total_ind, total_shape) else: arg += offset_info if isinstance(vals, np.ma.masked_array): if 'min' in argfunc.__name__: fill_value = np.ma.minimum_fill_value(vals) else: fill_value = np.ma.maximum_fill_value(vals) vals = np.ma.filled(vals, fill_value) result = np.empty(shape=vals.shape, dtype=[('vals', vals.dtype), ('arg', arg.dtype)]) result['vals'] = vals result['arg'] = arg return result def arg_combine(func, argfunc, data, axis=None, **kwargs): arg, vals = _arg_combine(data, axis, argfunc, keepdims=True) result = np.empty(shape=vals.shape, dtype=[('vals', vals.dtype), ('arg', arg.dtype)]) result['vals'] = vals result['arg'] = arg return result def arg_agg(func, argfunc, data, axis=None, **kwargs): return _arg_combine(data, axis, argfunc, keepdims=False)[0] def nanarg_agg(func, argfunc, data, axis=None, **kwargs): arg, vals = _arg_combine(data, axis, argfunc, keepdims=False) if np.any(np.isnan(vals)): raise ValueError("All NaN slice encountered") return arg def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None): """Generic function for argreduction. Parameters ---------- x : Array chunk : callable Partialed ``arg_chunk``. combine : callable Partialed ``arg_combine``. agg : callable Partialed ``arg_agg``. axis : int, optional split_every : int or dict, optional """ if axis is None: axis = tuple(range(x.ndim)) ravel = True elif isinstance(axis, int): if axis < 0: axis += x.ndim if axis < 0 or axis >= x.ndim: raise ValueError("axis entry is out of bounds") axis = (axis,) ravel = x.ndim == 1 else: raise TypeError("axis must be either `None` or int, " "got '{0}'".format(axis)) # Map chunk across all blocks name = 'arg-reduce-chunk-{0}'.format(tokenize(chunk, axis)) old = x.name keys = list(product(*map(range, x.numblocks))) offsets = list(product(*(accumulate(operator.add, bd[:-1], 0) for bd in x.chunks))) if ravel: offset_info = zip(offsets, repeat(x.shape)) else: offset_info = pluck(axis[0], offsets) chunks = tuple((1, ) * len(c) if i in axis else c for (i, c) in enumerate(x.chunks)) dsk = dict(((name,) + k, (chunk, (old,) + k, axis, off)) for (k, off) in zip(keys, offset_info)) # The dtype of `tmp` doesn't actually matter, just need to provide something tmp = Array(sharedict.merge(x.dask, (name, dsk)), name, chunks, dtype=x.dtype) dtype = np.argmin([1]).dtype result = _tree_reduce(tmp, agg, axis, False, dtype, split_every, combine) return handle_out(out, result) def make_arg_reduction(func, argfunc, is_nan_func=False): """Create a argreduction callable. Parameters ---------- func : callable The reduction (e.g. ``min``) argfunc : callable The argreduction (e.g. ``argmin``) """ chunk = partial(arg_chunk, func, argfunc) combine = partial(arg_combine, func, argfunc) if is_nan_func: agg = partial(nanarg_agg, func, argfunc) else: agg = partial(arg_agg, func, argfunc) @wraps(argfunc) def _(x, axis=None, split_every=None, out=None): return arg_reduction(x, chunk, combine, agg, axis, split_every=split_every, out=out) return _ def _nanargmin(x, axis, **kwargs): try: return chunk.nanargmin(x, axis, **kwargs) except ValueError: return chunk.nanargmin(np.where(np.isnan(x), np.inf, x), axis, **kwargs) def _nanargmax(x, axis, **kwargs): try: return chunk.nanargmax(x, axis, **kwargs) except ValueError: return chunk.nanargmax(np.where(np.isnan(x), -np.inf, x), axis, **kwargs) argmin = make_arg_reduction(chunk.min, chunk.argmin) argmax = make_arg_reduction(chunk.max, chunk.argmax) nanargmin = make_arg_reduction(chunk.nanmin, _nanargmin, True) nanargmax = make_arg_reduction(chunk.nanmax, _nanargmax, True) def cumreduction(func, binop, ident, x, axis=None, dtype=None, out=None): """ Generic function for cumulative reduction Parameters ---------- func: callable Cumulative function like np.cumsum or np.cumprod binop: callable Associated binary operator like ``np.cumsum->add`` or ``np.cumprod->mul`` ident: Number Associated identity like ``np.cumsum->0`` or ``np.cumprod->1`` x: dask Array axis: int dtype: dtype Returns ------- dask array See also -------- cumsum cumprod """ if axis is None: x = x.flatten() axis = 0 if dtype is None: dtype = func(np.empty((0,), dtype=x.dtype)).dtype assert isinstance(axis, int) axis = validate_axis(x.ndim, axis) m = x.map_blocks(func, axis=axis, dtype=dtype) name = '%s-axis=%d-%s' % (func.__name__, axis, tokenize(x, dtype)) n = x.numblocks[axis] full = slice(None, None, None) slc = (full,) * axis + (slice(-1, None),) + (full,) * (x.ndim - axis - 1) indices = list(product(*[range(nb) if i != axis else [0] for i, nb in enumerate(x.numblocks)])) dsk = dict() for ind in indices: shape = tuple(x.chunks[i][ii] if i != axis else 1 for i, ii in enumerate(ind)) dsk[(name, 'extra') + ind] = (np.full, shape, ident, m.dtype) dsk[(name,) + ind] = (m.name,) + ind for i in range(1, n): last_indices = indices indices = list(product(*[range(nb) if ii != axis else [i] for ii, nb in enumerate(x.numblocks)])) for old, ind in zip(last_indices, indices): this_slice = (name, 'extra') + ind dsk[this_slice] = (binop, (name, 'extra') + old, (operator.getitem, (m.name,) + old, slc)) dsk[(name,) + ind] = (binop, this_slice, (m.name,) + ind) result = Array(sharedict.merge(m.dask, (name, dsk)), name, x.chunks, m.dtype) return handle_out(out, result) def _cumsum_merge(a, b): if isinstance(a, np.ma.masked_array) or isinstance(b, np.ma.masked_array): values = np.ma.getdata(a) + np.ma.getdata(b) return np.ma.masked_array(values, mask=np.ma.getmaskarray(b)) return a + b def _cumprod_merge(a, b): if isinstance(a, np.ma.masked_array) or isinstance(b, np.ma.masked_array): values = np.ma.getdata(a) * np.ma.getdata(b) return np.ma.masked_array(values, mask=np.ma.getmaskarray(b)) return a * b @wraps(np.cumsum) def cumsum(x, axis=None, dtype=None, out=None): return cumreduction(np.cumsum, _cumsum_merge, 0, x, axis, dtype, out=out) @wraps(np.cumprod) def cumprod(x, axis=None, dtype=None, out=None): return cumreduction(np.cumprod, _cumprod_merge, 1, x, axis, dtype, out=out) def validate_axis(ndim, axis): """ Validate single axis dimension against number of dimensions """ if axis > ndim - 1 or axis < -ndim: raise ValueError("Axis must be between -%d and %d, got %d" % (ndim, ndim - 1, axis)) if axis < 0: return axis + ndim else: return axis dask-0.16.0/dask/array/reshape.py000066400000000000000000000134711320364734500165760ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from itertools import product from operator import mul import numpy as np from .core import Array from ..base import tokenize from ..core import flatten from ..compatibility import reduce from ..utils import M from .. import sharedict def reshape_rechunk(inshape, outshape, inchunks): assert all(isinstance(c, tuple) for c in inchunks) ii = len(inshape) - 1 oi = len(outshape) - 1 result_inchunks = [None for i in range(len(inshape))] result_outchunks = [None for i in range(len(outshape))] while ii >= 0 or oi >= 0: if inshape[ii] == outshape[oi]: result_inchunks[ii] = inchunks[ii] result_outchunks[oi] = inchunks[ii] ii -= 1 oi -= 1 continue din = inshape[ii] dout = outshape[oi] if din == 1: result_inchunks[ii] = (1,) ii -= 1 elif dout == 1: result_outchunks[oi] = (1,) oi -= 1 elif din < dout: # (4, 4, 4) -> (64,) ileft = ii - 1 while ileft >= 0 and reduce(mul, inshape[ileft:ii + 1]) < dout: # 4 < 64, 4*4 < 64, 4*4*4 == 64 ileft -= 1 if reduce(mul, inshape[ileft:ii + 1]) != dout: raise ValueError("Shapes not compatible") for i in range(ileft + 1, ii + 1): # need single-shape dimensions result_inchunks[i] = (inshape[i],) # chunks[i] = (4,) chunk_reduction = reduce(mul, map(len, inchunks[ileft + 1:ii + 1])) result_inchunks[ileft] = expand_tuple(inchunks[ileft], chunk_reduction) prod = reduce(mul, inshape[ileft + 1: ii + 1]) # 16 result_outchunks[oi] = tuple(prod * c for c in result_inchunks[ileft]) # (1, 1, 1, 1) .* 16 oi -= 1 ii = ileft - 1 elif din > dout: # (64,) -> (4, 4, 4) oleft = oi - 1 while oleft >= 0 and reduce(mul, outshape[oleft:oi + 1]) < din: oleft -= 1 if reduce(mul, outshape[oleft:oi + 1]) != din: raise ValueError("Shapes not compatible") # TODO: don't coalesce shapes unnecessarily cs = reduce(mul, outshape[oleft + 1: oi + 1]) result_inchunks[ii] = contract_tuple(inchunks[ii], cs) # (16, 16, 16, 16) for i in range(oleft + 1, oi + 1): result_outchunks[i] = (outshape[i],) result_outchunks[oleft] = tuple(c // cs for c in result_inchunks[ii]) oi = oleft - 1 ii -= 1 return tuple(result_inchunks), tuple(result_outchunks) def expand_tuple(chunks, factor): """ >>> expand_tuple((2, 4), 2) (1, 1, 2, 2) >>> expand_tuple((2, 4), 3) (1, 1, 1, 1, 2) >>> expand_tuple((3, 4), 2) (1, 2, 2, 2) >>> expand_tuple((7, 4), 3) (2, 2, 3, 1, 1, 2) """ if factor == 1: return chunks out = [] for c in chunks: x = c part = max(x / factor, 1) while x >= 2 * part: out.append(int(part)) x -= int(part) if x: out.append(x) assert sum(chunks) == sum(out) return tuple(out) def contract_tuple(chunks, factor): """ Return simple chunks tuple such that factor divides all elements Examples -------- >>> contract_tuple((2, 2, 8, 4), 4) (4, 8, 4) """ assert sum(chunks) % factor == 0 out = [] residual = 0 for chunk in chunks: chunk += residual div = chunk // factor residual = chunk % factor good = factor * div if good: out.append(good) return tuple(out) def reshape(x, shape): """ Reshape array to new shape This is a parallelized version of the ``np.reshape`` function with the following limitations: 1. It assumes that the array is stored in C-order 2. It only allows for reshapings that collapse or merge dimensions like ``(1, 2, 3, 4) -> (1, 6, 4)`` or ``(64,) -> (4, 4, 4)`` When communication is necessary this algorithm depends on the logic within rechunk. It endeavors to keep chunk sizes roughly the same when possible. See Also -------- dask.array.rechunk numpy.reshape """ # Sanitize inputs, look for -1 in shape from .slicing import sanitize_index shape = tuple(map(sanitize_index, shape)) known_sizes = [s for s in shape if s != -1] if len(known_sizes) < len(shape): if len(known_sizes) - len(shape) > 1: raise ValueError('can only specify one unknown dimension') missing_size = sanitize_index(x.size / reduce(mul, known_sizes, 1)) shape = tuple(missing_size if s == -1 else s for s in shape) if np.isnan(sum(x.shape)): raise ValueError("Array chunk size or shape is unknown. shape: %s", x.shape) if reduce(mul, shape, 1) != x.size: raise ValueError('total size of new array must be unchanged') if x.shape == shape: return x name = 'reshape-' + tokenize(x, shape) if x.npartitions == 1: key = next(flatten(x.__dask_keys__())) dsk = {(name,) + (0,) * len(shape): (M.reshape, key, shape)} chunks = tuple((d,) for d in shape) return Array(sharedict.merge((name, dsk), x.dask), name, chunks, dtype=x.dtype) # Logic for how to rechunk inchunks, outchunks = reshape_rechunk(x.shape, shape, x.chunks) x2 = x.rechunk(inchunks) # Construct graph in_keys = list(product([x2.name], *[range(len(c)) for c in inchunks])) out_keys = list(product([name], *[range(len(c)) for c in outchunks])) shapes = list(product(*outchunks)) dsk = {a: (M.reshape, b, shape) for a, b, shape in zip(out_keys, in_keys, shapes)} return Array(sharedict.merge((name, dsk), x2.dask), name, outchunks, dtype=x.dtype) dask-0.16.0/dask/array/routines.py000066400000000000000000000747741320364734500170340ustar00rootroot00000000000000from __future__ import division, print_function, absolute_import import inspect import warnings from collections import Iterable from distutils.version import LooseVersion from functools import wraps, partial from itertools import product from numbers import Integral from operator import getitem import numpy as np from toolz import concat, sliding_window, interleave from .. import sharedict from ..core import flatten from ..base import tokenize from . import numpy_compat, chunk from .creation import arange from .wrap import ones from .core import (Array, map_blocks, elemwise, from_array, asarray, asanyarray, concatenate, stack, atop, broadcast_shapes, is_scalar_for_elemwise, broadcast_to, tensordot_lookup) @wraps(np.array) def array(x, dtype=None, ndmin=None): while ndmin is not None and x.ndim < ndmin: x = x[None, :] if dtype is not None and x.dtype != dtype: x = x.astype(dtype) return x @wraps(np.result_type) def result_type(*args): args = [a if is_scalar_for_elemwise(a) else a.dtype for a in args] return np.result_type(*args) @wraps(np.atleast_3d) def atleast_3d(*arys): new_arys = [] for x in arys: x = asanyarray(x) if x.ndim == 0: x = x[None, None, None] elif x.ndim == 1: x = x[None, :, None] elif x.ndim == 2: x = x[:, :, None] new_arys.append(x) if len(new_arys) == 1: return new_arys[0] else: return new_arys @wraps(np.atleast_2d) def atleast_2d(*arys): new_arys = [] for x in arys: x = asanyarray(x) if x.ndim == 0: x = x[None, None] elif x.ndim == 1: x = x[None, :] new_arys.append(x) if len(new_arys) == 1: return new_arys[0] else: return new_arys @wraps(np.atleast_1d) def atleast_1d(*arys): new_arys = [] for x in arys: x = asanyarray(x) if x.ndim == 0: x = x[None] new_arys.append(x) if len(new_arys) == 1: return new_arys[0] else: return new_arys @wraps(np.vstack) def vstack(tup): tup = tuple(atleast_2d(x) for x in tup) return concatenate(tup, axis=0) @wraps(np.hstack) def hstack(tup): if all(x.ndim == 1 for x in tup): return concatenate(tup, axis=0) else: return concatenate(tup, axis=1) @wraps(np.dstack) def dstack(tup): tup = tuple(atleast_3d(x) for x in tup) return concatenate(tup, axis=2) @wraps(np.swapaxes) def swapaxes(a, axis1, axis2): if axis1 == axis2: return a if axis1 < 0: axis1 = axis1 + a.ndim if axis2 < 0: axis2 = axis2 + a.ndim ind = list(range(a.ndim)) out = list(ind) out[axis1], out[axis2] = axis2, axis1 return atop(np.swapaxes, out, a, ind, axis1=axis1, axis2=axis2, dtype=a.dtype) @wraps(np.transpose) def transpose(a, axes=None): if axes: if len(axes) != a.ndim: raise ValueError("axes don't match array") else: axes = tuple(range(a.ndim))[::-1] axes = tuple(d + a.ndim if d < 0 else d for d in axes) return atop(np.transpose, axes, a, tuple(range(a.ndim)), dtype=a.dtype, axes=axes) alphabet = 'abcdefghijklmnopqrstuvwxyz' ALPHABET = alphabet.upper() def _tensordot(a, b, axes): x = max([a, b], key=lambda x: x.__array_priority__) tensordot = tensordot_lookup.dispatch(type(x)) x = tensordot(a, b, axes=axes) ind = [slice(None, None)] * x.ndim for a in sorted(axes[0]): ind.insert(a, None) x = x[tuple(ind)] return x @wraps(np.tensordot) def tensordot(lhs, rhs, axes=2): if isinstance(axes, Iterable): left_axes, right_axes = axes else: left_axes = tuple(range(lhs.ndim - 1, lhs.ndim - axes - 1, -1)) right_axes = tuple(range(0, axes)) if isinstance(left_axes, int): left_axes = (left_axes,) if isinstance(right_axes, int): right_axes = (right_axes,) if isinstance(left_axes, list): left_axes = tuple(left_axes) if isinstance(right_axes, list): right_axes = tuple(right_axes) dt = np.promote_types(lhs.dtype, rhs.dtype) left_index = list(alphabet[:lhs.ndim]) right_index = list(ALPHABET[:rhs.ndim]) out_index = left_index + right_index for l, r in zip(left_axes, right_axes): out_index.remove(right_index[r]) right_index[r] = left_index[l] intermediate = atop(_tensordot, out_index, lhs, left_index, rhs, right_index, dtype=dt, axes=(left_axes, right_axes)) result = intermediate.sum(axis=left_axes) return result @wraps(np.dot) def dot(a, b): return tensordot(a, b, axes=((a.ndim - 1,), (b.ndim - 2,))) def _inner_apply_along_axis(arr, func1d, func1d_axis, func1d_args, func1d_kwargs): return np.apply_along_axis( func1d, func1d_axis, arr, *func1d_args, **func1d_kwargs ) @wraps(np.apply_along_axis) def apply_along_axis(func1d, axis, arr, *args, **kwargs): arr = asarray(arr) # Validate and normalize axis. arr.shape[axis] axis = len(arr.shape[:axis]) # Test out some data with the function. test_data = np.ones((1,), dtype=arr.dtype) test_result = np.array(func1d(test_data, *args, **kwargs)) if (LooseVersion(np.__version__) < LooseVersion("1.13.0") and (np.array(test_result.shape) > 1).sum(dtype=int) > 1): raise ValueError( "No more than one non-trivial dimension allowed in result. " "Need NumPy 1.13.0+ for this functionality." ) # Rechunk so that func1d is applied over the full axis. arr = arr.rechunk( arr.chunks[:axis] + (arr.shape[axis:axis + 1],) + arr.chunks[axis + 1:] ) # Map func1d over the data to get the result # Adds other axes as needed. result = arr.map_blocks( _inner_apply_along_axis, token="apply_along_axis", dtype=test_result.dtype, chunks=(arr.chunks[:axis] + test_result.shape + arr.chunks[axis + 1:]), drop_axis=axis, new_axis=list(range(axis, axis + test_result.ndim, 1)), func1d=func1d, func1d_axis=axis, func1d_args=args, func1d_kwargs=kwargs, ) return result @wraps(np.apply_over_axes) def apply_over_axes(func, a, axes): # Validate arguments a = asarray(a) try: axes = tuple(axes) except TypeError: axes = (axes,) sl = a.ndim * (slice(None),) # Compute using `apply_along_axis`. result = a for i in axes: result = apply_along_axis(func, i, result, 0) # Restore original dimensionality or error. if result.ndim == (a.ndim - 1): result = result[sl[:i] + (None,)] elif result.ndim != a.ndim: raise ValueError( "func must either preserve dimensionality of the input" " or reduce it by one." ) return result @wraps(np.ptp) def ptp(a, axis=None): return a.max(axis=axis) - a.min(axis=axis) @wraps(np.diff) def diff(a, n=1, axis=-1): a = asarray(a) n = int(n) axis = int(axis) sl_1 = a.ndim * [slice(None)] sl_2 = a.ndim * [slice(None)] sl_1[axis] = slice(1, None) sl_2[axis] = slice(None, -1) sl_1 = tuple(sl_1) sl_2 = tuple(sl_2) r = a for i in range(n): r = r[sl_1] - r[sl_2] return r @wraps(np.ediff1d) def ediff1d(ary, to_end=None, to_begin=None): ary = asarray(ary) aryf = ary.flatten() r = aryf[1:] - aryf[:-1] r = [r] if to_begin is not None: r = [asarray(to_begin).flatten()] + r if to_end is not None: r = r + [asarray(to_end).flatten()] r = concatenate(r) return r @wraps(np.bincount) def bincount(x, weights=None, minlength=None): if minlength is None: raise TypeError("Must specify minlength argument in da.bincount") assert x.ndim == 1 if weights is not None: assert weights.chunks == x.chunks # Call np.bincount on each block, possibly with weights token = tokenize(x, weights, minlength) name = 'bincount-' + token if weights is not None: dsk = {(name, i): (np.bincount, (x.name, i), (weights.name, i), minlength) for i, _ in enumerate(x.__dask_keys__())} dtype = np.bincount([1], weights=[1]).dtype else: dsk = {(name, i): (np.bincount, (x.name, i), None, minlength) for i, _ in enumerate(x.__dask_keys__())} dtype = np.bincount([]).dtype # Sum up all of the intermediate bincounts per block name = 'bincount-sum-' + token dsk[(name, 0)] = (np.sum, list(dsk), 0) chunks = ((minlength,),) dsk = sharedict.merge((name, dsk), x.dask) if weights is not None: dsk.update(weights.dask) return Array(dsk, name, chunks, dtype) @wraps(np.digitize) def digitize(a, bins, right=False): bins = np.asarray(bins) dtype = np.digitize([0], bins, right=False).dtype return a.map_blocks(np.digitize, dtype=dtype, bins=bins, right=right) def histogram(a, bins=None, range=None, normed=False, weights=None, density=None): """ Blocked variant of numpy.histogram. Follows the signature of numpy.histogram exactly with the following exceptions: - Either an iterable specifying the ``bins`` or the number of ``bins`` and a ``range`` argument is required as computing ``min`` and ``max`` over blocked arrays is an expensive operation that must be performed explicitly. - ``weights`` must be a dask.array.Array with the same block structure as ``a``. Examples -------- Using number of bins and range: >>> import dask.array as da >>> import numpy as np >>> x = da.from_array(np.arange(10000), chunks=10) >>> h, bins = da.histogram(x, bins=10, range=[0, 10000]) >>> bins array([ 0., 1000., 2000., 3000., 4000., 5000., 6000., 7000., 8000., 9000., 10000.]) >>> h.compute() array([1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]) Explicitly specifying the bins: >>> h, bins = da.histogram(x, bins=np.array([0, 5000, 10000])) >>> bins array([ 0, 5000, 10000]) >>> h.compute() array([5000, 5000]) """ if bins is None or (range is None and bins is None): raise ValueError('dask.array.histogram requires either bins ' 'or bins and range to be defined.') if weights is not None and weights.chunks != a.chunks: raise ValueError('Input array and weights must have the same ' 'chunked structure') if not np.iterable(bins): bin_token = bins mn, mx = range if mn == mx: mn -= 0.5 mx += 0.5 bins = np.linspace(mn, mx, bins + 1, endpoint=True) else: bin_token = bins token = tokenize(a, bin_token, range, normed, weights, density) nchunks = len(list(flatten(a.__dask_keys__()))) chunks = ((1,) * nchunks, (len(bins) - 1,)) name = 'histogram-sum-' + token # Map the histogram to all bins def block_hist(x, weights=None): return np.histogram(x, bins, weights=weights)[0][np.newaxis] if weights is None: dsk = {(name, i, 0): (block_hist, k) for i, k in enumerate(flatten(a.__dask_keys__()))} dtype = np.histogram([])[0].dtype else: a_keys = flatten(a.__dask_keys__()) w_keys = flatten(weights.__dask_keys__()) dsk = {(name, i, 0): (block_hist, k, w) for i, (k, w) in enumerate(zip(a_keys, w_keys))} dtype = weights.dtype all_dsk = sharedict.merge(a.dask, (name, dsk)) if weights is not None: all_dsk.update(weights.dask) mapped = Array(all_dsk, name, chunks, dtype=dtype) n = mapped.sum(axis=0) # We need to replicate normed and density options from numpy if density is not None: if density: db = from_array(np.diff(bins).astype(float), chunks=n.chunks) return n / db / n.sum(), bins else: return n, bins else: # deprecated, will be removed from Numpy 2.0 if normed: db = from_array(np.diff(bins).astype(float), chunks=n.chunks) return n / (n * db).sum(), bins else: return n, bins @wraps(np.cov) def cov(m, y=None, rowvar=1, bias=0, ddof=None): # This was copied almost verbatim from np.cov # See numpy license at https://github.com/numpy/numpy/blob/master/LICENSE.txt # or NUMPY_LICENSE.txt within this directory if ddof is not None and ddof != int(ddof): raise ValueError( "ddof must be integer") # Handles complex arrays too m = asarray(m) if y is None: dtype = np.result_type(m, np.float64) else: y = asarray(y) dtype = np.result_type(m, y, np.float64) X = array(m, ndmin=2, dtype=dtype) if X.shape[0] == 1: rowvar = 1 if rowvar: N = X.shape[1] axis = 0 else: N = X.shape[0] axis = 1 # check ddof if ddof is None: if bias == 0: ddof = 1 else: ddof = 0 fact = float(N - ddof) if fact <= 0: warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning) fact = 0.0 if y is not None: y = array(y, ndmin=2, dtype=dtype) X = concatenate((X, y), axis) X = X - X.mean(axis=1 - axis, keepdims=True) if not rowvar: return (dot(X.T, X.conj()) / fact).squeeze() else: return (dot(X, X.T.conj()) / fact).squeeze() @wraps(np.corrcoef) def corrcoef(x, y=None, rowvar=1): from .ufunc import sqrt from .creation import diag c = cov(x, y, rowvar) if c.shape == (): return c / c d = diag(c) d = d.reshape((d.shape[0], 1)) sqr_d = sqrt(d) return (c / sqr_d) / sqr_d.T @wraps(np.round) def round(a, decimals=0): return a.map_blocks(np.round, decimals=decimals, dtype=a.dtype) def _unique_internal(ar, indices, counts, return_inverse=False): """ Helper/wrapper function for NumPy's ``unique``. Uses NumPy's ``unique`` to find the unique values for the array chunk. Given this chunk may not represent the whole array, also take the ``indices`` and ``counts`` that are in 1-to-1 correspondence to ``ar`` and reduce them in the same fashion as ``ar`` is reduced. Namely sum any counts that correspond to the same value and take the smallest index that corresponds to the same value. To handle the inverse mapping from the unique values to the original array, simply return a NumPy array created with ``arange`` with enough values to correspond 1-to-1 to the unique values. While there is more work needed to be done to create the full inverse mapping for the original array, this provides enough information to generate the inverse mapping in Dask. Given Dask likes to have one array returned from functions like ``atop``, some formatting is done to stuff all of the resulting arrays into one big NumPy structured array. Dask is then able to handle this object and can split it apart into the separate results on the Dask side, which then can be passed back to this function in concatenated chunks for further reduction or can be return to the user to perform other forms of analysis. By handling the problem in this way, it does not matter where a chunk is in a larger array or how big it is. The chunk can still be computed on the same way. Also it does not matter if the chunk is the result of other chunks being run through this function multiple times. The end result will still be just as accurate using this strategy. """ return_index = (indices is not None) return_counts = (counts is not None) u = np.unique(ar) dt = [("values", u.dtype)] if return_index: dt.append(("indices", np.int64)) if return_inverse: dt.append(("inverse", np.int64)) if return_counts: dt.append(("counts", np.int64)) r = np.empty(u.shape, dtype=dt) r["values"] = u if return_inverse: r["inverse"] = np.arange(len(r), dtype=np.int64) if return_index or return_counts: for i, v in enumerate(r["values"]): m = (ar == v) if return_index: indices[m].min(keepdims=True, out=r["indices"][i:i + 1]) if return_counts: counts[m].sum(keepdims=True, out=r["counts"][i:i + 1]) return r @wraps(np.unique) def unique(ar, return_index=False, return_inverse=False, return_counts=False): ar = ar.ravel() # Run unique on each chunk and collect results in a Dask Array of # unknown size. args = [ar, "i"] out_dtype = [("values", ar.dtype)] if return_index: args.extend([ arange(ar.shape[0], dtype=np.int64, chunks=ar.chunks[0]), "i" ]) out_dtype.append(("indices", np.int64)) else: args.extend([None, None]) if return_counts: args.extend([ ones((ar.shape[0],), dtype=np.int64, chunks=ar.chunks[0]), "i" ]) out_dtype.append(("counts", np.int64)) else: args.extend([None, None]) out = atop( _unique_internal, "i", *args, dtype=out_dtype, return_inverse=False ) out._chunks = tuple((np.nan,) * len(c) for c in out.chunks) # Take the results from the unique chunks and do the following. # # 1. Collect all results as arguments. # 2. Concatenate each result into one big array. # 3. Pass all results as arguments to the internal unique again. # # TODO: This should be replaced with a tree reduction using this strategy. # xref: https://github.com/dask/dask/issues/2851 out_parts = [out["values"]] if return_index: out_parts.append(out["indices"]) else: out_parts.append(None) if return_counts: out_parts.append(out["counts"]) else: out_parts.append(None) name = 'unique-aggregate-' + out.name dsk = { (name, 0): ( (_unique_internal,) + tuple( (np.concatenate, o. __dask_keys__()) if hasattr(o, "__dask_keys__") else o for o in out_parts ) + (return_inverse,) ) } out_dtype = [("values", ar.dtype)] if return_index: out_dtype.append(("indices", np.int64)) if return_inverse: out_dtype.append(("inverse", np.int64)) if return_counts: out_dtype.append(("counts", np.int64)) out = Array( sharedict.merge(*( [(name, dsk)] + [o.dask for o in out_parts if hasattr(o, "__dask_keys__")] )), name, ((np.nan,),), out_dtype ) # Split out all results to return to the user. result = [out["values"]] if return_index: result.append(out["indices"]) if return_inverse: # Using the returned unique values and arange of unknown length, find # each value matching a unique value and replace it with its # corresponding index or `0`. There should be only one entry for this # index in axis `1` (the one of unknown length). Reduce axis `1` # through summing to get an array with known dimensionality and the # mapping of the original values. mtches = (ar[:, None] == out["values"][None, :]).astype(np.int64) result.append((mtches * out["inverse"]).sum(axis=1)) if return_counts: result.append(out["counts"]) if len(result) == 1: result = result[0] else: result = tuple(result) return result @wraps(np.roll) def roll(array, shift, axis=None): result = array if axis is None: result = ravel(result) if not isinstance(shift, Integral): raise TypeError( "Expect `shift` to be an instance of Integral" " when `axis` is None." ) shift = (shift,) axis = (0,) else: try: len(shift) except TypeError: shift = (shift,) try: len(axis) except TypeError: axis = (axis,) if len(shift) != len(axis): raise ValueError("Must have the same number of shifts as axes.") for i, s in zip(axis, shift): s = -s s %= result.shape[i] sl1 = result.ndim * [slice(None)] sl2 = result.ndim * [slice(None)] sl1[i] = slice(s, None) sl2[i] = slice(None, s) sl1 = tuple(sl1) sl2 = tuple(sl2) result = concatenate([result[sl1], result[sl2]], axis=i) result = result.reshape(array.shape) return result @wraps(np.ravel) def ravel(array): return array.reshape((-1,)) @wraps(np.squeeze) def squeeze(a, axis=None): if 1 not in a.shape: return a if axis is None: axis = tuple(i for i, d in enumerate(a.shape) if d == 1) b = a.map_blocks(partial(np.squeeze, axis=axis), dtype=a.dtype) chunks = tuple(bd for bd in b.chunks if bd != (1,)) name = 'squeeze-' + tokenize(a, axis) old_keys = list(product([b.name], *[range(len(bd)) for bd in b.chunks])) new_keys = list(product([name], *[range(len(bd)) for bd in chunks])) dsk = {n: b.dask[o] for o, n in zip(old_keys, new_keys)} return Array(sharedict.merge(b.dask, (name, dsk)), name, chunks, dtype=a.dtype) def topk(k, x): """ The top k elements of an array Returns the k greatest elements of the array in sorted order. Only works on arrays of a single dimension. This assumes that ``k`` is small. All results will be returned in a single chunk. Examples -------- >>> x = np.array([5, 1, 3, 6]) >>> d = from_array(x, chunks=2) >>> d.topk(2).compute() array([6, 5]) """ if x.ndim != 1: raise ValueError("Topk only works on arrays of one dimension") token = tokenize(k, x) name = 'chunk.topk-' + token dsk = {(name, i): (chunk.topk, k, key) for i, key in enumerate(x.__dask_keys__())} name2 = 'topk-' + token dsk[(name2, 0)] = (getitem, (np.sort, (np.concatenate, list(dsk))), slice(-1, -k - 1, -1)) chunks = ((k,),) return Array(sharedict.merge((name2, dsk), x.dask), name2, chunks, dtype=x.dtype) @wraps(np.compress) def compress(condition, a, axis=None): if axis is None: a = a.ravel() axis = 0 if not -a.ndim <= axis < a.ndim: raise ValueError('axis=(%s) out of bounds' % axis) if axis < 0: axis += a.ndim # Only coerce non-lazy values to numpy arrays if not isinstance(condition, Array): condition = np.array(condition, dtype=bool) if condition.ndim != 1: raise ValueError("Condition must be one dimensional") if isinstance(condition, Array): if len(condition) < a.shape[axis]: a = a[tuple(slice(None, len(condition)) if i == axis else slice(None) for i in range(a.ndim))] inds = tuple(range(a.ndim)) out = atop(np.compress, inds, condition, (inds[axis],), a, inds, axis=axis, dtype=a.dtype) out._chunks = tuple((np.NaN,) * len(c) if i == axis else c for i, c in enumerate(out.chunks)) return out else: # Optimized case when condition is known if len(condition) < a.shape[axis]: condition = condition.copy() condition.resize(a.shape[axis]) slc = ((slice(None),) * axis + (condition, ) + (slice(None),) * (a.ndim - axis - 1)) return a[slc] @wraps(np.extract) def extract(condition, arr): if not isinstance(condition, Array): condition = np.array(condition, dtype=bool) return compress(condition.ravel(), arr.ravel()) @wraps(np.take) def take(a, indices, axis=0): if not -a.ndim <= axis < a.ndim: raise ValueError('axis=(%s) out of bounds' % axis) if axis < 0: axis += a.ndim if isinstance(a, np.ndarray) and isinstance(indices, Array): return _take_dask_array_from_numpy(a, indices, axis) else: return a[(slice(None),) * axis + (indices,)] def _take_dask_array_from_numpy(a, indices, axis): assert isinstance(a, np.ndarray) assert isinstance(indices, Array) return indices.map_blocks(lambda block: np.take(a, block, axis), chunks=indices.chunks, dtype=a.dtype) @wraps(np.around) def around(x, decimals=0): return map_blocks(partial(np.around, decimals=decimals), x, dtype=x.dtype) def isnull(values): """ pandas.isnull for dask arrays """ import pandas as pd return elemwise(pd.isnull, values, dtype='bool') def notnull(values): """ pandas.notnull for dask arrays """ return ~isnull(values) @wraps(numpy_compat.isclose) def isclose(arr1, arr2, rtol=1e-5, atol=1e-8, equal_nan=False): func = partial(numpy_compat.isclose, rtol=rtol, atol=atol, equal_nan=equal_nan) return elemwise(func, arr1, arr2, dtype='bool') @wraps(np.allclose) def allclose(arr1, arr2, rtol=1e-5, atol=1e-8, equal_nan=False): return isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=equal_nan).all() def variadic_choose(a, *choices): return np.choose(a, choices) @wraps(np.choose) def choose(a, choices): return elemwise(variadic_choose, a, *choices) def _isnonzero_vec(v): return bool(np.count_nonzero(v)) _isnonzero_vec = np.vectorize(_isnonzero_vec, otypes=[bool]) def isnonzero(a): try: np.zeros(tuple(), dtype=a.dtype).astype(bool) except ValueError: ###################################################### # Handle special cases where conversion to bool does # # not work correctly. # # # # xref: https://github.com/numpy/numpy/issues/9479 # ###################################################### return a.map_blocks(_isnonzero_vec, dtype=bool) else: return a.astype(bool) @wraps(np.argwhere) def argwhere(a): from .creation import indices a = asarray(a) nz = isnonzero(a).flatten() ind = indices(a.shape, dtype=np.int64, chunks=a.chunks) if ind.ndim > 1: ind = stack([ind[i].ravel() for i in range(len(ind))], axis=1) ind = compress(nz, ind, axis=0) return ind @wraps(np.where) def where(condition, x=None, y=None): if (x is None) != (y is None): raise ValueError("either both or neither of x and y should be given") if (x is None) and (y is None): return nonzero(condition) if np.isscalar(condition): dtype = result_type(x, y) x = asarray(x) y = asarray(y) shape = broadcast_shapes(x.shape, y.shape) out = x if condition else y return broadcast_to(out, shape).astype(dtype) else: return elemwise(np.where, condition, x, y) @wraps(np.count_nonzero) def count_nonzero(a, axis=None): return isnonzero(asarray(a)).astype(np.int64).sum(axis=axis) @wraps(np.flatnonzero) def flatnonzero(a): return argwhere(asarray(a).ravel())[:, 0] @wraps(np.nonzero) def nonzero(a): ind = argwhere(a) if ind.ndim > 1: return tuple(ind[:, i] for i in range(ind.shape[1])) else: return (ind,) @wraps(chunk.coarsen) def coarsen(reduction, x, axes, trim_excess=False): if (not trim_excess and not all(bd % div == 0 for i, div in axes.items() for bd in x.chunks[i])): msg = "Coarsening factor does not align with block dimensions" raise ValueError(msg) if 'dask' in inspect.getfile(reduction): reduction = getattr(np, reduction.__name__) name = 'coarsen-' + tokenize(reduction, x, axes, trim_excess) dsk = {(name,) + key[1:]: (chunk.coarsen, reduction, key, axes, trim_excess) for key in flatten(x.__dask_keys__())} chunks = tuple(tuple(int(bd // axes.get(i, 1)) for bd in bds) for i, bds in enumerate(x.chunks)) dt = reduction(np.empty((1,) * x.ndim, dtype=x.dtype)).dtype return Array(sharedict.merge(x.dask, (name, dsk)), name, chunks, dtype=dt) def split_at_breaks(array, breaks, axis=0): """ Split an array into a list of arrays (using slices) at the given breaks >>> split_at_breaks(np.arange(6), [3, 5]) [array([0, 1, 2]), array([3, 4]), array([5])] """ padded_breaks = concat([[None], breaks, [None]]) slices = [slice(i, j) for i, j in sliding_window(2, padded_breaks)] preslice = (slice(None),) * axis split_array = [array[preslice + (s,)] for s in slices] return split_array @wraps(np.insert) def insert(arr, obj, values, axis): # axis is a required argument here to avoid needing to deal with the numpy # default case (which reshapes the array to make it flat) if not -arr.ndim <= axis < arr.ndim: raise IndexError('axis %r is out of bounds for an array of dimension ' '%s' % (axis, arr.ndim)) if axis < 0: axis += arr.ndim if isinstance(obj, slice): obj = np.arange(*obj.indices(arr.shape[axis])) obj = np.asarray(obj) scalar_obj = obj.ndim == 0 if scalar_obj: obj = np.atleast_1d(obj) obj = np.where(obj < 0, obj + arr.shape[axis], obj) if (np.diff(obj) < 0).any(): raise NotImplementedError( 'da.insert only implemented for monotonic ``obj`` argument') split_arr = split_at_breaks(arr, np.unique(obj), axis) if getattr(values, 'ndim', 0) == 0: # we need to turn values into a dask array name = 'values-' + tokenize(values) dtype = getattr(values, 'dtype', type(values)) values = Array({(name,): values}, name, chunks=(), dtype=dtype) values_shape = tuple(len(obj) if axis == n else s for n, s in enumerate(arr.shape)) values = broadcast_to(values, values_shape) elif scalar_obj: values = values[(slice(None),) * axis + (None,)] values_chunks = tuple(values_bd if axis == n else arr_bd for n, (arr_bd, values_bd) in enumerate(zip(arr.chunks, values.chunks))) values = values.rechunk(values_chunks) counts = np.bincount(obj)[:-1] values_breaks = np.cumsum(counts[counts > 0]) split_values = split_at_breaks(values, values_breaks, axis) interleaved = list(interleave([split_arr, split_values])) interleaved = [i for i in interleaved if i.nbytes] return concatenate(interleaved, axis=axis) dask-0.16.0/dask/array/slicing.py000066400000000000000000000673021320364734500166010ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from itertools import product import math from numbers import Integral, Number from operator import add, getitem, itemgetter import numpy as np from toolz import accumulate, memoize, merge, pluck, concat from .. import core from .. import sharedict from ..base import tokenize, is_dask_collection colon = slice(None, None, None) def _sanitize_index_element(ind): """Sanitize a one-element index.""" if isinstance(ind, Number): ind2 = int(ind) if ind2 != ind: raise IndexError("Bad index. Must be integer-like: %s" % ind) else: return ind2 elif ind is None: return None else: raise TypeError("Invalid index type", type(ind), ind) def sanitize_index(ind): """ Sanitize the elements for indexing along one axis >>> sanitize_index([2, 3, 5]) array([2, 3, 5]) >>> sanitize_index([True, False, True, False]) array([0, 2]) >>> sanitize_index(np.array([1, 2, 3])) array([1, 2, 3]) >>> sanitize_index(np.array([False, True, True])) array([1, 2]) >>> type(sanitize_index(np.int32(0))) >>> sanitize_index(1.0) 1 >>> sanitize_index(0.5) Traceback (most recent call last): ... IndexError: Bad index. Must be integer-like: 0.5 """ if ind is None: return None elif isinstance(ind, slice): return slice(_sanitize_index_element(ind.start), _sanitize_index_element(ind.stop), _sanitize_index_element(ind.step)) elif isinstance(ind, Number): return _sanitize_index_element(ind) elif is_dask_collection(ind): return ind index_array = np.asanyarray(ind) if index_array.dtype == bool: nonzero = np.nonzero(index_array) if len(nonzero) == 1: # If a 1-element tuple, unwrap the element nonzero = nonzero[0] return np.asanyarray(nonzero) elif np.issubdtype(index_array.dtype, np.integer): return index_array elif np.issubdtype(index_array.dtype, float): int_index = index_array.astype(np.intp) if np.allclose(index_array, int_index): return int_index else: check_int = np.isclose(index_array, int_index) first_err = index_array.ravel( )[np.flatnonzero(~check_int)[0]] raise IndexError("Bad index. Must be integer-like: %s" % first_err) else: raise TypeError("Invalid index type", type(ind), ind) def slice_array(out_name, in_name, blockdims, index): """ Master function for array slicing This function makes a new dask that slices blocks along every dimension and aggregates (via cartesian product) each dimension's slices so that the resulting block slices give the same results as the original slice on the original structure Index must be a tuple. It may contain the following types int, slice, list (at most one list), None Parameters ---------- in_name - string This is the dask variable name that will be used as input out_name - string This is the dask variable output name blockshape - iterable of integers index - iterable of integers, slices, lists, or None Returns ------- Dict where the keys are tuples of (out_name, dim_index[, dim_index[, ...]]) and the values are (function, (in_name, dim_index, dim_index, ...), (slice(...), [slice()[,...]]) Also new blockdims with shapes of each block ((10, 10, 10, 10), (20, 20)) Examples -------- >>> dsk, blockdims = slice_array('y', 'x', [(20, 20, 20, 20, 20)], ... (slice(10, 35),)) # doctest: +SKIP >>> dsk # doctest: +SKIP {('y', 0): (getitem, ('x', 0), (slice(10, 20),)), ('y', 1): (getitem, ('x', 1), (slice(0, 15),))} >>> blockdims # doctest: +SKIP ((10, 15),) See Also -------- This function works by successively unwrapping cases and passing down through a sequence of functions. slice_with_newaxis - handle None/newaxis case slice_wrap_lists - handle fancy indexing with lists slice_slices_and_integers - handle everything else """ blockdims = tuple(map(tuple, blockdims)) # x[:, :, :] - Punt and return old value if all(isinstance(index, slice) and index == slice(None, None, None) for index in index): suffixes = product(*[range(len(bd)) for bd in blockdims]) dsk = dict(((out_name,) + s, (in_name,) + s) for s in suffixes) return dsk, blockdims # Add in missing colons at the end as needed. x[5] -> x[5, :, :] not_none_count = sum(i is not None for i in index) missing = len(blockdims) - not_none_count index += (slice(None, None, None),) * missing # Pass down to next function dsk_out, bd_out = slice_with_newaxes(out_name, in_name, blockdims, index) bd_out = tuple(map(tuple, bd_out)) return dsk_out, bd_out def slice_with_newaxes(out_name, in_name, blockdims, index): """ Handle indexing with Nones Strips out Nones then hands off to slice_wrap_lists """ # Strip Nones from index index2 = tuple([ind for ind in index if ind is not None]) where_none = [i for i, ind in enumerate(index) if ind is None] where_none_orig = list(where_none) for i, x in enumerate(where_none): n = sum(isinstance(ind, int) for ind in index[:x]) if n: where_none[i] -= n # Pass down and do work dsk, blockdims2 = slice_wrap_lists(out_name, in_name, blockdims, index2) if where_none: expand = expander(where_none) expand_orig = expander(where_none_orig) # Insert ",0" into the key: ('x', 2, 3) -> ('x', 0, 2, 0, 3) dsk2 = {(out_name,) + expand(k[1:], 0): (v[:2] + (expand_orig(v[2], None),)) for k, v in dsk.items() if k[0] == out_name} # Add back intermediate parts of the dask that weren't the output dsk3 = merge(dsk2, {k: v for k, v in dsk.items() if k[0] != out_name}) # Insert (1,) into blockdims: ((2, 2), (3, 3)) -> ((2, 2), (1,), (3, 3)) blockdims3 = expand(blockdims2, (1,)) return dsk3, blockdims3 else: return dsk, blockdims2 def slice_wrap_lists(out_name, in_name, blockdims, index): """ Fancy indexing along blocked array dasks Handles index of type list. Calls slice_slices_and_integers for the rest See Also -------- take - handle slicing with lists ("fancy" indexing) slice_slices_and_integers - handle slicing with slices and integers """ assert all(isinstance(i, (slice, list, Integral, np.ndarray)) for i in index) if not len(blockdims) == len(index): raise IndexError("Too many indices for array") # Do we have more than one list in the index? where_list = [i for i, ind in enumerate(index) if isinstance(ind, np.ndarray) and ind.ndim > 0] if len(where_list) > 1: raise NotImplementedError("Don't yet support nd fancy indexing") # Is the single list an empty list? In this case just treat it as a zero # length slice if where_list and not index[where_list[0]].size: index = list(index) index[where_list.pop()] = slice(0, 0, 1) index = tuple(index) # No lists, hooray! just use slice_slices_and_integers if not where_list: return slice_slices_and_integers(out_name, in_name, blockdims, index) # Replace all lists with full slices [3, 1, 0] -> slice(None, None, None) index_without_list = tuple(slice(None, None, None) if isinstance(i, np.ndarray) else i for i in index) # lists and full slices. Just use take if all(isinstance(i, np.ndarray) or i == slice(None, None, None) for i in index): axis = where_list[0] blockdims2, dsk3 = take(out_name, in_name, blockdims, index[where_list[0]], axis=axis) # Mixed case. Both slices/integers and lists. slice/integer then take else: # Do first pass without lists tmp = 'slice-' + tokenize((out_name, in_name, blockdims, index)) dsk, blockdims2 = slice_slices_and_integers(tmp, in_name, blockdims, index_without_list) # After collapsing some axes due to int indices, adjust axis parameter axis = where_list[0] axis2 = axis - sum(1 for i, ind in enumerate(index) if i < axis and isinstance(ind, Integral)) # Do work blockdims2, dsk2 = take(out_name, tmp, blockdims2, index[axis], axis=axis2) dsk3 = merge(dsk, dsk2) return dsk3, blockdims2 def slice_slices_and_integers(out_name, in_name, blockdims, index): """ Dask array indexing with slices and integers See Also -------- _slice_1d """ shape = tuple(map(sum, blockdims)) for dim, ind in zip(shape, index): if np.isnan(dim) and ind != slice(None, None, None): raise ValueError("Arrays chunk sizes are unknown: %s", shape) assert all(isinstance(ind, (slice, Integral)) for ind in index) assert len(index) == len(blockdims) # Get a list (for each dimension) of dicts{blocknum: slice()} block_slices = list(map(_slice_1d, shape, blockdims, index)) sorted_block_slices = [sorted(i.items()) for i in block_slices] # (in_name, 1, 1, 2), (in_name, 1, 1, 4), (in_name, 2, 1, 2), ... in_names = list(product([in_name], *[pluck(0, s) for s in sorted_block_slices])) # (out_name, 0, 0, 0), (out_name, 0, 0, 1), (out_name, 0, 1, 0), ... out_names = list(product([out_name], *[range(len(d))[::-1] if i.step and i.step < 0 else range(len(d)) for d, i in zip(block_slices, index) if not isinstance(i, Integral)])) all_slices = list(product(*[pluck(1, s) for s in sorted_block_slices])) dsk_out = {out_name: (getitem, in_name, slices) for out_name, in_name, slices in zip(out_names, in_names, all_slices)} new_blockdims = [new_blockdim(d, db, i) for d, i, db in zip(shape, index, blockdims) if not isinstance(i, Integral)] return dsk_out, new_blockdims def _slice_1d(dim_shape, lengths, index): """Returns a dict of {blocknum: slice} This function figures out where each slice should start in each block for a single dimension. If the slice won't return any elements in the block, that block will not be in the output. Parameters ---------- dim_shape - the number of elements in this dimension. This should be a positive, non-zero integer blocksize - the number of elements per block in this dimension This should be a positive, non-zero integer index - a description of the elements in this dimension that we want This might be an integer, a slice(), or an Ellipsis Returns ------- dictionary where the keys are the integer index of the blocks that should be sliced and the values are the slices Examples -------- Trivial slicing >>> _slice_1d(100, [60, 40], slice(None, None, None)) {0: slice(None, None, None), 1: slice(None, None, None)} 100 length array cut into length 20 pieces, slice 0:35 >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 35)) {0: slice(None, None, None), 1: slice(0, 15, 1)} Support irregular blocks and various slices >>> _slice_1d(100, [20, 10, 10, 10, 25, 25], slice(10, 35)) {0: slice(10, 20, 1), 1: slice(None, None, None), 2: slice(0, 5, 1)} Support step sizes >>> _slice_1d(100, [15, 14, 13], slice(10, 41, 3)) {0: slice(10, 15, 3), 1: slice(1, 14, 3), 2: slice(2, 12, 3)} >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 100, 40)) # step > blocksize {0: slice(0, 20, 40), 2: slice(0, 20, 40), 4: slice(0, 20, 40)} Also support indexing single elements >>> _slice_1d(100, [20, 20, 20, 20, 20], 25) {1: 5} And negative slicing >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 0, -3)) {0: slice(-2, -20, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)} >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 12, -3)) {0: slice(-2, -8, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)} >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, -12, -3)) {4: slice(-1, -12, -3)} """ if isinstance(index, Integral): i = 0 ind = index lens = list(lengths) while ind >= lens[0]: i += 1 ind -= lens.pop(0) return {i: ind} assert isinstance(index, slice) if index == colon: return {k: colon for k in range(len(lengths))} step = index.step or 1 if step > 0: start = index.start or 0 stop = index.stop if index.stop is not None else dim_shape else: start = index.start or dim_shape - 1 start = dim_shape - 1 if start >= dim_shape else start stop = -(dim_shape + 1) if index.stop is None else index.stop # posify start and stop if start < 0: start += dim_shape if stop < 0: stop += dim_shape d = dict() if step > 0: for i, length in enumerate(lengths): if start < length and stop > 0: d[i] = slice(start, min(stop, length), step) start = (start - length) % step else: start = start - length stop -= length else: rstart = start # running start chunk_boundaries = list(accumulate(add, lengths)) for i, chunk_stop in reversed(list(enumerate(chunk_boundaries))): # create a chunk start and stop if i == 0: chunk_start = 0 else: chunk_start = chunk_boundaries[i - 1] # if our slice is in this chunk if (chunk_start <= rstart < chunk_stop) and (rstart > stop): d[i] = slice(rstart - chunk_stop, max(chunk_start - chunk_stop - 1, stop - chunk_stop), step) # compute the next running start point, offset = (rstart - (chunk_start - 1)) % step rstart = chunk_start + offset - 1 # replace 0:20:1 with : if appropriate for k, v in d.items(): if v == slice(0, lengths[k], 1): d[k] = slice(None, None, None) if not d: # special case x[:0] d[0] = slice(0, 0, 1) return d def partition_by_size(sizes, seq): """ >>> partition_by_size([10, 20, 10], [1, 5, 9, 12, 29, 35]) [array([1, 5, 9]), array([ 2, 19]), array([5])] """ seq = np.asanyarray(seq) left = np.empty(len(sizes) + 1, dtype=int) left[0] = 0 right = np.cumsum(sizes, out=left[1:]) locations = np.empty(len(sizes) + 1, dtype=int) locations[0] = 0 locations[1:] = np.searchsorted(seq, right) return [(seq[j:k] - l) for j, k, l in zip(locations[:-1], locations[1:], left)] def issorted(seq): """ Is sequence sorted? >>> issorted([1, 2, 3]) True >>> issorted([3, 1, 2]) False """ if len(seq) == 0: return True return np.all(seq[:-1] <= seq[1:]) def take_sorted(outname, inname, blockdims, index, axis=0): """ Index array with sorted list index Forms a dask for the following case x[:, [1, 3, 5, 10], ...] where the index, ``[1, 3, 5, 10]`` is sorted in non-decreasing order. >>> blockdims, dsk = take('y', 'x', [(20, 20, 20, 20)], [1, 3, 5, 47], axis=0) >>> blockdims ((3, 1),) >>> dsk # doctest: +SKIP {('y', 0): (getitem, ('x', 0), ([1, 3, 5],)), ('y', 1): (getitem, ('x', 2), ([7],))} See Also -------- take - calls this function """ sizes = blockdims[axis] # the blocksizes on the axis that we care about index_lists = partition_by_size(sizes, index) where_index = [i for i, il in enumerate(index_lists) if len(il)] index_lists = [il for il in index_lists if len(il)] dims = [range(len(bd)) for bd in blockdims] indims = list(dims) indims[axis] = list(range(len(where_index))) keys = list(product([outname], *indims)) outdims = list(dims) outdims[axis] = where_index slices = [[colon] * len(bd) for bd in blockdims] slices[axis] = index_lists slices = list(product(*slices)) inkeys = list(product([inname], *outdims)) values = [(getitem, inkey, slc) for inkey, slc in zip(inkeys, slices)] blockdims2 = list(blockdims) blockdims2[axis] = tuple(map(len, index_lists)) return tuple(blockdims2), dict(zip(keys, values)) def take(outname, inname, blockdims, index, axis=0): """ Index array with an iterable of index Handles a single index by a single list Mimics ``np.take`` >>> blockdims, dsk = take('y', 'x', [(20, 20, 20, 20)], [5, 1, 47, 3], axis=0) >>> blockdims ((4,),) >>> dsk # doctest: +SKIP {('y', 0): (getitem, (np.concatenate, [(getitem, ('x', 0), ([1, 3, 5],)), (getitem, ('x', 2), ([7],))], 0), (2, 0, 4, 1))} When list is sorted we retain original block structure >>> blockdims, dsk = take('y', 'x', [(20, 20, 20, 20)], [1, 3, 5, 47], axis=0) >>> blockdims ((3, 1),) >>> dsk # doctest: +SKIP {('y', 0): (getitem, ('x', 0), ([1, 3, 5],)), ('y', 2): (getitem, ('x', 2), ([7],))} """ index = np.asanyarray(index) if issorted(index): return take_sorted(outname, inname, blockdims, index, axis) if isinstance(index, np.ndarray) and index.ndim > 0: sorted_idx = np.sort(index) else: sorted_idx = index n = len(blockdims) sizes = blockdims[axis] # the blocksizes on the axis that we care about index_lists = partition_by_size(sizes, sorted_idx) dims = [[0] if axis == i else list(range(len(bd))) for i, bd in enumerate(blockdims)] keys = list(product([outname], *dims)) rev_index = np.searchsorted(sorted_idx, index) vals = [(getitem, (np.concatenate, [(getitem, ((inname, ) + d[:axis] + (i, ) + d[axis + 1:]), ((colon, ) * axis + (IL, ) + (colon, ) * (n - axis - 1))) for i, IL in enumerate(index_lists) if len(IL)], axis), ((colon, ) * axis + (rev_index, ) + (colon, ) * (n - axis - 1))) for d in product(*dims)] blockdims2 = list(blockdims) blockdims2[axis] = (len(index), ) return tuple(blockdims2), dict(zip(keys, vals)) def posify_index(shape, ind): """ Flip negative indices around to positive ones >>> posify_index(10, 3) 3 >>> posify_index(10, -3) 7 >>> posify_index(10, [3, -3]) array([3, 7]) >>> posify_index((10, 20), (3, -3)) (3, 17) >>> posify_index((10, 20), (3, [3, 4, -3])) # doctest: +NORMALIZE_WHITESPACE (3, array([ 3, 4, 17])) """ if isinstance(ind, tuple): return tuple(map(posify_index, shape, ind)) if isinstance(ind, Integral): if ind < 0 and not math.isnan(shape): return ind + shape else: return ind if isinstance(ind, (np.ndarray, list)) and not math.isnan(shape): ind = np.asanyarray(ind) return np.where(ind < 0, ind + shape, ind) return ind @memoize def _expander(where): if not where: def expand(seq, val): return seq return expand else: decl = """def expand(seq, val): return ({left}) + tuple({right}) """ left = [] j = 0 for i in range(max(where) + 1): if i in where: left.append("val, ") else: left.append("seq[%d], " % j) j += 1 right = "seq[%d:]" % j left = "".join(left) decl = decl.format(**locals()) ns = {} exec(compile(decl, "", "exec"), ns, ns) return ns['expand'] def expander(where): """Create a function to insert value at many locations in sequence. >>> expander([0, 2])(['a', 'b', 'c'], 'z') ('z', 'a', 'z', 'b', 'c') """ return _expander(tuple(where)) def new_blockdim(dim_shape, lengths, index): """ >>> new_blockdim(100, [20, 10, 20, 10, 40], slice(0, 90, 2)) [10, 5, 10, 5, 15] >>> new_blockdim(100, [20, 10, 20, 10, 40], [5, 1, 30, 22]) [4] >>> new_blockdim(100, [20, 10, 20, 10, 40], slice(90, 10, -2)) [16, 5, 10, 5, 4] """ if index == slice(None, None, None): return lengths if isinstance(index, list): return [len(index)] assert not isinstance(index, Integral) pairs = sorted(_slice_1d(dim_shape, lengths, index).items(), key=itemgetter(0)) slices = [slice(0, lengths[i], 1) if slc == slice(None, None, None) else slc for i, slc in pairs] if isinstance(index, slice) and index.step and index.step < 0: slices = slices[::-1] return [int(math.ceil((1. * slc.stop - slc.start) / slc.step)) for slc in slices] def replace_ellipsis(n, index): """ Replace ... with slices, :, : ,: >>> replace_ellipsis(4, (3, Ellipsis, 2)) (3, slice(None, None, None), slice(None, None, None), 2) >>> replace_ellipsis(2, (Ellipsis, None)) (slice(None, None, None), slice(None, None, None), None) """ # Careful about using in or index because index may contain arrays isellipsis = [i for i, ind in enumerate(index) if ind is Ellipsis] if not isellipsis: return index else: loc = isellipsis[0] extra_dimensions = n - (len(index) - sum(i is None for i in index) - 1) return (index[:loc] + (slice(None, None, None),) * extra_dimensions + index[loc + 1:]) def normalize_slice(idx, dim): """ Normalize slices to canonical form Parameters ---------- idx: slice or other index dim: dimension length Examples -------- >>> normalize_slice(slice(0, 10, 1), 10) slice(None, None, None) """ if isinstance(idx, slice): start, stop, step = idx.start, idx.stop, idx.step if start is not None: if start < 0 and not math.isnan(dim): start = max(0, start + dim) elif start > dim: start = dim if stop is not None: if stop < 0 and not math.isnan(dim): stop = max(0, stop + dim) elif stop > dim: stop = dim if start == 0: start = None if stop == dim: stop = None if step == 1: step = None return slice(start, stop, step) return idx def normalize_index(idx, shape): """ Normalize slicing indexes 1. Replaces ellipses with many full slices 2. Adds full slices to end of index 3. Checks bounding conditions 4. Replaces numpy arrays with lists 5. Posify's integers and lists 6. Normalizes slices to canonical form Examples -------- >>> normalize_index(1, (10,)) (1,) >>> normalize_index(-1, (10,)) (9,) >>> normalize_index([-1], (10,)) (array([9]),) >>> normalize_index(slice(-3, 10, 1), (10,)) (slice(7, None, None),) >>> normalize_index((Ellipsis, None), (10,)) (slice(None, None, None), None) """ if not isinstance(idx, tuple): idx = (idx,) idx = replace_ellipsis(len(shape), idx) n_sliced_dims = 0 for i in idx: if hasattr(i, 'ndim') and i.ndim >= 1: n_sliced_dims += i.ndim elif i is None: continue else: n_sliced_dims += 1 idx = idx + (slice(None),) * (len(shape) - n_sliced_dims) if len([i for i in idx if i is not None]) > len(shape): raise IndexError("Too many indices for array") none_shape = [] i = 0 for ind in idx: if ind is not None: none_shape.append(shape[i]) i += 1 else: none_shape.append(None) for i, d in zip(idx, none_shape): if d is not None: check_index(i, d) idx = tuple(map(sanitize_index, idx)) idx = tuple(map(normalize_slice, idx, none_shape)) idx = posify_index(none_shape, idx) return idx def check_index(ind, dimension): """ Check validity of index for a given dimension Examples -------- >>> check_index(3, 5) >>> check_index(5, 5) Traceback (most recent call last): ... IndexError: Index is not smaller than dimension 5 >= 5 >>> check_index(6, 5) Traceback (most recent call last): ... IndexError: Index is not smaller than dimension 6 >= 5 >>> check_index(-1, 5) >>> check_index(-6, 5) Traceback (most recent call last): ... IndexError: Negative index is not greater than negative dimension -6 <= -5 >>> check_index([1, 2], 5) >>> check_index([6, 3], 5) Traceback (most recent call last): ... IndexError: Index out of bounds 5 >>> check_index(slice(0, 3), 5) """ # unknown dimension, assumed to be in bounds if np.isnan(dimension): return elif isinstance(ind, (list, np.ndarray)): x = np.asanyarray(ind) if (x >= dimension).any() or (x < -dimension).any(): raise IndexError("Index out of bounds %s" % dimension) elif isinstance(ind, slice): return elif is_dask_collection(ind): return elif ind is None: return elif ind >= dimension: raise IndexError("Index is not smaller than dimension %d >= %d" % (ind, dimension)) elif ind < -dimension: msg = "Negative index is not greater than negative dimension %d <= -%d" raise IndexError(msg % (ind, dimension)) def slice_with_dask_array(x, index): from .core import Array, atop, elemwise out_index = [slice(None) if isinstance(ind, Array) and ind.dtype == bool else ind for ind in index] if len(index) == 1 and index[0].ndim == x.ndim: y = elemwise(getitem, x, *index, dtype=x.dtype) name = 'getitem-' + tokenize(x, index) dsk = {(name, i): k for i, k in enumerate(core.flatten(y.__dask_keys__()))} chunks = ((np.nan,) * y.npartitions,) return (Array(sharedict.merge(y.dask, (name, dsk)), name, chunks, x.dtype), out_index) if any(isinstance(ind, Array) and ind.dtype == bool and ind.ndim != 1 for ind in index): raise NotImplementedError("Slicing with dask.array only permitted when " "the indexer has only one dimension or when " "it has the same dimension as the sliced " "array") indexes = [ind if isinstance(ind, Array) and ind.dtype == bool else slice(None) for ind in index] arginds = [] i = 0 for ind in indexes: if isinstance(ind, Array) and ind.dtype == bool: new = (ind, tuple(range(i, i + ind.ndim))) i += x.ndim else: new = (slice(None), None) i += 1 arginds.append(new) arginds = list(concat(arginds)) out = atop(getitem_variadic, tuple(range(x.ndim)), x, tuple(range(x.ndim)), *arginds, dtype=x.dtype) chunks = [] for ind, chunk in zip(index, out.chunks): if isinstance(ind, Array) and ind.dtype == bool: chunks.append((np.nan,) * len(chunk)) else: chunks.append(chunk) out._chunks = tuple(chunks) return out, tuple(out_index) def getitem_variadic(x, *index): return x[index] dask-0.16.0/dask/array/stats.py000066400000000000000000000336311320364734500163050ustar00rootroot00000000000000""" Statistical functions and tests, following scipy.stats. Some differences - We don't handle missing values at all """ # This is lightly adapted from scipy.stats 0.19 # http://github.com/scipy/scipy/blob/v0.19.0/scipy/stats/stats.py # The original copyright notice follows: # Copyright 2002 Gary Strangman. All rights reserved # Copyright 2002-2016 The SciPy Developers # # The original code from Gary Strangman was heavily adapted for # use in SciPy by Travis Oliphant. The original code came with the # following disclaimer: # # This software is provided "as-is". There are no expressed or implied # warranties of any kind, including, but not limited to, the warranties # of merchantability and fitness for a given application. In no event # shall Gary Strangman be liable for any direct, indirect, incidental, # special, exemplary or consequential damages (including, but not limited # to, loss of use, data or profits, or business interruption) however # caused and on any theory of liability, whether in contract, strict # liability or tort (including negligence or otherwise) arising in any way # out of the use of this software, even if advised of the possibility of # such damage. import math import numpy as np import dask.array as da from dask.array.random import doc_wraps from dask.array.ufunc import wrap_elemwise from dask import delayed try: import scipy.stats except ImportError: raise ImportError("`dask.array.stats` requires `scipy` to be installed.") from scipy.stats import distributions from scipy import special from scipy.stats.stats import (Ttest_indResult, Ttest_1sampResult, Ttest_relResult, Power_divergenceResult, NormaltestResult, SkewtestResult, KurtosistestResult, F_onewayResult) __all__ = ['ttest_ind', 'ttest_1samp', 'ttest_rel', 'chisquare', 'power_divergence', 'skew', 'skewtest', 'kurtosis', 'kurtosistest', 'normaltest', 'f_oneway', 'moment'] # ----------------- # Statistical Tests # ----------------- @doc_wraps(scipy.stats.ttest_ind) def ttest_ind(a, b, axis=0, equal_var=True): v1 = da.var(a, axis, ddof=1) # XXX: np -> da v2 = da.var(b, axis, ddof=1) # XXX: np -> da n1 = a.shape[axis] n2 = b.shape[axis] if equal_var: df, denom = _equal_var_ttest_denom(v1, n1, v2, n2) else: df, denom = _unequal_var_ttest_denom(v1, n1, v2, n2) res = _ttest_ind_from_stats(da.mean(a, axis), da.mean(b, axis), denom, df) return delayed(Ttest_indResult, nout=2)(*res) @doc_wraps(scipy.stats.ttest_1samp) def ttest_1samp(a, popmean, axis=0, nan_policy='propagate'): if nan_policy != 'propagate': raise NotImplementedError("`nan_policy` other than 'propagate' " "have not been implemented.") n = a.shape[axis] df = n - 1 d = da.mean(a, axis) - popmean v = da.var(a, axis, ddof=1) denom = da.sqrt(v / float(n)) with np.errstate(divide='ignore', invalid='ignore'): t = da.divide(d, denom) t, prob = _ttest_finish(df, t) return delayed(Ttest_1sampResult, nout=2)(t, prob) @doc_wraps(scipy.stats.ttest_rel) def ttest_rel(a, b, axis=0, nan_policy='propagate'): if nan_policy != 'propagate': raise NotImplementedError("`nan_policy` other than 'propagate' " "have not been implemented.") n = a.shape[axis] df = float(n - 1) d = (a - b).astype(np.float64) v = da.var(d, axis, ddof=1) dm = da.mean(d, axis) denom = da.sqrt(v / float(n)) with np.errstate(divide='ignore', invalid='ignore'): t = da.divide(dm, denom) t, prob = _ttest_finish(df, t) return delayed(Ttest_relResult, nout=2)(t, prob) @doc_wraps(scipy.stats.chisquare) def chisquare(f_obs, f_exp=None, ddof=0, axis=0): return power_divergence(f_obs, f_exp=f_exp, ddof=ddof, axis=axis, lambda_="pearson") @doc_wraps(scipy.stats.power_divergence) def power_divergence(f_obs, f_exp=None, ddof=0, axis=0, lambda_=None): if isinstance(lambda_, str): # TODO: public api if lambda_ not in scipy.stats.stats._power_div_lambda_names: names = repr(list(scipy.stats.stats._power_div_lambda_names.keys()))[1:-1] raise ValueError("invalid string for lambda_: {0!r}. Valid strings " "are {1}".format(lambda_, names)) lambda_ = scipy.stats.stats._power_div_lambda_names[lambda_] elif lambda_ is None: lambda_ = 1 if f_exp is not None: # f_exp = np.atleast_1d(np.asanyarray(f_exp)) pass else: f_exp = f_obs.mean(axis=axis, keepdims=True) # `terms` is the array of terms that are summed along `axis` to create # the test statistic. We use some specialized code for a few special # cases of lambda_. if lambda_ == 1: # Pearson's chi-squared statistic terms = (f_obs - f_exp)**2 / f_exp elif lambda_ == 0: # Log-likelihood ratio (i.e. G-test) terms = 2.0 * _xlogy(f_obs, f_obs / f_exp) elif lambda_ == -1: # Modified log-likelihood ratio terms = 2.0 * _xlogy(f_exp, f_exp / f_obs) else: # General Cressie-Read power divergence. terms = f_obs * ((f_obs / f_exp)**lambda_ - 1) terms /= 0.5 * lambda_ * (lambda_ + 1) stat = terms.sum(axis=axis) num_obs = _count(terms, axis=axis) # ddof = asarray(ddof) p = delayed(distributions.chi2.sf)(stat, num_obs - 1 - ddof) return delayed(Power_divergenceResult, nout=2)(stat, p) @doc_wraps(scipy.stats.skew) def skew(a, axis=0, bias=True, nan_policy='propagate'): if nan_policy != 'propagate': raise NotImplementedError("`nan_policy` other than 'propagate' " "have not been implemented.") n = a.shape[axis] # noqa; for bias m2 = moment(a, 2, axis) m3 = moment(a, 3, axis) zero = (m2 == 0) vals = da.where(~zero, m3 / m2**1.5, 0.) # vals = da.where(~zero, (m2, m3), # lambda m2, m3: m3 / m2**1.5, # 0.) if not bias: # Need a version of np.place raise NotImplementedError("bias=False is not implemented.") if vals.ndim == 0: return vals # TODO: scalar # return vals.item() return vals @doc_wraps(scipy.stats.skewtest) def skewtest(a, axis=0, nan_policy='propagate'): if nan_policy != 'propagate': raise NotImplementedError("`nan_policy` other than 'propagate' " "have not been implemented.") b2 = skew(a, axis) n = float(a.shape[axis]) if n < 8: raise ValueError( "skewtest is not valid with less than 8 samples; %i samples" " were given." % int(n)) y = b2 * math.sqrt(((n + 1) * (n + 3)) / (6.0 * (n - 2))) beta2 = (3.0 * (n**2 + 27 * n - 70) * (n + 1) * (n + 3) / ((n - 2.0) * (n + 5) * (n + 7) * (n + 9))) W2 = -1 + math.sqrt(2 * (beta2 - 1)) delta = 1 / math.sqrt(0.5 * math.log(W2)) alpha = math.sqrt(2.0 / (W2 - 1)) y = np.where(y == 0, 1, y) Z = delta * np.log(y / alpha + np.sqrt((y / alpha)**2 + 1)) return delayed(SkewtestResult, nout=2)(Z, 2 * distributions.norm.sf(np.abs(Z))) @doc_wraps(scipy.stats.kurtosis) def kurtosis(a, axis=0, fisher=True, bias=True, nan_policy='propagate'): if nan_policy != 'propagate': raise NotImplementedError("`nan_policy` other than 'propagate' " "have not been implemented.") n = a.shape[axis] # noqa; for bias m2 = moment(a, 2, axis) m4 = moment(a, 4, axis) zero = (m2 == 0) olderr = np.seterr(all='ignore') try: vals = da.where(zero, 0, m4 / m2**2.0) finally: np.seterr(**olderr) if not bias: # need a version of np.place raise NotImplementedError("bias=False is not implemented.") if vals.ndim == 0: return vals # TODO: scalar # vals = vals.item() # array scalar if fisher: return vals - 3 else: return vals # TODO: scalar; vals = vals.item() # array scalar @doc_wraps(scipy.stats.kurtosistest) def kurtosistest(a, axis=0, nan_policy='propagate'): if nan_policy != 'propagate': raise NotImplementedError("`nan_policy` other than 'propagate' " "have not been implemented.") n = float(a.shape[axis]) b2 = kurtosis(a, axis, fisher=False) E = 3.0 * (n - 1) / (n + 1) varb2 = 24.0 * n * (n - 2) * (n - 3) / ((n + 1) * (n + 1.) * (n + 3) * (n + 5)) # [1]_ Eq. 1 x = (b2 - E) / np.sqrt(varb2) # [1]_ Eq. 4 # [1]_ Eq. 2: sqrtbeta1 = 6.0 * (n * n - 5 * n + 2) / ((n + 7) * (n + 9)) * np.sqrt((6.0 * (n + 3) * (n + 5)) / (n * (n - 2) * (n - 3))) # [1]_ Eq. 3: A = 6.0 + 8.0 / sqrtbeta1 * (2.0 / sqrtbeta1 + np.sqrt(1 + 4.0 / (sqrtbeta1**2))) term1 = 1 - 2 / (9.0 * A) denom = 1 + x * np.sqrt(2 / (A - 4.0)) denom = np.where(denom < 0, 99, denom) term2 = np.where(denom < 0, term1, np.power((1 - 2.0 / A) / denom, 1 / 3.0)) Z = (term1 - term2) / np.sqrt(2 / (9.0 * A)) # [1]_ Eq. 5 Z = np.where(denom == 99, 0, Z) if Z.ndim == 0: Z = Z[()] # zprob uses upper tail, so Z needs to be positive return delayed(KurtosistestResult, nout=2)(Z, 2 * distributions.norm.sf(np.abs(Z))) @doc_wraps(scipy.stats.normaltest) def normaltest(a, axis=0, nan_policy='propagate'): if nan_policy != 'propagate': raise NotImplementedError("`nan_policy` other than 'propagate' " "have not been implemented.") s, _ = skewtest(a, axis) k, _ = kurtosistest(a, axis) k2 = s * s + k * k return delayed(NormaltestResult, nout=2)(k2, delayed(distributions.chi2.sf)(k2, 2)) @doc_wraps(scipy.stats.f_oneway) def f_oneway(*args): # args = [np.asarray(arg, dtype=float) for arg in args] # ANOVA on N groups, each in its own array num_groups = len(args) alldata = da.concatenate(args) bign = len(alldata) # Determine the mean of the data, and subtract that from all inputs to a # variance (via sum_of_sq / sq_of_sum) calculation. Variance is invariance # to a shift in location, and centering all data around zero vastly # improves numerical stability. offset = alldata.mean() alldata -= offset sstot = _sum_of_squares(alldata) - (_square_of_sums(alldata) / float(bign)) ssbn = 0 for a in args: ssbn += _square_of_sums(a - offset) / float(len(a)) # Naming: variables ending in bn/b are for "between treatments", wn/w are # for "within treatments" ssbn -= (_square_of_sums(alldata) / float(bign)) sswn = sstot - ssbn dfbn = num_groups - 1 dfwn = bign - num_groups msb = ssbn / float(dfbn) msw = sswn / float(dfwn) f = msb / msw prob = _fdtrc(dfbn, dfwn, f) # equivalent to stats.f.sf return delayed(F_onewayResult, nout=2)(f, prob) @doc_wraps(scipy.stats.moment) def moment(a, moment=1, axis=0, nan_policy='propagate'): if nan_policy != 'propagate': raise NotImplementedError("`nan_policy` other than 'propagate' " "have not been implemented.") return da.moment(a, moment, axis=axis) # ------- # Helpers # ------- # Don't really want to do all of scipy.special (or do we?) _xlogy = wrap_elemwise(special.xlogy) _fdtrc = wrap_elemwise(special.fdtrc) def _equal_var_ttest_denom(v1, n1, v2, n2): df = n1 + n2 - 2.0 svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / df denom = da.sqrt(svar * (1.0 / n1 + 1.0 / n2)) # XXX: np -> da return df, denom def _unequal_var_ttest_denom(v1, n1, v2, n2): vn1 = v1 / n1 vn2 = v2 / n2 with np.errstate(divide='ignore', invalid='ignore'): df = (vn1 + vn2)**2 / (vn1**2 / (n1 - 1) + vn2**2 / (n2 - 1)) # If df is undefined, variances are zero (assumes n1 > 0 & n2 > 0). # Hence it doesn't matter what df is as long as it's not NaN. df = da.where(da.isnan(df), 1, df) # XXX: np -> da denom = da.sqrt(vn1 + vn2) return df, denom def _ttest_ind_from_stats(mean1, mean2, denom, df): d = mean1 - mean2 with np.errstate(divide='ignore', invalid='ignore'): t = da.divide(d, denom) t, prob = _ttest_finish(df, t) return (t, prob) def _ttest_finish(df, t): """Common code between all 3 t-test functions.""" # XXX: np.abs -> da.absolute # XXX: delayed(distributions.t.sf) prob = delayed(distributions.t.sf)(da.absolute(t), df) * 2 # use np.abs to get upper tail if t.ndim == 0: t = t[()] return t, prob def _count(x, axis=None): if axis is None: return x.size else: return x.shape[axis] def _sum_of_squares(a, axis=0): """ Squares each element of the input array, and returns the sum(s) of that. Parameters ---------- a : array_like Input array. axis : int or None, optional Axis along which to calculate. Default is 0. If None, compute over the whole array `a`. Returns ------- sum_of_squares : ndarray The sum along the given axis for (a**2). See also -------- _square_of_sums : The square(s) of the sum(s) (the opposite of `_sum_of_squares`). """ return da.sum(a * a, axis) def _square_of_sums(a, axis=0): """ Sums elements of the input array, and returns the square(s) of that sum. Parameters ---------- a : array_like Input array. axis : int or None, optional Axis along which to calculate. Default is 0. If None, compute over the whole array `a`. Returns ------- square_of_sums : float or ndarray The square of the sum over `axis`. See also -------- _sum_of_squares : The sum of squares (the opposite of `square_of_sums`). """ s = da.sum(a, axis) return s * s dask-0.16.0/dask/array/tests/000077500000000000000000000000001320364734500157315ustar00rootroot00000000000000dask-0.16.0/dask/array/tests/__init__.py000066400000000000000000000000001320364734500200300ustar00rootroot00000000000000dask-0.16.0/dask/array/tests/test_array_core.py000066400000000000000000002325771320364734500215100ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import copy import pytest np = pytest.importorskip('numpy') import os import sys import time from distutils.version import LooseVersion import operator from operator import add, sub, getitem from threading import Lock import warnings from toolz import merge, countby, concat from toolz.curried import identity import dask import dask.array as da from dask.base import tokenize, compute_as_if_collection from dask.delayed import delayed from dask.local import get_sync from dask.utils import ignoring, tmpfile, tmpdir from dask.utils_test import inc from dask.array import chunk from dask.array.core import (getem, getter, top, dotmany, concatenate3, broadcast_dimensions, Array, stack, concatenate, from_array, elemwise, broadcast_shapes, broadcast_to, blockdims_from_blockshape, store, optimize, from_func, normalize_chunks, broadcast_chunks, atop, from_delayed, concatenate_axes, common_blockdim) from dask.array.utils import assert_eq, same_keys # temporary until numpy functions migrated try: from numpy import nancumsum, nancumprod except ImportError: # pragma: no cover import dask.array.numpy_compat as npcompat nancumsum = npcompat.nancumsum nancumprod = npcompat.nancumprod def test_getem(): sol = {('X', 0, 0): (getter, 'X', (slice(0, 2), slice(0, 3))), ('X', 1, 0): (getter, 'X', (slice(2, 4), slice(0, 3))), ('X', 1, 1): (getter, 'X', (slice(2, 4), slice(3, 6))), ('X', 0, 1): (getter, 'X', (slice(0, 2), slice(3, 6)))} assert getem('X', (2, 3), shape=(4, 6)) == sol def test_top(): assert top(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)}) == \ {('z', 0, 0): (inc, ('x', 0, 0)), ('z', 0, 1): (inc, ('x', 0, 1)), ('z', 1, 0): (inc, ('x', 1, 0)), ('z', 1, 1): (inc, ('x', 1, 1))} assert top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2), 'y': (2, 2)}) == \ {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} assert top(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2), 'y': (2, 2)}) == \ {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 1), ('y', 1, 1)]), ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 1), ('y', 1, 1)])} assert top(identity, 'z', '', 'x', 'ij', numblocks={'x': (2, 2)}) ==\ {('z',): (identity, [[('x', 0, 0), ('x', 0, 1)], [('x', 1, 0), ('x', 1, 1)]])} def test_top_supports_broadcasting_rules(): assert top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2), 'y': (2, 1)}) == \ {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 0)), ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 0))} def test_top_literals(): assert top(add, 'z', 'ij', 'x', 'ij', 123, None, numblocks={'x': (2, 2)}) == \ {('z', 0, 0): (add, ('x', 0, 0), 123), ('z', 0, 1): (add, ('x', 0, 1), 123), ('z', 1, 0): (add, ('x', 1, 0), 123), ('z', 1, 1): (add, ('x', 1, 1), 123)} def test_atop_literals(): x = da.ones((10, 10), chunks=(5, 5)) z = atop(add, 'ij', x, 'ij', 100, None, dtype=x.dtype) assert_eq(z, x + 100) z = atop(lambda x, y, z: x * y + z, 'ij', 2, None, x, 'ij', 100, None, dtype=x.dtype) assert_eq(z, 2 * x + 100) z = atop(getitem, 'ij', x, 'ij', slice(None), None, dtype=x.dtype) assert_eq(z, x) def test_concatenate3_on_scalars(): assert_eq(concatenate3([1, 2]), np.array([1, 2])) def test_chunked_dot_product(): x = np.arange(400).reshape((20, 20)) o = np.ones((20, 20)) d = {'x': x, 'o': o} getx = getem('x', (5, 5), shape=(20, 20)) geto = getem('o', (5, 5), shape=(20, 20)) result = top(dotmany, 'out', 'ik', 'x', 'ij', 'o', 'jk', numblocks={'x': (4, 4), 'o': (4, 4)}) dsk = merge(d, getx, geto, result) out = dask.get(dsk, [[('out', i, j) for j in range(4)] for i in range(4)]) assert_eq(np.dot(x, o), concatenate3(out)) def test_chunked_transpose_plus_one(): x = np.arange(400).reshape((20, 20)) d = {'x': x} getx = getem('x', (5, 5), shape=(20, 20)) f = lambda x: x.T + 1 comp = top(f, 'out', 'ij', 'x', 'ji', numblocks={'x': (4, 4)}) dsk = merge(d, getx, comp) out = dask.get(dsk, [[('out', i, j) for j in range(4)] for i in range(4)]) assert_eq(concatenate3(out), x.T + 1) def test_broadcast_dimensions_works_with_singleton_dimensions(): argpairs = [('x', 'i')] numblocks = {'x': ((1,),)} assert broadcast_dimensions(argpairs, numblocks) == {'i': (1,)} def test_broadcast_dimensions(): argpairs = [('x', 'ij'), ('y', 'ij')] d = {'x': ('Hello', 1), 'y': (1, (2, 3))} assert broadcast_dimensions(argpairs, d) == {'i': 'Hello', 'j': (2, 3)} def test_Array(): shape = (1000, 1000) chunks = (100, 100) name = 'x' dsk = merge({name: 'some-array'}, getem(name, chunks, shape=shape)) a = Array(dsk, name, chunks, shape=shape, dtype='f8') assert a.numblocks == (10, 10) assert a.__dask_keys__() == [[('x', i, j) for j in range(10)] for i in range(10)] assert a.chunks == ((100,) * 10, (100,) * 10) assert a.shape == shape assert len(a) == shape[0] def test_uneven_chunks(): a = Array({}, 'x', chunks=(3, 3), shape=(10, 10), dtype='f8') assert a.chunks == ((3, 3, 3, 1), (3, 3, 3, 1)) def test_numblocks_suppoorts_singleton_block_dims(): shape = (100, 10) chunks = (10, 10) name = 'x' dsk = merge({name: 'some-array'}, getem(name, shape=shape, chunks=chunks)) a = Array(dsk, name, chunks, shape=shape, dtype='f8') assert set(concat(a.__dask_keys__())) == {('x', i, 0) for i in range(10)} def test_keys(): dsk = dict((('x', i, j), ()) for i in range(5) for j in range(6)) dx = Array(dsk, 'x', chunks=(10, 10), shape=(50, 60), dtype='f8') assert dx.__dask_keys__() == [[(dx.name, i, j) for j in range(6)] for i in range(5)] # Cache works assert dx.__dask_keys__() is dx.__dask_keys__() # Test mutating names clears key cache dx.dask = {('y', i, j): () for i in range(5) for j in range(6)} dx.name = 'y' assert dx.__dask_keys__() == [[(dx.name, i, j) for j in range(6)] for i in range(5)] d = Array({}, 'x', (), shape=(), dtype='f8') assert d.__dask_keys__() == [('x',)] def test_Array_computation(): a = Array({('x', 0, 0): np.eye(3)}, 'x', shape=(3, 3), chunks=(3, 3), dtype='f8') assert_eq(np.array(a), np.eye(3)) assert isinstance(a.compute(), np.ndarray) assert float(a[0, 0]) == 1 def test_stack(): a, b, c = [Array(getem(name, chunks=(2, 3), shape=(4, 6)), name, chunks=(2, 3), dtype='f8', shape=(4, 6)) for name in 'ABC'] s = stack([a, b, c], axis=0) colon = slice(None, None, None) assert s.shape == (3, 4, 6) assert s.chunks == ((1, 1, 1), (2, 2), (3, 3)) assert s.dask[(s.name, 0, 1, 0)] == (getitem, ('A', 1, 0), (None, colon, colon)) assert s.dask[(s.name, 2, 1, 0)] == (getitem, ('C', 1, 0), (None, colon, colon)) assert same_keys(s, stack([a, b, c], axis=0)) s2 = stack([a, b, c], axis=1) assert s2.shape == (4, 3, 6) assert s2.chunks == ((2, 2), (1, 1, 1), (3, 3)) assert s2.dask[(s2.name, 0, 1, 0)] == (getitem, ('B', 0, 0), (colon, None, colon)) assert s2.dask[(s2.name, 1, 1, 0)] == (getitem, ('B', 1, 0), (colon, None, colon)) assert same_keys(s2, stack([a, b, c], axis=1)) s2 = stack([a, b, c], axis=2) assert s2.shape == (4, 6, 3) assert s2.chunks == ((2, 2), (3, 3), (1, 1, 1)) assert s2.dask[(s2.name, 0, 1, 0)] == (getitem, ('A', 0, 1), (colon, colon, None)) assert s2.dask[(s2.name, 1, 1, 2)] == (getitem, ('C', 1, 1), (colon, colon, None)) assert same_keys(s2, stack([a, b, c], axis=2)) pytest.raises(ValueError, lambda: stack([a, b, c], axis=3)) assert set(b.dask.keys()).issubset(s2.dask.keys()) assert stack([a, b, c], axis=-1).chunks == stack([a, b, c], axis=2).chunks def test_short_stack(): x = np.array([1]) d = da.from_array(x, chunks=(1,)) s = da.stack([d]) assert s.shape == (1, 1) chunks = compute_as_if_collection(Array, s.dask, s.__dask_keys__()) assert chunks[0][0].shape == (1, 1) def test_stack_scalars(): d = da.arange(4, chunks=2) s = da.stack([d.mean(), d.sum()]) assert s.compute().tolist() == [np.arange(4).mean(), np.arange(4).sum()] def test_stack_promote_type(): i = np.arange(10, dtype='i4') f = np.arange(10, dtype='f4') di = da.from_array(i, chunks=5) df = da.from_array(f, chunks=5) res = da.stack([di, df]) assert_eq(res, np.stack([i, f])) @pytest.mark.skipif(LooseVersion(np.__version__) < '1.10.0', reason="NumPy doesn't yet support stack") def test_stack_rechunk(): x = da.random.random(10, chunks=5) y = da.random.random(10, chunks=4) z = da.stack([x, y], axis=0) assert z.shape == (2, 10) assert z.chunks == ((1, 1), (4, 1, 3, 2)) assert_eq(z, np.stack([x.compute(), y.compute()], axis=0)) def test_concatenate(): a, b, c = [Array(getem(name, chunks=(2, 3), shape=(4, 6)), name, chunks=(2, 3), dtype='f8', shape=(4, 6)) for name in 'ABC'] x = concatenate([a, b, c], axis=0) assert x.shape == (12, 6) assert x.chunks == ((2, 2, 2, 2, 2, 2), (3, 3)) assert x.dask[(x.name, 0, 1)] == ('A', 0, 1) assert x.dask[(x.name, 5, 0)] == ('C', 1, 0) assert same_keys(x, concatenate([a, b, c], axis=0)) y = concatenate([a, b, c], axis=1) assert y.shape == (4, 18) assert y.chunks == ((2, 2), (3, 3, 3, 3, 3, 3)) assert y.dask[(y.name, 1, 0)] == ('A', 1, 0) assert y.dask[(y.name, 1, 5)] == ('C', 1, 1) assert same_keys(y, concatenate([a, b, c], axis=1)) assert set(b.dask.keys()).issubset(y.dask.keys()) z = concatenate([a], axis=0) assert z.shape == a.shape assert z.chunks == a.chunks assert z.dask == a.dask assert z is a assert (concatenate([a, b, c], axis=-1).chunks == concatenate([a, b, c], axis=1).chunks) pytest.raises(ValueError, lambda: concatenate([a, b, c], axis=2)) def test_concatenate_unknown_axes(): dd = pytest.importorskip('dask.dataframe') pd = pytest.importorskip('pandas') a_df = pd.DataFrame({'x': np.arange(12)}) b_df = pd.DataFrame({'y': np.arange(12) * 10}) a_ddf = dd.from_pandas(a_df, sort=False, npartitions=3) b_ddf = dd.from_pandas(b_df, sort=False, npartitions=3) a_x = a_ddf.values b_x = b_ddf.values assert np.isnan(a_x.shape[0]) assert np.isnan(b_x.shape[0]) da.concatenate([a_x, b_x], axis=0) # works fine with pytest.raises(ValueError) as exc_info: da.concatenate([a_x, b_x], axis=1) # unknown chunks assert 'nan' in str(exc_info.value) assert 'allow_unknown_chunksize' in str(exc_info.value) c_x = da.concatenate([a_x, b_x], axis=1, allow_unknown_chunksizes=True) # unknown chunks assert_eq(c_x, np.concatenate([a_df.values, b_df.values], axis=1)) def test_concatenate_rechunk(): x = da.random.random((6, 6), chunks=(3, 3)) y = da.random.random((6, 6), chunks=(2, 2)) z = da.concatenate([x, y], axis=0) assert z.shape == (12, 6) assert z.chunks == ((3, 3, 2, 2, 2), (2, 1, 1, 2)) assert_eq(z, np.concatenate([x.compute(), y.compute()], axis=0)) z = da.concatenate([x, y], axis=1) assert z.shape == (6, 12) assert z.chunks == ((2, 1, 1, 2), (3, 3, 2, 2, 2)) assert_eq(z, np.concatenate([x.compute(), y.compute()], axis=1)) def test_concatenate_fixlen_strings(): x = np.array(['a', 'b', 'c']) y = np.array(['aa', 'bb', 'cc']) a = da.from_array(x, chunks=(2,)) b = da.from_array(y, chunks=(2,)) assert_eq(np.concatenate([x, y]), da.concatenate([a, b])) def test_binops(): a = Array(dict((('a', i), np.array([0])) for i in range(3)), 'a', chunks=((1, 1, 1),), dtype='i8') b = Array(dict((('b', i), np.array([0])) for i in range(3)), 'b', chunks=((1, 1, 1),), dtype='i8') result = elemwise(add, a, b, name='c') assert result.dask == merge(a.dask, b.dask, dict((('c', i), (add, ('a', i), ('b', i))) for i in range(3))) result = elemwise(pow, a, 2, name='c') assert "'a', 0" in str(result.dask[('c', 0)]) assert "2" in str(result.dask[('c', 0)]) def test_broadcast_shapes(): assert (0, 5) == broadcast_shapes((0, 1), (1, 5)) assert (3, 4, 5) == broadcast_shapes((3, 4, 5), (4, 1), ()) assert (3, 4) == broadcast_shapes((3, 1), (1, 4), (4,)) assert (5, 6, 7, 3, 4) == broadcast_shapes((3, 1), (), (5, 6, 7, 1, 4)) pytest.raises(ValueError, lambda: broadcast_shapes((3,), (3, 4))) pytest.raises(ValueError, lambda: broadcast_shapes((2, 3), (2, 3, 1))) def test_elemwise_on_scalars(): x = np.arange(10, dtype=np.int64) a = from_array(x, chunks=(5,)) assert len(a.__dask_keys__()) == 2 assert_eq(a.sum()**2, x.sum()**2) y = np.arange(10, dtype=np.int32) b = from_array(y, chunks=(5,)) result = a.sum() * b # Dask 0-d arrays do not behave like numpy scalars for type promotion assert result.dtype == np.int64 assert result.compute().dtype == np.int64 assert (x.sum() * y).dtype == np.int32 assert_eq((x.sum() * y).astype(np.int64), result) def test_elemwise_with_ndarrays(): x = np.arange(3) y = np.arange(12).reshape(4, 3) a = from_array(x, chunks=(3,)) b = from_array(y, chunks=(2, 3)) assert_eq(x + a, 2 * x) assert_eq(a + x, 2 * x) assert_eq(x + b, x + y) assert_eq(b + x, x + y) assert_eq(a + y, x + y) assert_eq(y + a, x + y) # Error on shape mismatch pytest.raises(ValueError, lambda: a + y.T) pytest.raises(ValueError, lambda: a + np.arange(2)) def test_elemwise_differently_chunked(): x = np.arange(3) y = np.arange(12).reshape(4, 3) a = from_array(x, chunks=(3,)) b = from_array(y, chunks=(2, 2)) assert_eq(a + b, x + y) assert_eq(b + a, x + y) def test_elemwise_dtype(): values = [ da.from_array(np.ones(5, np.float32), chunks=3), da.from_array(np.ones(5, np.int16), chunks=3), da.from_array(np.ones(5, np.int64), chunks=3), da.from_array(np.ones((), np.float64), chunks=()) * 1e200, np.ones(5, np.float32), 1, 1.0, 1e200, np.int64(1), np.ones((), np.int64), ] for x in values: for y in values: assert da.maximum(x, y).dtype == da.result_type(x, y) def test_operators(): x = np.arange(10) y = np.arange(10).reshape((10, 1)) a = from_array(x, chunks=(5,)) b = from_array(y, chunks=(5, 1)) c = a + 1 assert_eq(c, x + 1) c = a + b assert_eq(c, x + x.reshape((10, 1))) expr = (3 / a * b)**2 > 5 with pytest.warns(None): # ZeroDivisionWarning assert_eq(expr, (3 / x * y)**2 > 5) with pytest.warns(None): # OverflowWarning c = da.exp(a) assert_eq(c, np.exp(x)) assert_eq(abs(-a), a) assert_eq(a, +x) def test_operator_dtype_promotion(): x = np.arange(10, dtype=np.float32) y = np.array([1]) a = from_array(x, chunks=(5,)) assert_eq(x + 1, a + 1) # still float32 assert_eq(x + 1e50, a + 1e50) # now float64 assert_eq(x + y, a + y) # also float64 def test_field_access(): x = np.array([(1, 1.0), (2, 2.0)], dtype=[('a', 'i4'), ('b', 'f4')]) y = from_array(x, chunks=(1,)) assert_eq(y['a'], x['a']) assert_eq(y[['b', 'a']], x[['b', 'a']]) assert same_keys(y[['b', 'a']], y[['b', 'a']]) def test_field_access_with_shape(): dtype = [('col1', ('f4', (3, 2))), ('col2', ('f4', 3))] data = np.ones((100, 50), dtype=dtype) x = da.from_array(data, 10) assert_eq(x['col1'], data['col1']) assert_eq(x[['col1']], data[['col1']]) assert_eq(x['col2'], data['col2']) assert_eq(x[['col1', 'col2']], data[['col1', 'col2']]) @pytest.mark.skipif(sys.version_info < (3, 5), reason="Matrix multiplication operator only after Py3.5") def test_matmul(): x = np.random.random((5, 5)) y = np.random.random((5, 2)) a = from_array(x, chunks=(1, 5)) b = from_array(y, chunks=(5, 1)) assert_eq(operator.matmul(a, b), a.dot(b)) assert_eq(operator.matmul(a, b), operator.matmul(x, y)) assert_eq(operator.matmul(a, y), operator.matmul(x, b)) list_vec = list(range(1, 6)) assert_eq(operator.matmul(list_vec, b), operator.matmul(list_vec, y)) assert_eq(operator.matmul(x, list_vec), operator.matmul(a, list_vec)) z = np.random.random((5, 5, 5)) c = from_array(z, chunks=(1, 5, 1)) with pytest.raises(NotImplementedError): operator.matmul(a, z) assert_eq(operator.matmul(z, a), operator.matmul(c, x)) def test_T(): x = np.arange(400).reshape((20, 20)) a = from_array(x, chunks=(5, 5)) assert_eq(x.T, a.T) def test_norm(): a = np.arange(200, dtype='f8').reshape((20, 10)) a = a + (a.max() - a) * 1j b = from_array(a, chunks=(5, 5)) # TODO: Deprecated method, remove test when method removed with pytest.warns(UserWarning): assert_eq(b.vnorm(), np.linalg.norm(a)) assert_eq(b.vnorm(ord=1), np.linalg.norm(a.flatten(), ord=1)) assert_eq(b.vnorm(ord=4, axis=0), np.linalg.norm(a, ord=4, axis=0)) assert b.vnorm(ord=4, axis=0, keepdims=True).ndim == b.ndim split_every = {0: 3, 1: 3} assert_eq(b.vnorm(ord=1, axis=0, split_every=split_every), np.linalg.norm(a, ord=1, axis=0)) assert_eq(b.vnorm(ord=np.inf, axis=0, split_every=split_every), np.linalg.norm(a, ord=np.inf, axis=0)) assert_eq(b.vnorm(ord=np.inf, split_every=split_every), np.linalg.norm(a.flatten(), ord=np.inf)) def test_broadcast_to(): x = np.random.randint(10, size=(5, 1, 6)) a = from_array(x, chunks=(3, 1, 3)) for shape in [a.shape, (5, 0, 6), (5, 4, 6), (2, 5, 1, 6), (3, 4, 5, 4, 6)]: xb = chunk.broadcast_to(x, shape) ab = broadcast_to(a, shape) assert_eq(xb, ab) if a.shape == ab.shape: assert a is ab pytest.raises(ValueError, lambda: broadcast_to(a, (2, 1, 6))) pytest.raises(ValueError, lambda: broadcast_to(a, (3,))) def test_broadcast_to_array(): x = np.random.randint(10, size=(5, 1, 6)) for shape in [(5, 0, 6), (5, 4, 6), (2, 5, 1, 6), (3, 4, 5, 4, 6)]: a = np.broadcast_to(x, shape) d = broadcast_to(x, shape) assert_eq(a, d) def test_broadcast_to_scalar(): x = 5 for shape in [tuple(), (0,), (2, 3), (5, 4, 6), (2, 5, 1, 6), (3, 4, 5, 4, 6)]: a = np.broadcast_to(x, shape) d = broadcast_to(x, shape) assert_eq(a, d) @pytest.mark.parametrize('u_shape, v_shape', [ [tuple(), (2, 3)], [(1,), (2, 3)], [(1, 1), (2, 3)], [(0, 3), (1, 3)], [(2, 0), (2, 1)], [(1, 0), (2, 1)], [(0, 1), (1, 3)], ]) def test_broadcast_operator(u_shape, v_shape): u = np.random.random(u_shape) v = np.random.random(v_shape) d_u = from_array(u, chunks=1) d_v = from_array(v, chunks=1) w = u * v d_w = d_u * d_v assert_eq(w, d_w) @pytest.mark.parametrize('original_shape,new_shape,chunks', [ ((10,), (10,), (3, 3, 4)), ((10,), (10, 1, 1), 5), ((10,), (1, 10,), 5), ((24,), (2, 3, 4), 12), ((1, 24,), (2, 3, 4), 12), ((2, 3, 4), (24,), (1, 3, 4)), ((2, 3, 4), (24,), 4), ((2, 3, 4), (24, 1), 4), ((2, 3, 4), (1, 24), 4), ((4, 4, 1), (4, 4), 2), ((4, 4), (4, 4, 1), 2), ((1, 4, 4), (4, 4), 2), ((1, 4, 4), (4, 4, 1), 2), ((1, 4, 4), (1, 1, 4, 4), 2), ((4, 4), (1, 4, 4, 1), 2), ((4, 4), (1, 4, 4), 2), ((2, 3), (2, 3), (1, 2)), ((2, 3), (3, 2), 3), ((4, 2, 3), (4, 6), 4), ((3, 4, 5, 6), (3, 4, 5, 6), (2, 3, 4, 5)), ((), (1,), 1), ((1,), (), 1), ((24,), (3, 8), 24), ((24,), (4, 6), 6), ((24,), (4, 3, 2), 6), ((24,), (4, 6, 1), 6), ((24,), (4, 6), (6, 12, 6)), ((64, 4), (8, 8, 4), (16, 2)), ((4, 64), (4, 8, 4, 2), (2, 16)), ((4, 8, 4, 2), (2, 1, 2, 32, 2), (2, 4, 2, 2)), ((4, 1, 4), (4, 4), (2, 1, 2)), ((0, 10), (0, 5, 2), (5, 5)), ((5, 0, 2), (0, 10), (5, 2, 2)), ((0,), (2, 0, 2), (4,)), ((2, 0, 2), (0,), (4, 4, 4)), ]) def test_reshape(original_shape, new_shape, chunks): x = np.random.randint(10, size=original_shape) a = from_array(x, chunks=chunks) xr = x.reshape(new_shape) ar = a.reshape(new_shape) if a.shape == new_shape: assert a is ar assert_eq(xr, ar) def test_reshape_exceptions(): x = np.random.randint(10, size=(5,)) a = from_array(x, chunks=(2,)) with pytest.raises(ValueError): da.reshape(a, (100,)) def test_reshape_splat(): x = da.ones((5, 5), chunks=(2, 2)) assert_eq(x.reshape((25,)), x.reshape(25)) def test_reshape_fails_for_dask_only(): cases = [ ((3, 4), (4, 3), 2), ] for original_shape, new_shape, chunks in cases: x = np.random.randint(10, size=original_shape) a = from_array(x, chunks=chunks) assert x.reshape(new_shape).shape == new_shape with pytest.raises(ValueError): da.reshape(a, new_shape) def test_reshape_unknown_dimensions(): for original_shape in [(24,), (2, 12), (2, 3, 4)]: for new_shape in [(-1,), (2, -1), (-1, 3, 4)]: x = np.random.randint(10, size=original_shape) a = from_array(x, 24) assert_eq(x.reshape(new_shape), a.reshape(new_shape)) pytest.raises(ValueError, lambda: da.reshape(a, (-1, -1))) def test_full(): d = da.full((3, 4), 2, chunks=((2, 1), (2, 2))) assert d.chunks == ((2, 1), (2, 2)) assert_eq(d, np.full((3, 4), 2)) def test_map_blocks(): x = np.arange(400).reshape((20, 20)) d = from_array(x, chunks=(7, 7)) e = d.map_blocks(inc, dtype=d.dtype) assert d.chunks == e.chunks assert_eq(e, x + 1) e = d.map_blocks(inc, name='increment') assert e.name == 'increment' e = d.map_blocks(inc, token='increment') assert e.name != 'increment' assert e.name.startswith('increment') d = from_array(x, chunks=(10, 10)) e = d.map_blocks(lambda x: x[::2, ::2], chunks=(5, 5), dtype=d.dtype) assert e.chunks == ((5, 5), (5, 5)) assert_eq(e, x[::2, ::2]) d = from_array(x, chunks=(8, 8)) e = d.map_blocks(lambda x: x[::2, ::2], chunks=((4, 4, 2), (4, 4, 2)), dtype=d.dtype) assert_eq(e, x[::2, ::2]) def test_map_blocks2(): x = np.arange(10, dtype='i8') d = from_array(x, chunks=(2,)) def func(block, block_id=None, c=0): return np.ones_like(block) * sum(block_id) + c out = d.map_blocks(func, dtype='i8') expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype='i8') assert_eq(out, expected) assert same_keys(d.map_blocks(func, dtype='i8'), out) out = d.map_blocks(func, dtype='i8', c=1) expected = expected + 1 assert_eq(out, expected) assert same_keys(d.map_blocks(func, dtype='i8', c=1), out) def test_map_blocks_with_constants(): d = da.arange(10, chunks=3) e = d.map_blocks(add, 100, dtype=d.dtype) assert_eq(e, np.arange(10) + 100) assert_eq(da.map_blocks(sub, d, 10, dtype=d.dtype), np.arange(10) - 10) assert_eq(da.map_blocks(sub, 10, d, dtype=d.dtype), 10 - np.arange(10)) def test_map_blocks_with_kwargs(): d = da.arange(10, chunks=5) result = d.map_blocks(np.max, axis=0, keepdims=True, dtype=d.dtype, chunks=(1,)) assert_eq(result, np.array([4, 9])) def test_map_blocks_with_chunks(): dx = da.ones((5, 3), chunks=(2, 2)) dy = da.ones((5, 3), chunks=(2, 2)) dz = da.map_blocks(np.add, dx, dy, chunks=dx.chunks) assert_eq(dz, np.ones((5, 3)) * 2) def test_map_blocks_dtype_inference(): x = np.arange(50).reshape((5, 10)) y = np.arange(10) dx = da.from_array(x, chunks=5) dy = da.from_array(y, chunks=5) def foo(x, *args, **kwargs): cast = kwargs.pop('cast', 'i8') return (x + sum(args)).astype(cast) assert_eq(dx.map_blocks(foo, dy, 1), foo(dx, dy, 1)) assert_eq(dx.map_blocks(foo, dy, 1, cast='f8'), foo(dx, dy, 1, cast='f8')) assert_eq(dx.map_blocks(foo, dy, 1, cast='f8', dtype='f8'), foo(dx, dy, 1, cast='f8', dtype='f8')) def foo(x): raise RuntimeError("Woops") try: dx.map_blocks(foo) except Exception as e: assert e.args[0].startswith("`dtype` inference failed") assert "Please specify the dtype explicitly" in e.args[0] assert 'RuntimeError' in e.args[0] else: assert False, "Should have errored" def test_from_function_requires_block_args(): x = np.arange(10) pytest.raises(Exception, lambda: from_array(x)) def test_repr(): d = da.ones((4, 4), chunks=(2, 2)) assert d.name[:5] in repr(d) assert str(d.shape) in repr(d) assert str(d.dtype) in repr(d) d = da.ones((4000, 4), chunks=(4, 2)) assert len(str(d)) < 1000 def test_slicing_with_ellipsis(): x = np.arange(256).reshape((4, 4, 4, 4)) d = da.from_array(x, chunks=((2, 2, 2, 2))) assert_eq(d[..., 1], x[..., 1]) assert_eq(d[0, ..., 1], x[0, ..., 1]) def test_slicing_with_ndarray(): x = np.arange(64).reshape((8, 8)) d = da.from_array(x, chunks=((4, 4))) assert_eq(d[np.arange(8)], x) assert_eq(d[np.ones(8, dtype=bool)], x) assert_eq(d[np.array([1])], x[[1]]) assert_eq(d[np.array([True])], x[[0]]) def test_dtype(): d = da.ones((4, 4), chunks=(2, 2)) assert d.dtype == d.compute().dtype assert (d * 1.0).dtype == (d + 1.0).compute().dtype assert d.sum().dtype == d.sum().compute().dtype # no shape def test_blockdims_from_blockshape(): assert blockdims_from_blockshape((10, 10), (4, 3)) == ((4, 4, 2), (3, 3, 3, 1)) pytest.raises(TypeError, lambda: blockdims_from_blockshape((10,), None)) assert blockdims_from_blockshape((1e2, 3), [1e1, 3]) == ((10, ) * 10, (3, )) assert blockdims_from_blockshape((np.int8(10), ), (5, )) == ((5, 5), ) def test_coerce(): d = da.from_array(np.array([1]), chunks=(1,)) with dask.set_options(get=dask.get): assert bool(d) assert int(d) assert float(d) assert complex(d) def test_store_delayed_target(): from dask.delayed import delayed d = da.ones((4, 4), chunks=(2, 2)) a, b = d + 1, d + 2 # empty buffers to be used as targets targs = {} def make_target(key): a = np.empty((4, 4)) targs[key] = a return a # delayed calls to these targets atd = delayed(make_target)('at') btd = delayed(make_target)('bt') store([a, b], [atd, btd]) at = targs['at'] bt = targs['bt'] assert_eq(at, a) assert_eq(bt, b) pytest.raises(ValueError, lambda: store([a], [at, bt])) pytest.raises(ValueError, lambda: store(at, at)) pytest.raises(ValueError, lambda: store([at, bt], [at, bt])) def test_store(): d = da.ones((4, 4), chunks=(2, 2)) a, b = d + 1, d + 2 at = np.empty(shape=(4, 4)) bt = np.empty(shape=(4, 4)) store([a, b], [at, bt]) assert (at == 2).all() assert (bt == 3).all() pytest.raises(ValueError, lambda: store([a], [at, bt])) pytest.raises(ValueError, lambda: store(at, at)) pytest.raises(ValueError, lambda: store([at, bt], [at, bt])) def test_store_regions(): d = da.ones((4, 4, 4), chunks=(2, 2, 2)) a, b = d + 1, d + 2 at = np.zeros(shape=(8, 4, 6)) bt = np.zeros(shape=(8, 4, 6)) region = (slice(None,None,2), slice(None), [1, 2, 4, 5]) # Single region: v = store([a, b], [at, bt], regions=region, compute=False) assert (at == 0).all() and (bt[region] == 0).all() v.compute() assert (at[region] == 2).all() and (bt[region] == 3).all() assert not (bt == 3).all() and not ( bt == 0 ).all() assert not (at == 3).all() and not ( at == 0 ).all() # Multiple regions: at = np.zeros(shape=(8, 4, 6)) bt = np.zeros(shape=(8, 4, 6)) v = store([a, b], [at, bt], regions=[region, region], compute=False) assert (at == 0).all() and (bt[region] == 0).all() v.compute() assert (at[region] == 2).all() and (bt[region] == 3).all() assert not (bt == 3).all() and not ( bt == 0 ).all() assert not (at == 3).all() and not ( at == 0 ).all() def test_store_compute_false(): d = da.ones((4, 4), chunks=(2, 2)) a, b = d + 1, d + 2 at = np.zeros(shape=(4, 4)) bt = np.zeros(shape=(4, 4)) v = store([a, b], [at, bt], compute=False) assert (at == 0).all() and (bt == 0).all() v.compute() assert (at == 2).all() and (bt == 3).all() class ThreadSafetyError(Exception): pass class NonthreadSafeStore(object): def __init__(self): self.in_use = False def __setitem__(self, key, value): if self.in_use: raise ThreadSafetyError() self.in_use = True time.sleep(0.001) self.in_use = False class ThreadSafeStore(object): def __init__(self): self.concurrent_uses = 0 self.max_concurrent_uses = 0 def __setitem__(self, key, value): self.concurrent_uses += 1 self.max_concurrent_uses = max(self.concurrent_uses, self.max_concurrent_uses) time.sleep(0.01) self.concurrent_uses -= 1 def test_store_locks(): _Lock = type(Lock()) d = da.ones((10, 10), chunks=(2, 2)) a, b = d + 1, d + 2 at = np.zeros(shape=(10, 10)) bt = np.zeros(shape=(10, 10)) lock = Lock() v = store([a, b], [at, bt], compute=False, lock=lock) dsk = v.dask locks = set(vv for v in dsk.values() for vv in v if isinstance(vv, _Lock)) assert locks == set([lock]) # Ensure same lock applies over multiple stores at = NonthreadSafeStore() v = store([a, b], [at, at], lock=lock, get=dask.threaded.get, num_workers=10) # Don't assume thread safety by default at = NonthreadSafeStore() store(a, at, get=dask.threaded.get, num_workers=10) a.store(at, get=dask.threaded.get, num_workers=10) # Ensure locks can be removed at = ThreadSafeStore() for i in range(10): a.store(at, lock=False, get=dask.threaded.get, num_workers=10) if at.max_concurrent_uses > 1: break if i == 9: assert False @pytest.mark.xfail(reason="can't lock with multiprocessing") def test_store_multiprocessing_lock(): d = da.ones((10, 10), chunks=(2, 2)) a = d + 1 at = np.zeros(shape=(10, 10)) a.store(at, get=dask.multiprocessing.get, num_workers=10) def test_to_hdf5(): h5py = pytest.importorskip('h5py') x = da.ones((4, 4), chunks=(2, 2)) y = da.ones(4, chunks=2, dtype='i4') with tmpfile('.hdf5') as fn: x.to_hdf5(fn, '/x') with h5py.File(fn) as f: d = f['/x'] assert_eq(d[:], x) assert d.chunks == (2, 2) with tmpfile('.hdf5') as fn: x.to_hdf5(fn, '/x', chunks=None) with h5py.File(fn) as f: d = f['/x'] assert_eq(d[:], x) assert d.chunks is None with tmpfile('.hdf5') as fn: x.to_hdf5(fn, '/x', chunks=(1, 1)) with h5py.File(fn) as f: d = f['/x'] assert_eq(d[:], x) assert d.chunks == (1, 1) with tmpfile('.hdf5') as fn: da.to_hdf5(fn, {'/x': x, '/y': y}) with h5py.File(fn) as f: assert_eq(f['/x'][:], x) assert f['/x'].chunks == (2, 2) assert_eq(f['/y'][:], y) assert f['/y'].chunks == (2,) def test_to_dask_dataframe(): dd = pytest.importorskip('dask.dataframe') a = da.ones((4,), chunks=(2,)) d = a.to_dask_dataframe() assert isinstance(d, dd.Series) a = da.ones((4, 4), chunks=(2, 2)) d = a.to_dask_dataframe() assert isinstance(d, dd.DataFrame) def test_np_array_with_zero_dimensions(): d = da.ones((4, 4), chunks=(2, 2)) assert_eq(np.array(d.sum()), np.array(d.compute().sum())) def test_dtype_complex(): x = np.arange(24).reshape((4, 6)).astype('f4') y = np.arange(24).reshape((4, 6)).astype('i8') z = np.arange(24).reshape((4, 6)).astype('i2') a = da.from_array(x, chunks=(2, 3)) b = da.from_array(y, chunks=(2, 3)) c = da.from_array(z, chunks=(2, 3)) def assert_eq(a, b): return (isinstance(a, np.dtype) and isinstance(b, np.dtype) and str(a) == str(b)) assert_eq(a.dtype, x.dtype) assert_eq(b.dtype, y.dtype) assert_eq((a + 1).dtype, (x + 1).dtype) assert_eq((a + b).dtype, (x + y).dtype) assert_eq(a.T.dtype, x.T.dtype) assert_eq(a[:3].dtype, x[:3].dtype) assert_eq((a.dot(b.T)).dtype, (x.dot(y.T)).dtype) assert_eq(stack([a, b]).dtype, np.vstack([x, y]).dtype) assert_eq(concatenate([a, b]).dtype, np.concatenate([x, y]).dtype) assert_eq(b.std().dtype, y.std().dtype) assert_eq(c.sum().dtype, z.sum().dtype) assert_eq(a.min().dtype, a.min().dtype) assert_eq(b.std().dtype, b.std().dtype) assert_eq(a.argmin(axis=0).dtype, a.argmin(axis=0).dtype) assert_eq(da.sin(c).dtype, np.sin(z).dtype) assert_eq(da.exp(b).dtype, np.exp(y).dtype) assert_eq(da.floor(a).dtype, np.floor(x).dtype) assert_eq(da.isnan(b).dtype, np.isnan(y).dtype) with ignoring(ImportError): assert da.isnull(b).dtype == 'bool' assert da.notnull(b).dtype == 'bool' x = np.array([('a', 1)], dtype=[('text', 'S1'), ('numbers', 'i4')]) d = da.from_array(x, chunks=(1,)) assert_eq(d['text'].dtype, x['text'].dtype) assert_eq(d[['numbers', 'text']].dtype, x[['numbers', 'text']].dtype) def test_astype(): x = np.ones((5, 5), dtype='f8') d = da.from_array(x, chunks=(2,2)) assert d.astype('i8').dtype == 'i8' assert_eq(d.astype('i8'), x.astype('i8')) assert same_keys(d.astype('i8'), d.astype('i8')) with pytest.raises(TypeError): d.astype('i8', casting='safe') with pytest.raises(TypeError): d.astype('i8', not_a_real_kwarg='foo') # smoketest with kwargs assert_eq(d.astype('i8', copy=False), x.astype('i8', copy=False)) # Check it's a noop assert d.astype('f8') is d def test_arithmetic(): x = np.arange(5).astype('f4') + 2 y = np.arange(5).astype('i8') + 2 z = np.arange(5).astype('i4') + 2 a = da.from_array(x, chunks=(2,)) b = da.from_array(y, chunks=(2,)) c = da.from_array(z, chunks=(2,)) assert_eq(a + b, x + y) assert_eq(a * b, x * y) assert_eq(a - b, x - y) assert_eq(a / b, x / y) assert_eq(b & b, y & y) assert_eq(b | b, y | y) assert_eq(b ^ b, y ^ y) assert_eq(a // b, x // y) assert_eq(a ** b, x ** y) assert_eq(a % b, x % y) assert_eq(a > b, x > y) assert_eq(a < b, x < y) assert_eq(a >= b, x >= y) assert_eq(a <= b, x <= y) assert_eq(a == b, x == y) assert_eq(a != b, x != y) assert_eq(a + 2, x + 2) assert_eq(a * 2, x * 2) assert_eq(a - 2, x - 2) assert_eq(a / 2, x / 2) assert_eq(b & True, y & True) assert_eq(b | True, y | True) assert_eq(b ^ True, y ^ True) assert_eq(a // 2, x // 2) assert_eq(a ** 2, x ** 2) assert_eq(a % 2, x % 2) assert_eq(a > 2, x > 2) assert_eq(a < 2, x < 2) assert_eq(a >= 2, x >= 2) assert_eq(a <= 2, x <= 2) assert_eq(a == 2, x == 2) assert_eq(a != 2, x != 2) assert_eq(2 + b, 2 + y) assert_eq(2 * b, 2 * y) assert_eq(2 - b, 2 - y) assert_eq(2 / b, 2 / y) assert_eq(True & b, True & y) assert_eq(True | b, True | y) assert_eq(True ^ b, True ^ y) assert_eq(2 // b, 2 // y) assert_eq(2 ** b, 2 ** y) assert_eq(2 % b, 2 % y) assert_eq(2 > b, 2 > y) assert_eq(2 < b, 2 < y) assert_eq(2 >= b, 2 >= y) assert_eq(2 <= b, 2 <= y) assert_eq(2 == b, 2 == y) assert_eq(2 != b, 2 != y) assert_eq(-a, -x) assert_eq(abs(a), abs(x)) assert_eq(~(a == b), ~(x == y)) assert_eq(~(a == b), ~(x == y)) assert_eq(da.logaddexp(a, b), np.logaddexp(x, y)) assert_eq(da.logaddexp2(a, b), np.logaddexp2(x, y)) with pytest.warns(None): # Overflow warning assert_eq(da.exp(b), np.exp(y)) assert_eq(da.log(a), np.log(x)) assert_eq(da.log10(a), np.log10(x)) assert_eq(da.log1p(a), np.log1p(x)) with pytest.warns(None): # Overflow warning assert_eq(da.expm1(b), np.expm1(y)) assert_eq(da.sqrt(a), np.sqrt(x)) assert_eq(da.square(a), np.square(x)) assert_eq(da.sin(a), np.sin(x)) assert_eq(da.cos(b), np.cos(y)) assert_eq(da.tan(a), np.tan(x)) assert_eq(da.arcsin(b / 10), np.arcsin(y / 10)) assert_eq(da.arccos(b / 10), np.arccos(y / 10)) assert_eq(da.arctan(b / 10), np.arctan(y / 10)) assert_eq(da.arctan2(b * 10, a), np.arctan2(y * 10, x)) assert_eq(da.hypot(b, a), np.hypot(y, x)) assert_eq(da.sinh(a), np.sinh(x)) with pytest.warns(None): # Overflow warning assert_eq(da.cosh(b), np.cosh(y)) assert_eq(da.tanh(a), np.tanh(x)) assert_eq(da.arcsinh(b * 10), np.arcsinh(y * 10)) assert_eq(da.arccosh(b * 10), np.arccosh(y * 10)) assert_eq(da.arctanh(b / 10), np.arctanh(y / 10)) assert_eq(da.deg2rad(a), np.deg2rad(x)) assert_eq(da.rad2deg(a), np.rad2deg(x)) assert_eq(da.logical_and(a < 1, b < 4), np.logical_and(x < 1, y < 4)) assert_eq(da.logical_or(a < 1, b < 4), np.logical_or(x < 1, y < 4)) assert_eq(da.logical_xor(a < 1, b < 4), np.logical_xor(x < 1, y < 4)) assert_eq(da.logical_not(a < 1), np.logical_not(x < 1)) assert_eq(da.maximum(a, 5 - a), np.maximum(a, 5 - a)) assert_eq(da.minimum(a, 5 - a), np.minimum(a, 5 - a)) assert_eq(da.fmax(a, 5 - a), np.fmax(a, 5 - a)) assert_eq(da.fmin(a, 5 - a), np.fmin(a, 5 - a)) assert_eq(da.isreal(a + 1j * b), np.isreal(x + 1j * y)) assert_eq(da.iscomplex(a + 1j * b), np.iscomplex(x + 1j * y)) assert_eq(da.isfinite(a), np.isfinite(x)) assert_eq(da.isinf(a), np.isinf(x)) assert_eq(da.isnan(a), np.isnan(x)) assert_eq(da.signbit(a - 3), np.signbit(x - 3)) assert_eq(da.copysign(a - 3, b), np.copysign(x - 3, y)) assert_eq(da.nextafter(a - 3, b), np.nextafter(x - 3, y)) with pytest.warns(None): # overflow warning assert_eq(da.ldexp(c, c), np.ldexp(z, z)) assert_eq(da.fmod(a * 12, b), np.fmod(x * 12, y)) assert_eq(da.floor(a * 0.5), np.floor(x * 0.5)) assert_eq(da.ceil(a), np.ceil(x)) assert_eq(da.trunc(a / 2), np.trunc(x / 2)) assert_eq(da.degrees(b), np.degrees(y)) assert_eq(da.radians(a), np.radians(x)) assert_eq(da.rint(a + 0.3), np.rint(x + 0.3)) assert_eq(da.fix(a - 2.5), np.fix(x - 2.5)) assert_eq(da.angle(a + 1j), np.angle(x + 1j)) assert_eq(da.real(a + 1j), np.real(x + 1j)) assert_eq((a + 1j).real, np.real(x + 1j)) assert_eq(da.imag(a + 1j), np.imag(x + 1j)) assert_eq((a + 1j).imag, np.imag(x + 1j)) assert_eq(da.conj(a + 1j * b), np.conj(x + 1j * y)) assert_eq((a + 1j * b).conj(), (x + 1j * y).conj()) assert_eq(da.clip(b, 1, 4), np.clip(y, 1, 4)) assert_eq(b.clip(1, 4), y.clip(1, 4)) assert_eq(da.fabs(b), np.fabs(y)) assert_eq(da.sign(b - 2), np.sign(y - 2)) assert_eq(da.absolute(b - 2), np.absolute(y - 2)) assert_eq(da.absolute(b - 2 + 1j), np.absolute(y - 2 + 1j)) l1, l2 = da.frexp(a) r1, r2 = np.frexp(x) assert_eq(l1, r1) assert_eq(l2, r2) l1, l2 = da.modf(a) r1, r2 = np.modf(x) assert_eq(l1, r1) assert_eq(l2, r2) assert_eq(da.around(a, -1), np.around(x, -1)) def test_elemwise_consistent_names(): a = da.from_array(np.arange(5, dtype='f4'), chunks=(2,)) b = da.from_array(np.arange(5, dtype='f4'), chunks=(2,)) assert same_keys(a + b, a + b) assert same_keys(a + 2, a + 2) assert same_keys(da.exp(a), da.exp(a)) assert same_keys(da.exp(a, dtype='f8'), da.exp(a, dtype='f8')) assert same_keys(da.maximum(a, b), da.maximum(a, b)) def test_optimize(): x = np.arange(5).astype('f4') a = da.from_array(x, chunks=(2,)) expr = a[1:4] + 1 result = optimize(expr.dask, expr.__dask_keys__()) assert isinstance(result, dict) assert all(key in result for key in expr.__dask_keys__()) def test_slicing_with_non_ndarrays(): class ARangeSlice(object): def __init__(self, start, stop): self.start = start self.stop = stop def __array__(self): return np.arange(self.start, self.stop) class ARangeSlicable(object): dtype = 'i8' def __init__(self, n): self.n = n @property def shape(self): return (self.n,) def __getitem__(self, key): return ARangeSlice(key[0].start, key[0].stop) x = da.from_array(ARangeSlicable(10), chunks=(4,)) assert_eq((x + 1).sum(), (np.arange(10, dtype=x.dtype) + 1).sum()) def test_getter(): assert type(getter(np.matrix([[1]]), 0)) is np.ndarray assert type(getter(np.matrix([[1]]), 0, asarray=False)) is np.matrix assert_eq(getter([1, 2, 3, 4, 5], slice(1, 4)), np.array([2, 3, 4])) assert_eq(getter(np.arange(5), (None, slice(None, None))), np.arange(5)[None, :]) def test_size(): x = da.ones((10, 2), chunks=(3, 1)) assert x.size == np.array(x).size assert isinstance(x.size, int) def test_nbytes(): x = da.ones((10, 2), chunks=(3, 1)) assert x.nbytes == np.array(x).nbytes def test_itemsize(): x = da.ones((10, 2), chunks=(3, 1)) assert x.itemsize == 8 def test_Array_normalizes_dtype(): x = da.ones((3,), chunks=(1,), dtype=int) assert isinstance(x.dtype, np.dtype) def test_from_array_with_lock(): x = np.arange(10) d = da.from_array(x, chunks=5, lock=True) tasks = [v for k, v in d.dask.items() if k[0] == d.name] assert hasattr(tasks[0][4], 'acquire') assert len(set(task[4] for task in tasks)) == 1 assert_eq(d, x) lock = Lock() e = da.from_array(x, chunks=5, lock=lock) f = da.from_array(x, chunks=5, lock=lock) assert_eq(e + f, x + x) class MyArray(object): def __init__(self, x): self.x = x self.dtype = x.dtype self.shape = x.shape def __getitem__(self, i): return self.x[i] def test_from_array_tasks_always_call_getter(): x1 = np.arange(25).reshape((5, 5)) x2 = np.array([[1]]) x3 = np.array(1)[()] dx1a = da.from_array(MyArray(x1), chunks=(5, 5), asarray=False) dx1b = da.from_array(MyArray(x1), chunks=x1.shape, asarray=False) dx2 = da.from_array(MyArray(x2), chunks=1, asarray=False) dx3 = da.from_array(MyArray(x3), chunks=1, asarray=False) for res, sol in [(dx1a, x1), (dx1b, x1), (dx2, x2), (dx3, x3)]: assert_eq(res, sol) def test_from_array_no_asarray(): def assert_chunks_are_of_type(x, cls): chunks = compute_as_if_collection(Array, x.dask, x.__dask_keys__()) for c in concat(chunks): assert type(c) is cls x = np.matrix(np.arange(100).reshape((10, 10))) for asarray, cls in [(True, np.ndarray), (False, np.matrix)]: dx = da.from_array(x, chunks=(5, 5), asarray=asarray) assert_chunks_are_of_type(dx, cls) assert_chunks_are_of_type(dx[0:5], cls) assert_chunks_are_of_type(dx[0:5][:, 0], cls) def test_from_array_getitem(): x = np.arange(10) def my_getitem(x, ind): return x[ind] y = da.from_array(x, chunks=(5,), getitem=my_getitem) for k, v in y.dask.items(): if isinstance(v, tuple): assert v[0] is my_getitem assert_eq(x, y) def test_from_array_minus_one(): x = np.arange(10) y = da.from_array(x, -1) assert y.chunks == ((10,),) assert_eq(x, y) def test_asarray(): assert_eq(da.asarray([1, 2, 3]), np.asarray([1, 2, 3])) x = da.asarray([1, 2, 3]) assert da.asarray(x) is x def test_asarray_h5py(): h5py = pytest.importorskip('h5py') with tmpfile('.hdf5') as fn: with h5py.File(fn) as f: d = f.create_dataset('/x', shape=(2, 2), dtype=float) x = da.asarray(d) assert d in x.dask.values() assert not any(isinstance(v, np.ndarray) for v in x.dask.values()) def test_asanyarray(): x = np.matrix([1, 2, 3]) dx = da.asanyarray(x) assert dx.numblocks == (1, 1) chunks = compute_as_if_collection(Array, dx.dask, dx.__dask_keys__()) assert isinstance(chunks[0][0], np.matrix) assert da.asanyarray(dx) is dx def test_from_func(): x = np.arange(10) f = lambda n: n * x d = from_func(f, (10,), x.dtype, kwargs={'n': 2}) assert d.shape == x.shape assert d.dtype == x.dtype assert_eq(d.compute(), 2 * x) assert same_keys(d, from_func(f, (10,), x.dtype, kwargs={'n': 2})) def test_concatenate3_2(): x = np.array([1, 2]) assert_eq(concatenate3([x, x, x]), np.array([1, 2, 1, 2, 1, 2])) x = np.array([[1, 2]]) assert (concatenate3([[x, x, x], [x, x, x]]) == np.array([[1, 2, 1, 2, 1, 2], [1, 2, 1, 2, 1, 2]])).all() assert (concatenate3([[x, x], [x, x], [x, x]]) == np.array([[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2]])).all() x = np.arange(12).reshape((2, 2, 3)) assert_eq(concatenate3([[[x, x, x], [x, x, x]], [[x, x, x], [x, x, x]]]), np.array([[[ 0, 1, 2, 0, 1, 2, 0, 1, 2], [ 3, 4, 5, 3, 4, 5, 3, 4, 5], [ 0, 1, 2, 0, 1, 2, 0, 1, 2], [ 3, 4, 5, 3, 4, 5, 3, 4, 5]], [[ 6, 7, 8, 6, 7, 8, 6, 7, 8], [ 9, 10, 11, 9, 10, 11, 9, 10, 11], [ 6, 7, 8, 6, 7, 8, 6, 7, 8], [ 9, 10, 11, 9, 10, 11, 9, 10, 11]], [[ 0, 1, 2, 0, 1, 2, 0, 1, 2], [ 3, 4, 5, 3, 4, 5, 3, 4, 5], [ 0, 1, 2, 0, 1, 2, 0, 1, 2], [ 3, 4, 5, 3, 4, 5, 3, 4, 5]], [[ 6, 7, 8, 6, 7, 8, 6, 7, 8], [ 9, 10, 11, 9, 10, 11, 9, 10, 11], [ 6, 7, 8, 6, 7, 8, 6, 7, 8], [ 9, 10, 11, 9, 10, 11, 9, 10, 11]]])) def test_map_blocks3(): x = np.arange(10) y = np.arange(10) * 2 d = da.from_array(x, chunks=5) e = da.from_array(y, chunks=5) assert_eq(da.core.map_blocks(lambda a, b: a + 2 * b, d, e, dtype=d.dtype), x + 2 * y) z = np.arange(100).reshape((10, 10)) f = da.from_array(z, chunks=5) func = lambda a, b: a + 2 * b res = da.core.map_blocks(func, d, f, dtype=d.dtype) assert_eq(res, x + 2 * z) assert same_keys(da.core.map_blocks(func, d, f, dtype=d.dtype), res) assert_eq(da.map_blocks(func, f, d, dtype=d.dtype), z + 2 * x) def test_from_array_with_missing_chunks(): x = np.random.randn(2, 4, 3) d = da.from_array(x, chunks=(None, 2, None)) assert d.chunks == da.from_array(x, chunks=(2, 2, 3)).chunks def test_normalize_chunks(): assert normalize_chunks(3, (4, 6)) == ((3, 1), (3, 3)) def test_raise_on_no_chunks(): x = da.ones(6, chunks=3) try: Array(x.dask, x.name, chunks=None, dtype=x.dtype, shape=None) assert False except ValueError as e: assert "dask.pydata.org" in str(e) pytest.raises(ValueError, lambda: da.ones(6)) def test_chunks_is_immutable(): x = da.ones(6, chunks=3) try: x.chunks = 2 assert False except TypeError as e: assert 'rechunk(2)' in str(e) def test_raise_on_bad_kwargs(): x = da.ones(5, chunks=3) try: da.minimum(x, foo=None) except TypeError as e: assert 'minimum' in str(e) assert 'foo' in str(e) def test_long_slice(): x = np.arange(10000) d = da.from_array(x, chunks=1) assert_eq(d[8000:8200], x[8000:8200]) def test_h5py_newaxis(): h5py = pytest.importorskip('h5py') with tmpfile('h5') as fn: with h5py.File(fn) as f: x = f.create_dataset('/x', shape=(10, 10), dtype='f8') d = da.from_array(x, chunks=(5, 5)) assert d[None, :, :].compute(get=get_sync).shape == (1, 10, 10) assert d[:, None, :].compute(get=get_sync).shape == (10, 1, 10) assert d[:, :, None].compute(get=get_sync).shape == (10, 10, 1) assert same_keys(d[:, :, None], d[:, :, None]) def test_ellipsis_slicing(): assert_eq(da.ones(4, chunks=2)[...], np.ones(4)) def test_point_slicing(): x = np.arange(56).reshape((7, 8)) d = da.from_array(x, chunks=(3, 4)) result = d.vindex[[1, 2, 5, 5], [3, 1, 6, 1]] assert_eq(result, x[[1, 2, 5, 5], [3, 1, 6, 1]]) result = d.vindex[[0, 1, 6, 0], [0, 1, 0, 7]] assert_eq(result, x[[0, 1, 6, 0], [0, 1, 0, 7]]) assert same_keys(result, d.vindex[[0, 1, 6, 0], [0, 1, 0, 7]]) def test_point_slicing_with_full_slice(): from dask.array.core import _vindex_transpose, _get_axis x = np.arange(4 * 5 * 6 * 7).reshape((4, 5, 6, 7)) d = da.from_array(x, chunks=(2, 3, 3, 4)) inds = [[[1, 2, 3], None, [3, 2, 1], [5, 3, 4]], [[1, 2, 3], None, [4, 3, 2], None], [[1, 2, 3], [3, 2, 1]], [[1, 2, 3], [3, 2, 1], [3, 2, 1], [5, 3, 4]], [[], [], [], None], [np.array([1, 2, 3]), None, np.array([4, 3, 2]), None], [None, None, [1, 2, 3], [4, 3, 2]], [None, [0, 2, 3], None, [0, 3, 2]]] for ind in inds: slc = [i if isinstance(i, (np.ndarray, list)) else slice(None, None) for i in ind] result = d.vindex[tuple(slc)] # Rotate the expected result accordingly axis = _get_axis(ind) expected = _vindex_transpose(x[tuple(slc)], axis) assert_eq(result, expected) # Always have the first axis be the length of the points k = len(next(i for i in ind if isinstance(i, (np.ndarray, list)))) assert result.shape[0] == k def test_slice_with_floats(): d = da.ones((5,), chunks=(3,)) with pytest.raises(IndexError): d[1.5] with pytest.raises(IndexError): d[0:1.5] with pytest.raises(IndexError): d[[1, 1.5]] def test_slice_with_integer_types(): x = np.arange(10) dx = da.from_array(x, chunks=5) inds = np.array([0, 3, 6], dtype='u8') assert_eq(dx[inds], x[inds]) assert_eq(dx[inds.astype('u4')], x[inds.astype('u4')]) inds = np.array([0, 3, 6], dtype=np.int64) assert_eq(dx[inds], x[inds]) assert_eq(dx[inds.astype('u4')], x[inds.astype('u4')]) def test_index_with_integer_types(): x = np.arange(10) dx = da.from_array(x, chunks=5) inds = int(3) assert_eq(dx[inds], x[inds]) inds = np.int64(3) assert_eq(dx[inds], x[inds]) def test_vindex_basic(): x = np.arange(56).reshape((7, 8)) d = da.from_array(x, chunks=(3, 4)) # cases where basic and advanced indexing coincide result = d.vindex[0] assert_eq(result, x[0]) result = d.vindex[0, 1] assert_eq(result, x[0, 1]) result = d.vindex[[0, 1], ::-1] # slices last assert_eq(result, x[:2, ::-1]) def test_vindex_nd(): x = np.arange(56).reshape((7, 8)) d = da.from_array(x, chunks=(3, 4)) result = d.vindex[[[0, 1], [6, 0]], [[0, 1], [0, 7]]] assert_eq(result, x[[[0, 1], [6, 0]], [[0, 1], [0, 7]]]) result = d.vindex[np.arange(7)[:, None], np.arange(8)[None, :]] assert_eq(result, x) result = d.vindex[np.arange(7)[None, :], np.arange(8)[:, None]] assert_eq(result, x.T) def test_vindex_errors(): d = da.ones((5, 5, 5), chunks=(3, 3, 3)) pytest.raises(IndexError, lambda: d.vindex[np.newaxis]) pytest.raises(IndexError, lambda: d.vindex[:5]) pytest.raises(IndexError, lambda: d.vindex[[1, 2], [1, 2, 3]]) pytest.raises(IndexError, lambda: d.vindex[[True] * 5]) def test_vindex_merge(): from dask.array.core import _vindex_merge locations = [1], [2, 0] values = [np.array([[1, 2, 3]]), np.array([[10, 20, 30], [40, 50, 60]])] assert (_vindex_merge(locations, values) == np.array([[40, 50, 60], [1, 2, 3], [10, 20, 30]])).all() def test_empty_array(): assert_eq(np.arange(0), da.arange(0, chunks=5)) def test_memmap(): with tmpfile('npy') as fn_1: with tmpfile('npy') as fn_2: try: x = da.arange(100, chunks=15) target = np.memmap(fn_1, shape=x.shape, mode='w+', dtype=x.dtype) x.store(target) assert_eq(target, x) np.save(fn_2, target) assert_eq(np.load(fn_2, mmap_mode='r'), x) finally: target._mmap.close() def test_to_npy_stack(): x = np.arange(5 * 10 * 10).reshape((5, 10, 10)) d = da.from_array(x, chunks=(2, 4, 4)) with tmpdir() as dirname: stackdir = os.path.join(dirname, 'test') da.to_npy_stack(stackdir, d, axis=0) assert os.path.exists(os.path.join(stackdir, '0.npy')) assert (np.load(os.path.join(stackdir, '1.npy')) == x[2:4]).all() e = da.from_npy_stack(stackdir) assert_eq(d, e) def test_view(): x = np.arange(56).reshape((7, 8)) d = da.from_array(x, chunks=(2, 3)) assert_eq(x.view('i4'), d.view('i4')) assert_eq(x.view('i2'), d.view('i2')) assert all(isinstance(s, int) for s in d.shape) x = np.arange(8, dtype='i1') d = da.from_array(x, chunks=(4,)) assert_eq(x.view('i4'), d.view('i4')) with pytest.raises(ValueError): x = np.arange(8, dtype='i1') d = da.from_array(x, chunks=(3,)) d.view('i4') with pytest.raises(ValueError): d.view('i4', order='asdf') def test_view_fortran(): x = np.asfortranarray(np.arange(64).reshape((8, 8))) d = da.from_array(x, chunks=(2, 3)) assert_eq(x.T.view('i4').T, d.view('i4', order='F')) assert_eq(x.T.view('i2').T, d.view('i2', order='F')) def test_h5py_tokenize(): h5py = pytest.importorskip('h5py') with tmpfile('hdf5') as fn1: with tmpfile('hdf5') as fn2: f = h5py.File(fn1) g = h5py.File(fn2) f['x'] = np.arange(10).astype(float) g['x'] = np.ones(10).astype(float) x1 = f['x'] x2 = g['x'] assert tokenize(x1) != tokenize(x2) def test_map_blocks_with_changed_dimension(): x = np.arange(56).reshape((7, 8)) d = da.from_array(x, chunks=(7, 4)) e = d.map_blocks(lambda b: b.sum(axis=0), chunks=(4,), drop_axis=0, dtype=d.dtype) assert e.chunks == ((4, 4),) assert_eq(e, x.sum(axis=0)) # Provided chunks have wrong shape with pytest.raises(ValueError): d.map_blocks(lambda b: b.sum(axis=0), chunks=(7, 4), drop_axis=0) with pytest.raises(ValueError): d.map_blocks(lambda b: b.sum(axis=0), chunks=((4, 4, 4),), drop_axis=0) # Can't drop axis with more than 1 block with pytest.raises(ValueError): d.map_blocks(lambda b: b.sum(axis=1), drop_axis=1, dtype=d.dtype) # Adding axis with a gap with pytest.raises(ValueError): d.map_blocks(lambda b: b, new_axis=(3, 4)) d = da.from_array(x, chunks=(4, 8)) e = d.map_blocks(lambda b: b.sum(axis=1), drop_axis=1, dtype=d.dtype) assert e.chunks == ((4, 3),) assert_eq(e, x.sum(axis=1)) x = np.arange(64).reshape((8, 8)) d = da.from_array(x, chunks=(4, 4)) e = d.map_blocks(lambda b: b[None, :, :, None], chunks=(1, 4, 4, 1), new_axis=[0, 3], dtype=d.dtype) assert e.chunks == ((1,), (4, 4), (4, 4), (1,)) assert_eq(e, x[None, :, :, None]) e = d.map_blocks(lambda b: b[None, :, :, None], new_axis=[0, 3], dtype=d.dtype) assert e.chunks == ((1,), (4, 4), (4, 4), (1,)) assert_eq(e, x[None, :, :, None]) # Both new_axis and drop_axis d = da.from_array(x, chunks=(8, 4)) e = d.map_blocks(lambda b: b.sum(axis=0)[:, None, None], drop_axis=0, new_axis=(1, 2), dtype=d.dtype) assert e.chunks == ((4, 4), (1,), (1,)) assert_eq(e, x.sum(axis=0)[:, None, None]) d = da.from_array(x, chunks=(4, 8)) e = d.map_blocks(lambda b: b.sum(axis=1)[:, None, None], drop_axis=1, new_axis=(1, 2), dtype=d.dtype) assert e.chunks == ((4, 4), (1,), (1,)) assert_eq(e, x.sum(axis=1)[:, None, None]) def test_broadcast_chunks(): assert broadcast_chunks(((5, 5),), ((5, 5),)) == ((5, 5),) a = ((10, 10, 10), (5, 5),) b = ((5, 5),) assert broadcast_chunks(a, b) == ((10, 10, 10), (5, 5),) assert broadcast_chunks(b, a) == ((10, 10, 10), (5, 5),) a = ((10, 10, 10), (5, 5),) b = ((1,), (5, 5),) assert broadcast_chunks(a, b) == ((10, 10, 10), (5, 5),) a = ((10, 10, 10), (5, 5),) b = ((3, 3,), (5, 5),) with pytest.raises(ValueError): broadcast_chunks(a, b) a = ((1,), (5, 5),) b = ((1,), (5, 5),) assert broadcast_chunks(a, b) == a def test_chunks_error(): x = np.ones((10, 10)) with pytest.raises(ValueError): da.from_array(x, chunks=(5,)) def test_array_compute_forward_kwargs(): x = da.arange(10, chunks=2).sum() x.compute(bogus_keyword=10) def test_dont_fuse_outputs(): dsk = {('x', 0): np.array([1, 2]), ('x', 1): (inc, ('x', 0))} a = da.Array(dsk, 'x', chunks=(2,), shape=(4,), dtype=np.array([1]).dtype) assert_eq(a, np.array([1, 2, 2, 3], dtype=a.dtype)) def test_dont_dealias_outputs(): dsk = {('x', 0, 0): np.ones((2, 2)), ('x', 0, 1): np.ones((2, 2)), ('x', 1, 0): np.ones((2, 2)), ('x', 1, 1): ('x', 0, 0)} a = da.Array(dsk, 'x', chunks=(2, 2), shape=(4, 4), dtype=np.ones(1).dtype) assert_eq(a, np.ones((4, 4))) def test_timedelta_op(): x = np.array([np.timedelta64(10, 'h')]) y = np.timedelta64(1, 'h') a = da.from_array(x, chunks=(1,)) / y assert a.compute() == x / y def test_to_delayed(): x = da.random.random((4, 4), chunks=(2, 2)) y = x + 10 [[a, b], [c, d]] = y.to_delayed() assert_eq(a.compute(), y[:2, :2]) s = 2 x = da.from_array(np.array(s), chunks=0) a = x.to_delayed()[tuple()] assert a.compute() == s def test_to_delayed_optimizes(): x = da.ones((4, 4), chunks=(2, 2)) y = x[1:][1:][1:][:, 1:][:, 1:][:, 1:] d = y.to_delayed().flatten().tolist()[0] assert len([k for k in d.dask if k[0].startswith('getitem')]) == 1 def test_cumulative(): x = da.arange(20, chunks=5) assert_eq(x.cumsum(axis=0), np.arange(20).cumsum()) assert_eq(x.cumprod(axis=0), np.arange(20).cumprod()) assert_eq(da.nancumsum(x, axis=0), nancumsum(np.arange(20))) assert_eq(da.nancumprod(x, axis=0), nancumprod(np.arange(20))) a = np.random.random((20)) rs = np.random.RandomState(0) a[rs.rand(*a.shape) < 0.5] = np.nan x = da.from_array(a, chunks=5) assert_eq(da.nancumsum(x, axis=0), nancumsum(a)) assert_eq(da.nancumprod(x, axis=0), nancumprod(a)) a = np.random.random((20, 24)) x = da.from_array(a, chunks=(6, 5)) assert_eq(x.cumsum(axis=0), a.cumsum(axis=0)) assert_eq(x.cumsum(axis=1), a.cumsum(axis=1)) assert_eq(x.cumprod(axis=0), a.cumprod(axis=0)) assert_eq(x.cumprod(axis=1), a.cumprod(axis=1)) assert_eq(da.nancumsum(x, axis=0), nancumsum(a, axis=0)) assert_eq(da.nancumsum(x, axis=1), nancumsum(a, axis=1)) assert_eq(da.nancumprod(x, axis=0), nancumprod(a, axis=0)) assert_eq(da.nancumprod(x, axis=1), nancumprod(a, axis=1)) a = np.random.random((20, 24)) rs = np.random.RandomState(0) a[rs.rand(*a.shape) < 0.5] = np.nan x = da.from_array(a, chunks=(6, 5)) assert_eq(da.nancumsum(x, axis=0), nancumsum(a, axis=0)) assert_eq(da.nancumsum(x, axis=1), nancumsum(a, axis=1)) assert_eq(da.nancumprod(x, axis=0), nancumprod(a, axis=0)) assert_eq(da.nancumprod(x, axis=1), nancumprod(a, axis=1)) a = np.random.random((20, 24, 13)) x = da.from_array(a, chunks=(6, 5, 4)) for axis in [0, 1, 2, -1, -2, -3]: assert_eq(x.cumsum(axis=axis), a.cumsum(axis=axis)) assert_eq(x.cumprod(axis=axis), a.cumprod(axis=axis)) assert_eq(da.nancumsum(x, axis=axis), nancumsum(a, axis=axis)) assert_eq(da.nancumprod(x, axis=axis), nancumprod(a, axis=axis)) a = np.random.random((20, 24, 13)) rs = np.random.RandomState(0) a[rs.rand(*a.shape) < 0.5] = np.nan x = da.from_array(a, chunks=(6, 5, 4)) for axis in [0, 1, 2, -1, -2, -3]: assert_eq(da.nancumsum(x, axis=axis), nancumsum(a, axis=axis)) assert_eq(da.nancumprod(x, axis=axis), nancumprod(a, axis=axis)) with pytest.raises(ValueError): x.cumsum(axis=3) with pytest.raises(ValueError): x.cumsum(axis=-4) def test_atop_names(): x = da.ones(5, chunks=(2,)) y = atop(add, 'i', x, 'i', dtype=x.dtype) assert y.name.startswith('add') def test_atop_new_axes(): def f(x): return x[:, None] * np.ones((1, 7)) x = da.ones(5, chunks=2) y = atop(f, 'aq', x, 'a', new_axes={'q': 7}, concatenate=True, dtype=x.dtype) assert y.chunks == ((2, 2, 1), (7,)) assert_eq(y, np.ones((5, 7))) def f(x): return x[None, :] * np.ones((7, 1)) x = da.ones(5, chunks=2) y = atop(f, 'qa', x, 'a', new_axes={'q': 7}, concatenate=True, dtype=x.dtype) assert y.chunks == ((7,), (2, 2, 1)) assert_eq(y, np.ones((7, 5))) def f(x): y = x.sum(axis=1) return y[:, None] * np.ones((1, 5)) x = da.ones((4, 6), chunks=(2, 2)) y = atop(f, 'aq', x, 'ab', new_axes={'q': 5}, concatenate=True, dtype=x.dtype) assert y.chunks == ((2, 2), (5,)) assert_eq(y, np.ones((4, 5)) * 6) def test_atop_kwargs(): def f(a, b=0): return a + b x = da.ones(5, chunks=(2,)) y = atop(f, 'i', x, 'i', b=10, dtype=x.dtype) assert_eq(y, np.ones(5) + 10) def test_atop_chunks(): x = da.ones((5, 5), chunks=((2, 1, 2), (3, 2))) def double(a, axis=0): return np.concatenate([a, a], axis=axis) y = atop(double, 'ij', x, 'ij', adjust_chunks={'i': lambda n: 2 * n}, axis=0, dtype=x.dtype) assert y.chunks == ((4, 2, 4), (3, 2)) assert_eq(y, np.ones((10, 5))) y = atop(double, 'ij', x, 'ij', adjust_chunks={'j': lambda n: 2 * n}, axis=1, dtype=x.dtype) assert y.chunks == ((2, 1, 2), (6, 4)) assert_eq(y, np.ones((5, 10))) x = da.ones((10, 10), chunks=(5, 5)) y = atop(double, 'ij', x, 'ij', axis=0, adjust_chunks={'i': 10}, dtype=x.dtype) assert y.chunks == ((10, 10), (5, 5)) assert_eq(y, np.ones((20, 10))) y = atop(double, 'ij', x, 'ij', axis=0, adjust_chunks={'i': (10, 10)}, dtype=x.dtype) assert y.chunks == ((10, 10), (5, 5)) assert_eq(y, np.ones((20, 10))) def test_from_delayed(): v = delayed(np.ones)((5, 3)) x = from_delayed(v, shape=(5, 3), dtype=np.ones(0).dtype) assert isinstance(x, Array) assert_eq(x, np.ones((5, 3))) def test_A_property(): x = da.ones(5, chunks=(2,)) assert x.A is x def test_copy_mutate(): x = da.arange(5, chunks=(2,)) y = x.copy() memo = {} y2 = copy.deepcopy(x, memo=memo) x[x % 2 == 0] = -1 xx = np.arange(5) xx[xx % 2 == 0] = -1 assert_eq(x, xx) assert_eq(y, np.arange(5)) assert_eq(y2, np.arange(5)) assert memo[id(x)] is y2 def test_npartitions(): assert da.ones(5, chunks=(2,)).npartitions == 3 assert da.ones((5, 5), chunks=(2, 3)).npartitions == 6 def test_astype_gh1151(): a = np.arange(5).astype(np.int32) b = da.from_array(a, (1,)) assert_eq(a.astype(np.int16), b.astype(np.int16)) def test_elemwise_name(): assert (da.ones(5, chunks=2) + 1).name.startswith('add-') def test_map_blocks_name(): assert da.ones(5, chunks=2).map_blocks(inc).name.startswith('inc-') def test_from_array_names(): pytest.importorskip('distributed') from distributed.utils import key_split x = np.ones(10) d = da.from_array(x, chunks=2) names = countby(key_split, d.dask) assert set(names.values()) == set([1, 5]) def test_array_picklable(): from pickle import loads, dumps a = da.arange(100, chunks=25) a2 = loads(dumps(a)) assert_eq(a, a2) def test_from_array_raises_on_bad_chunks(): x = np.ones(10) with pytest.raises(ValueError): da.from_array(x, chunks=(5, 5, 5)) # with pytest.raises(ValueError): # da.from_array(x, chunks=100) with pytest.raises(ValueError): da.from_array(x, chunks=((5, 5, 5),)) def test_concatenate_axes(): x = np.ones((2, 2, 2)) assert_eq(concatenate_axes([x, x], axes=[0]), np.ones((4, 2, 2))) assert_eq(concatenate_axes([x, x, x], axes=[0]), np.ones((6, 2, 2))) assert_eq(concatenate_axes([x, x], axes=[1]), np.ones((2, 4, 2))) assert_eq(concatenate_axes([[x, x], [x, x]], axes=[0, 1]), np.ones((4, 4, 2))) assert_eq(concatenate_axes([[x, x], [x, x]], axes=[0, 2]), np.ones((4, 2, 4))) assert_eq(concatenate_axes([[x, x, x], [x, x, x]], axes=[1, 2]), np.ones((2, 4, 6))) with pytest.raises(ValueError): concatenate_axes([[x, x], [x, x]], axes=[0]) # not all nested lists accounted for with pytest.raises(ValueError): concatenate_axes([x, x], axes=[0, 1, 2, 3]) # too many axes def test_atop_concatenate(): x = da.ones((4, 4, 4), chunks=(2, 2, 2)) y = da.ones((4, 4), chunks=(2, 2)) def f(a, b): assert isinstance(a, np.ndarray) assert isinstance(b, np.ndarray) assert a.shape == (2, 4, 4) assert b.shape == (4, 4) return (a + b).sum(axis=(1, 2)) z = atop(f, 'i', x, 'ijk', y, 'jk', concatenate=True, dtype=x.dtype) assert_eq(z, np.ones(4) * 32) z = atop(add, 'ij', y, 'ij', y, 'ij', concatenate=True, dtype=x.dtype) assert_eq(z, np.ones((4, 4)) * 2) def f(a, b, c): assert isinstance(a, np.ndarray) assert isinstance(b, np.ndarray) assert isinstance(c, np.ndarray) assert a.shape == (4, 2, 4) assert b.shape == (4, 4) assert c.shape == (4, 2) return np.ones(5) z = atop(f, 'j', x, 'ijk', y, 'ki', y, 'ij', concatenate=True, dtype=x.dtype) assert_eq(z, np.ones(10), check_shape=False) def test_common_blockdim(): assert common_blockdim([(5,), (5,)]) == (5,) assert common_blockdim([(5,), (2, 3,)]) == (2, 3) assert common_blockdim([(5, 5), (2, 3, 5)]) == (2, 3, 5) assert common_blockdim([(5, 5), (2, 3, 5)]) == (2, 3, 5) assert common_blockdim([(5, 2, 3), (2, 3, 5)]) == (2, 3, 2, 3) assert common_blockdim([(1, 2), (2, 1)]) == (1, 1, 1) assert common_blockdim([(1, 2, 2), (2, 1, 2), (2, 2, 1)]) == (1, 1, 1, 1, 1) def test_uneven_chunks_that_fit_neatly(): x = da.arange(10, chunks=((5, 5),)) y = da.ones(10, chunks=((5, 2, 3),)) assert_eq(x + y, np.arange(10) + np.ones(10)) z = x + y assert z.chunks == ((5, 2, 3),) def test_elemwise_uneven_chunks(): x = da.arange(10, chunks=((4, 6),)) y = da.ones(10, chunks=((6, 4),)) assert_eq(x + y, np.arange(10) + np.ones(10)) z = x + y assert z.chunks == ((4, 2, 4),) x = da.random.random((10, 10), chunks=((4, 6), (5, 2, 3))) y = da.random.random((4, 10, 10), chunks=((2, 2), (6, 4), (2, 3, 5))) z = x + y assert_eq(x + y, x.compute() + y.compute()) assert z.chunks == ((2, 2), (4, 2, 4), (2, 3, 2, 3)) def test_uneven_chunks_atop(): x = da.random.random((10, 10), chunks=((2, 3, 2, 3), (5, 5))) y = da.random.random((10, 10), chunks=((4, 4, 2), (4, 2, 4))) z = atop(np.dot, 'ik', x, 'ij', y, 'jk', dtype=x.dtype, concatenate=True) assert z.chunks == (x.chunks[0], y.chunks[1]) assert_eq(z, x.compute().dot(y)) def test_warn_bad_rechunking(): x = da.ones((20, 20), chunks=(20, 1)) y = da.ones((20, 20), chunks=(1, 20)) with warnings.catch_warnings(record=True) as record: x + y assert record assert '20' in record[0].message.args[0] def test_optimize_fuse_keys(): x = da.ones(10, chunks=(5,)) y = x + 1 z = y + 1 dsk = z.__dask_optimize__(z.dask, z.__dask_keys__()) assert not set(y.dask) & set(dsk) dsk = z.__dask_optimize__(z.dask, z.__dask_keys__(), fuse_keys=y.__dask_keys__()) assert all(k in dsk for k in y.__dask_keys__()) def test_concatenate_stack_dont_warn(): with warnings.catch_warnings(record=True) as record: da.concatenate([da.ones(2, chunks=1)] * 62) assert not record with warnings.catch_warnings(record=True) as record: da.stack([da.ones(2, chunks=1)] * 62) assert not record def test_map_blocks_delayed(): x = da.ones((10, 10), chunks=(5, 5)) y = np.ones((5, 5)) z = x.map_blocks(add, y, dtype=x.dtype) yy = delayed(y) zz = x.map_blocks(add, yy, dtype=x.dtype) assert_eq(z, zz) assert yy.key in zz.dask def test_no_chunks(): X = np.arange(11) dsk = {('x', 0): np.arange(5), ('x', 1): np.arange(5, 11)} x = Array(dsk, 'x', ((np.nan, np.nan,),), np.arange(1).dtype) assert_eq(x + 1, X + 1) assert_eq(x.sum(), X.sum()) assert_eq((x + 1).std(), (X + 1).std()) assert_eq((x + x).std(), (X + X).std()) assert_eq((x + x).std(keepdims=True), (X + X).std(keepdims=True)) def test_no_chunks_2d(): X = np.arange(24).reshape((4, 6)) x = da.from_array(X, chunks=(2, 2)) x._chunks = ((np.nan, np.nan), (np.nan, np.nan, np.nan)) with pytest.warns(None): # zero division warning assert_eq(da.log(x), np.log(X)) assert_eq(x.T, X.T) assert_eq(x.sum(axis=0, keepdims=True), X.sum(axis=0, keepdims=True)) assert_eq(x.sum(axis=1, keepdims=True), X.sum(axis=1, keepdims=True)) assert_eq(x.dot(x.T + 1), X.dot(X.T + 1)) def test_no_chunks_yes_chunks(): X = np.arange(24).reshape((4, 6)) x = da.from_array(X, chunks=(2, 2)) x._chunks = ((2, 2), (np.nan, np.nan, np.nan)) assert (x + 1).chunks == ((2, 2), (np.nan, np.nan, np.nan)) assert (x.T).chunks == ((np.nan, np.nan, np.nan), (2, 2)) assert (x.dot(x.T)).chunks == ((2, 2), (2, 2)) def test_raise_informative_errors_no_chunks(): X = np.arange(10) a = da.from_array(X, chunks=(5, 5)) a._chunks = ((np.nan, np.nan),) b = da.from_array(X, chunks=(4, 4, 2)) b._chunks = ((np.nan, np.nan, np.nan),) for op in [lambda: a + b, lambda: a[1], lambda: a[::2], lambda: a[-5], lambda: a.rechunk(3), lambda: a.reshape(2, 5)]: with pytest.raises(ValueError) as e: op() if 'chunk' not in str(e) or 'unknown' not in str(e): op() def test_no_chunks_slicing_2d(): X = np.arange(24).reshape((4, 6)) x = da.from_array(X, chunks=(2, 2)) x._chunks = ((2, 2), (np.nan, np.nan, np.nan)) assert_eq(x[0], X[0]) for op in [lambda: x[:, 4], lambda: x[:, ::2], lambda: x[0, 2:4]]: with pytest.raises(ValueError) as e: op() assert 'chunk' in str(e) and 'unknown' in str(e) def test_index_array_with_array_1d(): x = np.arange(10) dx = da.from_array(x, chunks=(5,)) dx._chunks = ((np.nan, np.nan),) assert_eq(x[x > 6], dx[dx > 6]) assert_eq(x[x % 2 == 0], dx[dx % 2 == 0]) dy = da.ones(11, chunks=(3,)) with pytest.raises(ValueError): dx[dy > 5] def test_index_array_with_array_2d(): x = np.arange(24).reshape((4, 6)) dx = da.from_array(x, chunks=(2, 2)) dx._chunks = ((2, 2), (np.nan, np.nan, np.nan)) assert (sorted(x[x % 2 == 0].tolist()) == sorted(dx[dx % 2 == 0].compute().tolist())) assert (sorted(x[x > 6].tolist()) == sorted(dx[dx > 6].compute().tolist())) @pytest.mark.xfail(reason='Chunking does not align well') def test_index_array_with_array_3d_2d(): x = np.arange(4**3).reshape((4, 4, 4)) dx = da.from_array(x, chunks=(2, 2, 2)) ind = np.random.random((4, 4)) > 0.5 ind = np.arange(4 ** 2).reshape((4, 4)) % 2 == 0 dind = da.from_array(ind, (2, 2)) assert_eq(x[ind], dx[dind]) assert_eq(x[:, ind], dx[:, dind]) def test_setitem_1d(): x = np.arange(10) dx = da.from_array(x.copy(), chunks=(5,)) x[x > 6] = -1 x[x % 2 == 0] = -2 dx[dx > 6] = -1 dx[dx % 2 == 0] = -2 assert_eq(x, dx) def test_setitem_2d(): x = np.arange(24).reshape((4, 6)) dx = da.from_array(x.copy(), chunks=(2, 2)) x[x > 6] = -1 x[x % 2 == 0] = -2 dx[dx > 6] = -1 dx[dx % 2 == 0] = -2 assert_eq(x, dx) @pytest.mark.skipif(np.__version__ >= '1.13.0', reason='boolean slicing rules changed') def test_setitem_mixed_d(): x = np.arange(24).reshape((4, 6)) dx = da.from_array(x, chunks=(2, 2)) x[x[0, None] > 2] = -1 dx[dx[0, None] > 2] = -1 assert_eq(x, dx) x[x[None, 0] > 2] = -1 dx[dx[None, 0] > 2] = -1 assert_eq(x, dx) def test_setitem_errs(): x = da.ones((4, 4), chunks=(2, 2)) with pytest.raises(ValueError): x[x > 1] = x def test_zero_slice_dtypes(): x = da.arange(5, chunks=1) y = x[[]] assert y.dtype == x.dtype assert y.shape == (0,) assert_eq(x[[]], np.arange(5)[[]]) def test_zero_sized_array_rechunk(): x = da.arange(5, chunks=1)[:0] y = da.atop(identity, 'i', x, 'i', dtype=x.dtype) assert_eq(x, y) def test_atop_zero_shape(): da.atop(lambda x: x, 'i', da.arange(10, chunks=10), 'i', da.from_array(np.ones((0, 2)), ((0,), 2)), 'ab', da.from_array(np.ones((0,)), ((0,),)), 'a', dtype='float64') def test_atop_zero_shape_new_axes(): da.atop(lambda x: np.ones(42), 'i', da.from_array(np.ones((0, 2)), ((0,), 2)), 'ab', da.from_array(np.ones((0,)), ((0,),)), 'a', dtype='float64', new_axes={'i': 42}) def test_broadcast_against_zero_shape(): assert_eq(da.arange(1, chunks=1)[:0] + 0, np.arange(1)[:0] + 0) assert_eq(da.arange(1, chunks=1)[:0] + 0.1, np.arange(1)[:0] + 0.1) assert_eq(da.ones((5, 5), chunks=(2, 3))[:0] + 0, np.ones((5, 5))[:0] + 0) assert_eq(da.ones((5, 5), chunks=(2, 3))[:0] + 0.1, np.ones((5, 5))[:0] + 0.1) assert_eq(da.ones((5, 5), chunks=(2, 3))[:, :0] + 0, np.ones((5, 5))[:, :0] + 0) assert_eq(da.ones((5, 5), chunks=(2, 3))[:, :0] + 0.1, np.ones((5, 5))[:, :0] + 0.1) def test_from_array_name(): x = np.array([1, 2, 3, 4, 5]) chunks = x.shape # Default is tokenize the array dx = da.from_array(x, chunks=chunks) hashed_name = dx.name assert da.from_array(x, chunks=chunks).name == hashed_name # Specify name directly assert da.from_array(x, chunks=chunks, name='x').name == 'x' # False gives a random name dx2 = da.from_array(x, chunks=chunks, name=False) dx3 = da.from_array(x, chunks=chunks, name=False) assert dx2.name != hashed_name assert dx3.name != hashed_name assert dx2.name != dx3.name def test_concatenate_errs(): with pytest.raises(ValueError) as e: da.concatenate([da.zeros((2, 1), chunks=(2, 1)), da.zeros((2, 3), chunks=(2, 3))]) assert 'shape' in str(e).lower() assert '(2, 1)' in str(e) with pytest.raises(ValueError): da.concatenate([da.zeros((1, 2), chunks=(1, 2)), da.zeros((3, 2), chunks=(3, 2))], axis=1) def test_stack_errs(): with pytest.raises(ValueError) as e: da.stack([da.zeros((2), chunks=(2)), da.zeros((3), chunks=(3))]) assert 'shape' in str(e).lower() assert '(2,)' in str(e) def test_atop_with_numpy_arrays(): x = np.ones(10) y = da.ones(10, chunks=(5,)) assert_eq(x + y, x + x) s = da.sum(x) assert any(x is v for v in s.dask.values()) @pytest.mark.parametrize('chunks', (100, 6)) @pytest.mark.parametrize('other', [[0, 0, 1], [2, 1, 3], (0, 0, 1)]) def test_elemwise_with_lists(chunks, other): x = np.arange(12).reshape((4, 3)) d = da.arange(12, chunks=chunks).reshape((4, 3)) x2 = np.vstack([x[:, 0], x[:, 1], x[:, 2]]).T d2 = da.vstack([d[:, 0], d[:, 1], d[:, 2]]).T assert_eq(x2, d2) x3 = x2 * other d3 = d2 * other assert_eq(x3, d3) def test_constructor_plugin(): L = [] L2 = [] with dask.set_options(array_plugins=[L.append, L2.append]): x = da.ones(10, chunks=5) y = x + 1 assert L == L2 == [x, y] with dask.set_options(array_plugins=[lambda x: x.compute()]): x = da.ones(10, chunks=5) y = x + 1 assert isinstance(y, np.ndarray) assert len(L) == 2 def test_no_warnings_on_metadata(): x = da.ones(5, chunks=3) with warnings.catch_warnings(record=True) as record: da.arccos(x) assert not record def test_delayed_array_key_hygeine(): a = da.zeros((1,), chunks=(1,)) d = delayed(identity)(a) b = da.from_delayed(d, shape=a.shape, dtype=a.dtype) assert_eq(a, b) dask-0.16.0/dask/array/tests/test_chunk.py000066400000000000000000000056141320364734500204600ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import pytest pytest.importorskip('numpy') import numpy as np from dask.array.chunk import coarsen, keepdims_wrapper import dask.array as da def test_keepdims_wrapper_no_axis(): def summer(a, axis=None): return a.sum(axis=axis) summer_wrapped = keepdims_wrapper(summer) assert summer_wrapped != summer assert summer_wrapped == keepdims_wrapper(summer_wrapped) a = np.arange(24).reshape(1, 2, 3, 4) r = summer(a) rw = summer_wrapped(a, keepdims=True) rwf = summer_wrapped(a, keepdims=False) assert r.ndim == 0 assert r.shape == tuple() assert r == 276 assert rw.ndim == 4 assert rw.shape == (1, 1, 1, 1) assert (rw == 276).all() assert rwf.ndim == 0 assert rwf.shape == tuple() assert rwf == 276 def test_keepdims_wrapper_one_axis(): def summer(a, axis=None): return a.sum(axis=axis) summer_wrapped = keepdims_wrapper(summer) assert summer_wrapped != summer assert summer_wrapped == keepdims_wrapper(summer_wrapped) a = np.arange(24).reshape(1, 2, 3, 4) r = summer(a, axis=2) rw = summer_wrapped(a, axis=2, keepdims=True) rwf = summer_wrapped(a, axis=2, keepdims=False) assert r.ndim == 3 assert r.shape == (1, 2, 4) assert (r == np.array([[[12, 15, 18, 21], [48, 51, 54, 57]]])).all() assert rw.ndim == 4 assert rw.shape == (1, 2, 1, 4) assert (rw == np.array([[[[12, 15, 18, 21]], [[48, 51, 54, 57]]]])).all() assert rwf.ndim == 3 assert rwf.shape == (1, 2, 4) assert (rwf == np.array([[[12, 15, 18, 21], [48, 51, 54, 57]]])).all() def test_keepdims_wrapper_two_axes(): def summer(a, axis=None): return a.sum(axis=axis) summer_wrapped = keepdims_wrapper(summer) assert summer_wrapped != summer assert summer_wrapped == keepdims_wrapper(summer_wrapped) a = np.arange(24).reshape(1, 2, 3, 4) r = summer(a, axis=(1, 3)) rw = summer_wrapped(a, axis=(1, 3), keepdims=True) rwf = summer_wrapped(a, axis=(1, 3), keepdims=False) assert r.ndim == 2 assert r.shape == (1, 3) assert (r == np.array([[60, 92, 124]])).all() assert rw.ndim == 4 assert rw.shape == (1, 1, 3, 1) assert (rw == np.array([[[[60], [92], [124]]]])).all() assert rwf.ndim == 2 assert rwf.shape == (1, 3) assert (rwf == np.array([[60, 92, 124]])).all() def test_coarsen(): x = np.random.randint(10, size=(24, 24)) y = coarsen(np.sum, x, {0: 2, 1: 4}) assert y.shape == (12, 6) assert y[0, 0] == np.sum(x[:2, :4]) """ def test_coarsen_on_uneven_shape(): x = np.random.randint(10, size=(23, 24)) y = coarsen(np.sum, x, {0: 2, 1: 4}) assert y.shape == (12, 6) assert y[0, 0] == np.sum(x[:2, :4]) assert eq(y[11, :], x[23, :]) """ def test_integer_input(): assert da.zeros((4, 6), chunks=2).rechunk(3).chunks == ((3, 1), (3, 3)) dask-0.16.0/dask/array/tests/test_creation.py000066400000000000000000000231171320364734500211520ustar00rootroot00000000000000import pytest pytest.importorskip('numpy') import numpy as np import pytest from toolz import concat import dask.array as da from dask.array.utils import assert_eq, same_keys @pytest.mark.parametrize( "funcname", [ "empty_like", "ones_like", "zeros_like", "full_like", ] ) @pytest.mark.parametrize( "shape, chunks", [ ((10, 10), (4, 4)), ] ) @pytest.mark.parametrize( "dtype", [ "i4", ] ) def test_arr_like(funcname, shape, dtype, chunks): np_func = getattr(np, funcname) da_func = getattr(da, funcname) if funcname == "full_like": old_np_func = np_func old_da_func = da_func np_func = lambda *a, **k: old_np_func(*a, fill_value=5, **k) da_func = lambda *a, **k: old_da_func(*a, fill_value=5, **k) dtype = np.dtype(dtype) a = np.random.randint(0, 10, shape).astype(dtype) np_r = np_func(a) da_r = da_func(a, chunks=chunks) assert np_r.shape == da_r.shape assert np_r.dtype == da_r.dtype if funcname != "empty_like": assert (np_r == np.asarray(da_r)).all() def test_linspace(): darr = da.linspace(6, 49, chunks=5) nparr = np.linspace(6, 49) assert_eq(darr, nparr) darr = da.linspace(1.4, 4.9, chunks=5, num=13) nparr = np.linspace(1.4, 4.9, num=13) assert_eq(darr, nparr) darr = da.linspace(6, 49, chunks=5, dtype=float) nparr = np.linspace(6, 49, dtype=float) assert_eq(darr, nparr) darr = da.linspace(1.4, 4.9, chunks=5, num=13, dtype=int) nparr = np.linspace(1.4, 4.9, num=13, dtype=int) assert_eq(darr, nparr) assert (sorted(da.linspace(1.4, 4.9, chunks=5, num=13).dask) == sorted(da.linspace(1.4, 4.9, chunks=5, num=13).dask)) assert (sorted(da.linspace(6, 49, chunks=5, dtype=float).dask) == sorted(da.linspace(6, 49, chunks=5, dtype=float).dask)) def test_arange(): darr = da.arange(77, chunks=13) nparr = np.arange(77) assert_eq(darr, nparr) darr = da.arange(2, 13, chunks=5) nparr = np.arange(2, 13) assert_eq(darr, nparr) darr = da.arange(4, 21, 9, chunks=13) nparr = np.arange(4, 21, 9) assert_eq(darr, nparr) # negative steps darr = da.arange(53, 5, -3, chunks=5) nparr = np.arange(53, 5, -3) assert_eq(darr, nparr) darr = da.arange(77, chunks=13, dtype=float) nparr = np.arange(77, dtype=float) assert_eq(darr, nparr) darr = da.arange(2, 13, chunks=5, dtype=int) nparr = np.arange(2, 13, dtype=int) assert_eq(darr, nparr) assert (sorted(da.arange(2, 13, chunks=5).dask) == sorted(da.arange(2, 13, chunks=5).dask)) assert (sorted(da.arange(77, chunks=13, dtype=float).dask) == sorted(da.arange(77, chunks=13, dtype=float).dask)) # 0 size output darr = da.arange(0, 1, -0.5, chunks=20) nparr = np.arange(0, 1, -0.5) assert_eq(darr, nparr) darr = da.arange(0, -1, 0.5, chunks=20) nparr = np.arange(0, -1, 0.5) assert_eq(darr, nparr) def test_arange_has_dtype(): assert da.arange(5, chunks=2).dtype == np.arange(5).dtype @pytest.mark.xfail(reason="Casting floats to ints is not supported since edge" "behavior is not specified or guaranteed by NumPy.") def test_arange_cast_float_int_step(): darr = da.arange(3.3, -9.1, -.25, chunks=3, dtype='i8') nparr = np.arange(3.3, -9.1, -.25, dtype='i8') assert_eq(darr, nparr) def test_arange_float_step(): darr = da.arange(2., 13., .3, chunks=4) nparr = np.arange(2., 13., .3) assert_eq(darr, nparr) darr = da.arange(7.7, 1.5, -.8, chunks=3) nparr = np.arange(7.7, 1.5, -.8) assert_eq(darr, nparr) darr = da.arange(0, 1, 0.01, chunks=20) nparr = np.arange(0, 1, 0.01) assert_eq(darr, nparr) darr = da.arange(0, 1, 0.03, chunks=20) nparr = np.arange(0, 1, 0.03) assert_eq(darr, nparr) def test_indices_no_chunks(): with pytest.raises(ValueError): da.indices((1,)) def test_indices_wrong_chunks(): with pytest.raises(ValueError): da.indices((1,), chunks=tuple()) def test_empty_indicies(): darr = da.indices(tuple(), chunks=tuple()) nparr = np.indices(tuple()) assert darr.shape == nparr.shape assert darr.dtype == nparr.dtype assert_eq(darr, nparr) darr = da.indices(tuple(), float, chunks=tuple()) nparr = np.indices(tuple(), float) assert darr.shape == nparr.shape assert darr.dtype == nparr.dtype assert_eq(darr, nparr) darr = da.indices((0,), float, chunks=(1,)) nparr = np.indices((0,), float) assert darr.shape == nparr.shape assert darr.dtype == nparr.dtype assert_eq(darr, nparr) darr = da.indices((0, 1, 2), float, chunks=(1, 1, 2)) nparr = np.indices((0, 1, 2), float) assert darr.shape == nparr.shape assert darr.dtype == nparr.dtype assert_eq(darr, nparr) def test_indicies(): darr = da.indices((1,), chunks=(1,)) nparr = np.indices((1,)) assert_eq(darr, nparr) darr = da.indices((1,), float, chunks=(1,)) nparr = np.indices((1,), float) assert_eq(darr, nparr) darr = da.indices((2, 1), chunks=(2, 1)) nparr = np.indices((2, 1)) assert_eq(darr, nparr) darr = da.indices((2, 3), chunks=(1, 2)) nparr = np.indices((2, 3)) assert_eq(darr, nparr) def test_tril_triu(): A = np.random.randn(20, 20) for chk in [5, 4]: dA = da.from_array(A, (chk, chk)) assert np.allclose(da.triu(dA).compute(), np.triu(A)) assert np.allclose(da.tril(dA).compute(), np.tril(A)) for k in [-25, -20, -19, -15, -14, -9, -8, -6, -5, -1, 1, 4, 5, 6, 8, 10, 11, 15, 16, 19, 20, 21]: assert np.allclose(da.triu(dA, k).compute(), np.triu(A, k)) assert np.allclose(da.tril(dA, k).compute(), np.tril(A, k)) def test_tril_triu_errors(): A = np.random.randint(0, 11, (10, 10, 10)) dA = da.from_array(A, chunks=(5, 5, 5)) pytest.raises(ValueError, lambda: da.triu(dA)) A = np.random.randint(0, 11, (30, 35)) dA = da.from_array(A, chunks=(5, 5)) pytest.raises(NotImplementedError, lambda: da.triu(dA)) def test_eye(): assert_eq(da.eye(9, chunks=3), np.eye(9)) assert_eq(da.eye(10, chunks=3), np.eye(10)) assert_eq(da.eye(9, chunks=3, M=11), np.eye(9, M=11)) assert_eq(da.eye(11, chunks=3, M=9), np.eye(11, M=9)) assert_eq(da.eye(7, chunks=3, M=11), np.eye(7, M=11)) assert_eq(da.eye(11, chunks=3, M=7), np.eye(11, M=7)) assert_eq(da.eye(9, chunks=3, k=2), np.eye(9, k=2)) assert_eq(da.eye(9, chunks=3, k=-2), np.eye(9, k=-2)) assert_eq(da.eye(7, chunks=3, M=11, k=5), np.eye(7, M=11, k=5)) assert_eq(da.eye(11, chunks=3, M=7, k=-6), np.eye(11, M=7, k=-6)) assert_eq(da.eye(6, chunks=3, M=9, k=7), np.eye(6, M=9, k=7)) assert_eq(da.eye(12, chunks=3, M=6, k=-3), np.eye(12, M=6, k=-3)) assert_eq(da.eye(9, chunks=3, dtype=int), np.eye(9, dtype=int)) assert_eq(da.eye(10, chunks=3, dtype=int), np.eye(10, dtype=int)) def test_diag(): v = np.arange(11) assert_eq(da.diag(v), np.diag(v)) v = da.arange(11, chunks=3) darr = da.diag(v) nparr = np.diag(v) assert_eq(darr, nparr) assert sorted(da.diag(v).dask) == sorted(da.diag(v).dask) v = v + v + 3 darr = da.diag(v) nparr = np.diag(v) assert_eq(darr, nparr) v = da.arange(11, chunks=11) darr = da.diag(v) nparr = np.diag(v) assert_eq(darr, nparr) assert sorted(da.diag(v).dask) == sorted(da.diag(v).dask) x = np.arange(64).reshape((8, 8)) assert_eq(da.diag(x), np.diag(x)) d = da.from_array(x, chunks=(4, 4)) assert_eq(da.diag(d), np.diag(x)) def test_fromfunction(): def f(x, y): return x + y d = da.fromfunction(f, shape=(5, 5), chunks=(2, 2), dtype='f8') assert_eq(d, np.fromfunction(f, shape=(5, 5))) assert same_keys(d, da.fromfunction(f, shape=(5, 5), chunks=(2, 2), dtype='f8')) def test_repeat(): x = np.random.random((10, 11, 13)) d = da.from_array(x, chunks=(4, 5, 3)) repeats = [1, 2, 5] axes = [-3, -2, -1, 0, 1, 2] for r in repeats: for a in axes: assert_eq(x.repeat(r, axis=a), d.repeat(r, axis=a)) assert_eq(d.repeat(2, 0), da.repeat(d, 2, 0)) with pytest.raises(NotImplementedError): da.repeat(d, np.arange(10)) with pytest.raises(NotImplementedError): da.repeat(d, 2, None) with pytest.raises(NotImplementedError): da.repeat(d, 2) for invalid_axis in [3, -4]: with pytest.raises(ValueError): da.repeat(d, 2, axis=invalid_axis) x = np.arange(5) d = da.arange(5, chunks=(2,)) assert_eq(x.repeat(3), d.repeat(3)) for r in [1, 2, 3, 4]: assert all(concat(d.repeat(r).chunks)) @pytest.mark.parametrize('shape, chunks', [ ((10,), (1,)), ((10, 11, 13), (4, 5, 3)), ]) @pytest.mark.parametrize('reps', [0, 1, 2, 3, 5]) def test_tile(shape, chunks, reps): x = np.random.random(shape) d = da.from_array(x, chunks=chunks) assert_eq(np.tile(x, reps), da.tile(d, reps)) @pytest.mark.parametrize('shape, chunks', [ ((10,), (1,)), ((10, 11, 13), (4, 5, 3)), ]) @pytest.mark.parametrize('reps', [-1, -5]) def test_tile_neg_reps(shape, chunks, reps): x = np.random.random(shape) d = da.from_array(x, chunks=chunks) with pytest.raises(ValueError): da.tile(d, reps) @pytest.mark.parametrize('shape, chunks', [ ((10,), (1,)), ((10, 11, 13), (4, 5, 3)), ]) @pytest.mark.parametrize('reps', [[1], [1, 2]]) def test_tile_array_reps(shape, chunks, reps): x = np.random.random(shape) d = da.from_array(x, chunks=chunks) with pytest.raises(NotImplementedError): da.tile(d, reps) dask-0.16.0/dask/array/tests/test_fft.py000066400000000000000000000172121320364734500201240ustar00rootroot00000000000000from itertools import combinations_with_replacement import numpy as np import pytest import dask.array as da import dask.array.fft from dask.array.fft import fft_wrap from dask.array.utils import assert_eq, same_keys from dask.array.core import ( normalize_chunks as _normalize_chunks, ) all_1d_funcnames = [ "fft", "ifft", "rfft", "irfft", "hfft", "ihfft", ] all_nd_funcnames = [ "fft2", "ifft2", "fftn", "ifftn", "rfft2", "irfft2", "rfftn", "irfftn", ] nparr = np.arange(100).reshape(10, 10) darr = da.from_array(nparr, chunks=(1, 10)) darr2 = da.from_array(nparr, chunks=(10, 1)) darr3 = da.from_array(nparr, chunks=(10, 10)) @pytest.mark.parametrize("funcname", all_1d_funcnames) def test_cant_fft_chunked_axis(funcname): da_fft = getattr(da.fft, funcname) bad_darr = da.from_array(nparr, chunks=(5, 5)) for i in range(bad_darr.ndim): with pytest.raises(ValueError): da_fft(bad_darr, axis=i) @pytest.mark.parametrize("funcname", all_1d_funcnames) def test_fft(funcname): da_fft = getattr(da.fft, funcname) np_fft = getattr(np.fft, funcname) assert_eq(da_fft(darr), np_fft(nparr)) @pytest.mark.parametrize("funcname", all_nd_funcnames) def test_fft2n_shapes(funcname): da_fft = getattr(dask.array.fft, funcname) np_fft = getattr(np.fft, funcname) assert_eq(da_fft(darr3), np_fft(nparr)) assert_eq(da_fft(darr3, (8, 9)), np_fft(nparr, (8, 9))) assert_eq(da_fft(darr3, (8, 9), axes=(1, 0)), np_fft(nparr, (8, 9), axes=(1, 0))) assert_eq(da_fft(darr3, (12, 11), axes=(1, 0)), np_fft(nparr, (12, 11), axes=(1, 0))) @pytest.mark.parametrize("funcname", all_1d_funcnames) def test_fft_n_kwarg(funcname): da_fft = getattr(da.fft, funcname) np_fft = getattr(np.fft, funcname) assert_eq(da_fft(darr, 5), np_fft(nparr, 5)) assert_eq(da_fft(darr, 13), np_fft(nparr, 13)) assert_eq(da_fft(darr2, axis=0), np_fft(nparr, axis=0)) assert_eq(da_fft(darr2, 5, axis=0), np_fft(nparr, 5, axis=0)) assert_eq(da_fft(darr2, 13, axis=0), np_fft(nparr, 13, axis=0)) assert_eq(da_fft(darr2, 12, axis=0), np_fft(nparr, 12, axis=0)) @pytest.mark.parametrize("funcname", all_1d_funcnames) def test_fft_consistent_names(funcname): da_fft = getattr(da.fft, funcname) assert same_keys(da_fft(darr, 5), da_fft(darr, 5)) assert same_keys(da_fft(darr2, 5, axis=0), da_fft(darr2, 5, axis=0)) assert not same_keys(da_fft(darr, 5), da_fft(darr, 13)) def test_wrap_bad_kind(): with pytest.raises(ValueError): fft_wrap(np.ones) @pytest.mark.parametrize("funcname", all_nd_funcnames) @pytest.mark.parametrize("dtype", ["float32", "float64"]) def test_nd_ffts_axes(funcname, dtype): np_fft = getattr(np.fft, funcname) da_fft = getattr(da.fft, funcname) shape = (7, 8, 9) chunk_size = (3, 3, 3) a = np.arange(np.prod(shape), dtype=dtype).reshape(shape) d = da.from_array(a, chunks=chunk_size) for num_axes in range(1, d.ndim): for axes in combinations_with_replacement(range(d.ndim), num_axes): cs = list(chunk_size) for i in axes: cs[i] = shape[i] d2 = d.rechunk(cs) if len(set(axes)) < len(axes): with pytest.raises(ValueError): da_fft(d2, axes=axes) else: r = da_fft(d2, axes=axes) er = np_fft(a, axes=axes) assert r.dtype == er.dtype assert r.shape == er.shape assert_eq(r, er) @pytest.mark.parametrize("modname", ["numpy.fft", "scipy.fftpack"]) @pytest.mark.parametrize("funcname", all_1d_funcnames) @pytest.mark.parametrize("dtype", ["float32", "float64"]) def test_wrap_ffts(modname, funcname, dtype): fft_mod = pytest.importorskip(modname) try: func = getattr(fft_mod, funcname) except AttributeError: pytest.skip("`%s` missing function `%s`." % (modname, funcname)) darrc = darr.astype(dtype) darr2c = darr2.astype(dtype) nparrc = nparr.astype(dtype) if modname == "scipy.fftpack" and "rfft" in funcname: with pytest.raises(ValueError): fft_wrap(func) else: wfunc = fft_wrap(func) assert wfunc(darrc).dtype == func(nparrc).dtype assert wfunc(darrc).shape == func(nparrc).shape assert_eq(wfunc(darrc), func(nparrc)) assert_eq(wfunc(darrc, axis=1), func(nparrc, axis=1)) assert_eq(wfunc(darr2c, axis=0), func(nparrc, axis=0)) assert_eq(wfunc(darrc, n=len(darrc) - 1), func(nparrc, n=len(darrc) - 1)) assert_eq(wfunc(darrc, axis=1, n=darrc.shape[1] - 1), func(nparrc, n=darrc.shape[1] - 1)) assert_eq(wfunc(darr2c, axis=0, n=darr2c.shape[0] - 1), func(nparrc, axis=0, n=darr2c.shape[0] - 1)) @pytest.mark.parametrize("modname", ["numpy.fft", "scipy.fftpack"]) @pytest.mark.parametrize("funcname", all_nd_funcnames) @pytest.mark.parametrize("dtype", ["float32", "float64"]) def test_wrap_fftns(modname, funcname, dtype): fft_mod = pytest.importorskip(modname) try: func = getattr(fft_mod, funcname) except AttributeError: pytest.skip("`%s` missing function `%s`." % (modname, funcname)) darrc = darr.astype(dtype).rechunk(darr.shape) darr2c = darr2.astype(dtype).rechunk(darr2.shape) nparrc = nparr.astype(dtype) wfunc = fft_wrap(func) assert wfunc(darrc).dtype == func(nparrc).dtype assert wfunc(darrc).shape == func(nparrc).shape assert_eq(wfunc(darrc), func(nparrc)) assert_eq(wfunc(darrc, axes=(1, 0)), func(nparrc, axes=(1, 0))) assert_eq(wfunc(darr2c, axes=(0, 1)), func(nparrc, axes=(0, 1))) assert_eq( wfunc(darr2c, (darr2c.shape[0] - 1, darr2c.shape[1] - 1), (0, 1)), func(nparrc, (nparrc.shape[0] - 1, nparrc.shape[1] - 1), (0, 1)) ) @pytest.mark.parametrize("n", [1, 2, 3, 6, 7]) @pytest.mark.parametrize("d", [1.0, 0.5, 2 * np.pi]) @pytest.mark.parametrize("c", [lambda m: m, lambda m: (1, m - 1)]) def test_fftfreq(n, d, c): c = c(n) r1 = np.fft.fftfreq(n, d) r2 = da.fft.fftfreq(n, d, chunks=c) assert _normalize_chunks(c, r2.shape) == r2.chunks assert_eq(r1, r2) @pytest.mark.parametrize("n", [1, 2, 3, 6, 7]) @pytest.mark.parametrize("d", [1.0, 0.5, 2 * np.pi]) @pytest.mark.parametrize("c", [lambda m: m // 2 + 1, lambda m: (1, m // 2)]) def test_rfftfreq(n, d, c): c = c(n) r1 = np.fft.rfftfreq(n, d) r2 = da.fft.rfftfreq(n, d, chunks=c) assert _normalize_chunks(c, r2.shape) == r2.chunks assert_eq(r1, r2) @pytest.mark.parametrize("funcname", ["fftshift", "ifftshift"]) @pytest.mark.parametrize("axes", [ None, 0, 1, 2, (0, 1), (1, 2), (0, 2), (0, 1, 2), ]) def test_fftshift(funcname, axes): np_func = getattr(np.fft, funcname) da_func = getattr(da.fft, funcname) s = (5, 6, 7) a = np.arange(np.prod(s)).reshape(s) d = da.from_array(a, chunks=(2, 3, 4)) assert_eq(da_func(d, axes), np_func(a, axes)) @pytest.mark.parametrize("funcname1, funcname2", [ ("fftshift", "ifftshift"), ("ifftshift", "fftshift"), ]) @pytest.mark.parametrize("axes", [ None, 0, 1, 2, (0, 1), (1, 2), (0, 2), (0, 1, 2), ]) def test_fftshift_identity(funcname1, funcname2, axes): da_func1 = getattr(da.fft, funcname1) da_func2 = getattr(da.fft, funcname2) s = (5, 6, 7) a = np.arange(np.prod(s)).reshape(s) d = da.from_array(a, chunks=(2, 3, 4)) assert_eq(d, da_func1(da_func2(d, axes), axes)) dask-0.16.0/dask/array/tests/test_ghost.py000066400000000000000000000277521320364734500205030ustar00rootroot00000000000000import pytest pytest.importorskip('numpy') import numpy as np from numpy.testing import assert_array_almost_equal, assert_array_equal import dask.array as da from dask.array.ghost import (fractional_slice, getitem, trim_internal, ghost_internal, nearest, constant, boundaries, reflect, periodic, ghost) from dask.core import get from dask.array.utils import assert_eq, same_keys def test_fractional_slice(): assert (fractional_slice(('x', 4.9), {0: 2}) == (getitem, ('x', 5), (slice(0, 2), ))) assert (fractional_slice(('x', 3, 5.1), {0: 2, 1: 3}) == (getitem, ('x', 3, 5), (slice(None, None, None), slice(-3, None)))) assert (fractional_slice(('x', 2.9, 5.1), {0: 2, 1: 3}) == (getitem, ('x', 3, 5), (slice(0, 2), slice(-3, None)))) fs = fractional_slice(('x', 4.9), {0: 2}) assert isinstance(fs[1][1], int) def test_ghost_internal(): x = np.arange(64).reshape((8, 8)) d = da.from_array(x, chunks=(4, 4)) g = ghost_internal(d, {0: 2, 1: 1}) result = g.compute(get=get) assert g.chunks == ((6, 6), (5, 5)) expected = np.array([ [ 0, 1, 2, 3, 4, 3, 4, 5, 6, 7], [ 8, 9, 10, 11, 12, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 19, 20, 21, 22, 23], [24, 25, 26, 27, 28, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 43, 44, 45, 46, 47], [16, 17, 18, 19, 20, 19, 20, 21, 22, 23], [24, 25, 26, 27, 28, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 51, 52, 53, 54, 55], [56, 57, 58, 59, 60, 59, 60, 61, 62, 63]]) assert_eq(result, expected) assert same_keys(ghost_internal(d, {0: 2, 1: 1}), g) def test_trim_internal(): d = da.ones((40, 60), chunks=(10, 10)) e = trim_internal(d, axes={0: 1, 1: 2}) assert e.chunks == ((8, 8, 8, 8), (6, 6, 6, 6, 6, 6)) def test_periodic(): x = np.arange(64).reshape((8, 8)) d = da.from_array(x, chunks=(4, 4)) e = periodic(d, axis=0, depth=2) assert e.shape[0] == d.shape[0] + 4 assert e.shape[1] == d.shape[1] assert_eq(e[1, :], d[-1, :]) assert_eq(e[0, :], d[-2, :]) def test_reflect(): x = np.arange(10) d = da.from_array(x, chunks=(5, 5)) e = reflect(d, axis=0, depth=2) expected = np.array([1, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8]) assert_eq(e, expected) e = reflect(d, axis=0, depth=1) expected = np.array([0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9]) assert_eq(e, expected) def test_nearest(): x = np.arange(10) d = da.from_array(x, chunks=(5, 5)) e = nearest(d, axis=0, depth=2) expected = np.array([0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9]) assert_eq(e, expected) e = nearest(d, axis=0, depth=1) expected = np.array([0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9]) assert_eq(e, expected) def test_constant(): x = np.arange(64).reshape((8, 8)) d = da.from_array(x, chunks=(4, 4)) e = constant(d, axis=0, depth=2, value=10) assert e.shape[0] == d.shape[0] + 4 assert e.shape[1] == d.shape[1] assert_eq(e[1, :], np.ones(8, dtype=x.dtype) * 10) assert_eq(e[-1, :], np.ones(8, dtype=x.dtype) * 10) def test_boundaries(): x = np.arange(64).reshape((8, 8)) d = da.from_array(x, chunks=(4, 4)) e = boundaries(d, {0: 2, 1: 1}, {0: 0, 1: 'periodic'}) expected = np.array( [[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 7, 0, 1, 2, 3, 4, 5, 6, 7, 0], [15, 8, 9,10,11,12,13,14,15, 8], [23,16,17,18,19,20,21,22,23,16], [31,24,25,26,27,28,29,30,31,24], [39,32,33,34,35,36,37,38,39,32], [47,40,41,42,43,44,45,46,47,40], [55,48,49,50,51,52,53,54,55,48], [63,56,57,58,59,60,61,62,63,56], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) assert_eq(e, expected) def test_ghost(): x = np.arange(64).reshape((8, 8)) d = da.from_array(x, chunks=(4, 4)) g = ghost(d, depth={0: 2, 1: 1}, boundary={0: 100, 1: 'reflect'}) assert g.chunks == ((8, 8), (6, 6)) expected = np.array( [[100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [ 0, 0, 1, 2, 3, 4, 3, 4, 5, 6, 7, 7], [ 8, 8, 9, 10, 11, 12, 11, 12, 13, 14, 15, 15], [ 16, 16, 17, 18, 19, 20, 19, 20, 21, 22, 23, 23], [ 24, 24, 25, 26, 27, 28, 27, 28, 29, 30, 31, 31], [ 32, 32, 33, 34, 35, 36, 35, 36, 37, 38, 39, 39], [ 40, 40, 41, 42, 43, 44, 43, 44, 45, 46, 47, 47], [ 16, 16, 17, 18, 19, 20, 19, 20, 21, 22, 23, 23], [ 24, 24, 25, 26, 27, 28, 27, 28, 29, 30, 31, 31], [ 32, 32, 33, 34, 35, 36, 35, 36, 37, 38, 39, 39], [ 40, 40, 41, 42, 43, 44, 43, 44, 45, 46, 47, 47], [ 48, 48, 49, 50, 51, 52, 51, 52, 53, 54, 55, 55], [ 56, 56, 57, 58, 59, 60, 59, 60, 61, 62, 63, 63], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]]) assert_eq(g, expected) assert same_keys(g, ghost(d, depth={0: 2, 1: 1}, boundary={0: 100, 1: 'reflect'})) g = ghost(d, depth={0: 2, 1: 1}, boundary={0: 100, 1: 'none'}) expected = np.array( [[100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [ 0, 1, 2, 3, 4, 3, 4, 5, 6, 7], [ 8, 9, 10, 11, 12, 11, 12, 13, 14, 15], [ 16, 17, 18, 19, 20, 19, 20, 21, 22, 23], [ 24, 25, 26, 27, 28, 27, 28, 29, 30, 31], [ 32, 33, 34, 35, 36, 35, 36, 37, 38, 39], [ 40, 41, 42, 43, 44, 43, 44, 45, 46, 47], [ 16, 17, 18, 19, 20, 19, 20, 21, 22, 23], [ 24, 25, 26, 27, 28, 27, 28, 29, 30, 31], [ 32, 33, 34, 35, 36, 35, 36, 37, 38, 39], [ 40, 41, 42, 43, 44, 43, 44, 45, 46, 47], [ 48, 49, 50, 51, 52, 51, 52, 53, 54, 55], [ 56, 57, 58, 59, 60, 59, 60, 61, 62, 63], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100]]) assert_eq(g, expected) assert g.chunks == ((8, 8), (5, 5)) def test_map_overlap(): x = da.arange(10, chunks=5) y = x.map_overlap(lambda x: x + len(x), depth=2, dtype=x.dtype) assert_eq(y, np.arange(10) + 5 + 2 + 2) x = da.arange(10, chunks=5) y = x.map_overlap(lambda x: x + len(x), depth=np.int64(2), dtype=x.dtype) assert all([(type(s) is int) for s in y.shape]) assert_eq(y, np.arange(10) + 5 + 2 + 2) x = np.arange(16).reshape((4, 4)) d = da.from_array(x, chunks=(2, 2)) exp1 = d.map_overlap(lambda x: x + x.size, depth=1, dtype=d.dtype) exp2 = d.map_overlap(lambda x: x + x.size, depth={0: 1, 1: 1}, boundary={0: 'reflect', 1: 'none'}, dtype=d.dtype) assert_eq(exp1, x + 16) assert_eq(exp2, x + 12) @pytest.mark.parametrize("boundary", [ None, "reflect", "periodic", "nearest", "none", 0 ]) def test_map_overlap_no_depth(boundary): x = da.arange(10, chunks=5) y = x.map_overlap(lambda i: i, depth=0, boundary=boundary, dtype=x.dtype) assert_eq(y, x) def test_nearest_ghost(): a = np.arange(144).reshape(12, 12).astype(float) darr = da.from_array(a, chunks=(6, 6)) garr = ghost(darr, depth={0: 5, 1: 5}, boundary={0: 'nearest', 1: 'nearest'}) tarr = trim_internal(garr, {0: 5, 1: 5}) assert_array_almost_equal(tarr, a) def test_0_depth(): expected = np.arange(100).reshape(10, 10) darr = da.from_array(expected, chunks=(5, 2)) depth = {0: 0, 1: 0} reflected = ghost(darr, depth=depth, boundary='reflect') nearest = ghost(darr, depth=depth, boundary='nearest') periodic = ghost(darr, depth=depth, boundary='periodic') constant = ghost(darr, depth=depth, boundary=42) result = trim_internal(reflected, depth) assert_array_equal(result, expected) result = trim_internal(nearest, depth) assert_array_equal(result, expected) result = trim_internal(periodic, depth) assert_array_equal(result, expected) result = trim_internal(constant, depth) assert_array_equal(result, expected) def test_some_0_depth(): expected = np.arange(100).reshape(10, 10) darr = da.from_array(expected, chunks=(5, 5)) depth = {0: 4, 1: 0} reflected = ghost(darr, depth=depth, boundary='reflect') nearest = ghost(darr, depth=depth, boundary='nearest') periodic = ghost(darr, depth=depth, boundary='periodic') constant = ghost(darr, depth=depth, boundary=42) result = trim_internal(reflected, depth) assert_array_equal(result, expected) result = trim_internal(nearest, depth) assert_array_equal(result, expected) result = trim_internal(periodic, depth) assert_array_equal(result, expected) result = trim_internal(constant, depth) assert_array_equal(result, expected) def test_one_chunk_along_axis(): a = np.arange(2 * 9).reshape(2, 9) darr = da.from_array(a, chunks=((2,), (2, 2, 2, 3))) g = ghost(darr, depth=0, boundary=0) assert a.shape == g.shape def test_constant_boundaries(): a = np.arange(1 * 9).reshape(1, 9) darr = da.from_array(a, chunks=((1,), (2, 2, 2, 3))) b = boundaries(darr, {0: 0, 1: 0}, {0: 0, 1: 0}) assert b.chunks == darr.chunks def test_depth_equals_boundary_length(): expected = np.arange(100).reshape(10, 10) darr = da.from_array(expected, chunks=(5, 5)) depth = {0: 5, 1: 5} reflected = ghost(darr, depth=depth, boundary='reflect') nearest = ghost(darr, depth=depth, boundary='nearest') periodic = ghost(darr, depth=depth, boundary='periodic') constant = ghost(darr, depth=depth, boundary=42) result = trim_internal(reflected, depth) assert_array_equal(result, expected) result = trim_internal(nearest, depth) assert_array_equal(result, expected) result = trim_internal(periodic, depth) assert_array_equal(result, expected) result = trim_internal(constant, depth) assert_array_equal(result, expected) @pytest.mark.xfail def test_depth_greater_than_boundary_length(): expected = np.arange(100).reshape(10, 10) darr = da.from_array(expected, chunks=(5, 5)) depth = {0: 8, 1: 7} reflected = ghost(darr, depth=depth, boundary='reflect') nearest = ghost(darr, depth=depth, boundary='nearest') periodic = ghost(darr, depth=depth, boundary='periodic') constant = ghost(darr, depth=depth, boundary=42) result = trim_internal(reflected, depth) assert_array_equal(result, expected) result = trim_internal(nearest, depth) assert_array_equal(result, expected) result = trim_internal(periodic, depth) assert_array_equal(result, expected) result = trim_internal(constant, depth) assert_array_equal(result, expected) def test_bad_depth_raises(): expected = np.arange(144).reshape(12, 12) darr = da.from_array(expected, chunks=(5, 5)) depth = {0: 4, 1: 2} pytest.raises(ValueError, ghost, darr, depth=depth, boundary=1) def test_none_boundaries(): x = da.from_array(np.arange(16).reshape(4, 4), chunks=(2, 2)) exp = boundaries(x, 2, {0: 'none', 1: 33}) res = np.array( [[33, 33, 0, 1, 2, 3, 33, 33], [33, 33, 4, 5, 6, 7, 33, 33], [33, 33, 8, 9, 10, 11, 33, 33], [33, 33, 12, 13, 14, 15, 33, 33]]) assert_eq(exp, res) def test_ghost_small(): x = da.ones((10, 10), chunks=(5, 5)) y = x.map_overlap(lambda x: x, depth=1) assert len(y.dask) < 200 y = x.map_overlap(lambda x: x, depth=1, boundary='none') assert len(y.dask) < 100 dask-0.16.0/dask/array/tests/test_image.py000066400000000000000000000026151320364734500204300ustar00rootroot00000000000000from contextlib import contextmanager import os import pytest pytest.importorskip('skimage') from dask.array.image import imread as da_imread import numpy as np from skimage.io import imsave from dask.utils import tmpdir @contextmanager def random_images(n, shape): with tmpdir() as dirname: for i in range(n): fn = os.path.join(dirname, 'image.%d.png' % i) x = np.random.randint(0, 255, size=shape).astype('i1') imsave(fn, x) yield os.path.join(dirname, '*.png') def test_imread(): with random_images(4, (5, 6, 3)) as globstring: im = da_imread(globstring) assert im.shape == (4, 5, 6, 3) assert im.chunks == ((1, 1, 1, 1), (5,), (6,), (3,)) assert im.dtype == 'uint8' assert im.compute().shape == (4, 5, 6, 3) assert im.compute().dtype == 'uint8' def test_imread_with_custom_function(): def imread2(fn): return np.ones((2, 3, 4), dtype='i1') with random_images(4, (5, 6, 3)) as globstring: im = da_imread(globstring, imread=imread2) assert (im.compute() == np.ones((4, 2, 3, 4), dtype='i1')).all() def test_preprocess(): def preprocess(x): x[:] = 1 return x[:, :, 0] with random_images(4, (2, 3, 4)) as globstring: im = da_imread(globstring, preprocess=preprocess) assert (im.compute() == np.ones((4, 2, 3), dtype='i1')).all() dask-0.16.0/dask/array/tests/test_learn.py000066400000000000000000000016041320364734500204440ustar00rootroot00000000000000import pytest pytest.importorskip('sklearn') from sklearn.linear_model import SGDClassifier import dask.array as da import numpy as np import dask x = np.array([[1, 0], [2, 0], [3, 0], [4, 0], [0, 1], [0, 2], [3, 3], [4, 4]]) y = np.array([1, 1, 1, 1, -1, -1, 0, 0]) z = np.array([[1, -1], [-1, 1], [10, -10], [-10, 10]]) X = da.from_array(x, chunks=(3, 2)) Y = da.from_array(y, chunks=(3,)) Z = da.from_array(z, chunks=(2, 2)) @pytest.mark.skipif(reason="Hangs") def test_fit(): sgd = SGDClassifier() sgd = da.learn.fit(sgd, X, Y, get=dask.get, classes=np.array([-1, 0, 1])) sol = sgd.predict(z) result = da.learn.predict(sgd, Z) assert result.chunks == ((2, 2),) assert result.compute(get=dask.get).tolist() == sol.tolist() dask-0.16.0/dask/array/tests/test_linalg.py000066400000000000000000000362041320364734500206150ustar00rootroot00000000000000from __future__ import absolute_import import pytest pytest.importorskip('numpy') pytest.importorskip('scipy') import numpy as np import scipy.linalg import dask.array as da from dask.array.linalg import tsqr, svd_compressed, qr, svd from dask.array.utils import assert_eq, same_keys def test_tsqr_regular_blocks(): m, n = 20, 10 mat = np.random.rand(m, n) data = da.from_array(mat, chunks=(10, n), name='A') q, r = tsqr(data) q = np.array(q) r = np.array(r) assert_eq(mat, np.dot(q, r)) # accuracy check assert_eq(np.eye(n, n), np.dot(q.T, q)) # q must be orthonormal assert_eq(r, np.triu(r)) # r must be upper triangular def test_tsqr_irregular_blocks(): m, n = 20, 10 mat = np.random.rand(m, n) data = da.from_array(mat, chunks=(3, n), name='A')[1:] mat2 = mat[1:, :] q, r = tsqr(data) q = np.array(q) r = np.array(r) assert_eq(mat2, np.dot(q, r)) # accuracy check assert_eq(np.eye(n, n), np.dot(q.T, q)) # q must be orthonormal assert_eq(r, np.triu(r)) # r must be upper triangular def test_tsqr_svd_regular_blocks(): m, n = 20, 10 mat = np.random.rand(m, n) data = da.from_array(mat, chunks=(10, n), name='A') u, s, vt = tsqr(data, compute_svd=True) u = np.array(u) s = np.array(s) vt = np.array(vt) usvt = np.dot(u, np.dot(np.diag(s), vt)) s_exact = np.linalg.svd(mat)[1] assert_eq(mat, usvt) # accuracy check assert_eq(np.eye(n, n), np.dot(u.T, u)) # u must be orthonormal assert_eq(np.eye(n, n), np.dot(vt, vt.T)) # v must be orthonormal assert_eq(s, s_exact) # s must contain the singular values def test_tsqr_svd_irregular_blocks(): m, n = 20, 10 mat = np.random.rand(m, n) data = da.from_array(mat, chunks=(3, n), name='A')[1:] mat2 = mat[1:, :] u, s, vt = tsqr(data, compute_svd=True) u = np.array(u) s = np.array(s) vt = np.array(vt) usvt = np.dot(u, np.dot(np.diag(s), vt)) s_exact = np.linalg.svd(mat2)[1] assert_eq(mat2, usvt) # accuracy check assert_eq(np.eye(n, n), np.dot(u.T, u)) # u must be orthonormal assert_eq(np.eye(n, n), np.dot(vt, vt.T)) # v must be orthonormal assert_eq(s, s_exact) # s must contain the singular values def test_linalg_consistent_names(): m, n = 20, 10 mat = np.random.rand(m, n) data = da.from_array(mat, chunks=(10, n), name='A') q1, r1 = qr(data) q2, r2 = qr(data) assert same_keys(q1, q2) assert same_keys(r1, r2) u1, s1, v1 = svd(data) u2, s2, v2 = svd(data) assert same_keys(u1, u2) assert same_keys(s1, s2) assert same_keys(v1, v2) @pytest.mark.slow def test_svd_compressed(): m, n = 2000, 250 r = 10 np.random.seed(4321) mat1 = np.random.randn(m, r) mat2 = np.random.randn(r, n) mat = mat1.dot(mat2) data = da.from_array(mat, chunks=(500, 50)) u, s, vt = svd_compressed(data, r, seed=4321, n_power_iter=2) u, s, vt = da.compute(u, s, vt) usvt = np.dot(u, np.dot(np.diag(s), vt)) tol = 0.2 assert_eq(np.linalg.norm(usvt), np.linalg.norm(mat), rtol=tol, atol=tol) # average accuracy check u = u[:, :r] s = s[:r] vt = vt[:r, :] s_exact = np.linalg.svd(mat)[1] s_exact = s_exact[:r] assert_eq(np.eye(r, r), np.dot(u.T, u)) # u must be orthonormal assert_eq(np.eye(r, r), np.dot(vt, vt.T)) # v must be orthonormal assert_eq(s, s_exact) # s must contain the singular values def test_svd_compressed_deterministic(): m, n = 30, 25 x = da.random.RandomState(1234).random_sample(size=(m, n), chunks=(5, 5)) u, s, vt = svd_compressed(x, 3, seed=1234) u2, s2, vt2 = svd_compressed(x, 3, seed=1234) assert all(da.compute((u == u2).all(), (s == s2).all(), (vt == vt2).all())) def _check_lu_result(p, l, u, A): assert np.allclose(p.dot(l).dot(u), A) # check triangulars assert np.allclose(l, np.tril(l.compute())) assert np.allclose(u, np.triu(u.compute())) def test_lu_1(): A1 = np.array([[7, 3, -1, 2], [3, 8, 1, -4], [-1, 1, 4, -1], [2, -4, -1, 6] ]) A2 = np.array([[7, 0, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0], [0, 0, 4, 0, 0, 0], [0, 0, 0, 6, 0, 0], [0, 0, 0, 0, 3, 0], [0, 0, 0, 0, 0, 5]]) # without shuffle for A, chunk in zip([A1, A2], [2, 2]): dA = da.from_array(A, chunks=(chunk, chunk)) p, l, u = scipy.linalg.lu(A) dp, dl, du = da.linalg.lu(dA) assert_eq(p, dp) assert_eq(l, dl) assert_eq(u, du) _check_lu_result(dp, dl, du, A) A3 = np.array([[ 7, 3, 2, 1, 4, 1], [ 7, 11, 5, 2, 5, 2], [21, 25, 16, 10, 16, 5], [21, 41, 18, 13, 16, 11], [14, 46, 23, 24, 21, 22], [ 0, 56, 29, 17, 14, 8]]) # with shuffle for A, chunk in zip([A3], [2]): dA = da.from_array(A, chunks=(chunk, chunk)) p, l, u = scipy.linalg.lu(A) dp, dl, du = da.linalg.lu(dA) _check_lu_result(dp, dl, du, A) @pytest.mark.slow @pytest.mark.parametrize('size', [10, 20, 30, 50]) def test_lu_2(size): np.random.seed(10) A = np.random.randint(0, 10, (size, size)) dA = da.from_array(A, chunks=(5, 5)) dp, dl, du = da.linalg.lu(dA) _check_lu_result(dp, dl, du, A) @pytest.mark.slow @pytest.mark.parametrize('size', [50, 100, 200]) def test_lu_3(size): np.random.seed(10) A = np.random.randint(0, 10, (size, size)) dA = da.from_array(A, chunks=(25, 25)) dp, dl, du = da.linalg.lu(dA) _check_lu_result(dp, dl, du, A) def test_lu_errors(): A = np.random.randint(0, 11, (10, 10, 10)) dA = da.from_array(A, chunks=(5, 5, 5)) pytest.raises(ValueError, lambda: da.linalg.lu(dA)) A = np.random.randint(0, 11, (10, 8)) dA = da.from_array(A, chunks=(5, 4)) pytest.raises(ValueError, lambda: da.linalg.lu(dA)) A = np.random.randint(0, 11, (20, 20)) dA = da.from_array(A, chunks=(5, 4)) pytest.raises(ValueError, lambda: da.linalg.lu(dA)) @pytest.mark.parametrize(('shape', 'chunk'), [(20, 10), (50, 10), (70, 20)]) def test_solve_triangular_vector(shape, chunk): np.random.seed(1) A = np.random.randint(1, 11, (shape, shape)) b = np.random.randint(1, 11, shape) # upper Au = np.triu(A) dAu = da.from_array(Au, (chunk, chunk)) db = da.from_array(b, chunk) res = da.linalg.solve_triangular(dAu, db) assert_eq(res, scipy.linalg.solve_triangular(Au, b)) assert_eq(dAu.dot(res), b.astype(float)) # lower Al = np.tril(A) dAl = da.from_array(Al, (chunk, chunk)) db = da.from_array(b, chunk) res = da.linalg.solve_triangular(dAl, db, lower=True) assert_eq(res, scipy.linalg.solve_triangular(Al, b, lower=True)) assert_eq(dAl.dot(res), b.astype(float)) @pytest.mark.parametrize(('shape', 'chunk'), [(20, 10), (50, 10), (50, 20)]) def test_solve_triangular_matrix(shape, chunk): np.random.seed(1) A = np.random.randint(1, 10, (shape, shape)) b = np.random.randint(1, 10, (shape, 5)) # upper Au = np.triu(A) dAu = da.from_array(Au, (chunk, chunk)) db = da.from_array(b, (chunk, 5)) res = da.linalg.solve_triangular(dAu, db) assert_eq(res, scipy.linalg.solve_triangular(Au, b)) assert_eq(dAu.dot(res), b.astype(float)) # lower Al = np.tril(A) dAl = da.from_array(Al, (chunk, chunk)) db = da.from_array(b, (chunk, 5)) res = da.linalg.solve_triangular(dAl, db, lower=True) assert_eq(res, scipy.linalg.solve_triangular(Al, b, lower=True)) assert_eq(dAl.dot(res), b.astype(float)) @pytest.mark.parametrize(('shape', 'chunk'), [(20, 10), (50, 10), (50, 20)]) def test_solve_triangular_matrix2(shape, chunk): np.random.seed(1) A = np.random.randint(1, 10, (shape, shape)) b = np.random.randint(1, 10, (shape, shape)) # upper Au = np.triu(A) dAu = da.from_array(Au, (chunk, chunk)) db = da.from_array(b, (chunk, chunk)) res = da.linalg.solve_triangular(dAu, db) assert_eq(res, scipy.linalg.solve_triangular(Au, b)) assert_eq(dAu.dot(res), b.astype(float)) # lower Al = np.tril(A) dAl = da.from_array(Al, (chunk, chunk)) db = da.from_array(b, (chunk, chunk)) res = da.linalg.solve_triangular(dAl, db, lower=True) assert_eq(res, scipy.linalg.solve_triangular(Al, b, lower=True)) assert_eq(dAl.dot(res), b.astype(float)) def test_solve_triangular_errors(): A = np.random.randint(0, 10, (10, 10, 10)) b = np.random.randint(1, 10, 10) dA = da.from_array(A, chunks=(5, 5, 5)) db = da.from_array(b, chunks=5) pytest.raises(ValueError, lambda: da.linalg.solve_triangular(dA, db)) A = np.random.randint(0, 10, (10, 10)) b = np.random.randint(1, 10, 10) dA = da.from_array(A, chunks=(3, 3)) db = da.from_array(b, chunks=5) pytest.raises(ValueError, lambda: da.linalg.solve_triangular(dA, db)) @pytest.mark.parametrize(('shape', 'chunk'), [(20, 10), (50, 10)]) def test_solve(shape, chunk): np.random.seed(1) A = np.random.randint(1, 10, (shape, shape)) dA = da.from_array(A, (chunk, chunk)) # vector b = np.random.randint(1, 10, shape) db = da.from_array(b, chunk) res = da.linalg.solve(dA, db) assert_eq(res, scipy.linalg.solve(A, b)) assert_eq(dA.dot(res), b.astype(float)) # tall-and-skinny matrix b = np.random.randint(1, 10, (shape, 5)) db = da.from_array(b, (chunk, 5)) res = da.linalg.solve(dA, db) assert_eq(res, scipy.linalg.solve(A, b)) assert_eq(dA.dot(res), b.astype(float)) # matrix b = np.random.randint(1, 10, (shape, shape)) db = da.from_array(b, (chunk, chunk)) res = da.linalg.solve(dA, db) assert_eq(res, scipy.linalg.solve(A, b)) assert_eq(dA.dot(res), b.astype(float)) @pytest.mark.parametrize(('shape', 'chunk'), [(20, 10), (50, 10)]) def test_inv(shape, chunk): np.random.seed(1) A = np.random.randint(1, 10, (shape, shape)) dA = da.from_array(A, (chunk, chunk)) res = da.linalg.inv(dA) assert_eq(res, scipy.linalg.inv(A)) assert_eq(dA.dot(res), np.eye(shape, dtype=float)) def _get_symmat(size): np.random.seed(1) A = np.random.randint(1, 21, (size, size)) lA = np.tril(A) return lA.dot(lA.T) @pytest.mark.parametrize(('shape', 'chunk'), [(20, 10), (30, 6)]) def test_solve_sym_pos(shape, chunk): np.random.seed(1) A = _get_symmat(shape) dA = da.from_array(A, (chunk, chunk)) # vector b = np.random.randint(1, 10, shape) db = da.from_array(b, chunk) res = da.linalg.solve(dA, db, sym_pos=True) assert_eq(res, scipy.linalg.solve(A, b, sym_pos=True)) assert_eq(dA.dot(res), b.astype(float)) # tall-and-skinny matrix b = np.random.randint(1, 10, (shape, 5)) db = da.from_array(b, (chunk, 5)) res = da.linalg.solve(dA, db, sym_pos=True) assert_eq(res, scipy.linalg.solve(A, b, sym_pos=True)) assert_eq(dA.dot(res), b.astype(float)) # matrix b = np.random.randint(1, 10, (shape, shape)) db = da.from_array(b, (chunk, chunk)) res = da.linalg.solve(dA, db, sym_pos=True) assert_eq(res, scipy.linalg.solve(A, b, sym_pos=True)) assert_eq(dA.dot(res), b.astype(float)) @pytest.mark.parametrize(('shape', 'chunk'), [(20, 10), (12, 3), (30, 3), (30, 6)]) def test_cholesky(shape, chunk): A = _get_symmat(shape) dA = da.from_array(A, (chunk, chunk)) assert_eq(da.linalg.cholesky(dA), scipy.linalg.cholesky(A)) assert_eq(da.linalg.cholesky(dA, lower=True), scipy.linalg.cholesky(A, lower=True)) @pytest.mark.parametrize(("nrow", "ncol", "chunk"), [(20, 10, 5), (100, 10, 10)]) def test_lstsq(nrow, ncol, chunk): np.random.seed(1) A = np.random.randint(1, 20, (nrow, ncol)) b = np.random.randint(1, 20, nrow) dA = da.from_array(A, (chunk, ncol)) db = da.from_array(b, chunk) x, r, rank, s = np.linalg.lstsq(A, b) dx, dr, drank, ds = da.linalg.lstsq(dA, db) assert_eq(dx, x) assert_eq(dr, r) assert drank.compute() == rank assert_eq(ds, s) # reduce rank causes multicollinearity, only compare rank A[:, 1] = A[:, 2] dA = da.from_array(A, (chunk, ncol)) db = da.from_array(b, chunk) x, r, rank, s = np.linalg.lstsq(A, b, rcond=np.finfo(np.double).eps * max(nrow, ncol)) assert rank == ncol - 1 dx, dr, drank, ds = da.linalg.lstsq(dA, db) assert drank.compute() == rank def test_no_chunks_svd(): x = np.random.random((100, 10)) u, s, v = np.linalg.svd(x, full_matrices=0) for chunks in [((np.nan,) * 10, (10,)), ((np.nan,) * 10, (np.nan,))]: dx = da.from_array(x, chunks=(10, 10)) dx._chunks = chunks du, ds, dv = da.linalg.svd(dx) assert_eq(s, ds) assert_eq(u.dot(np.diag(s)).dot(v), du.dot(da.diag(ds)).dot(dv)) assert_eq(du.T.dot(du), np.eye(10)) assert_eq(dv.T.dot(dv), np.eye(10)) dx = da.from_array(x, chunks=(10, 10)) dx._chunks = ((np.nan,) * 10, (np.nan,)) assert_eq(abs(v), abs(dv)) assert_eq(abs(u), abs(du)) @pytest.mark.parametrize("shape, chunks, axis", [ [(5,), (2,), None], [(5,), (2,), 0], [(5,), (2,), (0,)], [(5, 6), (2, 2), None], [(5, 6), (2, 2), 0], [(5, 6), (2, 2), 1], [(5, 6), (2, 2), (0, 1)], [(5, 6), (2, 2), (1, 0)], ]) @pytest.mark.parametrize("norm", [ None, 1, -1, np.inf, -np.inf, ]) @pytest.mark.parametrize("keepdims", [ False, True, ]) def test_norm_any_ndim(shape, chunks, axis, norm, keepdims): a = np.random.random(shape) d = da.from_array(a, chunks=chunks) a_r = np.linalg.norm(a, ord=norm, axis=axis, keepdims=keepdims) d_r = da.linalg.norm(d, ord=norm, axis=axis, keepdims=keepdims) assert_eq(a_r, d_r) @pytest.mark.parametrize("shape, chunks, axis", [ [(5,), (2,), None], [(5,), (2,), 0], [(5,), (2,), (0,)], ]) @pytest.mark.parametrize("norm", [ 0, 2, -2, 0.5, ]) @pytest.mark.parametrize("keepdims", [ False, True, ]) def test_norm_1dim(shape, chunks, axis, norm, keepdims): a = np.random.random(shape) d = da.from_array(a, chunks=chunks) a_r = np.linalg.norm(a, ord=norm, axis=axis, keepdims=keepdims) d_r = da.linalg.norm(d, ord=norm, axis=axis, keepdims=keepdims) # Fix a type mismatch on NumPy 1.10. a_r = a_r.astype(float) assert_eq(a_r, d_r) @pytest.mark.parametrize("shape, chunks, axis", [ [(5, 6), (2, 2), None], [(5, 6), (2, 2), (0, 1)], [(5, 6), (2, 2), (1, 0)], ]) @pytest.mark.parametrize("norm", [ "fro", "nuc", 2, -2 ]) @pytest.mark.parametrize("keepdims", [ False, True, ]) def test_norm_2dim(shape, chunks, axis, norm, keepdims): a = np.random.random(shape) d = da.from_array(a, chunks=chunks) # Need one chunk on last dimension for svd. if norm == "nuc" or norm == 2 or norm == -2: d = d.rechunk((d.chunks[0], d.shape[1])) a_r = np.linalg.norm(a, ord=norm, axis=axis, keepdims=keepdims) d_r = da.linalg.norm(d, ord=norm, axis=axis, keepdims=keepdims) assert_eq(a_r, d_r) dask-0.16.0/dask/array/tests/test_linearoperator.py000066400000000000000000000016471320364734500224000ustar00rootroot00000000000000import pytest pytest.importorskip('scipy') import numpy as np import dask.array as da import scipy.sparse.linalg def test_LinearOperator(): X = np.random.random(size=(3, 2)) y = np.random.random(size=(2, 1)) w = np.random.random(size=(3, 1)) square = np.random.random(size=(2, 2)) dX = da.from_array(X, chunks=(2, 1)) npLO = scipy.sparse.linalg.aslinearoperator(X) daLO = scipy.sparse.linalg.interface.MatrixLinearOperator(dX) functions = [lambda x, y: x.matvec(y), lambda x, y: x * y, lambda x, y: x.dot(y)] for func in functions: assert np.allclose(func(npLO, y), func(daLO, y)) assert np.allclose(npLO.matmat(square), daLO.matmat(square)) assert np.allclose(npLO.rmatvec(w), daLO.rmatvec(w)) assert npLO.dtype == daLO.dtype assert npLO.shape == daLO.shape dask-0.16.0/dask/array/tests/test_masked.py000066400000000000000000000235441320364734500206160ustar00rootroot00000000000000import random from itertools import product import numpy as np import pytest import dask.array as da from dask.base import tokenize from dask.array.utils import assert_eq pytest.importorskip("dask.array.ma") def test_tokenize_masked_array(): m = np.ma.masked_array([1, 2, 3], mask=[True, True, False], fill_value=10) m2 = np.ma.masked_array([1, 2, 3], mask=[True, True, False], fill_value=0) m3 = np.ma.masked_array([1, 2, 3], mask=False, fill_value=10) assert tokenize(m) == tokenize(m) assert tokenize(m2) == tokenize(m2) assert tokenize(m3) == tokenize(m3) assert tokenize(m) != tokenize(m2) assert tokenize(m) != tokenize(m3) def test_from_array_masked_array(): m = np.ma.masked_array([1, 2, 3], mask=[True, True, False], fill_value=10) dm = da.from_array(m, chunks=(2,), asarray=False) assert_eq(dm, m) functions = [ lambda x: x, lambda x: da.expm1(x), lambda x: 2 * x, lambda x: x / 2, lambda x: x**2, lambda x: x + x, lambda x: x * x, lambda x: x[0], lambda x: x[:, 1], lambda x: x[:1, None, 1:3], lambda x: x.T, lambda x: da.transpose(x, (1, 2, 0)), lambda x: x.sum(), lambda x: x.dot(np.arange(x.shape[-1])), lambda x: x.dot(np.eye(x.shape[-1])), lambda x: da.tensordot(x, np.ones(x.shape[:2]), axes=[(0, 1), (0, 1)]), lambda x: x.sum(axis=0), lambda x: x.max(axis=0), lambda x: x.sum(axis=(1, 2)), lambda x: x.astype(np.complex128), lambda x: x.map_blocks(lambda x: x * 2), lambda x: x.round(1), lambda x: x.reshape((x.shape[0] * x.shape[1], x.shape[2])), lambda x: abs(x), lambda x: x > 0.5, lambda x: x.rechunk((4, 4, 4)), lambda x: x.rechunk((2, 2, 1)), ] @pytest.mark.parametrize('func', functions) def test_basic(func): x = da.random.random((2, 3, 4), chunks=(1, 2, 2)) x[x < 0.4] = 0 y = da.ma.masked_equal(x, 0) xx = func(x) yy = func(y) assert_eq(xx, da.ma.filled(yy, 0)) if yy.shape: zz = yy.compute() assert isinstance(zz, np.ma.masked_array) def test_tensordot(): x = da.random.random((2, 3, 4), chunks=(1, 2, 2)) x[x < 0.4] = 0 y = da.random.random((4, 3, 2), chunks=(2, 2, 1)) y[y < 0.4] = 0 xx = da.ma.masked_equal(x, 0) yy = da.ma.masked_equal(y, 0) assert_eq(da.tensordot(x, y, axes=(2, 0)), da.ma.filled(da.tensordot(xx, yy, axes=(2, 0)), 0)) assert_eq(da.tensordot(x, y, axes=(1, 1)), da.ma.filled(da.tensordot(xx, yy, axes=(1, 1)), 0)) assert_eq(da.tensordot(x, y, axes=((1, 2), (1, 0))), da.ma.filled(da.tensordot(xx, yy, axes=((1, 2), (1, 0))), 0)) @pytest.mark.parametrize('func', functions) def test_mixed_concatenate(func): x = da.random.random((2, 3, 4), chunks=(1, 2, 2)) y = da.random.random((2, 3, 4), chunks=(1, 2, 2)) y[y < 0.4] = 0 yy = da.ma.masked_equal(y, 0) d = da.concatenate([x, y], axis=0) s = da.concatenate([x, yy], axis=0) dd = func(d) ss = func(s) assert_eq(dd, ss) @pytest.mark.parametrize('func', functions) def test_mixed_random(func): d = da.random.random((4, 3, 4), chunks=(1, 2, 2)) d[d < 0.4] = 0 fn = lambda x: np.ma.masked_equal(x, 0) if random.random() < 0.5 else x s = d.map_blocks(fn) dd = func(d) ss = func(s) assert_eq(dd, ss) def test_mixed_output_type(): y = da.random.random((10, 10), chunks=(5, 5)) y[y < 0.4] = 0 y = da.ma.masked_equal(y, 0) x = da.zeros((10, 1), chunks=(5, 1)) z = da.concatenate([x, y], axis=1) assert z.shape == (10, 11) zz = z.compute() assert isinstance(zz, np.ma.masked_array) def test_creation_functions(): x = np.array([-2, -1, 0, 1, 2] * 20).reshape((10, 10)) y = np.array([-2, 0, 1, 1, 0] * 2) dx = da.from_array(x, chunks=5) dy = da.from_array(y, chunks=4) sol = np.ma.masked_greater(x, y) for (a, b) in product([dx, x], [dy, y]): assert_eq(da.ma.masked_greater(a, b), sol) # These are all the same as masked_greater, just check for correct op assert_eq(da.ma.masked_greater(dx, 0), np.ma.masked_greater(x, 0)) assert_eq(da.ma.masked_greater_equal(dx, 0), np.ma.masked_greater_equal(x, 0)) assert_eq(da.ma.masked_less(dx, 0), np.ma.masked_less(x, 0)) assert_eq(da.ma.masked_less_equal(dx, 0), np.ma.masked_less_equal(x, 0)) assert_eq(da.ma.masked_equal(dx, 0), np.ma.masked_equal(x, 0)) assert_eq(da.ma.masked_not_equal(dx, 0), np.ma.masked_not_equal(x, 0)) # masked_where assert_eq(da.ma.masked_where(False, dx), np.ma.masked_where(False, x)) assert_eq(da.ma.masked_where(dx > 2, dx), np.ma.masked_where(x > 2, x)) with pytest.raises(IndexError): da.ma.masked_where((dx > 2)[:, 0], dx) assert_eq(da.ma.masked_inside(dx, -1, 1), np.ma.masked_inside(x, -1, 1)) assert_eq(da.ma.masked_outside(dx, -1, 1), np.ma.masked_outside(x, -1, 1)) assert_eq(da.ma.masked_values(dx, -1), np.ma.masked_values(x, -1)) # masked_equal and masked_values in numpy sets the fill_value to `value`, # which can sometimes be an array. This is hard to support in dask, so we # forbid it. Check that this isn't supported: with pytest.raises(ValueError): da.ma.masked_equal(dx, dy) with pytest.raises(ValueError): da.ma.masked_values(dx, dy) y = x.astype('f8') y[0, 0] = y[7, 5] = np.nan dy = da.from_array(y, chunks=5) assert_eq(da.ma.masked_invalid(dy), np.ma.masked_invalid(y)) my = np.ma.masked_greater(y, 0) dmy = da.ma.masked_greater(dy, 0) assert_eq(da.ma.fix_invalid(dmy, fill_value=0), np.ma.fix_invalid(my, fill_value=0)) def test_filled(): x = np.array([-2, -1, 0, 1, 2] * 20).reshape((10, 10)) dx = da.from_array(x, chunks=5) mx = np.ma.masked_equal(x, 0) mdx = da.ma.masked_equal(dx, 0) assert_eq(da.ma.filled(mdx), np.ma.filled(mx)) assert_eq(da.ma.filled(mdx, -5), np.ma.filled(mx, -5)) def assert_eq_ma(a, b): res = a.compute() assert type(res) == type(b) if hasattr(res, 'mask'): np.testing.assert_equal(res.mask, b.mask) a = da.ma.filled(a) b = np.ma.filled(b) assert_eq(a, b, equal_nan=True) @pytest.mark.parametrize('dtype', ('i8', 'f8')) @pytest.mark.parametrize('reduction', ['sum', 'prod', 'mean', 'var', 'std', 'min', 'max', 'any', 'all']) def test_reductions(dtype, reduction): x = (np.random.RandomState(42).rand(11, 11) * 10).astype(dtype) dx = da.from_array(x, chunks=(4, 4)) mx = np.ma.masked_greater(x, 5) mdx = da.ma.masked_greater(dx, 5) dfunc = getattr(da, reduction) func = getattr(np, reduction) assert_eq_ma(dfunc(mdx), func(mx)) assert_eq_ma(dfunc(mdx, axis=0), func(mx, axis=0)) assert_eq_ma(dfunc(mdx, keepdims=True, split_every=4), func(mx, keepdims=True)) assert_eq_ma(dfunc(mdx, axis=0, split_every=2), func(mx, axis=0)) assert_eq_ma(dfunc(mdx, axis=0, keepdims=True, split_every=2), func(mx, axis=0, keepdims=True)) assert_eq_ma(dfunc(mdx, axis=1, split_every=2), func(mx, axis=1)) assert_eq_ma(dfunc(mdx, axis=1, keepdims=True, split_every=2), func(mx, axis=1, keepdims=True)) @pytest.mark.parametrize('reduction', ['argmin', 'argmax']) def test_arg_reductions(reduction): x = np.random.random((10, 10, 10)) dx = da.from_array(x, chunks=(3, 4, 5)) mx = np.ma.masked_greater(x, 0.4) dmx = da.ma.masked_greater(dx, 0.4) dfunc = getattr(da, reduction) func = getattr(np, reduction) assert_eq_ma(dfunc(dmx), func(mx)) assert_eq_ma(dfunc(dmx, 0), func(mx, 0)) assert_eq_ma(dfunc(dmx, 1), func(mx, 1)) assert_eq_ma(dfunc(dmx, 2), func(mx, 2)) def test_cumulative(): x = np.random.RandomState(0).rand(20, 24, 13) dx = da.from_array(x, chunks=(6, 5, 4)) mx = np.ma.masked_greater(x, 0.4) dmx = da.ma.masked_greater(dx, 0.4) for axis in [0, 1, 2]: assert_eq_ma(dmx.cumsum(axis=axis), mx.cumsum(axis=axis)) assert_eq_ma(dmx.cumprod(axis=axis), mx.cumprod(axis=axis)) def test_accessors(): x = np.random.random((10, 10)) dx = da.from_array(x, chunks=(3, 4)) mx = np.ma.masked_greater(x, 0.4) dmx = da.ma.masked_greater(dx, 0.4) assert_eq(da.ma.getmaskarray(dmx), np.ma.getmaskarray(mx)) assert_eq(da.ma.getmaskarray(dx), np.ma.getmaskarray(x)) assert_eq(da.ma.getdata(dmx), np.ma.getdata(mx)) assert_eq(da.ma.getdata(dx), np.ma.getdata(x)) def test_masked_array(): x = np.random.random((10, 10)).astype('f4') dx = da.from_array(x, chunks=(3, 4)) f1 = da.from_array(np.array(1), chunks=()) fill_values = [(None, None), (0.5, 0.5), (1, f1)] for data, (df, f) in product([x, dx], fill_values): assert_eq(da.ma.masked_array(data, fill_value=df), np.ma.masked_array(x, fill_value=f)) assert_eq(da.ma.masked_array(data, mask=data > 0.4, fill_value=df), np.ma.masked_array(x, mask=x > 0.4, fill_value=f)) assert_eq(da.ma.masked_array(data, mask=data > 0.4, fill_value=df), np.ma.masked_array(x, mask=x > 0.4, fill_value=f)) assert_eq(da.ma.masked_array(data, fill_value=df, dtype='f8'), np.ma.masked_array(x, fill_value=f, dtype='f8')) with pytest.raises(ValueError): da.ma.masked_array(dx, fill_value=dx) with pytest.raises(np.ma.MaskError): da.ma.masked_array(dx, mask=dx[:3, :3]) def test_set_fill_value(): x = np.random.randint(0, 10, (10, 10)) dx = da.from_array(x, chunks=(3, 4)) mx = np.ma.masked_greater(x, 3) dmx = da.ma.masked_greater(dx, 3) da.ma.set_fill_value(dmx, -10) np.ma.set_fill_value(mx, -10) assert_eq_ma(dmx, mx) da.ma.set_fill_value(dx, -10) np.ma.set_fill_value(x, -10) assert_eq_ma(dx, x) with pytest.raises(TypeError): da.ma.set_fill_value(dmx, 1e20) with pytest.raises(ValueError): da.ma.set_fill_value(dmx, dx) dask-0.16.0/dask/array/tests/test_optimization.py000066400000000000000000000243421320364734500220750ustar00rootroot00000000000000import pytest pytest.importorskip('numpy') import numpy as np import dask import dask.array as da from dask.optimize import fuse from dask.utils import SerializableLock from dask.array.core import getter, getter_nofancy from dask.array.optimization import (getitem, optimize, optimize_slices, fuse_slice) from dask.array.utils import assert_eq def test_fuse_getitem(): pairs = [((getter, (getter, 'x', slice(1000, 2000)), slice(15, 20)), (getter, 'x', slice(1015, 1020))), ((getitem, (getter, 'x', (slice(1000, 2000), slice(100, 200))), (slice(15, 20), slice(50, 60))), (getter, 'x', (slice(1015, 1020), slice(150, 160)))), ((getitem, (getter_nofancy, 'x', (slice(1000, 2000), slice(100, 200))), (slice(15, 20), slice(50, 60))), (getter_nofancy, 'x', (slice(1015, 1020), slice(150, 160)))), ((getter, (getter, 'x', slice(1000, 2000)), 10), (getter, 'x', 1010)), ((getitem, (getter, 'x', (slice(1000, 2000), 10)), (slice(15, 20),)), (getter, 'x', (slice(1015, 1020), 10))), ((getitem, (getter_nofancy, 'x', (slice(1000, 2000), 10)), (slice(15, 20),)), (getter_nofancy, 'x', (slice(1015, 1020), 10))), ((getter, (getter, 'x', (10, slice(1000, 2000))), (slice(15, 20), )), (getter, 'x', (10, slice(1015, 1020)))), ((getter, (getter, 'x', (slice(1000, 2000), slice(100, 200))), (slice(None, None), slice(50, 60))), (getter, 'x', (slice(1000, 2000), slice(150, 160)))), ((getter, (getter, 'x', (None, slice(None, None))), (slice(None, None), 5)), (getter, 'x', (None, 5))), ((getter, (getter, 'x', (slice(1000, 2000), slice(10, 20))), (slice(5, 10),)), (getter, 'x', (slice(1005, 1010), slice(10, 20)))), ((getitem, (getitem, 'x', (slice(1000, 2000),)), (slice(5, 10), slice(10, 20))), (getitem, 'x', (slice(1005, 1010), slice(10, 20)))), ((getter, (getter, 'x', slice(1000, 2000), False, False), slice(15, 20)), (getter, 'x', slice(1015, 1020))), ((getter, (getter, 'x', slice(1000, 2000)), slice(15, 20), False, False), (getter, 'x', slice(1015, 1020))), ((getter, (getter_nofancy, 'x', slice(1000, 2000), False, False), slice(15, 20), False, False), (getter_nofancy, 'x', slice(1015, 1020), False, False)), ] for inp, expected in pairs: result = optimize_slices({'y': inp}) assert result == {'y': expected} def test_fuse_getitem_lock(): lock1 = SerializableLock() lock2 = SerializableLock() pairs = [((getter, (getter, 'x', slice(1000, 2000), True, lock1), slice(15, 20)), (getter, 'x', slice(1015, 1020), True, lock1)), ((getitem, (getter, 'x', (slice(1000, 2000), slice(100, 200)), True, lock1), (slice(15, 20), slice(50, 60))), (getter, 'x', (slice(1015, 1020), slice(150, 160)), True, lock1)), ((getitem, (getter_nofancy, 'x', (slice(1000, 2000), slice(100, 200)), True, lock1), (slice(15, 20), slice(50, 60))), (getter_nofancy, 'x', (slice(1015, 1020), slice(150, 160)), True, lock1)), ((getter, (getter, 'x', slice(1000, 2000), True, lock1), slice(15, 20), True, lock2), (getter, (getter, 'x', slice(1000, 2000), True, lock1), slice(15, 20), True, lock2))] for inp, expected in pairs: result = optimize_slices({'y': inp}) assert result == {'y': expected} def test_optimize_with_getitem_fusion(): dsk = {'a': 'some-array', 'b': (getter, 'a', (slice(10, 20), slice(100, 200))), 'c': (getter, 'b', (5, slice(50, 60)))} result = optimize(dsk, ['c']) expected_task = (getter, 'some-array', (15, slice(150, 160))) assert any(v == expected_task for v in result.values()) assert len(result) < len(dsk) def test_optimize_slicing(): dsk = {'a': (range, 10), 'b': (getter, 'a', (slice(None, None, None),)), 'c': (getter, 'b', (slice(None, None, None),)), 'd': (getter, 'c', (slice(0, 5, None),)), 'e': (getter, 'd', (slice(None, None, None),))} expected = {'e': (getter, (range, 10), (slice(0, 5, None),))} result = optimize_slices(fuse(dsk, [], rename_keys=False)[0]) assert result == expected # protect output keys expected = {'c': (getter, (range, 10), (slice(0, None, None),)), 'd': (getter, 'c', (slice(0, 5, None),)), 'e': (getter, 'd', (slice(None, None, None),))} result = optimize_slices(fuse(dsk, ['c', 'd', 'e'], rename_keys=False)[0]) assert result == expected def test_fuse_slice(): assert fuse_slice(slice(10, 15), slice(0, 5, 2)) == slice(10, 15, 2) assert (fuse_slice((slice(100, 200),), (None, slice(10, 20))) == (None, slice(110, 120))) assert (fuse_slice((slice(100, 200),), (slice(10, 20), None)) == (slice(110, 120), None)) assert (fuse_slice((1,), (None,)) == (1, None)) assert (fuse_slice((1, slice(10, 20)), (None, None, 3, None)) == (1, None, None, 13, None)) with pytest.raises(NotImplementedError): fuse_slice(slice(10, 15, 2), -1) def test_fuse_slice_with_lists(): assert fuse_slice(slice(10, 20, 2), [1, 2, 3]) == [12, 14, 16] assert fuse_slice([10, 20, 30, 40, 50], [3, 1, 2]) == [40, 20, 30] assert fuse_slice([10, 20, 30, 40, 50], 3) == 40 assert fuse_slice([10, 20, 30, 40, 50], -1) == 50 assert fuse_slice([10, 20, 30, 40, 50], slice(1, None, 2)) == [20, 40] assert fuse_slice((slice(None), slice(0, 10), [1, 2, 3]), (slice(None), slice(1, 5), slice(None))) == (slice(0, None), slice(1, 5), [1, 2, 3]) assert fuse_slice((slice(None), slice(None), [1, 2, 3]), (slice(None), slice(1, 5), 1)) == (slice(0, None), slice(1, 5), 2) def test_nonfusible_fancy_indexing(): nil = slice(None) cases = [# x[:, list, :][int, :, :] ((nil, [1, 2, 3], nil), (0, nil, nil)), # x[int, :, :][:, list, :] ((0, nil, nil), (nil, [1, 2, 3], nil)), # x[:, list, :, :][:, :, :, int] ((nil, [1, 2], nil, nil), (nil, nil, nil, 0))] for a, b in cases: with pytest.raises(NotImplementedError): fuse_slice(a, b) def test_hard_fuse_slice_cases(): dsk = {'x': (getter, (getter, 'x', (None, slice(None, None))), (slice(None, None), 5))} assert optimize_slices(dsk) == {'x': (getter, 'x', (None, 5))} def test_dont_fuse_numpy_arrays(): x = np.ones(10) for chunks in [(5,), (10,)]: y = da.from_array(x, chunks=(10,)) dsk = y.__dask_optimize__(y.dask, y.__dask_keys__()) assert sum(isinstance(v, np.ndarray) for v in dsk.values()) == 1 def test_minimize_data_transfer(): x = np.ones(100) y = da.from_array(x, chunks=25) z = y + 1 dsk = z.__dask_optimize__(z.dask, z.__dask_keys__()) keys = list(dsk) results = dask.get(dsk, keys) big_key = [k for k, r in zip(keys, results) if r is x][0] dependencies, dependents = dask.core.get_deps(dsk) deps = dependents[big_key] assert len(deps) == 4 for dep in deps: assert dsk[dep][0] in (getitem, getter) assert dsk[dep][1] == big_key def test_fuse_slices_with_alias(): dsk = {'x': np.arange(16).reshape((4, 4)), ('dx', 0, 0): (getter, 'x', (slice(0, 4), slice(0, 4))), ('alias', 0, 0): ('dx', 0, 0), ('dx2', 0): (getitem, ('alias', 0, 0), (slice(None), 0))} keys = [('dx2', 0)] dsk2 = optimize(dsk, keys) assert len(dsk2) == 3 fused_key = set(dsk2).difference(['x', ('dx2', 0)]).pop() assert dsk2[fused_key] == (getter, 'x', (slice(0, 4), 0)) def test_dont_fuse_fancy_indexing_in_getter_nofancy(): dsk = {'a': (getitem, (getter_nofancy, 'x', (slice(10, 20, None), slice(100, 200, None))), ([1, 3], slice(50, 60, None)))} assert optimize_slices(dsk) == dsk dsk = {'a': (getitem, (getter_nofancy, 'x', [1, 2, 3]), 0)} assert optimize_slices(dsk) == dsk @pytest.mark.parametrize('chunks', [10, 5, 3]) def test_fuse_getter_with_asarray(chunks): x = np.ones(10) * 1234567890 y = da.ones(10, chunks=chunks) z = x + y dsk = z.__dask_optimize__(z.dask, z.__dask_keys__()) assert any(v is x for v in dsk.values()) for v in dsk.values(): s = str(v) assert s.count('getitem') + s.count('getter') <= 1 if v is not x: assert '1234567890' not in s n_getters = len([v for v in dsk.values() if v[0] in (getitem, getter)]) if y.npartitions > 1: assert n_getters == y.npartitions else: assert n_getters == 0 assert_eq(z, x + 1) @pytest.mark.parametrize('get,remove', [(getter, False), (getter_nofancy, False), (getitem, True)]) def test_remove_no_op_slices_if_get_is_not_getter_or_getter_nofancy(get, remove): # Test that no-op slices are removed as long as get is not getter or # getter_nofancy. This ensures that `get` calls are always made in all # tasks created by `from_array`, even after optimization null = slice(0,None) opts = [((get, 'x', null, False, False), 'x' if remove else (get, 'x', null, False, False)), ((getitem, (get, 'x', null, False, False), null), 'x' if remove else (get, 'x', null, False, False)), ((getitem, (get, 'x', (null, null), False, False), ()), 'x' if remove else (get, 'x', (null, null), False, False))] for orig, final in opts: assert optimize_slices({'a': orig}) == {'a': final} def test_turn_off_fusion(): x = da.ones(10, chunks=(5,)) y = da.sum(x + 1 + 2 + 3) a = y.__dask_optimize__(y.dask, y.__dask_keys__()) with dask.set_options(fuse_ave_width=0): b = y.__dask_optimize__(y.dask, y.__dask_keys__()) assert dask.get(a, y.__dask_keys__()) == dask.get(b, y.__dask_keys__()) assert len(a) < len(b) dask-0.16.0/dask/array/tests/test_percentiles.py000066400000000000000000000032531320364734500216620ustar00rootroot00000000000000import pytest pytest.importorskip('numpy') import numpy as np import dask.array as da from dask.array.utils import assert_eq, same_keys def test_percentile(): d = da.ones((16,), chunks=(4,)) assert_eq(da.percentile(d, [0, 50, 100]), np.array([1, 1, 1], dtype=d.dtype)) x = np.array([0, 0, 5, 5, 5, 5, 20, 20]) d = da.from_array(x, chunks=(3,)) result = da.percentile(d, [0, 50, 100]) assert_eq(da.percentile(d, [0, 50, 100]), np.array([0, 5, 20], dtype=result.dtype)) assert same_keys(da.percentile(d, [0, 50, 100]), da.percentile(d, [0, 50, 100])) assert not same_keys(da.percentile(d, [0, 50, 100]), da.percentile(d, [0, 50])) x = np.array(['a', 'a', 'd', 'd', 'd', 'e']) d = da.from_array(x, chunks=(3,)) assert_eq(da.percentile(d, [0, 50, 100]), np.array(['a', 'd', 'e'], dtype=x.dtype)) @pytest.mark.skip def test_percentile_with_categoricals(): try: import pandas as pd except ImportError: return x0 = pd.Categorical(['Alice', 'Bob', 'Charlie', 'Dennis', 'Alice', 'Alice']) x1 = pd.Categorical(['Alice', 'Bob', 'Charlie', 'Dennis', 'Alice', 'Alice']) dsk = {('x', 0): x0, ('x', 1): x1} x = da.Array(dsk, 'x', chunks=((6, 6),)) p = da.percentile(x, [50]) assert (p.compute().categories == x0.categories).all() assert (p.compute().codes == [0]).all() assert same_keys(da.percentile(x, [50]), da.percentile(x, [50])) def test_percentiles_with_empty_arrays(): x = da.ones(10, chunks=((5, 0, 5),)) assert_eq(da.percentile(x, [10, 50, 90]), np.array([1, 1, 1], dtype=x.dtype)) dask-0.16.0/dask/array/tests/test_random.py000066400000000000000000000211741320364734500206270ustar00rootroot00000000000000import pytest pytest.importorskip('numpy') import numpy as np import dask.array as da from dask.array.core import Array from dask.array.random import random, exponential, normal from dask.array.utils import assert_eq from dask.multiprocessing import get as mpget from dask.multiprocessing import _dumps, _loads def test_RandomState(): state = da.random.RandomState(5) x = state.normal(10, 1, size=10, chunks=5) assert_eq(x, x) state = da.random.RandomState(5) y = state.normal(10, 1, size=10, chunks=5) assert_eq(x, y) def test_concurrency(): state = da.random.RandomState(5) x = state.normal(10, 1, size=10, chunks=2) state = da.random.RandomState(5) y = state.normal(10, 1, size=10, chunks=2) assert (x.compute(get=mpget) == y.compute(get=mpget)).all() def test_doc_randomstate(): assert 'mean' in da.random.RandomState(5).normal.__doc__ def test_serializability(): state = da.random.RandomState(5) x = state.normal(10, 1, size=10, chunks=5) y = _loads(_dumps(x)) assert_eq(x, y) def test_determinisim_through_dask_values(): samples_1 = da.random.RandomState(42).normal(size=1000, chunks=10) samples_2 = da.random.RandomState(42).normal(size=1000, chunks=10) assert set(samples_1.dask) == set(samples_2.dask) assert_eq(samples_1, samples_2) def test_randomstate_consistent_names(): state1 = da.random.RandomState(42) state2 = da.random.RandomState(42) assert (sorted(state1.normal(size=(100, 100), chunks=(10, 10)).dask) == sorted(state2.normal(size=(100, 100), chunks=(10, 10)).dask)) assert (sorted(state1.normal(size=100, loc=4.5, scale=5.0, chunks=10).dask) == sorted(state2.normal(size=100, loc=4.5, scale=5.0, chunks=10).dask)) def test_random(): a = random((10, 10), chunks=(5, 5)) assert isinstance(a, Array) assert isinstance(a.name, str) and a.name assert a.shape == (10, 10) assert a.chunks == ((5, 5), (5, 5)) x = set(np.array(a).flat) assert len(x) > 90 def test_parametrized_random_function(): a = exponential(1000, (10, 10), chunks=(5, 5)) assert isinstance(a, Array) assert isinstance(a.name, str) and a.name assert a.shape == (10, 10) assert a.chunks == ((5, 5), (5, 5)) x = np.array(a) assert 10 < x.mean() < 100000 y = set(x.flat) assert len(y) > 90 def test_kwargs(): a = normal(loc=10.0, scale=0.1, size=(10, 10), chunks=(5, 5)) assert isinstance(a, Array) x = np.array(a) assert 8 < x.mean() < 12 def test_unique_names(): a = random((10, 10), chunks=(5, 5)) b = random((10, 10), chunks=(5, 5)) assert a.name != b.name def test_docs(): assert 'exponential' in exponential.__doc__ assert 'exponential' in exponential.__name__ def test_can_make_really_big_random_array(): normal(10, 1, (1000000, 1000000), chunks=(100000, 100000)) def test_random_seed(): da.random.seed(123) x = da.random.normal(size=10, chunks=5) y = da.random.normal(size=10, chunks=5) da.random.seed(123) a = da.random.normal(size=10, chunks=5) b = da.random.normal(size=10, chunks=5) assert_eq(x, a) assert_eq(y, b) def test_consistent_across_sizes(): x1 = da.random.RandomState(123).random(20, chunks=20) x2 = da.random.RandomState(123).random(100, chunks=20)[:20] x3 = da.random.RandomState(123).random(200, chunks=20)[:20] assert_eq(x1, x2) assert_eq(x1, x3) def test_random_all(): da.random.beta(1, 2, size=5, chunks=3).compute() da.random.binomial(10, 0.5, size=5, chunks=3).compute() da.random.chisquare(1, size=5, chunks=3).compute() da.random.exponential(1, size=5, chunks=3).compute() da.random.f(1, 2, size=5, chunks=3).compute() da.random.gamma(5, 1, size=5, chunks=3).compute() da.random.geometric(1, size=5, chunks=3).compute() da.random.gumbel(1, size=5, chunks=3).compute() da.random.hypergeometric(1, 2, 3, size=5, chunks=3).compute() da.random.laplace(size=5, chunks=3).compute() da.random.logistic(size=5, chunks=3).compute() da.random.lognormal(size=5, chunks=3).compute() da.random.logseries(0.5, size=5, chunks=3).compute() da.random.multinomial(20, [1 / 6.] * 6, size=5, chunks=3).compute() da.random.negative_binomial(5, 0.5, size=5, chunks=3).compute() da.random.noncentral_chisquare(2, 2, size=5, chunks=3).compute() da.random.noncentral_f(2, 2, 3, size=5, chunks=3).compute() da.random.normal(2, 2, size=5, chunks=3).compute() da.random.pareto(1, size=5, chunks=3).compute() da.random.poisson(size=5, chunks=3).compute() da.random.power(1, size=5, chunks=3).compute() da.random.rayleigh(size=5, chunks=3).compute() da.random.random_sample(size=5, chunks=3).compute() da.random.triangular(1, 2, 3, size=5, chunks=3).compute() da.random.uniform(size=5, chunks=3).compute() da.random.vonmises(2, 3, size=5, chunks=3).compute() da.random.wald(1, 2, size=5, chunks=3).compute() da.random.weibull(2, size=5, chunks=3).compute() da.random.zipf(2, size=5, chunks=3).compute() da.random.standard_cauchy(size=5, chunks=3).compute() da.random.standard_exponential(size=5, chunks=3).compute() da.random.standard_gamma(2, size=5, chunks=3).compute() da.random.standard_normal(size=5, chunks=3).compute() da.random.standard_t(2, size=5, chunks=3).compute() @pytest.mark.skipif(not hasattr(np,'broadcast_to'), reason='requires numpy 1.10 method "broadcast_to"') def test_array_broadcasting(): arr = np.arange(6).reshape((2, 3)) daones = da.ones((2, 3, 4), chunks=3) assert da.random.poisson(arr, chunks=3).compute().shape == (2, 3) for x in (arr, daones): y = da.random.normal(x, 2, chunks=3) assert y.shape == x.shape assert y.compute().shape == x.shape y = da.random.normal(daones, 2, chunks=3) assert set(daones.dask).issubset(set(y.dask)) assert da.random.normal(np.ones((1, 4)), da.ones((2, 3, 4), chunks=(2, 3, 4)), chunks=(2, 3, 4)).compute().shape == (2, 3, 4) assert da.random.normal(scale=np.ones((1, 4)), loc=da.ones((2, 3, 4), chunks=(2, 3, 4)), size=(2, 2, 3, 4), chunks=(2, 2, 3, 4)).compute().shape == (2, 2, 3, 4) with pytest.raises(ValueError): da.random.normal(arr, np.ones((3, 1)), size=(2, 3, 4), chunks=3) for o in (np.ones(100), da.ones(100, chunks=(50,)), 1): a = da.random.normal(1000 * o, 0.01, chunks=(50,)) assert 800 < a.mean().compute() < 1200 # ensure that mis-matched chunks align well x = np.arange(10)**3 y = da.from_array(x, chunks=(1,)) z = da.random.normal(y, 0.01, chunks=(10,)) assert 0.8 < z.mean().compute() / x.mean() < 1.2 def test_multinomial(): for size, chunks in [(5, 3), ((5, 4), (2, 3))]: x = da.random.multinomial(20, [1 / 6.] * 6, size=size, chunks=chunks) y = np.random.multinomial(20, [1 / 6.] * 6, size=size) assert x.shape == y.shape == x.compute().shape def test_choice(): np_dtype = np.random.choice(1, size=()).dtype size = (10, 3) chunks = 4 x = da.random.choice(3, size=size, chunks=chunks) assert x.dtype == np_dtype assert x.shape == size res = x.compute() assert res.dtype == np_dtype assert res.shape == size np_a = np.array([1, 3, 5, 7, 9], dtype='f8') da_a = da.from_array(np_a, chunks=2) for a in [np_a, da_a]: x = da.random.choice(a, size=size, chunks=chunks) res = x.compute() assert x.dtype == np_a.dtype assert res.dtype == np_a.dtype assert set(np.unique(res)).issubset(np_a) np_p = np.array([0, 0.2, 0.2, 0.3, 0.3]) da_p = da.from_array(np_p, chunks=2) for a, p in [(da_a, np_p), (np_a, da_p)]: x = da.random.choice(a, size=size, chunks=chunks, p=p) res = x.compute() assert x.dtype == np_a.dtype assert res.dtype == np_a.dtype assert set(np.unique(res)).issubset(np_a[1:]) np_dtype = np.random.choice(1, size=(), p=np.array([1])).dtype x = da.random.choice(5, size=size, chunks=chunks, p=np_p) res = x.compute() assert x.dtype == np_dtype assert res.dtype == np_dtype errs = [(-1, None), # negative a (np_a[:, None], None), # a must be 1D (np_a, np_p[:, None]), # p must be 1D (np_a, np_p[:-2]), # a and p must match (3, np_p), # a and p must match (4, [0.2, 0.2, 0.3])] # p must sum to 1 for (a, p) in errs: with pytest.raises(ValueError): da.random.choice(a, size=size, chunks=chunks, p=p) dask-0.16.0/dask/array/tests/test_rechunk.py000066400000000000000000000456421320364734500210140ustar00rootroot00000000000000from itertools import product import warnings import pytest np = pytest.importorskip('numpy') import dask from dask.utils import funcname from dask.array.utils import assert_eq from dask.array.rechunk import intersect_chunks, rechunk, normalize_chunks from dask.array.rechunk import cumdims_label, _breakpoints, _intersect_1d, _old_to_new from dask.array.rechunk import plan_rechunk, divide_to_width, merge_to_number import dask.array as da def test_rechunk_internals_1(): """ Test the cumdims_label and _breakpoints and _intersect_1d internal funcs to rechunk.""" new = cumdims_label(((1,1,2),(1,5,1)),'n') old = cumdims_label(((4, ),(1,) * 5),'o') breaks = tuple(_breakpoints(o, n) for o, n in zip(old, new)) answer = (('o', 0), ('n', 0), ('n', 1), ('n', 2), ('o', 4), ('n', 4)) assert breaks[0] == answer answer2 = (('o', 0), ('n', 0), ('o', 1), ('n', 1), ('o', 2), ('o', 3), ('o', 4), ('o', 5), ('n', 6), ('n', 7)) assert breaks[1] == answer2 i1d = [_intersect_1d(b) for b in breaks] answer3 = [[(0, slice(0, 1))], [(0, slice(1, 2))], [(0, slice(2, 4))]] assert i1d[0] == answer3 answer4 = [[(0, slice(0, 1))], [(1, slice(0, 1)), (2, slice(0, 1)), (3, slice(0, 1)), (4, slice(0, 1)), (5, slice(0, 1))], [(5, slice(1, 2))]] assert i1d[1] == answer4 def test_intersect_1(): """ Convert 1 D chunks""" old = ((10, 10, 10, 10, 10), ) new = ((25, 5, 20), ) answer = [(((0, slice(0, 10)), ), ((1, slice(0, 10)), ), ((2, slice(0, 5)), )), (((2, slice(5, 10)), ), ), (((3, slice(0, 10)), ), ((4, slice(0, 10)), )) ] cross = list(intersect_chunks(old_chunks=old, new_chunks=new)) assert answer == cross def test_intersect_2(): """ Convert 1 D chunks""" old = ((20, 20, 20, 20, 20), ) new = ((58, 4, 20, 18),) answer = [(((0, slice(0, 20)), ), ((1, slice(0, 20)), ), ((2, slice(0, 18)), )), (((2, slice(18, 20)), ), ((3, slice(0, 2)), )), (((3, slice(2, 20)), ), ((4, slice(0, 2)), )), (((4, slice(2, 20)), ), ) ] cross = list(intersect_chunks(old_chunks=old, new_chunks=new)) assert answer == cross def test_rechunk_1d(): """Try rechunking a random 1d matrix""" a = np.random.uniform(0, 1, 300) x = da.from_array(a, chunks=((100, ) * 3, )) new = ((50, ) * 6,) x2 = rechunk(x, chunks=new) assert x2.chunks == new assert np.all(x2.compute() == a) def test_rechunk_2d(): """Try rechunking a random 2d matrix""" a = np.random.uniform(0, 1, 300).reshape((10, 30)) x = da.from_array(a, chunks=((1, 2, 3, 4), (5, ) * 6)) new = ((5, 5), (15, ) * 2) x2 = rechunk(x, chunks=new) assert x2.chunks == new assert np.all(x2.compute() == a) def test_rechunk_4d(): """Try rechunking a random 4d matrix""" old = ((5, 5), ) * 4 a = np.random.uniform(0, 1, 10000).reshape((10, ) * 4) x = da.from_array(a, chunks=old) new = ((10, ), ) * 4 x2 = rechunk(x, chunks=new) assert x2.chunks == new assert np.all(x2.compute() == a) def test_rechunk_expand(): a = np.random.uniform(0, 1, 100).reshape((10, 10)) x = da.from_array(a, chunks=(5, 5)) y = x.rechunk(chunks=((3, 3, 3, 1), (3, 3, 3, 1))) assert np.all(y.compute() == a) def test_rechunk_expand2(): (a, b) = (3, 2) orig = np.random.uniform(0, 1, a ** b).reshape((a,) * b) for off, off2 in product(range(1, a - 1), range(1, a - 1)): old = ((a - off, off), ) * b x = da.from_array(orig, chunks=old) new = ((a - off2, off2), ) * b assert np.all(x.rechunk(chunks=new).compute() == orig) if a - off - off2 > 0: new = ((off, a - off2 - off, off2), ) * b y = x.rechunk(chunks=new).compute() assert np.all(y == orig) def test_rechunk_method(): """ Test rechunking can be done as a method of dask array.""" old = ((5, 2, 3), ) * 4 new = ((3, 3, 3, 1), ) * 4 a = np.random.uniform(0, 1, 10000).reshape((10, ) * 4) x = da.from_array(a, chunks=old) x2 = x.rechunk(chunks=new) assert x2.chunks == new assert np.all(x2.compute() == a) def test_rechunk_blockshape(): """ Test that blockshape can be used.""" new_shape, new_chunks = (10, 10), (4, 3) new_blockdims = normalize_chunks(new_chunks, new_shape) old_chunks = ((4, 4, 2), (3, 3, 3, 1)) a = np.random.uniform(0,1,100).reshape((10, 10)) x = da.from_array(a, chunks=old_chunks) check1 = rechunk(x, chunks=new_chunks) assert check1.chunks == new_blockdims assert np.all(check1.compute() == a) def test_dtype(): x = da.ones(5, chunks=(2,)) assert x.rechunk(chunks=(1,)).dtype == x.dtype def test_rechunk_with_dict(): x = da.ones((24, 24), chunks=(4, 8)) y = x.rechunk(chunks={0: 12}) assert y.chunks == ((12, 12), (8, 8, 8)) x = da.ones((24, 24), chunks=(4, 8)) y = x.rechunk(chunks={0: (12, 12)}) assert y.chunks == ((12, 12), (8, 8, 8)) def test_rechunk_with_empty_input(): x = da.ones((24, 24), chunks=(4, 8)) assert x.rechunk(chunks={}).chunks == x.chunks pytest.raises(ValueError, lambda: x.rechunk(chunks=())) def test_rechunk_with_null_dimensions(): x = da.from_array(np.ones((24, 24)), chunks=(4, 8)) assert (x.rechunk(chunks=(None, 4)).chunks == da.ones((24, 24), chunks=(4, 4)).chunks) def test_rechunk_with_integer(): x = da.from_array(np.arange(5), chunks=4) y = x.rechunk(3) assert y.chunks == ((3, 2),) assert (x.compute() == y.compute()).all() def test_rechunk_0d(): a = np.array(42) x = da.from_array(a, chunks=()) y = x.rechunk(()) assert y.chunks == () assert y.compute() == a def test_rechunk_empty(): x = da.ones((0, 10), chunks=(5, 5)) y = x.rechunk((2, 2)) assert y.chunks == ((0,), (2,) * 5) assert_eq(x, y) def test_rechunk_same(): x = da.ones((24, 24), chunks=(4, 8)) y = x.rechunk(x.chunks) assert x is y def test_rechunk_minus_one(): x = da.ones((24, 24), chunks=(4, 8)) y = x.rechunk((-1, 8)) assert y.chunks == ((24,), (8, 8, 8)) assert_eq(x, y) def test_rechunk_intermediates(): x = da.random.normal(10, 0.1, (10, 10), chunks=(10, 1)) y = x.rechunk((1, 10)) assert len(y.dask) > 30 def test_divide_to_width(): chunks = divide_to_width((8, 9, 10), 10) assert chunks == (8, 9, 10) chunks = divide_to_width((8, 2, 9, 10, 11, 12), 4) # Note how 9 gives (3, 3, 3), not (4, 4, 1) or whatever assert chunks == (4, 4, 2, 3, 3, 3, 3, 3, 4, 3, 4, 4, 4, 4, 4, ) def test_merge_to_number(): chunks = merge_to_number((10,) * 4, 5) assert chunks == (10, 10, 10, 10) chunks = merge_to_number((10,) * 4, 4) assert chunks == (10, 10, 10, 10) chunks = merge_to_number((10,) * 4, 3) assert chunks == (20, 10, 10) chunks = merge_to_number((10,) * 4, 2) assert chunks == (20, 20) chunks = merge_to_number((10,) * 4, 1) assert chunks == (40,) chunks = merge_to_number((10,) * 10, 2) assert chunks == (50,) * 2 chunks = merge_to_number((10,) * 10, 3) assert chunks == (40, 30, 30) chunks = merge_to_number((5, 1, 1, 15, 10), 4) assert chunks == (5, 2, 15, 10) chunks = merge_to_number((5, 1, 1, 15, 10), 3) assert chunks == (7, 15, 10) chunks = merge_to_number((5, 1, 1, 15, 10), 2) assert chunks == (22, 10) chunks = merge_to_number((5, 1, 1, 15, 10), 1) assert chunks == (32,) chunks = merge_to_number((1, 1, 1, 1, 3, 1, 1), 6) assert chunks == (2, 1, 1, 3, 1, 1) chunks = merge_to_number((1, 1, 1, 1, 3, 1, 1), 5) assert chunks == (2, 2, 3, 1, 1) chunks = merge_to_number((1, 1, 1, 1, 3, 1, 1), 4) assert chunks == (2, 2, 3, 2) chunks = merge_to_number((1, 1, 1, 1, 3, 1, 1), 3) assert chunks == (4, 3, 2) chunks = merge_to_number((1, 1, 1, 1, 3, 1, 1), 2) assert chunks == (4, 5) chunks = merge_to_number((1, 1, 1, 1, 3, 1, 1), 1) assert chunks == (9,) def _plan(old_chunks, new_chunks, itemsize=1, block_size_limit=1e7): return plan_rechunk(old_chunks, new_chunks, itemsize=itemsize, block_size_limit=block_size_limit) def _assert_steps(steps, expected): assert len(steps) == len(expected) assert steps == expected def test_plan_rechunk(): c = ((20,) * 2) # coarse f = ((2,) * 20) # fine nc = ((float('nan'),) * 2) # nan-coarse nf = ((float('nan'),) * 20) # nan-fine # Trivial cases steps = _plan((), ()) _assert_steps(steps, [()]) steps = _plan((c, ()), (f, ())) _assert_steps(steps, [(f, ())]) # No intermediate required steps = _plan((c,), (f,)) _assert_steps(steps, [(f,)]) steps = _plan((f,), (c,)) _assert_steps(steps, [(c,)]) steps = _plan((c, c), (f, f)) _assert_steps(steps, [(f, f)]) steps = _plan((f, f), (c, c)) _assert_steps(steps, [(c, c)]) steps = _plan((f, c), (c, c)) _assert_steps(steps, [(c, c)]) # An intermediate is used to reduce graph size steps = _plan((f, c), (c, f)) _assert_steps(steps, [(c, c), (c, f)]) steps = _plan((c + c, c + f), (f + f, c + c)) _assert_steps(steps, [(c + c, c + c), (f + f, c + c)]) # Same, with unknown dim steps = _plan((nc + nf, c + c, c + f), (nc + nf, f + f, c + c)) _assert_steps(steps, steps) # Just at the memory limit => an intermediate is used steps = _plan((f, c), (c, f), block_size_limit=400) _assert_steps(steps, [(c, c), (c, f)]) # Hitting the memory limit => partial merge m = ((10,) * 4) # mid steps = _plan((f, c), (c, f), block_size_limit=399) _assert_steps(steps, [(m, c), (c, f)]) steps2 = _plan((f, c), (c, f), block_size_limit=3999, itemsize=10) _assert_steps(steps2, steps) # Larger problem size => more intermediates c = ((1000,) * 2) # coarse f = ((2,) * 1000) # fine steps = _plan((f, c), (c, f), block_size_limit=99999) assert len(steps) == 3 assert steps[-1] == (c, f) for i in range(len(steps) - 1): prev = steps[i] succ = steps[i + 1] # Merging on the first dim, splitting on the second dim assert len(succ[0]) <= len(prev[0]) / 2.0 assert len(succ[1]) >= len(prev[1]) * 2.0 def test_plan_rechunk_5d(): # 5d problem c = ((10,) * 1) # coarse f = ((1,) * 10) # fine steps = _plan((c, c, c, c, c), (f, f, f, f, f)) _assert_steps(steps, [(f, f, f, f, f)]) steps = _plan((f, f, f, f, c), (c, c, c, f, f)) _assert_steps(steps, [(c, c, c, f, c), (c, c, c, f, f)]) # Only 1 dim can be merged at first steps = _plan((c, c, f, f, c), (c, c, c, f, f), block_size_limit=2e4) _assert_steps(steps, [(c, c, c, f, c), (c, c, c, f, f)]) def test_plan_rechunk_heterogenous(): c = ((10,) * 1) # coarse f = ((1,) * 10) # fine cf = c + f cc = c + c ff = f + f fc = f + c # No intermediate required steps = _plan((cc, cf), (ff, ff)) _assert_steps(steps, [(ff, ff)]) steps = _plan((cf, fc), (ff, cf)) _assert_steps(steps, [(ff, cf)]) # An intermediate is used to reduce graph size steps = _plan((cc, cf), (ff, cc)) _assert_steps(steps, [(cc, cc), (ff, cc)]) steps = _plan((cc, cf, cc), (ff, cc, cf)) _assert_steps(steps, [(cc, cc, cc), (ff, cc, cf)]) # Imposing a memory limit => the first intermediate is constrained: # * cc -> ff would increase the graph size: no # * ff -> cf would increase the block size too much: no # * cf -> cc fits the bill (graph size /= 10, block size neutral) # * cf -> fc also fits the bill (graph size and block size neutral) steps = _plan((cc, ff, cf), (ff, cf, cc), block_size_limit=100) _assert_steps(steps, [(cc, ff, cc), (ff, cf, cc)]) def test_plan_rechunk_asymmetric(): a = ((1,) * 1000, (80000000,)) b = ((1000,), (80000,) * 1000) steps = plan_rechunk(a, b, itemsize=8) assert len(steps) > 1 x = da.ones((1000, 80000000), chunks=(1, 80000000)) y = x.rechunk((1000, x.shape[1] // 1000)) assert len(y.dask) < 100000 def test_rechunk_warning(): N = 20 x = da.random.normal(size=(N, N, 100), chunks=(1, N, 100)) with warnings.catch_warnings(record=True) as w: x = x.rechunk((N, 1, 100)) assert not w @pytest.mark.parametrize('shape,chunks', [[(4,), (2,)], [(4, 4), (2, 2)], [(4, 4), (4, 2)]]) def test_dont_concatenate_single_chunks(shape, chunks): x = da.ones(shape, chunks=shape) y = x.rechunk(chunks) dsk = dict(y.dask) assert not any(funcname(task[0]).startswith('concat') for task in dsk.values() if dask.istask(task)) def test_intersect_nan(): old_chunks = ((float('nan'), float('nan')), (8,)) new_chunks = ((float('nan'), float('nan')), (4, 4)) result = list(intersect_chunks(old_chunks, new_chunks)) expected = [ (((0, slice(0, None, None)), (0, slice(0, 4, None))),), (((0, slice(0, None, None)), (0, slice(4, 8, None))),), (((1, slice(0, None, None)), (0, slice(0, 4, None))),), (((1, slice(0, None, None)), (0, slice(4, 8, None))),) ] assert result == expected def test_intersect_nan_single(): old_chunks = ((float('nan'),), (10,)) new_chunks = ((float('nan'),), (5, 5)) result = list(intersect_chunks(old_chunks, new_chunks)) expected = [(((0, slice(0, None, None)), (0, slice(0, 5, None))),), (((0, slice(0, None, None)), (0, slice(5, 10, None))),)] assert result == expected def test_intersect_nan_long(): old_chunks = (tuple([float('nan')] * 4), (10,)) new_chunks = (tuple([float('nan')] * 4), (5, 5)) result = list(intersect_chunks(old_chunks, new_chunks)) expected = [ (((0, slice(0, None, None)), (0, slice(0, 5, None))),), (((0, slice(0, None, None)), (0, slice(5, 10, None))),), (((1, slice(0, None, None)), (0, slice(0, 5, None))),), (((1, slice(0, None, None)), (0, slice(5, 10, None))),), (((2, slice(0, None, None)), (0, slice(0, 5, None))),), (((2, slice(0, None, None)), (0, slice(5, 10, None))),), (((3, slice(0, None, None)), (0, slice(0, 5, None))),), (((3, slice(0, None, None)), (0, slice(5, 10, None))),) ] assert result == expected def test_rechunk_unknown_from_pandas(): dd = pytest.importorskip('dask.dataframe') pd = pytest.importorskip('pandas') arr = np.random.randn(50, 10) x = dd.from_pandas(pd.DataFrame(arr), 2).values result = x.rechunk((None, (5, 5))) assert np.isnan(x.chunks[0]).all() assert np.isnan(result.chunks[0]).all() assert result.chunks[1] == (5, 5) expected = da.from_array(arr, chunks=((25, 25), (10,))).rechunk((None, (5, 5))) assert_eq(result, expected) def test_rechunk_unknown_from_array(): dd = pytest.importorskip('dask.dataframe') # pd = pytest.importorskip('pandas') x = dd.from_array(da.ones(shape=(4, 4), chunks=(2, 2))).values # result = x.rechunk({1: 5}) result = x.rechunk((None, 4)) assert np.isnan(x.chunks[0]).all() assert np.isnan(result.chunks[0]).all() assert x.chunks[1] == (4,) assert_eq(x, result) @pytest.mark.parametrize('x, chunks', [ (da.ones(shape=(50, 10), chunks=(25, 10)), (None, 5)), (da.ones(shape=(50, 10), chunks=(25, 10)), {1: 5}), (da.ones(shape=(50, 10), chunks=(25, 10)), (None, (5, 5))), (da.ones(shape=(1000, 10), chunks=(5, 10)), (None, 5)), (da.ones(shape=(1000, 10), chunks=(5, 10)), {1: 5}), (da.ones(shape=(1000, 10), chunks=(5, 10)), (None, (5, 5))), (da.ones(shape=(10, 10), chunks=(10, 10)), (None, 5)), (da.ones(shape=(10, 10), chunks=(10, 10)), {1: 5}), (da.ones(shape=(10, 10), chunks=(10, 10)), (None, (5, 5))), (da.ones(shape=(10, 10), chunks=(10, 2)), (None, 5)), (da.ones(shape=(10, 10), chunks=(10, 2)), {1: 5}), (da.ones(shape=(10, 10), chunks=(10, 2)), (None, (5, 5))), ]) def test_rechunk_unknown(x, chunks): dd = pytest.importorskip('dask.dataframe') y = dd.from_array(x).values result = y.rechunk(chunks) expected = x.rechunk(chunks) assert_chunks_match(result.chunks, expected.chunks) assert_eq(result, expected) def test_rechunk_unknown_explicit(): dd = pytest.importorskip('dask.dataframe') x = da.ones(shape=(10, 10), chunks=(5, 2)) y = dd.from_array(x).values result = y.rechunk(((float('nan'), float('nan')), (5, 5))) expected = x.rechunk((None, (5, 5))) assert_chunks_match(result.chunks, expected.chunks) assert_eq(result, expected) def assert_chunks_match(left, right): for x, y in zip(left, right): if np.isnan(x).any(): assert np.isnan(x).all() else: assert x == y def test_rechunk_unknown_raises(): dd = pytest.importorskip('dask.dataframe') x = dd.from_array(da.ones(shape=(10, 10), chunks=(5, 5))).values with pytest.raises(ValueError): x.rechunk((None, (5, 5, 5))) def test_old_to_new_single(): old = ((float('nan'), float('nan')), (8,)) new = ((float('nan'), float('nan')), (4, 4)) result = _old_to_new(old, new) expected = [[[(0, slice(0, None, None))], [(1, slice(0, None, None))]], [[(0, slice(0, 4, None))], [(0, slice(4, 8, None))]]] assert result == expected def test_old_to_new(): old = ((float('nan'),), (10,)) new = ((float('nan'),), (5, 5)) result = _old_to_new(old, new) expected = [[[(0, slice(0, None, None))]], [[(0, slice(0, 5, None))], [(0, slice(5, 10, None))]]] assert result == expected def test_old_to_new_large(): old = (tuple([float('nan')] * 4), (10,)) new = (tuple([float('nan')] * 4), (5, 5)) result = _old_to_new(old, new) expected = [[[(0, slice(0, None, None))], [(1, slice(0, None, None))], [(2, slice(0, None, None))], [(3, slice(0, None, None))]], [[(0, slice(0, 5, None))], [(0, slice(5, 10, None))]]] assert result == expected def test_changing_raises(): nan = float('nan') with pytest.raises(ValueError) as record: _old_to_new(((nan, nan), (4, 4)), ((nan, nan, nan), (4, 4))) assert 'unchanging' in str(record.value) def test_old_to_new_known(): old = ((10, 10, 10, 10, 10), ) new = ((25, 5, 20), ) result = _old_to_new(old, new) expected = [[[(0, slice(0, 10, None)), (1, slice(0, 10, None)), (2, slice(0, 5, None))], [(2, slice(5, 10, None))], [(3, slice(0, 10, None)), (4, slice(0, 10, None))]]] assert result == expected def test_rechunk_zero_dim(): da = pytest.importorskip('dask.array') x = da.ones((0, 10, 100), chunks=(0, 10, 10)).rechunk((0, 10, 50)) assert len(x.compute()) == 0 dask-0.16.0/dask/array/tests/test_reductions.py000066400000000000000000000370751320364734500215350ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import pytest pytest.importorskip('numpy') import dask.array as da from dask.array.utils import assert_eq as _assert_eq, same_keys from dask.core import get_deps from dask.context import set_options import numpy as np # temporary until numpy functions migrated try: from numpy import nanprod except ImportError: # pragma: no cover import dask.array.numpy_compat as npcompat nanprod = npcompat.nanprod def assert_eq(a, b): _assert_eq(a, b, equal_nan=True) def reduction_1d_test(da_func, darr, np_func, narr, use_dtype=True, split_every=True): assert_eq(da_func(darr), np_func(narr)) assert_eq(da_func(darr, keepdims=True), np_func(narr, keepdims=True)) assert same_keys(da_func(darr), da_func(darr)) assert same_keys(da_func(darr, keepdims=True), da_func(darr, keepdims=True)) if use_dtype: assert_eq(da_func(darr, dtype='f8'), np_func(narr, dtype='f8')) assert_eq(da_func(darr, dtype='i8'), np_func(narr, dtype='i8')) assert same_keys(da_func(darr, dtype='i8'), da_func(darr, dtype='i8')) if split_every: a1 = da_func(darr, split_every=2) a2 = da_func(darr, split_every={0: 2}) assert same_keys(a1, a2) assert_eq(a1, np_func(narr)) assert_eq(a2, np_func(narr)) assert_eq(da_func(darr, keepdims=True, split_every=2), np_func(narr, keepdims=True)) @pytest.mark.parametrize('dtype', ['f4', 'i4']) def test_reductions_1D(dtype): x = np.arange(5).astype(dtype) a = da.from_array(x, chunks=(2,)) reduction_1d_test(da.sum, a, np.sum, x) reduction_1d_test(da.prod, a, np.prod, x) reduction_1d_test(da.mean, a, np.mean, x) reduction_1d_test(da.var, a, np.var, x) reduction_1d_test(da.std, a, np.std, x) reduction_1d_test(da.min, a, np.min, x, False) reduction_1d_test(da.max, a, np.max, x, False) reduction_1d_test(da.any, a, np.any, x, False) reduction_1d_test(da.all, a, np.all, x, False) reduction_1d_test(da.nansum, a, np.nansum, x) reduction_1d_test(da.nanprod, a, nanprod, x) reduction_1d_test(da.nanmean, a, np.mean, x) reduction_1d_test(da.nanvar, a, np.var, x) reduction_1d_test(da.nanstd, a, np.std, x) reduction_1d_test(da.nanmin, a, np.nanmin, x, False) reduction_1d_test(da.nanmax, a, np.nanmax, x, False) def reduction_2d_test(da_func, darr, np_func, narr, use_dtype=True, split_every=True): assert_eq(da_func(darr), np_func(narr)) assert_eq(da_func(darr, keepdims=True), np_func(narr, keepdims=True)) assert_eq(da_func(darr, axis=0), np_func(narr, axis=0)) assert_eq(da_func(darr, axis=1), np_func(narr, axis=1)) assert_eq(da_func(darr, axis=-1), np_func(narr, axis=-1)) assert_eq(da_func(darr, axis=-2), np_func(narr, axis=-2)) assert_eq(da_func(darr, axis=1, keepdims=True), np_func(narr, axis=1, keepdims=True)) assert_eq(da_func(darr, axis=(1, 0)), np_func(narr, axis=(1, 0))) assert same_keys(da_func(darr, axis=1), da_func(darr, axis=1)) assert same_keys(da_func(darr, axis=(1, 0)), da_func(darr, axis=(1, 0))) if use_dtype: assert_eq(da_func(darr, dtype='f8'), np_func(narr, dtype='f8')) assert_eq(da_func(darr, dtype='i8'), np_func(narr, dtype='i8')) if split_every: a1 = da_func(darr, split_every=4) a2 = da_func(darr, split_every={0: 2, 1: 2}) assert same_keys(a1, a2) assert_eq(a1, np_func(narr)) assert_eq(a2, np_func(narr)) assert_eq(da_func(darr, keepdims=True, split_every=4), np_func(narr, keepdims=True)) assert_eq(da_func(darr, axis=0, split_every=2), np_func(narr, axis=0)) assert_eq(da_func(darr, axis=0, keepdims=True, split_every=2), np_func(narr, axis=0, keepdims=True)) assert_eq(da_func(darr, axis=1, split_every=2), np_func(narr, axis=1)) assert_eq(da_func(darr, axis=1, keepdims=True, split_every=2), np_func(narr, axis=1, keepdims=True)) def test_reduction_errors(): x = da.ones((5, 5), chunks=(3, 3)) with pytest.raises(ValueError): x.sum(axis=2) with pytest.raises(ValueError): x.sum(axis=-3) @pytest.mark.slow @pytest.mark.parametrize('dtype', ['f4', 'i4']) def test_reductions_2D(dtype): x = np.arange(1, 122).reshape((11, 11)).astype(dtype) a = da.from_array(x, chunks=(4, 4)) b = a.sum(keepdims=True) assert b.__dask_keys__() == [[(b.name, 0, 0)]] reduction_2d_test(da.sum, a, np.sum, x) reduction_2d_test(da.prod, a, np.prod, x) reduction_2d_test(da.mean, a, np.mean, x) reduction_2d_test(da.var, a, np.var, x, False) # Difference in dtype algo reduction_2d_test(da.std, a, np.std, x, False) # Difference in dtype algo reduction_2d_test(da.min, a, np.min, x, False) reduction_2d_test(da.max, a, np.max, x, False) reduction_2d_test(da.any, a, np.any, x, False) reduction_2d_test(da.all, a, np.all, x, False) reduction_2d_test(da.nansum, a, np.nansum, x) reduction_2d_test(da.nanprod, a, nanprod, x) reduction_2d_test(da.nanmean, a, np.mean, x) reduction_2d_test(da.nanvar, a, np.nanvar, x, False) # Difference in dtype algo reduction_2d_test(da.nanstd, a, np.nanstd, x, False) # Difference in dtype algo reduction_2d_test(da.nanmin, a, np.nanmin, x, False) reduction_2d_test(da.nanmax, a, np.nanmax, x, False) @pytest.mark.parametrize(['dfunc', 'func'], [(da.argmin, np.argmin), (da.argmax, np.argmax), (da.nanargmin, np.nanargmin), (da.nanargmax, np.nanargmax)]) def test_arg_reductions(dfunc, func): x = np.random.random((10, 10, 10)) a = da.from_array(x, chunks=(3, 4, 5)) assert_eq(dfunc(a), func(x)) assert_eq(dfunc(a, 0), func(x, 0)) assert_eq(dfunc(a, 1), func(x, 1)) assert_eq(dfunc(a, 2), func(x, 2)) with set_options(split_every=2): assert_eq(dfunc(a), func(x)) assert_eq(dfunc(a, 0), func(x, 0)) assert_eq(dfunc(a, 1), func(x, 1)) assert_eq(dfunc(a, 2), func(x, 2)) pytest.raises(ValueError, lambda: dfunc(a, 3)) pytest.raises(TypeError, lambda: dfunc(a, (0, 1))) x2 = np.arange(10) a2 = da.from_array(x2, chunks=3) assert_eq(dfunc(a2), func(x2)) assert_eq(dfunc(a2, 0), func(x2, 0)) assert_eq(dfunc(a2, 0, split_every=2), func(x2, 0)) @pytest.mark.parametrize(['dfunc', 'func'], [(da.nanargmin, np.nanargmin), (da.nanargmax, np.nanargmax)]) def test_nanarg_reductions(dfunc, func): x = np.random.random((10, 10, 10)) x[5] = np.nan a = da.from_array(x, chunks=(3, 4, 5)) assert_eq(dfunc(a), func(x)) assert_eq(dfunc(a, 0), func(x, 0)) with pytest.raises(ValueError): with pytest.warns(None): # All NaN axis dfunc(a, 1).compute() with pytest.raises(ValueError): with pytest.warns(None): # All NaN axis dfunc(a, 2).compute() x[:] = np.nan a = da.from_array(x, chunks=(3, 4, 5)) with pytest.raises(ValueError): with pytest.warns(None): # All NaN axis dfunc(a).compute() def test_reductions_2D_nans(): # chunks are a mix of some/all/no NaNs x = np.full((4, 4), np.nan) x[:2, :2] = np.array([[1, 2], [3, 4]]) x[2, 2] = 5 x[3, 3] = 6 a = da.from_array(x, chunks=(2, 2)) reduction_2d_test(da.sum, a, np.sum, x, False, False) reduction_2d_test(da.prod, a, np.prod, x, False, False) reduction_2d_test(da.mean, a, np.mean, x, False, False) reduction_2d_test(da.var, a, np.var, x, False, False) reduction_2d_test(da.std, a, np.std, x, False, False) reduction_2d_test(da.min, a, np.min, x, False, False) reduction_2d_test(da.max, a, np.max, x, False, False) reduction_2d_test(da.any, a, np.any, x, False, False) reduction_2d_test(da.all, a, np.all, x, False, False) reduction_2d_test(da.nansum, a, np.nansum, x, False, False) reduction_2d_test(da.nanprod, a, nanprod, x, False, False) reduction_2d_test(da.nanmean, a, np.nanmean, x, False, False) with pytest.warns(None): # division by 0 warning reduction_2d_test(da.nanvar, a, np.nanvar, x, False, False) with pytest.warns(None): # division by 0 warning reduction_2d_test(da.nanstd, a, np.nanstd, x, False, False) with pytest.warns(None): # all NaN axis warning reduction_2d_test(da.nanmin, a, np.nanmin, x, False, False) with pytest.warns(None): # all NaN axis warning reduction_2d_test(da.nanmax, a, np.nanmax, x, False, False) assert_eq(da.argmax(a), np.argmax(x)) assert_eq(da.argmin(a), np.argmin(x)) with pytest.warns(None): # all NaN axis warning assert_eq(da.nanargmax(a), np.nanargmax(x)) with pytest.warns(None): # all NaN axis warning assert_eq(da.nanargmin(a), np.nanargmin(x)) assert_eq(da.argmax(a, axis=0), np.argmax(x, axis=0)) assert_eq(da.argmin(a, axis=0), np.argmin(x, axis=0)) with pytest.warns(None): # all NaN axis warning assert_eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0)) with pytest.warns(None): # all NaN axis warning assert_eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0)) assert_eq(da.argmax(a, axis=1), np.argmax(x, axis=1)) assert_eq(da.argmin(a, axis=1), np.argmin(x, axis=1)) with pytest.warns(None): # all NaN axis warning assert_eq(da.nanargmax(a, axis=1), np.nanargmax(x, axis=1)) with pytest.warns(None): # all NaN axis warning assert_eq(da.nanargmin(a, axis=1), np.nanargmin(x, axis=1)) def test_moment(): def moment(x, n, axis=None): return (((x - x.mean(axis=axis, keepdims=True)) ** n).sum(axis=axis) / np.ones_like(x).sum(axis=axis)) # Poorly conditioned x = np.array([1., 2., 3.] * 10).reshape((3, 10)) + 1e8 a = da.from_array(x, chunks=5) assert_eq(a.moment(2), moment(x, 2)) assert_eq(a.moment(3), moment(x, 3)) assert_eq(a.moment(4), moment(x, 4)) x = np.arange(1, 122).reshape((11, 11)).astype('f8') a = da.from_array(x, chunks=(4, 4)) assert_eq(a.moment(4, axis=1), moment(x, 4, axis=1)) assert_eq(a.moment(4, axis=(1, 0)), moment(x, 4, axis=(1, 0))) # Tree reduction assert_eq(a.moment(order=4, split_every=4), moment(x, 4)) assert_eq(a.moment(order=4, axis=0, split_every=4), moment(x, 4, axis=0)) assert_eq(a.moment(order=4, axis=1, split_every=4), moment(x, 4, axis=1)) def test_reductions_with_negative_axes(): x = np.random.random((4, 4, 4)) a = da.from_array(x, chunks=2) assert_eq(a.argmin(axis=-1), x.argmin(axis=-1)) assert_eq(a.argmin(axis=-1, split_every=2), x.argmin(axis=-1)) assert_eq(a.sum(axis=-1), x.sum(axis=-1)) assert_eq(a.sum(axis=(0, -1)), x.sum(axis=(0, -1))) def test_nan(): x = np.array([[1, np.nan, 3, 4], [5, 6, 7, np.nan], [9, 10, 11, 12]]) d = da.from_array(x, chunks=(2, 2)) assert_eq(np.nansum(x), da.nansum(d)) assert_eq(np.nansum(x, axis=0), da.nansum(d, axis=0)) assert_eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1)) assert_eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1)) assert_eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1))) assert_eq(np.nanvar(x), da.nanvar(d)) assert_eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0)) assert_eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0)) assert_eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0)) assert_eq(nanprod(x), da.nanprod(d)) def test_0d_array(): x = da.mean(da.ones(4, chunks=4), axis=0).compute() y = np.mean(np.ones(4)) assert type(x) == type(y) x = da.sum(da.zeros(4, chunks=1)).compute() y = np.sum(np.zeros(4)) assert type(x) == type(y) def test_reduction_on_scalar(): x = da.from_array(np.array(1.0), chunks=()) assert (x == x).all() def test_reductions_with_empty_array(): dx1 = da.ones((10, 0, 5), chunks=4) x1 = dx1.compute() dx2 = da.ones((0, 0, 0), chunks=4) x2 = dx2.compute() for dx, x in [(dx1, x1), (dx2, x2)]: with pytest.warns(None): # empty slice warning assert_eq(dx.mean(), x.mean()) assert_eq(dx.mean(axis=0), x.mean(axis=0)) assert_eq(dx.mean(axis=1), x.mean(axis=1)) assert_eq(dx.mean(axis=2), x.mean(axis=2)) def assert_max_deps(x, n, eq=True): dependencies, dependents = get_deps(x.dask) if eq: assert max(map(len, dependencies.values())) == n else: assert max(map(len, dependencies.values())) <= n def test_tree_reduce_depth(): # 2D x = da.from_array(np.arange(242).reshape((11, 22)), chunks=(3, 4)) thresh = {0: 2, 1: 3} assert_max_deps(x.sum(split_every=thresh), 2 * 3) assert_max_deps(x.sum(axis=0, split_every=thresh), 2) assert_max_deps(x.sum(axis=1, split_every=thresh), 3) assert_max_deps(x.sum(split_every=20), 20, False) assert_max_deps(x.sum(axis=0, split_every=20), 4) assert_max_deps(x.sum(axis=1, split_every=20), 6) # 3D x = da.from_array(np.arange(11 * 22 * 29).reshape((11, 22, 29)), chunks=(3, 4, 5)) thresh = {0: 2, 1: 3, 2: 4} assert_max_deps(x.sum(split_every=thresh), 2 * 3 * 4) assert_max_deps(x.sum(axis=0, split_every=thresh), 2) assert_max_deps(x.sum(axis=1, split_every=thresh), 3) assert_max_deps(x.sum(axis=2, split_every=thresh), 4) assert_max_deps(x.sum(axis=(0, 1), split_every=thresh), 2 * 3) assert_max_deps(x.sum(axis=(0, 2), split_every=thresh), 2 * 4) assert_max_deps(x.sum(axis=(1, 2), split_every=thresh), 3 * 4) assert_max_deps(x.sum(split_every=20), 20, False) assert_max_deps(x.sum(axis=0, split_every=20), 4) assert_max_deps(x.sum(axis=1, split_every=20), 6) assert_max_deps(x.sum(axis=2, split_every=20), 6) assert_max_deps(x.sum(axis=(0, 1), split_every=20), 20, False) assert_max_deps(x.sum(axis=(0, 2), split_every=20), 20, False) assert_max_deps(x.sum(axis=(1, 2), split_every=20), 20, False) assert_max_deps(x.sum(axis=(0, 1), split_every=40), 4 * 6) assert_max_deps(x.sum(axis=(0, 2), split_every=40), 4 * 6) assert_max_deps(x.sum(axis=(1, 2), split_every=40), 6 * 6) def test_tree_reduce_set_options(): x = da.from_array(np.arange(242).reshape((11, 22)), chunks=(3, 4)) with set_options(split_every={0: 2, 1: 3}): assert_max_deps(x.sum(), 2 * 3) assert_max_deps(x.sum(axis=0), 2) def test_reduction_names(): x = da.ones(5, chunks=(2,)) assert x.sum().name.startswith('sum') assert 'max' in x.max().name.split('-')[0] assert x.var().name.startswith('var') assert x.all().name.startswith('all') assert any(k[0].startswith('nansum') for k in da.nansum(x).dask) assert x.mean().name.startswith('mean') @pytest.mark.skipif(np.__version__ < '1.12.0', reason='argmax out parameter') @pytest.mark.parametrize('func', [np.sum, np.argmax]) def test_array_reduction_out(func): x = da.arange(10, chunks=(5,)) y = da.ones((10, 10), chunks=(4, 4)) func(y, axis=0, out=x) assert_eq(x, func(np.ones((10, 10)), axis=0)) @pytest.mark.parametrize("func", ["cumsum", "cumprod"]) @pytest.mark.parametrize("axis", [None, 0, 1, -1]) def test_array_cumreduction_axis(func, axis): np_func = getattr(np, func) da_func = getattr(da, func) s = (10, 11, 12) a = np.arange(np.prod(s)).reshape(s) d = da.from_array(a, chunks=(4, 5, 6)) a_r = np_func(a, axis=axis) d_r = da_func(d, axis=axis) assert_eq(a_r, d_r) @pytest.mark.parametrize('func', [np.cumsum, np.cumprod]) def test_array_cumreduction_out(func): x = da.ones((10, 10), chunks=(4, 4)) func(x, axis=0, out=x) assert_eq(x, func(np.ones((10, 10)), axis=0)) dask-0.16.0/dask/array/tests/test_reshape.py000066400000000000000000000041231320364734500207710ustar00rootroot00000000000000import pytest import numpy as np from dask.array.reshape import reshape_rechunk, expand_tuple, contract_tuple @pytest.mark.parametrize('inshape,outshape,prechunks,inchunks,outchunks', [ ((4,), (4,), ((2, 2),), ((2, 2),), ((2, 2),)), ((4,), (2, 2), ((2, 2),), ((2, 2),), ((1, 1), (2,))), ((4,), (4, 1), ((2, 2),), ((2, 2),), ((2, 2), (1,))), ((4,), (1, 4), ((2, 2),), ((2, 2),), ((1,), (2, 2))), ((1, 4), (4,), ((1,), (2, 2),), ((1,), (2, 2),), ((2, 2),)), ((4, 1), (4,), ((2, 2), (1,)), ((2, 2), (1,)), ((2, 2),)), ((4, 1, 4), (4, 4), ((2, 2), (1,), (2, 2),), ((2, 2), (1,), (2, 2),), ((2, 2), (2, 2))), ((4, 4), (4, 1, 4), ((2, 2), (2, 2),), ((2, 2), (2, 2),), ((2, 2), (1,), (2, 2))), ((2, 2), (4,), ((2,), (2,)), ((2,), (2,)), ((4,),)), ((2, 2), (4,), ((1, 1), (2,)), ((1, 1,), (2,)), ((2, 2),)), ((2, 2), (4,), ((2,), (1, 1)), ((1, 1,), (2,)), ((2, 2),)), ((64,), (4, 4, 4), ((8, 8, 8, 8, 8, 8, 8, 8),), ((16, 16, 16, 16),), ((1, 1, 1, 1), (4,), (4,))), ((64,), (4, 4, 4), ((32, 32),), ((32, 32),), ((2, 2), (4,), (4,))), ((64,), (4, 4, 4), ((16, 48),), ((16, 48),), ((1, 3), (4,), (4,))), ((64,), (4, 4, 4), ((20, 44),), ((16, 48),), ((1, 3), (4,), (4,))), ((64, 4), (8, 8, 4), ((16, 16, 16, 16), (2, 2)), ((16, 16, 16, 16), (2, 2)), ((2, 2, 2, 2), (8,), (2, 2))), ]) def test_reshape_rechunk(inshape, outshape, prechunks, inchunks, outchunks): result_in, result_out = reshape_rechunk(inshape, outshape, prechunks) assert result_in == inchunks assert result_out == outchunks assert np.prod(list(map(len, result_in))) == np.prod(list(map(len, result_out))) def test_expand_tuple(): assert expand_tuple((2, 4), 2) == (1, 1, 2, 2) assert expand_tuple((2, 4), 3) == (1, 1, 1, 1, 2) assert expand_tuple((3, 4), 2) == (1, 2, 2, 2) assert expand_tuple((7, 4), 3) == (2, 2, 3, 1, 1, 2) def test_contract_tuple(): assert contract_tuple((1, 1, 2, 3, 1), 2) == (2, 2, 2, 2) assert contract_tuple((1, 1, 2, 5, 1), 2) == (2, 2, 4, 2) assert contract_tuple((2, 4), 2) == (2, 4) assert contract_tuple((2, 4), 3) == (6,) dask-0.16.0/dask/array/tests/test_routines.py000066400000000000000000000732721320364734500212250ustar00rootroot00000000000000from __future__ import division, print_function, absolute_import import itertools import pytest from distutils.version import LooseVersion np = pytest.importorskip('numpy') import dask.array as da from dask.utils import ignoring from dask.array.utils import assert_eq, same_keys def test_array(): x = np.ones(5, dtype='i4') d = da.ones(5, chunks=3, dtype='i4') assert_eq(da.array(d, ndmin=3, dtype='i8'), np.array(x, ndmin=3, dtype='i8')) # regression #1847 this shall not raise an exception. x = da.ones((100,3), chunks=10) y = da.array(x) assert isinstance(y, da.Array) @pytest.mark.parametrize("funcname", [ "atleast_1d", "atleast_2d", "atleast_3d", ]) def test_atleast_nd_no_args(funcname): np_func = getattr(np, funcname) da_func = getattr(da, funcname) np_r_n = np_func() da_r_n = da_func() assert np_r_n == da_r_n @pytest.mark.parametrize("funcname", [ "atleast_1d", "atleast_2d", "atleast_3d", ]) @pytest.mark.parametrize("shape, chunks", [ (tuple(), tuple()), ((4,), (2,)), ((4, 6), (2, 3)), ((4, 6, 8), (2, 3, 4)), ((4, 6, 8, 10), (2, 3, 4, 5)), ]) def test_atleast_nd_one_arg(funcname, shape, chunks): np_a = np.random.random(shape) da_a = da.from_array(np_a, chunks=chunks) np_func = getattr(np, funcname) da_func = getattr(da, funcname) np_r = np_func(np_a) da_r = da_func(da_a) assert_eq(np_r, da_r) @pytest.mark.parametrize("funcname", [ "atleast_1d", "atleast_2d", "atleast_3d", ]) @pytest.mark.parametrize("shape1, shape2", list( itertools.combinations_with_replacement( [ tuple(), (4,), (4, 6), (4, 6, 8), (4, 6, 8, 10), ], 2 ) )) def test_atleast_nd_two_args(funcname, shape1, shape2): np_a_1 = np.random.random(shape1) da_a_1 = da.from_array(np_a_1, chunks=tuple(c // 2 for c in shape1)) np_a_2 = np.random.random(shape2) da_a_2 = da.from_array(np_a_2, chunks=tuple(c // 2 for c in shape2)) np_a_n = [np_a_1, np_a_2] da_a_n = [da_a_1, da_a_2] np_func = getattr(np, funcname) da_func = getattr(da, funcname) np_r_n = np_func(*np_a_n) da_r_n = da_func(*da_a_n) assert type(np_r_n) is type(da_r_n) assert len(np_r_n) == len(da_r_n) for np_r, da_r in zip(np_r_n, da_r_n): assert_eq(np_r, da_r) def test_transpose(): x = np.arange(240).reshape((4, 6, 10)) d = da.from_array(x, (2, 3, 4)) assert_eq(d.transpose((2, 0, 1)), x.transpose((2, 0, 1))) assert same_keys(d.transpose((2, 0, 1)), d.transpose((2, 0, 1))) assert_eq(d.transpose(2, 0, 1), x.transpose(2, 0, 1)) assert same_keys(d.transpose(2, 0, 1), d.transpose(2, 0, 1)) with pytest.raises(ValueError): d.transpose(1, 2) with pytest.raises(ValueError): d.transpose((1, 2)) def test_transpose_negative_axes(): x = np.ones((2, 3, 4, 5)) y = da.ones((2, 3, 4, 5), chunks=3) assert_eq(x.transpose([-1, -2, 0, 1]), y.transpose([-1, -2, 0, 1])) def test_swapaxes(): x = np.random.normal(0, 10, size=(10, 12, 7)) d = da.from_array(x, chunks=(4, 5, 2)) assert_eq(np.swapaxes(x, 0, 1), da.swapaxes(d, 0, 1)) assert_eq(np.swapaxes(x, 2, 1), da.swapaxes(d, 2, 1)) assert_eq(x.swapaxes(2, 1), d.swapaxes(2, 1)) assert_eq(x.swapaxes(0, 0), d.swapaxes(0, 0)) assert_eq(x.swapaxes(1, 2), d.swapaxes(1, 2)) assert_eq(x.swapaxes(0, -1), d.swapaxes(0, -1)) assert_eq(x.swapaxes(-1, 1), d.swapaxes(-1, 1)) assert d.swapaxes(0, 1).name == d.swapaxes(0, 1).name assert d.swapaxes(0, 1).name != d.swapaxes(1, 0).name def test_tensordot(): x = np.arange(400).reshape((20, 20)) a = da.from_array(x, chunks=(5, 4)) y = np.arange(200).reshape((20, 10)) b = da.from_array(y, chunks=(4, 5)) for axes in [1, (1, 0)]: assert_eq(da.tensordot(a, b, axes=axes), np.tensordot(x, y, axes=axes)) assert_eq(da.tensordot(x, b, axes=axes), np.tensordot(x, y, axes=axes)) assert_eq(da.tensordot(a, y, axes=axes), np.tensordot(x, y, axes=axes)) assert same_keys(da.tensordot(a, b, axes=(1, 0)), da.tensordot(a, b, axes=(1, 0))) with pytest.warns(None): # Increasing number of chunks warning assert not same_keys(da.tensordot(a, b, axes=0), da.tensordot(a, b, axes=1)) @pytest.mark.parametrize('axes', [ 0, 1, (0, 1), (1, 0), ((1, 0), (2, 1)), ((1, 2), (2, 0)), ((2, 0), (1, 2)) ]) def test_tensordot_2(axes): x = np.arange(4 * 4 * 4).reshape((4, 4, 4)) y = da.from_array(x, chunks=2) assert_eq(da.tensordot(y, y, axes=axes), np.tensordot(x, x, axes=axes)) def test_dot_method(): x = np.arange(400).reshape((20, 20)) a = da.from_array(x, chunks=(5, 5)) y = np.arange(200).reshape((20, 10)) b = da.from_array(y, chunks=(5, 5)) assert_eq(a.dot(b), x.dot(y)) @pytest.mark.parametrize('func1d_name, func1d', [ ["ndim", lambda x: x.ndim], ["sum", lambda x: x.sum()], ["range", lambda x: [x.min(), x.max()]], ["range2", lambda x: [[x.min(), x.max()], [x.max(), x.min()]]], ]) @pytest.mark.parametrize('shape, axis', [ [(10, 15, 20), 0], [(10, 15, 20), 1], [(10, 15, 20), 2], [(10, 15, 20), -1], ]) def test_apply_along_axis(func1d_name, func1d, shape, axis): a = np.random.randint(0, 10, shape) d = da.from_array(a, chunks=(len(shape) * (5,))) if (func1d_name == "range2" and LooseVersion(np.__version__) < LooseVersion("1.13.0")): with pytest.raises(ValueError): da.apply_along_axis(func1d, axis, d) else: assert_eq( da.apply_along_axis(func1d, axis, d), np.apply_along_axis(func1d, axis, a) ) @pytest.mark.parametrize('func_name, func', [ ["sum0", lambda x, axis: x.sum(axis=axis)], ["sum1", lambda x, axis: x.sum(axis=axis, keepdims=True)], [ "range", lambda x, axis: np.concatenate( [ x.min(axis=axis, keepdims=True), x.max(axis=axis, keepdims=True) ], axis=axis ) ], ]) @pytest.mark.parametrize('shape, axes', [ [(10, 15, 20), tuple()], [(10, 15, 20), 0], [(10, 15, 20), (1,)], [(10, 15, 20), (-1, 1)], [(10, 15, 20), (2, 0, 1)], ]) def test_apply_over_axes(func_name, func, shape, axes): a = np.random.randint(0, 10, shape) d = da.from_array(a, chunks=(len(shape) * (5,))) assert_eq( da.apply_over_axes(func, d, axes), np.apply_over_axes(func, a, axes) ) @pytest.mark.parametrize('shape, axis', [ [(10, 15, 20), None], [(10, 15, 20), 0], [(10, 15, 20), 1], [(10, 15, 20), 2], [(10, 15, 20), -1], ]) def test_ptp(shape, axis): a = np.random.randint(0, 10, shape) d = da.from_array(a, chunks=(len(shape) * (5,))) assert_eq(da.ptp(d, axis), np.ptp(a, axis)) @pytest.mark.parametrize('shape, axis', [ [(10, 15, 20), 0], [(10, 15, 20), 1], [(10, 15, 20), 2], [(10, 15, 20), -1], ]) @pytest.mark.parametrize('n', [ 0, 1, 2, ]) def test_diff(shape, n, axis): x = np.random.randint(0, 10, shape) a = da.from_array(x, chunks=(len(shape) * (5,))) assert_eq(da.diff(a, n, axis), np.diff(x, n, axis)) @pytest.mark.parametrize('shape', [ (10,), (10, 15), ]) @pytest.mark.parametrize('to_end, to_begin', [ [None, None], [0, 0], [[1, 2], [3, 4]], ]) def test_ediff1d(shape, to_end, to_begin): x = np.random.randint(0, 10, shape) a = da.from_array(x, chunks=(len(shape) * (5,))) assert_eq(da.ediff1d(a, to_end, to_begin), np.ediff1d(x, to_end, to_begin)) def test_topk(): x = np.array([5, 2, 1, 6]) d = da.from_array(x, chunks=2) e = da.topk(2, d) assert e.chunks == ((2,),) assert_eq(e, np.sort(x)[-1:-3:-1]) assert same_keys(da.topk(2, d), e) def test_topk_k_bigger_than_chunk(): x = np.array([5, 2, 1, 6]) d = da.from_array(x, chunks=2) e = da.topk(3, d) assert e.chunks == ((3,),) assert_eq(e, np.array([6, 5, 2])) def test_bincount(): x = np.array([2, 1, 5, 2, 1]) d = da.from_array(x, chunks=2) e = da.bincount(d, minlength=6) assert_eq(e, np.bincount(x, minlength=6)) assert same_keys(da.bincount(d, minlength=6), e) def test_bincount_with_weights(): x = np.array([2, 1, 5, 2, 1]) d = da.from_array(x, chunks=2) weights = np.array([1, 2, 1, 0.5, 1]) dweights = da.from_array(weights, chunks=2) e = da.bincount(d, weights=dweights, minlength=6) assert_eq(e, np.bincount(x, weights=dweights, minlength=6)) assert same_keys(da.bincount(d, weights=dweights, minlength=6), e) def test_bincount_raises_informative_error_on_missing_minlength_kwarg(): x = np.array([2, 1, 5, 2, 1]) d = da.from_array(x, chunks=2) try: da.bincount(d) except Exception as e: assert 'minlength' in str(e) else: assert False @pytest.mark.skipif(LooseVersion(np.__version__) < '1.10.0', reason="NumPy doesn't yet support nd digitize") def test_digitize(): x = np.array([2, 4, 5, 6, 1]) bins = np.array([1, 2, 3, 4, 5]) for chunks in [2, 4]: for right in [False, True]: d = da.from_array(x, chunks=chunks) assert_eq(da.digitize(d, bins, right=right), np.digitize(x, bins, right=right)) x = np.random.random(size=(100, 100)) bins = np.random.random(size=13) bins.sort() for chunks in [(10, 10), (10, 20), (13, 17), (87, 54)]: for right in [False, True]: d = da.from_array(x, chunks=chunks) assert_eq(da.digitize(d, bins, right=right), np.digitize(x, bins, right=right)) def test_histogram(): # Test for normal, flattened input n = 100 v = da.random.random(n, chunks=10) bins = np.arange(0, 1.01, 0.01) (a1, b1) = da.histogram(v, bins=bins) (a2, b2) = np.histogram(v, bins=bins) # Check if the sum of the bins equals the number of samples assert a2.sum(axis=0) == n assert a1.sum(axis=0) == n assert_eq(a1, a2) assert same_keys(da.histogram(v, bins=bins)[0], a1) def test_histogram_alternative_bins_range(): v = da.random.random(100, chunks=10) (a1, b1) = da.histogram(v, bins=10, range=(0, 1)) (a2, b2) = np.histogram(v, bins=10, range=(0, 1)) assert_eq(a1, a2) assert_eq(b1, b2) def test_histogram_return_type(): v = da.random.random(100, chunks=10) bins = np.arange(0, 1.01, 0.01) # Check if return type is same as hist bins = np.arange(0, 11, 1, dtype='i4') assert_eq(da.histogram(v * 10, bins=bins)[0], np.histogram(v * 10, bins=bins)[0]) def test_histogram_extra_args_and_shapes(): # Check for extra args and shapes bins = np.arange(0, 1.01, 0.01) v = da.random.random(100, chunks=10) data = [(v, bins, da.ones(100, chunks=v.chunks) * 5), (da.random.random((50, 50), chunks=10), bins, da.ones((50, 50), chunks=10) * 5)] for v, bins, w in data: # density assert_eq(da.histogram(v, bins=bins, normed=True)[0], np.histogram(v, bins=bins, normed=True)[0]) # normed assert_eq(da.histogram(v, bins=bins, density=True)[0], np.histogram(v, bins=bins, density=True)[0]) # weights assert_eq(da.histogram(v, bins=bins, weights=w)[0], np.histogram(v, bins=bins, weights=w)[0]) assert_eq(da.histogram(v, bins=bins, weights=w, density=True)[0], da.histogram(v, bins=bins, weights=w, density=True)[0]) def test_cov(): x = np.arange(56).reshape((7, 8)) d = da.from_array(x, chunks=(4, 4)) assert_eq(da.cov(d), np.cov(x)) assert_eq(da.cov(d, rowvar=0), np.cov(x, rowvar=0)) with pytest.warns(None): # warning dof <= 0 for slice assert_eq(da.cov(d, ddof=10), np.cov(x, ddof=10)) assert_eq(da.cov(d, bias=1), np.cov(x, bias=1)) assert_eq(da.cov(d, d), np.cov(x, x)) y = np.arange(8) e = da.from_array(y, chunks=(4,)) assert_eq(da.cov(d, e), np.cov(x, y)) assert_eq(da.cov(e, d), np.cov(y, x)) with pytest.raises(ValueError): da.cov(d, ddof=1.5) def test_corrcoef(): x = np.arange(56).reshape((7, 8)) d = da.from_array(x, chunks=(4, 4)) assert_eq(da.corrcoef(d), np.corrcoef(x)) assert_eq(da.corrcoef(d, rowvar=0), np.corrcoef(x, rowvar=0)) assert_eq(da.corrcoef(d, d), np.corrcoef(x, x)) y = np.arange(8) e = da.from_array(y, chunks=(4,)) assert_eq(da.corrcoef(d, e), np.corrcoef(x, y)) assert_eq(da.corrcoef(e, d), np.corrcoef(y, x)) def test_round(): x = np.random.random(10) d = da.from_array(x, chunks=4) for i in (0, 1, 4, 5): assert_eq(x.round(i), d.round(i)) assert_eq(d.round(2), da.round(d, 2)) @pytest.mark.parametrize("return_index", [False, True]) @pytest.mark.parametrize("return_inverse", [False, True]) @pytest.mark.parametrize("return_counts", [False, True]) def test_unique_kwargs(return_index, return_inverse, return_counts): kwargs = dict( return_index=return_index, return_inverse=return_inverse, return_counts=return_counts ) a = np.array([1, 2, 4, 4, 5, 2]) d = da.from_array(a, chunks=(3,)) r_a = np.unique(a, **kwargs) r_d = da.unique(d, **kwargs) if not any([return_index, return_inverse, return_counts]): assert isinstance(r_a, np.ndarray) assert isinstance(r_d, da.Array) r_a = (r_a,) r_d = (r_d,) assert len(r_a) == len(r_d) if return_inverse: i = 1 + int(return_index) assert (d.size,) == r_d[i].shape for e_r_a, e_r_d in zip(r_a, r_d): assert_eq(e_r_d, e_r_a) @pytest.mark.parametrize("seed", [23, 796]) @pytest.mark.parametrize("low, high", [ [0, 10] ]) @pytest.mark.parametrize("shape, chunks", [ [(10,), (5,)], [(10,), (3,)], [(4, 5), (3, 2)], [(20, 20), (4, 5)], ]) def test_unique_rand(seed, low, high, shape, chunks): np.random.seed(seed) a = np.random.randint(low, high, size=shape) d = da.from_array(a, chunks=chunks) kwargs = dict( return_index=True, return_inverse=True, return_counts=True ) r_a = np.unique(a, **kwargs) r_d = da.unique(d, **kwargs) assert len(r_a) == len(r_d) assert (d.size,) == r_d[2].shape for e_r_a, e_r_d in zip(r_a, r_d): assert_eq(e_r_d, e_r_a) def _maybe_len(l): try: return len(l) except TypeError: return 0 @pytest.mark.parametrize('chunks', [(4, 6), (2, 6)]) @pytest.mark.parametrize('shift', [3, 7, 9, (3, 9), (7, 2)]) @pytest.mark.parametrize('axis', [None, 0, 1, -1, (0, 1), (1, 0)]) def test_roll(chunks, shift, axis): x = np.random.randint(10, size=(4, 6)) a = da.from_array(x, chunks=chunks) if _maybe_len(shift) != _maybe_len(axis): with pytest.raises(TypeError if axis is None else ValueError): da.roll(a, shift, axis) else: if (_maybe_len(shift) > 1 and LooseVersion(np.__version__) < LooseVersion("1.12.0")): pytest.skip( "NumPy %s doesn't support multiple axes with `roll`." " Need NumPy 1.12.0 or greater." % np.__version__ ) assert_eq(np.roll(x, shift, axis), da.roll(a, shift, axis)) def test_ravel(): x = np.random.randint(10, size=(4, 6)) # 2d for chunks in [(4, 6), (2, 6)]: a = da.from_array(x, chunks=chunks) assert_eq(x.ravel(), a.ravel()) assert len(a.ravel().dask) == len(a.dask) + len(a.chunks[0]) # 0d assert_eq(x[0, 0].ravel(), a[0, 0].ravel()) # 1d a_flat = a.ravel() assert_eq(a_flat.ravel(), a_flat) # 3d x = np.random.randint(10, size=(2, 3, 4)) for chunks in [4, (1, 3, 4)]: a = da.from_array(x, chunks=chunks) assert_eq(x.ravel(), a.ravel()) assert_eq(x.flatten(), a.flatten()) assert_eq(np.ravel(x), da.ravel(a)) def test_squeeze(): x = da.ones((10, 1), chunks=(3, 1)) assert_eq(x.squeeze(), x.compute().squeeze()) assert x.squeeze().chunks == ((3, 3, 3, 1),) assert same_keys(x.squeeze(), x.squeeze()) def test_vstack(): x = np.arange(5) y = np.ones(5) a = da.arange(5, chunks=2) b = da.ones(5, chunks=2) assert_eq(np.vstack((x, y)), da.vstack((a, b))) assert_eq(np.vstack((x, y[None, :])), da.vstack((a, b[None, :]))) def test_hstack(): x = np.arange(5) y = np.ones(5) a = da.arange(5, chunks=2) b = da.ones(5, chunks=2) assert_eq(np.hstack((x[None, :], y[None, :])), da.hstack((a[None, :], b[None, :]))) assert_eq(np.hstack((x, y)), da.hstack((a, b))) def test_dstack(): x = np.arange(5) y = np.ones(5) a = da.arange(5, chunks=2) b = da.ones(5, chunks=2) assert_eq(np.dstack((x[None, None, :], y[None, None, :])), da.dstack((a[None, None, :], b[None, None, :]))) assert_eq(np.dstack((x[None, :], y[None, :])), da.dstack((a[None, :], b[None, :]))) assert_eq(np.dstack((x, y)), da.dstack((a, b))) def test_take(): x = np.arange(400).reshape((20, 20)) a = da.from_array(x, chunks=(5, 5)) assert_eq(np.take(x, 3, axis=0), da.take(a, 3, axis=0)) assert_eq(np.take(x, [3, 4, 5], axis=-1), da.take(a, [3, 4, 5], axis=-1)) with pytest.raises(ValueError): da.take(a, 3, axis=2) assert same_keys(da.take(a, [3, 4, 5], axis=-1), da.take(a, [3, 4, 5], axis=-1)) def test_take_dask_from_numpy(): x = np.arange(5).astype('f8') y = da.from_array(np.array([1, 2, 3, 3, 2 ,1]), chunks=3) z = da.take(x * 2, y) assert z.chunks == y.chunks assert_eq(z, np.array([2., 4., 6., 6., 4., 2.])) def test_compress(): x = np.arange(25).reshape((5, 5)) a = da.from_array(x, chunks=(2, 2)) c1 = np.array([True, False, True, False, True]) c2 = np.array([True, False]) c3 = [True, False] dc1 = da.from_array(c1, chunks=3) dc2 = da.from_array(c2, chunks=2) for c, dc in [(c1, c1), (c2, c2), (c3, c3), (c1, dc1), (c2, dc2), (c3, dc2)]: for axis in [None, 0, 1]: res = da.compress(dc, a, axis=axis) assert_eq(np.compress(c, x, axis=axis), res) if isinstance(dc, da.Array): axis = axis or 0 assert np.isnan(res.chunks[axis]).all() with pytest.raises(ValueError): da.compress([True, False], a, axis=100) with pytest.raises(ValueError): da.compress([[True], [False]], a, axis=100) def test_extract(): x = np.arange(25).reshape((5, 5)) a = da.from_array(x, chunks=(2, 2)) c1 = np.array([True, False, True, False, True]) c2 = np.array([[True, False], [True, False]]) c3 = np.array([True, False]) dc1 = da.from_array(c1, chunks=3) dc2 = da.from_array(c2, chunks=(2, 1)) dc3 = da.from_array(c3, chunks=2) for c, dc in [(c1, c1), (c2, c2), (c3, c3), (c1, dc1), (c2, dc2), (c3, dc3)]: res = da.extract(dc, a) assert_eq(np.extract(c, x), res) if isinstance(dc, da.Array): assert np.isnan(res.chunks[0]).all() def test_isnull(): x = np.array([1, np.nan]) a = da.from_array(x, chunks=(2,)) with ignoring(ImportError): assert_eq(da.isnull(a), np.isnan(x)) assert_eq(da.notnull(a), ~np.isnan(x)) def test_isclose(): x = np.array([0, np.nan, 1, 1.5]) y = np.array([1e-9, np.nan, 1, 2]) a = da.from_array(x, chunks=(2,)) b = da.from_array(y, chunks=(2,)) assert_eq(da.isclose(a, b, equal_nan=True), np.isclose(x, y, equal_nan=True)) def test_allclose(): n_a = np.array([0, np.nan, 1, 1.5]) n_b = np.array([1e-9, np.nan, 1, 2]) d_a = da.from_array(n_a, chunks=(2,)) d_b = da.from_array(n_b, chunks=(2,)) n_r = np.allclose(n_a, n_b, equal_nan=True) d_r = da.allclose(d_a, d_b, equal_nan=True) assert_eq(np.array(n_r)[()], d_r) def test_choose(): # test choose function x = np.random.randint(10, size=(15, 16)) d = da.from_array(x, chunks=(4, 5)) assert_eq(da.choose(d > 5, [0, d]), np.choose(x > 5, [0, x])) assert_eq(da.choose(d > 5, [-d, d]), np.choose(x > 5, [-x, x])) # test choose method index_dask = d > 5 index_numpy = x > 5 assert_eq(index_dask.choose([0, d]), index_numpy.choose([0, x])) assert_eq(index_dask.choose([-d, d]), index_numpy.choose([-x, x])) def test_argwhere(): for shape, chunks in [(0, ()), ((0, 0), (0, 0)), ((15, 16), (4, 5))]: x = np.random.randint(10, size=shape) d = da.from_array(x, chunks=chunks) x_nz = np.argwhere(x) d_nz = da.argwhere(d) assert_eq(d_nz, x_nz) def test_argwhere_obj(): x = np.random.randint(10, size=(15, 16)).astype(object) d = da.from_array(x, chunks=(4, 5)) x_nz = np.argwhere(x) d_nz = da.argwhere(d) assert_eq(d_nz, x_nz) def test_argwhere_str(): x = np.array(list("Hello world")) d = da.from_array(x, chunks=(4,)) x_nz = np.argwhere(x) d_nz = da.argwhere(d) assert_eq(d_nz, x_nz) def test_where(): x = np.random.randint(10, size=(15, 14)) x[5, 5] = x[4, 4] = 0 # Ensure some false elements d = da.from_array(x, chunks=(4, 5)) y = np.random.randint(10, size=15).astype(np.uint8) e = da.from_array(y, chunks=(4,)) for c1, c2 in [(d > 5, x > 5), (d, x), (1, 1), (0, 0), (5, 5), (True, True), (np.True_, np.True_), (False, False), (np.False_, np.False_)]: for b1, b2 in [(0, 0), (-e[:, None], -y[:, None]), (e[:14], y[:14])]: w1 = da.where(c1, d, b1) w2 = np.where(c2, x, b2) assert_eq(w1, w2) def test_where_scalar_dtype(): x = np.int32(3) y1 = np.array([4, 5, 6], dtype=np.int16) c1 = np.array([1, 0, 1]) y2 = da.from_array(y1, chunks=2) c2 = da.from_array(c1, chunks=2) w1 = np.where(c1, x, y1) w2 = da.where(c2, x, y2) assert_eq(w1, w2) # Test again for the bool optimization w3 = np.where(True, x, y1) w4 = da.where(True, x, y1) assert_eq(w3, w4) def test_where_bool_optimization(): x = np.random.randint(10, size=(15, 16)) d = da.from_array(x, chunks=(4, 5)) y = np.random.randint(10, size=(15, 16)) e = da.from_array(y, chunks=(4, 5)) for c in [True, False, np.True_, np.False_, 1, 0]: w1 = da.where(c, d, e) w2 = np.where(c, x, y) assert_eq(w1, w2) ex_w1 = d if c else e assert w1 is ex_w1 def test_where_nonzero(): for shape, chunks in [(0, ()), ((0, 0), (0, 0)), ((15, 16), (4, 5))]: x = np.random.randint(10, size=shape) d = da.from_array(x, chunks=chunks) x_w = np.where(x) d_w = da.where(d) assert isinstance(d_w, type(x_w)) assert len(d_w) == len(x_w) for i in range(len(x_w)): assert_eq(d_w[i], x_w[i]) def test_where_incorrect_args(): a = da.ones(5, chunks=3) for kwd in ["x", "y"]: kwargs = {kwd: a} try: da.where(a > 0, **kwargs) except ValueError as e: assert 'either both or neither of x and y should be given' in str(e) def test_count_nonzero(): for shape, chunks in [(0, ()), ((0, 0), (0, 0)), ((15, 16), (4, 5))]: x = np.random.randint(10, size=shape) d = da.from_array(x, chunks=chunks) x_c = np.count_nonzero(x) d_c = da.count_nonzero(d) if d_c.shape == tuple(): assert x_c == d_c.compute() else: assert_eq(x_c, d_c) @pytest.mark.skipif(LooseVersion(np.__version__) < '1.12.0', reason="NumPy's count_nonzero doesn't yet support axis") @pytest.mark.parametrize('axis', [None, 0, (1,), (0, 1)]) def test_count_nonzero_axis(axis): for shape, chunks in [((0, 0), (0, 0)), ((15, 16), (4, 5))]: x = np.random.randint(10, size=shape) d = da.from_array(x, chunks=chunks) x_c = np.count_nonzero(x, axis) d_c = da.count_nonzero(d, axis) if d_c.shape == tuple(): assert x_c == d_c.compute() else: assert_eq(x_c, d_c) def test_count_nonzero_obj(): x = np.random.randint(10, size=(15, 16)).astype(object) d = da.from_array(x, chunks=(4, 5)) x_c = np.count_nonzero(x) d_c = da.count_nonzero(d) if d_c.shape == tuple(): assert x_c == d_c.compute() else: assert_eq(x_c, d_c) @pytest.mark.skipif(LooseVersion(np.__version__) < '1.12.0', reason="NumPy's count_nonzero doesn't yet support axis") @pytest.mark.parametrize('axis', [None, 0, (1,), (0, 1)]) def test_count_nonzero_obj_axis(axis): x = np.random.randint(10, size=(15, 16)).astype(object) d = da.from_array(x, chunks=(4, 5)) x_c = np.count_nonzero(x, axis) d_c = da.count_nonzero(d, axis) if d_c.shape == tuple(): assert x_c == d_c.compute() else: ####################################################### # Workaround oddness with Windows and object arrays. # # # # xref: https://github.com/numpy/numpy/issues/9468 # ####################################################### assert_eq(x_c.astype(np.int64), d_c) def test_count_nonzero_str(): x = np.array(list("Hello world")) d = da.from_array(x, chunks=(4,)) x_c = np.count_nonzero(x) d_c = da.count_nonzero(d) assert x_c == d_c.compute() def test_flatnonzero(): for shape, chunks in [(0, ()), ((0, 0), (0, 0)), ((15, 16), (4, 5))]: x = np.random.randint(10, size=shape) d = da.from_array(x, chunks=chunks) x_fnz = np.flatnonzero(x) d_fnz = da.flatnonzero(d) assert_eq(d_fnz, x_fnz) def test_nonzero(): for shape, chunks in [(0, ()), ((0, 0), (0, 0)), ((15, 16), (4, 5))]: x = np.random.randint(10, size=shape) d = da.from_array(x, chunks=chunks) x_nz = np.nonzero(x) d_nz = da.nonzero(d) assert isinstance(d_nz, type(x_nz)) assert len(d_nz) == len(x_nz) for i in range(len(x_nz)): assert_eq(d_nz[i], x_nz[i]) def test_nonzero_method(): for shape, chunks in [(0, ()), ((0, 0), (0, 0)), ((15, 16), (4, 5))]: x = np.random.randint(10, size=shape) d = da.from_array(x, chunks=chunks) x_nz = x.nonzero() d_nz = d.nonzero() assert isinstance(d_nz, type(x_nz)) assert len(d_nz) == len(x_nz) for i in range(len(x_nz)): assert_eq(d_nz[i], x_nz[i]) def test_coarsen(): x = np.random.randint(10, size=(24, 24)) d = da.from_array(x, chunks=(4, 8)) assert_eq(da.chunk.coarsen(np.sum, x, {0: 2, 1: 4}), da.coarsen(np.sum, d, {0: 2, 1: 4})) assert_eq(da.chunk.coarsen(np.sum, x, {0: 2, 1: 4}), da.coarsen(da.sum, d, {0: 2, 1: 4})) def test_coarsen_with_excess(): x = da.arange(10, chunks=5) assert_eq(da.coarsen(np.min, x, {0: 3}, trim_excess=True), np.array([0, 5])) assert_eq(da.coarsen(np.sum, x, {0: 3}, trim_excess=True), np.array([0 + 1 + 2, 5 + 6 + 7])) def test_insert(): x = np.random.randint(10, size=(10, 10)) a = da.from_array(x, chunks=(5, 5)) y = np.random.randint(10, size=(5, 10)) b = da.from_array(y, chunks=(4, 4)) assert_eq(np.insert(x, 0, -1, axis=0), da.insert(a, 0, -1, axis=0)) assert_eq(np.insert(x, 3, -1, axis=-1), da.insert(a, 3, -1, axis=-1)) assert_eq(np.insert(x, 5, -1, axis=1), da.insert(a, 5, -1, axis=1)) assert_eq(np.insert(x, -1, -1, axis=-2), da.insert(a, -1, -1, axis=-2)) assert_eq(np.insert(x, [2, 3, 3], -1, axis=1), da.insert(a, [2, 3, 3], -1, axis=1)) assert_eq(np.insert(x, [2, 3, 8, 8, -2, -2], -1, axis=0), da.insert(a, [2, 3, 8, 8, -2, -2], -1, axis=0)) assert_eq(np.insert(x, slice(1, 4), -1, axis=1), da.insert(a, slice(1, 4), -1, axis=1)) assert_eq(np.insert(x, [2] * 3 + [5] * 2, y, axis=0), da.insert(a, [2] * 3 + [5] * 2, b, axis=0)) assert_eq(np.insert(x, 0, y[0], axis=1), da.insert(a, 0, b[0], axis=1)) assert same_keys(da.insert(a, [2, 3, 8, 8, -2, -2], -1, axis=0), da.insert(a, [2, 3, 8, 8, -2, -2], -1, axis=0)) with pytest.raises(NotImplementedError): da.insert(a, [4, 2], -1, axis=0) with pytest.raises(IndexError): da.insert(a, [3], -1, axis=2) with pytest.raises(IndexError): da.insert(a, [3], -1, axis=-3) def test_multi_insert(): z = np.random.randint(10, size=(1, 2)) c = da.from_array(z, chunks=(1, 2)) assert_eq(np.insert(np.insert(z, [0, 1], -1, axis=0), [1], -1, axis=1), da.insert(da.insert(c, [0, 1], -1, axis=0), [1], -1, axis=1)) def test_result_type(): a = da.from_array(np.ones(5, np.float32), chunks=(3,)) b = da.from_array(np.ones(5, np.int16), chunks=(3,)) c = da.from_array(np.ones(5, np.int64), chunks=(3,)) x = np.ones(5, np.float32) assert da.result_type(b, c) == np.int64 assert da.result_type(a, b, c) == np.float64 assert da.result_type(b, np.float32) == np.float32 assert da.result_type(b, np.dtype(np.float32)) == np.float32 assert da.result_type(b, x) == np.float32 # Effect of scalars depends on their value assert da.result_type(1, b) == np.int16 assert da.result_type(1.0, a) == np.float32 assert da.result_type(np.int64(1), b) == np.int16 assert da.result_type(np.ones((), np.int64), b) == np.int16 # 0d array assert da.result_type(1e200, a) == np.float64 # 1e200 is too big for float32 # dask 0d-arrays are NOT treated like scalars c = da.from_array(np.ones((), np.float64), chunks=()) assert da.result_type(a, c) == np.float64 dask-0.16.0/dask/array/tests/test_slicing.py000066400000000000000000000527231320364734500210030ustar00rootroot00000000000000import itertools from operator import getitem import pytest from toolz import merge np = pytest.importorskip('numpy') import dask import dask.array as da from dask.array.slicing import (_sanitize_index_element, _slice_1d, new_blockdim, sanitize_index, slice_array, take, normalize_index) from dask.array.utils import assert_eq, same_keys def test_slice_1d(): expected = {0: slice(10, 25, 1), 1: slice(None, None, None), 2: slice(0, 1, 1)} result = _slice_1d(100, [25] * 4, slice(10, 51, None)) assert expected == result # x[100:12:-3] expected = {0: slice(-2, -8, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)} result = _slice_1d(100, [20] * 5, slice(100, 12, -3)) assert expected == result # x[102::-3] expected = {0: slice(-2, -21, -3), 1: slice(-1, -21, -3), 2: slice(-3, -21, -3), 3: slice(-2, -21, -3), 4: slice(-1, -21, -3)} result = _slice_1d(100, [20] * 5, slice(102, None, -3)) assert expected == result # x[::-4] expected = {0: slice(-1, -21, -4), 1: slice(-1, -21, -4), 2: slice(-1, -21, -4), 3: slice(-1, -21, -4), 4: slice(-1, -21, -4)} result = _slice_1d(100, [20] * 5, slice(None, None, -4)) assert expected == result # x[::-7] expected = {0: slice(-5, -21, -7), 1: slice(-4, -21, -7), 2: slice(-3, -21, -7), 3: slice(-2, -21, -7), 4: slice(-1, -21, -7)} result = _slice_1d(100, [20] * 5, slice(None, None, -7)) assert expected == result # x=range(115) # x[::-7] expected = {0: slice(-7, -24, -7), 1: slice(-2, -24, -7), 2: slice(-4, -24, -7), 3: slice(-6, -24, -7), 4: slice(-1, -24, -7)} result = _slice_1d(115, [23] * 5, slice(None, None, -7)) assert expected == result # x[79::-3] expected = {0: slice(-1, -21, -3), 1: slice(-3, -21, -3), 2: slice(-2, -21, -3), 3: slice(-1, -21, -3)} result = _slice_1d(100, [20] * 5, slice(79, None, -3)) assert expected == result # x[-1:-8:-1] expected = {4: slice(-1, -8, -1)} result = _slice_1d(100, [20, 20, 20, 20, 20], slice(-1, 92, -1)) assert expected == result # x[20:0:-1] expected = {0: slice(-1, -20, -1), 1: slice(-20, -21, -1)} result = _slice_1d(100, [20, 20, 20, 20, 20], slice(20, 0, -1)) assert expected == result # x[:0] expected = {} result = _slice_1d(100, [20, 20, 20, 20, 20], slice(0)) assert result # x=range(99) expected = {0: slice(-3, -21, -3), 1: slice(-2, -21, -3), 2: slice(-1, -21, -3), 3: slice(-2, -20, -3), 4: slice(-1, -21, -3)} # This array has non-uniformly sized blocks result = _slice_1d(99, [20, 20, 20, 19, 20], slice(100, None, -3)) assert expected == result # x=range(104) # x[::-3] expected = {0: slice(-1, -21, -3), 1: slice(-3, -24, -3), 2: slice(-3, -28, -3), 3: slice(-1, -14, -3), 4: slice(-1, -22, -3)} # This array has non-uniformly sized blocks result = _slice_1d(104, [20, 23, 27, 13, 21], slice(None, None, -3)) assert expected == result # x=range(104) # x[:27:-3] expected = {1: slice(-3, -16, -3), 2: slice(-3, -28, -3), 3: slice(-1, -14, -3), 4: slice(-1, -22, -3)} # This array has non-uniformly sized blocks result = _slice_1d(104, [20, 23, 27, 13, 21], slice(None, 27, -3)) assert expected == result # x=range(104) # x[100:27:-3] expected = {1: slice(-3, -16, -3), 2: slice(-3, -28, -3), 3: slice(-1, -14, -3), 4: slice(-4, -22, -3)} # This array has non-uniformly sized blocks result = _slice_1d(104, [20, 23, 27, 13, 21], slice(100, 27, -3)) assert expected == result def test_slice_singleton_value_on_boundary(): assert _slice_1d(15, [5, 5, 5], 10) == {2: 0} assert _slice_1d(30, (5, 5, 5, 5, 5, 5), 10) == {2: 0} def test_slice_array_1d(): #x[24::2] expected = {('y', 0): (getitem, ('x', 0), (slice(24, 25, 2),)), ('y', 1): (getitem, ('x', 1), (slice(1, 25, 2),)), ('y', 2): (getitem, ('x', 2), (slice(0, 25, 2),)), ('y', 3): (getitem, ('x', 3), (slice(1, 25, 2),))} result, chunks = slice_array('y', 'x', [[25] * 4], [slice(24, None, 2)]) assert expected == result #x[26::2] expected = {('y', 0): (getitem, ('x', 1), (slice(1, 25, 2),)), ('y', 1): (getitem, ('x', 2), (slice(0, 25, 2),)), ('y', 2): (getitem, ('x', 3), (slice(1, 25, 2),))} result, chunks = slice_array('y', 'x', [[25] * 4], [slice(26, None, 2)]) assert expected == result #x[24::2] expected = {('y', 0): (getitem, ('x', 0), (slice(24, 25, 2),)), ('y', 1): (getitem, ('x', 1), (slice(1, 25, 2),)), ('y', 2): (getitem, ('x', 2), (slice(0, 25, 2),)), ('y', 3): (getitem, ('x', 3), (slice(1, 25, 2),))} result, chunks = slice_array('y', 'x', [(25, ) * 4], (slice(24, None, 2), )) assert expected == result #x[26::2] expected = {('y', 0): (getitem, ('x', 1), (slice(1, 25, 2),)), ('y', 1): (getitem, ('x', 2), (slice(0, 25, 2),)), ('y', 2): (getitem, ('x', 3), (slice(1, 25, 2),))} result, chunks = slice_array('y', 'x', [(25, ) * 4], (slice(26, None, 2), )) assert expected == result def test_slice_array_2d(): #2d slices: x[13::2,10::1] expected = {('y', 0, 0): (getitem, ('x', 0, 0), (slice(13, 20, 2), slice(10, 20, 1))), ('y', 0, 1): (getitem, ('x', 0, 1), (slice(13, 20, 2), slice(None, None, None))), ('y', 0, 2): (getitem, ('x', 0, 2), (slice(13, 20, 2), slice(None, None, None)))} result, chunks = slice_array('y', 'x', [[20], [20, 20, 5]], [slice(13, None, 2), slice(10, None, 1)]) assert expected == result #2d slices with one dimension: x[5,10::1] expected = {('y', 0): (getitem, ('x', 0, 0), (5, slice(10, 20, 1))), ('y', 1): (getitem, ('x', 0, 1), (5, slice(None, None, None))), ('y', 2): (getitem, ('x', 0, 2), (5, slice(None, None, None)))} result, chunks = slice_array('y', 'x', ([20], [20, 20, 5]), [5, slice(10, None, 1)]) assert expected == result def test_slice_optimizations(): #bar[:] expected = {('foo', 0): ('bar', 0)} result, chunks = slice_array('foo', 'bar', [[100]], (slice(None, None, None),)) assert expected == result #bar[:,:,:] expected = {('foo', 0): ('bar', 0), ('foo', 1): ('bar', 1), ('foo', 2): ('bar', 2)} result, chunks = slice_array('foo', 'bar', [(100, 1000, 10000)], (slice(None, None, None), slice(None, None, None), slice(None, None, None))) assert expected == result def test_slicing_with_singleton_indices(): result, chunks = slice_array('y', 'x', ([5, 5], [5, 5]), (slice(0, 5), 8)) expected = {('y', 0): (getitem, ('x', 0, 1), (slice(None, None, None), 3))} assert expected == result def test_slicing_with_newaxis(): result, chunks = slice_array('y', 'x', ([5, 5], [5, 5]), (slice(0, 3), None, slice(None, None, None))) expected = { ('y', 0, 0, 0): (getitem, ('x', 0, 0), (slice(0, 3, 1), None, slice(None, None, None))), ('y', 0, 0, 1): (getitem, ('x', 0, 1), (slice(0, 3, 1), None, slice(None, None, None)))} assert expected == result assert chunks == ((3,), (1,), (5, 5)) def test_take(): chunks, dsk = take('y', 'x', [(20, 20, 20, 20)], [5, 1, 47, 3], axis=0) expected = {('y', 0): (getitem, (np.concatenate, [(getitem, ('x', 0), (np.array([1, 3, 5]),)), (getitem, ('x', 2), (np.array([7]),))], 0), (np.array([2, 0, 3, 1]), ))} np.testing.assert_equal(sorted(dsk.items()), sorted(expected.items())) assert chunks == ((4,),) chunks, dsk = take('y', 'x', [(20, 20, 20, 20), (20, 20)], [ 5, 1, 47, 3], axis=0) expected = {('y', 0, j): (getitem, (np.concatenate, [(getitem, ('x', 0, j), ([1, 3, 5], slice(None, None, None))), (getitem, ('x', 2, j), ([7], slice(None, None, None)))], 0), ([2, 0, 3, 1], slice(None, None, None))) for j in range(2)} np.testing.assert_equal(sorted(dsk.items()), sorted(expected.items())) assert chunks == ((4,), (20, 20)) chunks, dsk = take('y', 'x', [(20, 20, 20, 20), (20, 20)], [ 5, 1, 37, 3], axis=1) expected = {('y', i, 0): (getitem, (np.concatenate, [(getitem, ('x', i, 0), (slice(None, None, None), [1, 3, 5])), (getitem, ('x', i, 1), (slice(None, None, None), [17]))], 1), (slice(None, None, None), [2, 0, 3, 1])) for i in range(4)} np.testing.assert_equal(sorted(dsk.items()), sorted(expected.items())) assert chunks == ((20, 20, 20, 20), (4,)) def test_take_sorted(): chunks, dsk = take('y', 'x', [(20, 20, 20, 20)], [1, 3, 5, 47], axis=0) expected = {('y', 0): (getitem, ('x', 0), ([1, 3, 5],)), ('y', 1): (getitem, ('x', 2), ([7],))} np.testing.assert_equal(dsk, expected) assert chunks == ((3, 1),) chunks, dsk = take('y', 'x', [(20, 20, 20, 20), (20, 20)], [1, 3, 5, 37], axis=1) expected = merge(dict((('y', i, 0), (getitem, ('x', i, 0), (slice(None, None, None), [1, 3, 5]))) for i in range(4)), dict((('y', i, 1), (getitem, ('x', i, 1), (slice(None, None, None), [17]))) for i in range(4))) np.testing.assert_equal(dsk, expected) assert chunks == ((20, 20, 20, 20), (3, 1)) def test_slice_lists(): y, chunks = slice_array('y', 'x', ((3, 3, 3, 1), (3, 3, 3, 1)), (np.array([2, 1, 9]), slice(None, None, None))) exp = {('y', 0, i): (getitem, (np.concatenate, [(getitem, ('x', 0, i), ([1, 2], slice(None, None, None))), (getitem, ('x', 3, i), ([0], slice(None, None, None)))], 0), ([1, 0, 2], slice(None, None, None))) for i in range(4)} np.testing.assert_equal(y, exp) assert chunks == ((3,), (3, 3, 3, 1)) def test_slicing_chunks(): result, chunks = slice_array('y', 'x', ([5, 5], [5, 5]), (1, np.array([2, 0, 3]))) assert chunks == ((3,), ) result, chunks = slice_array('y', 'x', ([5, 5], [5, 5]), (slice(0, 7), np.array([2, 0, 3]))) assert chunks == ((5, 2), (3, )) result, chunks = slice_array('y', 'x', ([5, 5], [5, 5]), (slice(0, 7), 1)) assert chunks == ((5, 2), ) def test_slicing_with_numpy_arrays(): a, bd1 = slice_array('y', 'x', ((3, 3, 3, 1), (3, 3, 3, 1)), (np.array([1, 2, 9]), slice(None, None, None))) b, bd2 = slice_array('y', 'x', ((3, 3, 3, 1), (3, 3, 3, 1)), (np.array([1, 2, 9]), slice(None, None, None))) assert bd1 == bd2 np.testing.assert_equal(a, b) i = [False, True, True, False, False, False, False, False, False, True, False] index = (i, slice(None, None, None)) index = normalize_index(index, (10, 10)) c, bd3 = slice_array('y', 'x', ((3, 3, 3, 1), (3, 3, 3, 1)), index) assert bd1 == bd3 np.testing.assert_equal(a, c) def test_slicing_and_chunks(): o = da.ones((24, 16), chunks=((4, 8, 8, 4), (2, 6, 6, 2))) t = o[4:-4, 2:-2] assert t.chunks == ((8, 8), (6, 6)) def test_slice_stop_0(): # from gh-125 a = da.ones(10, chunks=(10,))[:0].compute() b = np.ones(10)[:0] assert_eq(a, b) def test_slice_list_then_None(): x = da.zeros(shape=(5, 5), chunks=(3, 3)) y = x[[2, 1]][None] assert_eq(y, np.zeros((1, 2, 5))) class ReturnItem(object): def __getitem__(self, key): return key @pytest.mark.skip(reason='really long test') def test_slicing_exhaustively(): x = np.random.rand(6, 7, 8) a = da.from_array(x, chunks=(3, 3, 3)) I = ReturnItem() # independent indexing along different axes indexers = [0, -2, I[:], I[:5], [0, 1], [0, 1, 2], [4, 2], I[::-1], None, I[:0], []] for i in indexers: assert_eq(x[i], a[i]), i for j in indexers: assert_eq(x[i][:, j], a[i][:, j]), (i, j) assert_eq(x[:, i][j], a[:, i][j]), (i, j) for k in indexers: assert_eq(x[..., i][:, j][k], a[..., i][:, j][k]), (i, j, k) # repeated indexing along the first axis first_indexers = [I[:], I[:5], np.arange(5), [3, 1, 4, 5, 0], np.arange(6) < 6] second_indexers = [0, -1, 3, I[:], I[:3], I[2:-1], [2, 4], [], I[:0]] for i in first_indexers: for j in second_indexers: assert_eq(x[i][j], a[i][j]), (i, j) def test_slicing_with_negative_step_flops_keys(): x = da.arange(10, chunks=5) y = x[:1:-1] assert (x.name, 1) in y.dask[(y.name, 0)] assert (x.name, 0) in y.dask[(y.name, 1)] assert_eq(y, np.arange(10)[:1:-1]) assert y.chunks == ((5, 3),) assert y.dask[(y.name, 0)] == (getitem, (x.name, 1), (slice(-1, -6, -1),)) assert y.dask[(y.name, 1)] == (getitem, (x.name, 0), (slice(-1, -4, -1),)) def test_empty_slice(): x = da.ones((5, 5), chunks=(2, 2), dtype='i4') y = x[:0] assert_eq(y, np.ones((5, 5), dtype='i4')[:0]) def test_multiple_list_slicing(): x = np.random.rand(6, 7, 8) a = da.from_array(x, chunks=(3, 3, 3)) assert_eq(x[:, [0, 1, 2]][[0, 1]], a[:, [0, 1, 2]][[0, 1]]) def test_empty_list(): x = np.ones((5, 5, 5), dtype='i4') dx = da.from_array(x, chunks=2) assert_eq(dx[[], :3, :2], x[[], :3, :2]) assert_eq(dx[:3, [], :2], x[:3, [], :2]) assert_eq(dx[:3, :2, []], x[:3, :2, []]) def test_uneven_chunks(): assert da.ones(20, chunks=5)[::2].chunks == ((3, 2, 3, 2),) def test_new_blockdim(): assert new_blockdim(20, [5, 5, 5, 5], slice(0, None, 2)) == [3, 2, 3, 2] def test_slicing_consistent_names(): x = np.arange(100).reshape((10, 10)) a = da.from_array(x, chunks=(5, 5)) assert same_keys(a[0], a[0]) assert same_keys(a[:, [1, 2, 3]], a[:, [1, 2, 3]]) assert same_keys(a[:, 5:2:-1], a[:, 5:2:-1]) assert same_keys(a[0, ...], a[0, ...]) assert same_keys(a[...], a[...]) assert same_keys(a[[1, 3, 5]], a[[1, 3, 5]]) assert same_keys(a[-11:11], a[:]) assert same_keys(a[-11:-9], a[:1]) assert same_keys(a[-1], a[9]) def test_slicing_consistent_names_after_normalization(): x = da.zeros(10, chunks=(5,)) assert same_keys(x[0:], x[:10]) assert same_keys(x[0:], x[0:10]) assert same_keys(x[0:], x[0:10:1]) assert same_keys(x[:], x[0:10:1]) def test_sanitize_index_element(): with pytest.raises(TypeError): _sanitize_index_element('Hello!') def test_sanitize_index(): pd = pytest.importorskip('pandas') with pytest.raises(TypeError): sanitize_index('Hello!') np.testing.assert_equal(sanitize_index(pd.Series([1, 2, 3])), [1, 2, 3]) np.testing.assert_equal(sanitize_index((1, 2, 3)), [1, 2, 3]) def test_uneven_blockdims(): blockdims = ((31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30), (100,)) index = (slice(240, 270), slice(None)) dsk_out, bd_out = slice_array('in', 'out', blockdims, index) sol = {('in', 0, 0): (getitem, ('out', 7, 0), (slice(28, 31, 1), slice(None))), ('in', 1, 0): (getitem, ('out', 8, 0), (slice(0, 27, 1), slice(None)))} assert dsk_out == sol assert bd_out == ((3, 27), (100,)) blockdims = ((31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30),) * 2 index = (slice(240, 270), slice(180, 230)) dsk_out, bd_out = slice_array('in', 'out', blockdims, index) sol = {('in', 0, 0): (getitem, ('out', 7, 5), (slice(28, 31, 1), slice(29, 30, 1))), ('in', 0, 1): (getitem, ('out', 7, 6), (slice(28, 31, 1), slice(None))), ('in', 0, 2): (getitem, ('out', 7, 7), (slice(28, 31, 1), slice(0, 18, 1))), ('in', 1, 0): (getitem, ('out', 8, 5), (slice(0, 27, 1), slice(29, 30, 1))), ('in', 1, 1): (getitem, ('out', 8, 6), (slice(0, 27, 1), slice(None))), ('in', 1, 2): (getitem, ('out', 8, 7), (slice(0, 27, 1), slice(0, 18, 1)))} assert dsk_out == sol assert bd_out == ((3, 27), (1, 31, 18)) def test_oob_check(): x = da.ones(5, chunks=(2,)) with pytest.raises(IndexError): x[6] with pytest.raises(IndexError): x[[6]] with pytest.raises(IndexError): x[-10] with pytest.raises(IndexError): x[[-10]] with pytest.raises(IndexError): x[0, 0] def test_index_with_dask_array(): x = np.arange(36).reshape((6, 6)) d = da.from_array(x, chunks=(3, 3)) ind = np.asarray([True, True, False, True, False, False], dtype=bool) ind = da.from_array(ind, chunks=2) for index in [ind, (slice(1, 9, 2), ind), (ind, slice(2, 8, 1))]: x_index = dask.compute(index)[0] assert_eq(x[x_index], d[index]) def test_index_with_dask_array_2(): x = np.random.random((10, 10, 10)) ind = np.random.random(10) > 0.5 d = da.from_array(x, chunks=(3, 4, 5)) dind = da.from_array(ind, chunks=4) index = [slice(1, 9, 1), slice(None)] for i in range(x.ndim): index2 = index[:] index2.insert(i, dind) index3 = index[:] index3.insert(i, ind) assert_eq(x[tuple(index3)], d[tuple(index2)]) @pytest.mark.xfail def test_cull(): x = da.ones(1000, chunks=(10,)) for slc in [1, slice(0, 30), slice(0, None, 100)]: y = x[slc] assert len(y.dask) < len(x.dask) @pytest.mark.parametrize('shape', [(2,), (2, 3), (2, 3, 5)]) @pytest.mark.parametrize('index', [(Ellipsis,), (None, Ellipsis), (Ellipsis, None), (None, Ellipsis, None)]) def test_slicing_with_Nones(shape, index): x = np.random.random(shape) d = da.from_array(x, chunks=shape) assert_eq(x[index], d[index]) indexers = [Ellipsis, slice(2), 0, 1, -2, -1, slice(-2, None), None] """ # We comment this out because it is 4096 tests @pytest.mark.parametrize('a', indexers) @pytest.mark.parametrize('b', indexers) @pytest.mark.parametrize('c', indexers) @pytest.mark.parametrize('d', indexers) def test_slicing_none_int_ellipses(a, b, c, d): if (a, b, c, d).count(Ellipsis) > 1: return shape = (2,3,5,7,11) x = np.arange(np.prod(shape)).reshape(shape) y = da.core.asarray(x) xx = x[a, b, c, d] yy = y[a, b, c, d] assert_eq(xx, yy) """ def test_slicing_integer_no_warnings(): # https://github.com/dask/dask/pull/2457/ X = da.random.random((100, 2), (2, 2)) idx = np.array([0, 0, 1, 1]) with pytest.warns(None) as rec: X[idx].compute() assert len(rec) == 0 @pytest.mark.slow def test_slicing_none_int_ellipes(): shape = (2, 3, 5, 7, 11) x = np.arange(np.prod(shape)).reshape(shape) y = da.core.asarray(x) for ind in itertools.product(indexers, indexers, indexers, indexers): if ind.count(Ellipsis) > 1: continue assert_eq(x[ind], y[ind]) def test_None_overlap_int(): a, b, c, d = (0, slice(None, 2, None), None, Ellipsis) shape = (2, 3, 5, 7, 11) x = np.arange(np.prod(shape)).reshape(shape) y = da.core.asarray(x) xx = x[a, b, c, d] yy = y[a, b, c, d] assert_eq(xx, yy) def test_negative_n_slicing(): assert_eq(da.ones(2, chunks=2)[-2], np.ones(2)[-2]) def test_negative_list_slicing(): x = np.arange(5) dx = da.from_array(x, chunks=2) assert_eq(dx[[0, -5]], x[[0, -5]]) assert_eq(dx[[4, -1]], x[[4, -1]]) def test_permit_oob_slices(): x = np.arange(5) dx = da.from_array(x, chunks=2) assert_eq(x[-102:], dx[-102:]) assert_eq(x[102:], dx[102:]) assert_eq(x[:102], dx[:102]) assert_eq(x[:-102], dx[:-102]) def test_normalize_index(): assert normalize_index((Ellipsis, None), (10,)) == (slice(None), None) assert normalize_index(5, (np.nan,)) == (5,) assert normalize_index(-5, (np.nan,)) == (-5,) (result,) = normalize_index([-5, -2, 1], (np.nan,)) assert result.tolist() == [-5, -2, 1] assert normalize_index(slice(-5, -2), (np.nan,)) == (slice(-5, -2),) dask-0.16.0/dask/array/tests/test_sparse.py000066400000000000000000000061501320364734500206410ustar00rootroot00000000000000import random from distutils.version import LooseVersion import numpy as np import pytest import dask.array as da from dask.array.utils import assert_eq sparse = pytest.importorskip('sparse') if LooseVersion(np.__version__) < '1.11.0': pytestmark = pytest.mark.skip functions = [ lambda x: x, lambda x: da.expm1(x), lambda x: 2 * x, lambda x: x / 2, lambda x: x**2, lambda x: x + x, lambda x: x * x, lambda x: x[0], lambda x: x[:, 1], lambda x: x[:1, None, 1:3], lambda x: x.T, lambda x: da.transpose(x, (1, 2, 0)), lambda x: x.sum(), lambda x: x.dot(np.arange(x.shape[-1])), lambda x: x.dot(np.eye(x.shape[-1])), lambda x: da.tensordot(x, np.ones(x.shape[:2]), axes=[(0, 1), (0, 1)]), lambda x: x.sum(axis=0), lambda x: x.max(axis=0), lambda x: x.sum(axis=(1, 2)), lambda x: x.astype(np.complex128), lambda x: x.map_blocks(lambda x: x * 2), lambda x: x.round(1), lambda x: x.reshape((x.shape[0] * x.shape[1], x.shape[2])), lambda x: abs(x), lambda x: x > 0.5, lambda x: x.rechunk((4, 4, 4)), lambda x: x.rechunk((2, 2, 1)), ] @pytest.mark.parametrize('func', functions) def test_basic(func): x = da.random.random((2, 3, 4), chunks=(1, 2, 2)) x[x < 0.8] = 0 y = x.map_blocks(sparse.COO.from_numpy) xx = func(x) yy = func(y) assert_eq(xx, yy) if yy.shape: zz = yy.compute() if not isinstance(zz, sparse.COO): assert (zz != 1).sum() > np.prod(zz.shape) / 2 # mostly dense def test_tensordot(): x = da.random.random((2, 3, 4), chunks=(1, 2, 2)) x[x < 0.8] = 0 y = da.random.random((4, 3, 2), chunks=(2, 2, 1)) y[y < 0.8] = 0 xx = x.map_blocks(sparse.COO.from_numpy) yy = y.map_blocks(sparse.COO.from_numpy) assert_eq(da.tensordot(x, y, axes=(2, 0)), da.tensordot(xx, yy, axes=(2, 0))) assert_eq(da.tensordot(x, y, axes=(1, 1)), da.tensordot(xx, yy, axes=(1, 1))) assert_eq(da.tensordot(x, y, axes=((1, 2), (1, 0))), da.tensordot(xx, yy, axes=((1, 2), (1, 0)))) @pytest.mark.parametrize('func', functions) def test_mixed_concatenate(func): x = da.random.random((2, 3, 4), chunks=(1, 2, 2)) y = da.random.random((2, 3, 4), chunks=(1, 2, 2)) y[y < 0.8] = 0 yy = y.map_blocks(sparse.COO.from_numpy) d = da.concatenate([x, y], axis=0) s = da.concatenate([x, yy], axis=0) dd = func(d) ss = func(s) assert_eq(dd, ss) @pytest.mark.parametrize('func', functions) def test_mixed_random(func): d = da.random.random((4, 3, 4), chunks=(1, 2, 2)) d[d < 0.7] = 0 fn = lambda x: sparse.COO.from_numpy(x) if random.random() < 0.5 else x s = d.map_blocks(fn) dd = func(d) ss = func(s) assert_eq(dd, ss) def test_mixed_output_type(): y = da.random.random((10, 10), chunks=(5, 5)) y[y < 0.8] = 0 y = y.map_blocks(sparse.COO.from_numpy) x = da.zeros((10, 1), chunks=(5, 1)) z = da.concatenate([x, y], axis=1) assert z.shape == (10, 11) zz = z.compute() assert isinstance(zz, sparse.COO) assert zz.nnz == y.compute().nnz dask-0.16.0/dask/array/tests/test_stats.py000066400000000000000000000072761320364734500205140ustar00rootroot00000000000000import pytest scipy = pytest.importorskip('scipy') import numpy as np import dask.array as da from dask.array.utils import assert_eq from dask.delayed import Delayed import dask.array.stats from dask.array.utils import allclose @pytest.mark.parametrize('kind, kwargs', [ ('skew', {}), ('kurtosis', {}), ('kurtosis', {'fisher': False}), ]) def test_measures(kind, kwargs): x = np.random.random(size=(30, 2)) y = da.from_array(x, 3) dfunc = getattr(dask.array.stats, kind) sfunc = getattr(scipy.stats, kind) expected = sfunc(x, **kwargs) result = dfunc(y, **kwargs) assert_eq(result, expected) assert isinstance(result, da.Array) def test_bias_raises(): x = np.random.random(size=(30, 2)) y = da.from_array(x, 3) with pytest.raises(NotImplementedError): dask.array.stats.skew(y, bias=False) with pytest.raises(NotImplementedError): dask.array.stats.kurtosis(y, bias=False) @pytest.mark.parametrize('kind', [ 'chisquare', 'power_divergence', 'normaltest', 'skewtest', 'kurtosistest', ]) def test_one(kind): a = np.random.random(size=30,) a_ = da.from_array(a, 3) dask_test = getattr(dask.array.stats, kind) scipy_test = getattr(scipy.stats, kind) result = dask_test(a_) expected = scipy_test(a) assert isinstance(result, Delayed) assert allclose(result.compute(), expected) @pytest.mark.parametrize('kind, kwargs', [ ('ttest_ind', {}), ('ttest_ind', {'equal_var': False}), ('ttest_1samp', {}), ('ttest_rel', {}), ('chisquare', {}), ('power_divergence', {}), ('power_divergence', {'lambda_': 0}), ('power_divergence', {'lambda_': -1}), ('power_divergence', {'lambda_': 'neyman'}), ]) def test_two(kind, kwargs): a = np.random.random(size=30,) b = np.random.random(size=30,) a_ = da.from_array(a, 3) b_ = da.from_array(b, 3) dask_test = getattr(dask.array.stats, kind) scipy_test = getattr(scipy.stats, kind) with pytest.warns(None): # maybe overflow warning (powrer_divergence) result = dask_test(a_, b_, **kwargs) expected = scipy_test(a, b, **kwargs) assert isinstance(result, Delayed) assert allclose(result.compute(), expected) # fails occasionally. shouldn't this be exact? # assert dask.compute(*result) == expected @pytest.mark.parametrize('k', range(5)) def test_moments(k): x = np.random.random(size=(30, 2)) y = da.from_array(x, 3) expected = scipy.stats.moment(x, k) result = dask.array.stats.moment(y, k) assert_eq(result, expected) def test_anova(): np_args = [i * np.random.random(size=(30,)) for i in range(4)] da_args = [da.from_array(x, chunks=10) for x in np_args] result = dask.array.stats.f_oneway(*da_args) expected = scipy.stats.f_oneway(*np_args) assert allclose(result.compute(), expected) @pytest.mark.parametrize('func, nargs', [ (dask.array.stats.ttest_1samp, 2), (dask.array.stats.ttest_rel, 2), (dask.array.stats.skewtest, 1), (dask.array.stats.kurtosis, 1), (dask.array.stats.kurtosistest, 1), (dask.array.stats.normaltest, 1), (dask.array.stats.moment, 1), ]) @pytest.mark.parametrize('nan_policy', ['omit', 'raise']) def test_nan_raises(func, nargs, nan_policy): with pytest.raises(NotImplementedError): func(*(None,) * nargs, nan_policy=nan_policy) def test_power_divergence_invalid(): a = np.random.random(size=30,) a_ = da.from_array(a, 3) with pytest.raises(ValueError): dask.array.stats.power_divergence(a_, lambda_='wrong') def test_skew_raises(): a = da.ones((7,), chunks=(7,)) with pytest.raises(ValueError) as rec: dask.array.stats.skewtest(a) assert "7 samples" in str(rec) dask-0.16.0/dask/array/tests/test_testing.py000066400000000000000000000007431320364734500210230ustar00rootroot00000000000000import sys import pytest import numpy as np import dask.array as da from dask.array.utils import assert_eq @pytest.mark.skipif(sys.flags.optimize, reason="Assertions disabled.") def test_assert_eq_checks_scalars(): # https://github.com/dask/dask/issues/2680 with pytest.raises(AssertionError): assert_eq(np.array(0), np.array(1)) a = da.from_array(np.array([0]), 1)[0] b = np.array([1])[0] with pytest.raises(AssertionError): assert_eq(a, b) dask-0.16.0/dask/array/tests/test_ufunc.py000066400000000000000000000247241320364734500204730ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from functools import partial import pytest np = pytest.importorskip('numpy') import dask.array as da from dask.array.utils import assert_eq def test_ufunc_meta(): assert da.log.__name__ == 'log' assert da.log.__doc__.replace(' # doctest: +SKIP', '') == np.log.__doc__ assert da.modf.__name__ == 'modf' assert da.modf.__doc__.replace(' # doctest: +SKIP', '') == np.modf.__doc__ assert da.frexp.__name__ == 'frexp' assert da.frexp.__doc__.replace(' # doctest: +SKIP', '') == np.frexp.__doc__ def test_ufunc(): for attr in ['nin', 'nargs', 'nout', 'ntypes', 'identity', 'signature', 'types']: assert getattr(da.log, attr) == getattr(np.log, attr) with pytest.raises(AttributeError): da.log.not_an_attribute assert repr(da.log) == repr(np.log) assert 'nin' in dir(da.log) assert 'outer' in dir(da.log) binary_ufuncs = ['add', 'arctan2', 'copysign', 'divide', 'equal', 'floor_divide', 'fmax', 'fmin', 'fmod', 'greater', 'greater_equal', 'hypot', 'ldexp', 'less', 'less_equal', 'logaddexp', 'logaddexp2', 'logical_and', 'logical_or', 'logical_xor', 'maximum', 'minimum', 'mod', 'multiply', 'nextafter', 'not_equal', 'power', 'remainder', 'subtract', 'true_divide'] unary_ufuncs = ['absolute', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctanh', 'cbrt', 'ceil', 'conj', 'cos', 'cosh', 'deg2rad', 'degrees', 'exp', 'exp2', 'expm1', 'fabs', 'fix', 'floor', 'i0', 'isfinite', 'isinf', 'isnan', 'log', 'log10', 'log1p', 'log2', 'logical_not', 'nan_to_num', 'negative', 'rad2deg', 'radians', 'reciprocal', 'rint', 'sign', 'signbit', 'sin', 'sinc', 'sinh', 'spacing', 'sqrt', 'square', 'tan', 'tanh', 'trunc'] @pytest.mark.parametrize('ufunc', unary_ufuncs) def test_unary_ufunc(ufunc): if ufunc == 'fix' and np.__version__ >= '1.13.0': pytest.skip('fix calls floor in a way that we do not yet support') dafunc = getattr(da, ufunc) npfunc = getattr(np, ufunc) arr = np.random.randint(1, 100, size=(20, 20)) darr = da.from_array(arr, 3) with pytest.warns(None): # some invalid values (arccos, arcsin, etc.) # applying Dask ufunc doesn't trigger computation assert isinstance(dafunc(darr), da.Array) assert_eq(dafunc(darr), npfunc(arr), equal_nan=True) with pytest.warns(None): # some invalid values (arccos, arcsin, etc.) # applying NumPy ufunc is lazy if isinstance(npfunc, np.ufunc) and np.__version__ >= '1.13.0': assert isinstance(npfunc(darr), da.Array) else: assert isinstance(npfunc(darr), np.ndarray) assert_eq(npfunc(darr), npfunc(arr), equal_nan=True) with pytest.warns(None): # some invalid values (arccos, arcsin, etc.) # applying Dask ufunc to normal ndarray triggers computation assert isinstance(dafunc(arr), np.ndarray) assert_eq(dafunc(arr), npfunc(arr), equal_nan=True) @pytest.mark.parametrize('ufunc', binary_ufuncs) def test_binary_ufunc(ufunc): dafunc = getattr(da, ufunc) npfunc = getattr(np, ufunc) arr1 = np.random.randint(1, 100, size=(20, 20)) darr1 = da.from_array(arr1, 3) arr2 = np.random.randint(1, 100, size=(20, 20)) darr2 = da.from_array(arr2, 3) # applying Dask ufunc doesn't trigger computation assert isinstance(dafunc(darr1, darr2), da.Array) assert_eq(dafunc(darr1, darr2), npfunc(arr1, arr2)) # applying NumPy ufunc triggers computation or is lazy (np >= 1.13.0) if np.__version__ >= '1.13.0': assert isinstance(npfunc(darr1, darr2), da.Array) else: assert isinstance(npfunc(darr1, darr2), np.ndarray) assert_eq(npfunc(darr1, darr2), npfunc(arr1, arr2)) # applying Dask ufunc to normal ndarray triggers computation assert isinstance(dafunc(arr1, arr2), np.ndarray) assert_eq(dafunc(arr1, arr2), npfunc(arr1, arr2)) # with scalar assert isinstance(dafunc(darr1, 10), da.Array) assert_eq(dafunc(darr1, 10), npfunc(arr1, 10)) with pytest.warns(None): # overflow in ldexp assert isinstance(dafunc(10, darr1), da.Array) assert_eq(dafunc(10, darr1), npfunc(10, arr1)) assert isinstance(dafunc(arr1, 10), np.ndarray) assert_eq(dafunc(arr1, 10), npfunc(arr1, 10)) with pytest.warns(None): # overflow in ldexp assert isinstance(dafunc(10, arr1), np.ndarray) assert_eq(dafunc(10, arr1), npfunc(10, arr1)) def test_ufunc_outer(): arr1 = np.random.randint(1, 100, size=20) darr1 = da.from_array(arr1, 3) arr2 = np.random.randint(1, 100, size=(10, 3)) darr2 = da.from_array(arr2, 3) # Check output types assert isinstance(da.add.outer(darr1, darr2), da.Array) assert isinstance(da.add.outer(arr1, darr2), da.Array) assert isinstance(da.add.outer(darr1, arr2), da.Array) assert isinstance(da.add.outer(arr1, arr2), np.ndarray) # Check mix of dimensions, dtypes, and numpy/dask/object cases = [((darr1, darr2), (arr1, arr2)), ((darr2, darr1), (arr2, arr1)), ((darr2, darr1.astype('f8')), (arr2, arr1.astype('f8'))), ((darr1, arr2), (arr1, arr2)), ((darr1, 1), (arr1, 1)), ((1, darr2), (1, arr2)), ((1.5, darr2), (1.5, arr2)), (([1, 2, 3], darr2), ([1, 2, 3], arr2)), ((darr1.sum(), darr2), (arr1.sum(), arr2)), ((np.array(1), darr2), (np.array(1), arr2))] for (dA, dB), (A, B) in cases: assert_eq(da.add.outer(dA, dB), np.add.outer(A, B)) # Check dtype kwarg works assert_eq(da.add.outer(darr1, darr2, dtype='f8'), np.add.outer(arr1, arr2, dtype='f8')) with pytest.raises(ValueError): da.add.outer(darr1, darr2, out=arr1) with pytest.raises(ValueError): da.sin.outer(darr1, darr2) @pytest.mark.parametrize('ufunc', ['isreal', 'iscomplex', 'real', 'imag']) def test_complex(ufunc): dafunc = getattr(da, ufunc) npfunc = getattr(np, ufunc) real = np.random.randint(1, 100, size=(20, 20)) imag = np.random.randint(1, 100, size=(20, 20)) * 1j comp = real + imag dareal = da.from_array(real, 3) daimag = da.from_array(imag, 3) dacomp = da.from_array(comp, 3) assert_eq(dacomp.real, comp.real) assert_eq(dacomp.imag, comp.imag) assert_eq(dacomp.conj(), comp.conj()) for darr, arr in [(dacomp, comp), (dareal, real), (daimag, imag)]: # applying Dask ufunc doesn't trigger computation assert isinstance(dafunc(darr), da.Array) assert_eq(dafunc(darr), npfunc(arr)) # applying NumPy ufunc triggers computation if np.__version__ < '1.13.0': assert isinstance(npfunc(darr), np.ndarray) assert_eq(npfunc(darr), npfunc(arr)) # applying Dask ufunc to normal ndarray triggers computation assert isinstance(dafunc(arr), np.ndarray) assert_eq(dafunc(arr), npfunc(arr)) @pytest.mark.parametrize('ufunc', ['frexp', 'modf']) def test_ufunc_2results(ufunc): dafunc = getattr(da, ufunc) npfunc = getattr(np, ufunc) arr = np.random.randint(1, 100, size=(20, 20)) darr = da.from_array(arr, 3) # applying Dask ufunc doesn't trigger computation res1, res2 = dafunc(darr) assert isinstance(res1, da.Array) assert isinstance(res2, da.Array) exp1, exp2 = npfunc(arr) assert_eq(res1, exp1) assert_eq(res2, exp2) # applying NumPy ufunc is now lazy res1, res2 = npfunc(darr) if np.__version__ >= '1.13.0': assert isinstance(res1, da.Array) assert isinstance(res2, da.Array) else: assert isinstance(res1, np.ndarray) assert isinstance(res2, np.ndarray) exp1, exp2 = npfunc(arr) assert_eq(res1, exp1) assert_eq(res2, exp2) # applying Dask ufunc to normal ndarray triggers computation res1, res2 = dafunc(arr) assert isinstance(res1, da.Array) assert isinstance(res2, da.Array) exp1, exp2 = npfunc(arr) assert_eq(res1, exp1) assert_eq(res2, exp2) def test_clip(): x = np.random.normal(0, 10, size=(10, 10)) d = da.from_array(x, chunks=(3, 4)) assert_eq(x.clip(5), d.clip(5)) assert_eq(x.clip(1, 5), d.clip(1, 5)) assert_eq(x.clip(min=5), d.clip(min=5)) assert_eq(x.clip(max=5), d.clip(max=5)) assert_eq(x.clip(max=1, min=5), d.clip(max=1, min=5)) assert_eq(x.clip(min=1, max=5), d.clip(min=1, max=5)) def test_angle(): real = np.random.randint(1, 100, size=(20, 20)) imag = np.random.randint(1, 100, size=(20, 20)) * 1j comp = real + imag dacomp = da.from_array(comp, 3) assert_eq(da.angle(dacomp), np.angle(comp)) assert_eq(da.angle(dacomp, deg=True), np.angle(comp, deg=True)) assert isinstance(da.angle(comp), np.ndarray) assert_eq(da.angle(comp), np.angle(comp)) @pytest.mark.skipif(np.__version__ < '1.13.0', reason='array_ufunc not present') def test_array_ufunc(): x = np.arange(24).reshape((4, 6)) d = da.from_array(x, chunks=(2, 3)) for func in [np.sin, np.isreal, np.sum, np.negative, partial(np.prod, axis=0)]: assert isinstance(func(d), da.Array) assert_eq(func(d), func(x)) @pytest.mark.skipif(np.__version__ < '1.13.0', reason='array_ufunc not present') def test_array_ufunc_binop(): x = np.arange(25).reshape((5, 5)) d = da.from_array(x, chunks=(2, 2)) for func in [np.add, np.multiply]: assert isinstance(func(d, d), da.Array) assert_eq(func(d, d), func(x, x)) assert isinstance(func.outer(d, d), da.Array) assert_eq(func.outer(d, d), func.outer(x, x)) @pytest.mark.skipif(np.__version__ < '1.13.0', reason='array_ufunc not present') def test_array_ufunc_out(): x = da.arange(10, chunks=(5,)) np.sin(x, out=x) np.add(x, 10, out=x) assert_eq(x, np.sin(np.arange(10)) + 10) @pytest.mark.skipif(np.__version__ < '1.13.0', reason='array_ufunc not present') def test_unsupported_ufunc_methods(): x = da.arange(10, chunks=(5,)) with pytest.raises(TypeError): assert np.add.reduce(x) @pytest.mark.skipif(np.__version__ < '1.13.0', reason='array_ufunc not present') def test_out_numpy(): x = da.arange(10, chunks=(5,)) empty = np.empty(10, dtype=x.dtype) with pytest.raises((TypeError, NotImplementedError)) as info: np.add(x, 1, out=empty) assert 'ndarray' in str(info.value) assert 'Array' in str(info.value) dask-0.16.0/dask/array/tests/test_wrap.py000066400000000000000000000030561320364734500203170ustar00rootroot00000000000000import pytest pytest.importorskip('numpy') from dask.array.wrap import ones import dask.array as da import numpy as np import dask def test_ones(): a = ones((10, 10), dtype='i4', chunks=(4, 4)) x = np.array(a) assert (x == np.ones((10, 10), 'i4')).all() def test_size_as_list(): a = ones([10, 10], dtype='i4', chunks=(4, 4)) x = np.array(a) assert (x == np.ones((10, 10), dtype='i4')).all() def test_singleton_size(): a = ones(10, dtype='i4', chunks=(4,)) x = np.array(a) assert (x == np.ones(10, dtype='i4')).all() def test_kwargs(): a = ones(10, dtype='i4', chunks=(4,)) x = np.array(a) assert (x == np.ones(10, dtype='i4')).all() def test_full(): a = da.full((3, 3), 100, chunks=(2, 2), dtype='i8') assert (a.compute() == 100).all() assert a.dtype == a.compute(get=dask.get).dtype == 'i8' def test_can_make_really_big_array_of_ones(): ones((1000000, 1000000), chunks=(100000, 100000)) ones(shape=(1000000, 1000000), chunks=(100000, 100000)) def test_wrap_consistent_names(): assert (sorted(ones(10, dtype='i4', chunks=(4,)).dask) == sorted(ones(10, dtype='i4', chunks=(4,)).dask)) assert (sorted(ones(10, dtype='i4', chunks=(4,)).dask) != sorted(ones(10, chunks=(4,)).dask)) assert (sorted(da.full((3, 3), 100, chunks=(2, 2), dtype='f8').dask) == sorted(da.full((3, 3), 100, chunks=(2, 2), dtype='f8').dask)) assert (sorted(da.full((3, 3), 100, chunks=(2, 2), dtype='f8').dask) != sorted(da.full((3, 3), 100, chunks=(2, 2)).dask)) dask-0.16.0/dask/array/ufunc.py000066400000000000000000000176761320364734500163020ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from operator import getitem from functools import partial import numpy as np from toolz import curry from .core import Array, elemwise, atop, apply_infer_dtype, asarray from ..base import is_dask_collection from .. import core, sharedict from ..utils import skip_doctest def __array_wrap__(numpy_ufunc, x, *args, **kwargs): return x.__array_wrap__(numpy_ufunc(x, *args, **kwargs)) @curry def copy_docstring(target, source=None): target.__doc__ = skip_doctest(source.__doc__) return target def wrap_elemwise(numpy_ufunc, array_wrap=False): """ Wrap up numpy function into dask.array """ def wrapped(*args, **kwargs): dsk = [arg for arg in args if hasattr(arg, '_elemwise')] if len(dsk) > 0: if array_wrap: return dsk[0]._elemwise(__array_wrap__, numpy_ufunc, *args, **kwargs) else: return dsk[0]._elemwise(numpy_ufunc, *args, **kwargs) else: return numpy_ufunc(*args, **kwargs) # functools.wraps cannot wrap ufunc in Python 2.x wrapped.__name__ = numpy_ufunc.__name__ wrapped.__doc__ = skip_doctest(numpy_ufunc.__doc__) return wrapped class ufunc(object): _forward_attrs = {'nin', 'nargs', 'nout', 'ntypes', 'identity', 'signature', 'types'} def __init__(self, ufunc): if not isinstance(ufunc, np.ufunc): raise TypeError("must be an instance of `ufunc`, " "got `%s" % type(ufunc).__name__) self._ufunc = ufunc self.__name__ = ufunc.__name__ copy_docstring(self, ufunc) def __getattr__(self, key): if key in self._forward_attrs: return getattr(self._ufunc, key) raise AttributeError("%r object has no attribute " "%r" % (type(self).__name__, key)) def __dir__(self): return list(self._forward_attrs.union(dir(type(self)), self.__dict__)) def __repr__(self): return repr(self._ufunc) def __call__(self, *args, **kwargs): dsk = [arg for arg in args if hasattr(arg, '_elemwise')] if len(dsk) > 0: return dsk[0]._elemwise(self._ufunc, *args, **kwargs) else: return self._ufunc(*args, **kwargs) @copy_docstring(source=np.ufunc.outer) def outer(self, A, B, **kwargs): if self.nin != 2: raise ValueError("outer product only supported for binary functions") if 'out' in kwargs: raise ValueError("`out` kwarg not supported") A_is_dask = is_dask_collection(A) B_is_dask = is_dask_collection(B) if not A_is_dask and not B_is_dask: return self._ufunc.outer(A, B, **kwargs) elif (A_is_dask and not isinstance(A, Array) or B_is_dask and not isinstance(B, Array)): raise NotImplementedError("Dask objects besides `dask.array.Array` " "are not supported at this time.") A = asarray(A) B = asarray(B) ndim = A.ndim + B.ndim out_inds = tuple(range(ndim)) A_inds = out_inds[:A.ndim] B_inds = out_inds[A.ndim:] dtype = apply_infer_dtype(self._ufunc.outer, [A, B], kwargs, 'ufunc.outer', suggest_dtype=False) if 'dtype' in kwargs: func = partial(self._ufunc.outer, dtype=kwargs.pop('dtype')) else: func = self._ufunc.outer return atop(func, out_inds, A, A_inds, B, B_inds, dtype=dtype, token=self.__name__ + '.outer', **kwargs) # ufuncs, copied from this page: # http://docs.scipy.org/doc/numpy/reference/ufuncs.html # math operations add = ufunc(np.add) subtract = ufunc(np.subtract) multiply = ufunc(np.multiply) divide = ufunc(np.divide) logaddexp = ufunc(np.logaddexp) logaddexp2 = ufunc(np.logaddexp2) true_divide = ufunc(np.true_divide) floor_divide = ufunc(np.floor_divide) negative = ufunc(np.negative) power = ufunc(np.power) remainder = ufunc(np.remainder) mod = ufunc(np.mod) # fmod: see below conj = conjugate = ufunc(np.conjugate) exp = ufunc(np.exp) exp2 = ufunc(np.exp2) log = ufunc(np.log) log2 = ufunc(np.log2) log10 = ufunc(np.log10) log1p = ufunc(np.log1p) expm1 = ufunc(np.expm1) sqrt = ufunc(np.sqrt) square = ufunc(np.square) cbrt = ufunc(np.cbrt) reciprocal = ufunc(np.reciprocal) # trigonometric functions sin = ufunc(np.sin) cos = ufunc(np.cos) tan = ufunc(np.tan) arcsin = ufunc(np.arcsin) arccos = ufunc(np.arccos) arctan = ufunc(np.arctan) arctan2 = ufunc(np.arctan2) hypot = ufunc(np.hypot) sinh = ufunc(np.sinh) cosh = ufunc(np.cosh) tanh = ufunc(np.tanh) arcsinh = ufunc(np.arcsinh) arccosh = ufunc(np.arccosh) arctanh = ufunc(np.arctanh) deg2rad = ufunc(np.deg2rad) rad2deg = ufunc(np.rad2deg) # comparison functions greater = ufunc(np.greater) greater_equal = ufunc(np.greater_equal) less = ufunc(np.less) less_equal = ufunc(np.less_equal) not_equal = ufunc(np.not_equal) equal = ufunc(np.equal) logical_and = ufunc(np.logical_and) logical_or = ufunc(np.logical_or) logical_xor = ufunc(np.logical_xor) logical_not = ufunc(np.logical_not) maximum = ufunc(np.maximum) minimum = ufunc(np.minimum) fmax = ufunc(np.fmax) fmin = ufunc(np.fmin) # floating functions isfinite = ufunc(np.isfinite) isinf = ufunc(np.isinf) isnan = ufunc(np.isnan) signbit = ufunc(np.signbit) copysign = ufunc(np.copysign) nextafter = ufunc(np.nextafter) spacing = ufunc(np.spacing) # modf: see below ldexp = ufunc(np.ldexp) # frexp: see below fmod = ufunc(np.fmod) floor = ufunc(np.floor) ceil = ufunc(np.ceil) trunc = ufunc(np.trunc) # more math routines, from this page: # http://docs.scipy.org/doc/numpy/reference/routines.math.html degrees = ufunc(np.degrees) radians = ufunc(np.radians) rint = ufunc(np.rint) fabs = ufunc(np.fabs) sign = ufunc(np.sign) absolute = ufunc(np.absolute) # non-ufunc elementwise functions clip = wrap_elemwise(np.clip) isreal = wrap_elemwise(np.isreal, array_wrap=True) iscomplex = wrap_elemwise(np.iscomplex, array_wrap=True) real = wrap_elemwise(np.real, array_wrap=True) imag = wrap_elemwise(np.imag, array_wrap=True) fix = wrap_elemwise(np.fix, array_wrap=True) i0 = wrap_elemwise(np.i0, array_wrap=True) sinc = wrap_elemwise(np.sinc, array_wrap=True) nan_to_num = wrap_elemwise(np.nan_to_num, array_wrap=True) @copy_docstring(source=np.angle) def angle(x, deg=0): deg = bool(deg) if hasattr(x, '_elemwise'): return x._elemwise(__array_wrap__, np.angle, x, deg) return np.angle(x, deg=deg) @copy_docstring(source=np.frexp) def frexp(x): # Not actually object dtype, just need to specify something tmp = elemwise(np.frexp, x, dtype=object) left = 'mantissa-' + tmp.name right = 'exponent-' + tmp.name ldsk = {(left,) + key[1:]: (getitem, key, 0) for key in core.flatten(tmp.__dask_keys__())} rdsk = {(right,) + key[1:]: (getitem, key, 1) for key in core.flatten(tmp.__dask_keys__())} a = np.empty((1, ), dtype=x.dtype) l, r = np.frexp(a) ldt = l.dtype rdt = r.dtype L = Array(sharedict.merge(tmp.dask, (left, ldsk)), left, chunks=tmp.chunks, dtype=ldt) R = Array(sharedict.merge(tmp.dask, (right, rdsk)), right, chunks=tmp.chunks, dtype=rdt) return L, R @copy_docstring(source=np.modf) def modf(x): # Not actually object dtype, just need to specify something tmp = elemwise(np.modf, x, dtype=object) left = 'modf1-' + tmp.name right = 'modf2-' + tmp.name ldsk = {(left,) + key[1:]: (getitem, key, 0) for key in core.flatten(tmp.__dask_keys__())} rdsk = {(right,) + key[1:]: (getitem, key, 1) for key in core.flatten(tmp.__dask_keys__())} a = np.empty((1,), dtype=x.dtype) l, r = np.modf(a) ldt = l.dtype rdt = r.dtype L = Array(sharedict.merge(tmp.dask, (left, ldsk)), left, chunks=tmp.chunks, dtype=ldt) R = Array(sharedict.merge(tmp.dask, (right, rdsk)), right, chunks=tmp.chunks, dtype=rdt) return L, R dask-0.16.0/dask/array/utils.py000066400000000000000000000056341320364734500163110ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from distutils.version import LooseVersion import difflib import math import os import numpy as np from toolz import frequencies, concat from .core import Array from ..local import get_sync from ..sharedict import ShareDict if LooseVersion(np.__version__) >= '1.10.0': allclose = np.allclose else: def allclose(a, b, **kwargs): if kwargs.pop('equal_nan', False): a_nans = np.isnan(a) b_nans = np.isnan(b) if not (a_nans == b_nans).all(): return False a = a[~a_nans] b = b[~b_nans] return np.allclose(a, b, **kwargs) def same_keys(a, b): def key(k): if isinstance(k, str): return (k, -1, -1, -1) else: return k return sorted(a.dask, key=key) == sorted(b.dask, key=key) def _not_empty(x): return x.shape and 0 not in x.shape def _check_dsk(dsk): """ Check that graph is well named and non-overlapping """ if not isinstance(dsk, ShareDict): return assert all(isinstance(k, (tuple, str)) for k in dsk.dicts) freqs = frequencies(concat(dsk.dicts.values())) non_one = {k: v for k, v in freqs.items() if v != 1} assert not non_one, non_one def assert_eq_shape(a, b, check_nan=True): for aa, bb in zip(a, b): if math.isnan(aa) or math.isnan(bb): if check_nan: assert math.isnan(aa) == math.isnan(bb) else: assert aa == bb def assert_eq(a, b, check_shape=True, **kwargs): a_original = a b_original = b if isinstance(a, Array): assert a.dtype is not None adt = a.dtype _check_dsk(a.dask) a = a.compute(get=get_sync) if hasattr(a, 'todense'): a = a.todense() if _not_empty(a): assert a.dtype == a_original.dtype if check_shape: assert_eq_shape(a_original.shape, a.shape, check_nan=False) else: adt = getattr(a, 'dtype', None) if isinstance(b, Array): assert b.dtype is not None bdt = b.dtype _check_dsk(b.dask) b = b.compute(get=get_sync) if hasattr(b, 'todense'): b = b.todense() if _not_empty(b): assert b.dtype == b_original.dtype if check_shape: assert_eq_shape(b_original.shape, b.shape, check_nan=False) else: bdt = getattr(b, 'dtype', None) if str(adt) != str(bdt): diff = difflib.ndiff(str(adt).splitlines(), str(bdt).splitlines()) raise AssertionError('string repr are different' + os.linesep + os.linesep.join(diff)) try: assert a.shape == b.shape assert allclose(a, b, **kwargs) return True except TypeError: pass c = a == b if isinstance(c, np.ndarray): assert c.all() else: assert c return True dask-0.16.0/dask/array/wrap.py000066400000000000000000000035011320364734500161110ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from functools import partial from itertools import product import numpy as np try: from cytoolz import curry except ImportError: from toolz import curry from ..base import tokenize from .core import Array, normalize_chunks from .numpy_compat import full def wrap_func_shape_as_first_arg(func, *args, **kwargs): """ Transform np creation function into blocked version """ if 'shape' not in kwargs: shape, args = args[0], args[1:] else: shape = kwargs.pop('shape') if not isinstance(shape, (tuple, list)): shape = (shape,) chunks = kwargs.pop('chunks', None) chunks = normalize_chunks(chunks, shape) name = kwargs.pop('name', None) dtype = kwargs.pop('dtype', None) if dtype is None: dtype = func(shape, *args, **kwargs).dtype name = name or 'wrapped-' + tokenize(func, shape, chunks, dtype, args, kwargs) keys = product([name], *[range(len(bd)) for bd in chunks]) shapes = product(*chunks) func = partial(func, dtype=dtype, **kwargs) vals = ((func,) + (s,) + args for s in shapes) dsk = dict(zip(keys, vals)) return Array(dsk, name, chunks, dtype=dtype) @curry def wrap(wrap_func, func, **kwargs): f = partial(wrap_func, func, **kwargs) template = """ Blocked variant of %(name)s Follows the signature of %(name)s exactly except that it also requires a keyword argument chunks=(...) Original signature follows below. """ if func.__doc__ is not None: f.__doc__ = template % {'name': func.__name__} + func.__doc__ f.__name__ = 'blocked_' + func.__name__ return f w = wrap(wrap_func_shape_as_first_arg) ones = w(np.ones, dtype='f8') zeros = w(np.zeros, dtype='f8') empty = w(np.empty, dtype='f8') full = w(full) dask-0.16.0/dask/bag/000077500000000000000000000000001320364734500142025ustar00rootroot00000000000000dask-0.16.0/dask/bag/__init__.py000066400000000000000000000005401320364734500163120ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from .core import (Bag, Item, from_sequence, from_url, to_textfiles, concat, from_delayed, map_partitions, bag_range as range, bag_zip as zip, bag_map as map) from .text import read_text from ..context import set_options from ..base import compute dask-0.16.0/dask/bag/core.py000066400000000000000000002066051320364734500155150ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from collections import Iterable, Iterator, defaultdict from functools import wraps, partial import itertools import math from operator import getitem import types import uuid from random import Random from warnings import warn from distutils.version import LooseVersion from toolz import (merge, take, reduce, valmap, map, partition_all, filter, remove, compose, curry, first, second, accumulate, peek) from toolz.compatibility import iteritems, zip import toolz _implement_accumulate = LooseVersion(toolz.__version__) > '0.7.4' try: import cytoolz from cytoolz import (frequencies, merge_with, join, reduceby, count, pluck, groupby, topk) if LooseVersion(cytoolz.__version__) > '0.7.3': from cytoolz import accumulate # noqa: F811 _implement_accumulate = True except ImportError: from toolz import (frequencies, merge_with, join, reduceby, count, pluck, groupby, topk) from ..base import Base, tokenize, dont_optimize, is_dask_collection from ..bytes.core import write_bytes from ..compatibility import apply, urlopen from ..context import _globals, globalmethod from ..core import quote, istask, get_dependencies, reverse_dict from ..delayed import Delayed from ..multiprocessing import get as mpget from ..optimize import fuse, cull, inline from ..utils import (system_encoding, takes_multiple_arguments, funcname, digit, insert, ensure_dict) no_default = '__no__default__' no_result = type('no_result', (object,), {'__slots__': (), '__reduce__': lambda self: 'no_result'}) def lazify_task(task, start=True): """ Given a task, remove unnecessary calls to ``list`` and ``reify``. This traverses tasks and small lists. We choose not to traverse down lists of size >= 50 because it is unlikely that sequences this long contain other sequences in practice. Examples -------- >>> task = (sum, (list, (map, inc, [1, 2, 3]))) # doctest: +SKIP >>> lazify_task(task) # doctest: +SKIP (sum, (map, inc, [1, 2, 3])) """ if type(task) is list and len(task) < 50: return [lazify_task(arg, False) for arg in task] if not istask(task): return task head, tail = task[0], task[1:] if not start and head in (list, reify): task = task[1] return lazify_task(*tail, start=False) else: return (head,) + tuple([lazify_task(arg, False) for arg in tail]) def lazify(dsk): """ Remove unnecessary calls to ``list`` in tasks. See Also -------- ``dask.bag.core.lazify_task`` """ return valmap(lazify_task, dsk) def inline_singleton_lists(dsk, dependencies=None): """ Inline lists that are only used once. >>> d = {'b': (list, 'a'), ... 'c': (f, 'b', 1)} # doctest: +SKIP >>> inline_singleton_lists(d) # doctest: +SKIP {'c': (f, (list, 'a'), 1)} Pairs nicely with lazify afterwards. """ if dependencies is None: dependencies = {k: get_dependencies(dsk, task=v) for k, v in dsk.items()} dependents = reverse_dict(dependencies) keys = [k for k, v in dsk.items() if istask(v) and v and v[0] is list and len(dependents[k]) == 1] dsk = inline(dsk, keys, inline_constants=False) for k in keys: del dsk[k] return dsk def optimize(dsk, keys, fuse_keys=None, rename_fused_keys=True, **kwargs): """ Optimize a dask from a dask Bag. """ dsk2, dependencies = cull(dsk, keys) dsk3, dependencies = fuse(dsk2, keys + (fuse_keys or []), dependencies, rename_keys=rename_fused_keys) dsk4 = inline_singleton_lists(dsk3, dependencies) dsk5 = lazify(dsk4) return dsk5 def to_textfiles(b, path, name_function=None, compression='infer', encoding=system_encoding, compute=True, get=None, storage_options=None): """ Write dask Bag to disk, one filename per partition, one line per element. **Paths**: This will create one file for each partition in your bag. You can specify the filenames in a variety of ways. Use a globstring >>> b.to_textfiles('/path/to/data/*.json.gz') # doctest: +SKIP The * will be replaced by the increasing sequence 1, 2, ... :: /path/to/data/0.json.gz /path/to/data/1.json.gz Use a globstring and a ``name_function=`` keyword argument. The name_function function should expect an integer and produce a string. Strings produced by name_function must preserve the order of their respective partition indices. >>> from datetime import date, timedelta >>> def name(i): ... return str(date(2015, 1, 1) + i * timedelta(days=1)) >>> name(0) '2015-01-01' >>> name(15) '2015-01-16' >>> b.to_textfiles('/path/to/data/*.json.gz', name_function=name) # doctest: +SKIP :: /path/to/data/2015-01-01.json.gz /path/to/data/2015-01-02.json.gz ... You can also provide an explicit list of paths. >>> paths = ['/path/to/data/alice.json.gz', '/path/to/data/bob.json.gz', ...] # doctest: +SKIP >>> b.to_textfiles(paths) # doctest: +SKIP **Compression**: Filenames with extensions corresponding to known compression algorithms (gz, bz2) will be compressed accordingly. **Bag Contents**: The bag calling ``to_textfiles`` must be a bag of text strings. For example, a bag of dictionaries could be written to JSON text files by mapping ``json.dumps`` on to the bag first, and then calling ``to_textfiles`` : >>> b_dict.map(json.dumps).to_textfiles("/path/to/data/*.json") # doctest: +SKIP """ from dask import delayed (writes,names) = write_bytes(b.to_delayed(), path, name_function, compression, encoding=encoding, **(storage_options or {})) # Use Bag optimizations on these delayed objects dsk = ensure_dict(delayed(writes).dask) dsk2 = Bag.__dask_optimize__(dsk, [w.key for w in writes]) out = [Delayed(w.key, dsk2) for w in writes] if compute: get = get or _globals.get('get', None) or Bag.__dask_scheduler__ delayed(out).compute(get=get) return names else: return out def finalize(results): if not results: return results if isinstance(results, Iterator): results = list(results) if isinstance(results[0], Iterable) and not isinstance(results[0], str): results = toolz.concat(results) if isinstance(results, Iterator): results = list(results) return results def finalize_item(results): return results[0] class StringAccessor(object): """ String processing functions Examples -------- >>> import dask.bag as db >>> b = db.from_sequence(['Alice Smith', 'Bob Jones', 'Charlie Smith']) >>> list(b.str.lower()) ['alice smith', 'bob jones', 'charlie smith'] >>> list(b.str.match('*Smith')) ['Alice Smith', 'Charlie Smith'] >>> list(b.str.split(' ')) [['Alice', 'Smith'], ['Bob', 'Jones'], ['Charlie', 'Smith']] """ def __init__(self, bag): self._bag = bag def __dir__(self): return sorted(set(dir(type(self)) + dir(str))) def _strmap(self, key, *args, **kwargs): return self._bag.map(lambda s: getattr(s, key)(*args, **kwargs)) def __getattr__(self, key): try: return object.__getattribute__(self, key) except AttributeError: if key in dir(str): func = getattr(str, key) return robust_wraps(func)(partial(self._strmap, key)) else: raise def match(self, pattern): """ Filter strings by those that match a pattern. Examples -------- >>> import dask.bag as db >>> b = db.from_sequence(['Alice Smith', 'Bob Jones', 'Charlie Smith']) >>> list(b.str.match('*Smith')) ['Alice Smith', 'Charlie Smith'] See Also -------- fnmatch.fnmatch """ from fnmatch import fnmatch return self._bag.filter(partial(fnmatch, pat=pattern)) def robust_wraps(wrapper): """ A weak version of wraps that only copies doc. """ def _(wrapped): wrapped.__doc__ = wrapper.__doc__ return wrapped return _ class Item(Base): def __init__(self, dsk, key): self.dask = dsk self.key = key self.name = key def __dask_graph__(self): return self.dask def __dask_keys__(self): return [self.key] def __dask_tokenize__(self): return self.key __dask_optimize__ = globalmethod(optimize, key='bag_optimize', falsey=dont_optimize) __dask_scheduler__ = staticmethod(mpget) def __dask_postcompute__(self): return finalize_item, () def __dask_postpersist__(self): return Item, (self.key,) @staticmethod def from_delayed(value): """ Create bag item from a dask.delayed value. See ``dask.bag.from_delayed`` for details """ from dask.delayed import Delayed, delayed if not isinstance(value, Delayed) and hasattr(value, 'key'): value = delayed(value) assert isinstance(value, Delayed) return Item(ensure_dict(value.dask), value.key) @property def _args(self): return (self.dask, self.key) def __getstate__(self): return self._args def __setstate__(self, state): self.dask, self.key = state def apply(self, func): name = 'apply-{0}-{1}'.format(funcname(func), tokenize(self, func)) dsk = {name: (func, self.key)} return Item(merge(self.dask, dsk), name) __int__ = __float__ = __complex__ = __bool__ = Base.compute def to_delayed(self): """ Convert bag item to dask.delayed. Returns a single value. """ from dask.delayed import Delayed dsk = self.__dask_optimize__(self.__dask_graph__(), self.__dask_keys__()) return Delayed(self.key, dsk) class Bag(Base): """ Parallel collection of Python objects Examples -------- Create Bag from sequence >>> import dask.bag as db >>> b = db.from_sequence(range(5)) >>> list(b.filter(lambda x: x % 2 == 0).map(lambda x: x * 10)) # doctest: +SKIP [0, 20, 40] Create Bag from filename or globstring of filenames >>> b = db.read_text('/path/to/mydata.*.json.gz').map(json.loads) # doctest: +SKIP Create manually (expert use) >>> dsk = {('x', 0): (range, 5), ... ('x', 1): (range, 5), ... ('x', 2): (range, 5)} >>> b = Bag(dsk, 'x', npartitions=3) >>> sorted(b.map(lambda x: x * 10)) # doctest: +SKIP [0, 0, 0, 10, 10, 10, 20, 20, 20, 30, 30, 30, 40, 40, 40] >>> int(b.fold(lambda x, y: x + y)) # doctest: +SKIP 30 """ def __init__(self, dsk, name, npartitions): self.dask = dsk self.name = name self.npartitions = npartitions def __dask_graph__(self): return self.dask def __dask_keys__(self): return [(self.name, i) for i in range(self.npartitions)] def __dask_tokenize__(self): return self.name __dask_optimize__ = globalmethod(optimize, key='bag_optimize', falsey=dont_optimize) __dask_scheduler__ = staticmethod(mpget) def __dask_postcompute__(self): return finalize, () def __dask_postpersist__(self): return type(self), (self.name, self.npartitions) def __str__(self): name = self.name if len(self.name) < 10 else self.name[:7] + '...' return 'dask.bag<%s, npartitions=%d>' % (name, self.npartitions) __repr__ = __str__ str = property(fget=StringAccessor) def map(self, func, *args, **kwargs): """Apply a function elementwise across one or more bags. Note that all ``Bag`` arguments must be partitioned identically. Parameters ---------- func : callable *args, **kwargs : Bag, Item, or object Extra arguments and keyword arguments to pass to ``func`` *after* the calling bag instance. Non-Bag args/kwargs are broadcasted across all calls to ``func``. Notes ----- For calls with multiple `Bag` arguments, corresponding partitions should have the same length; if they do not, the call will error at compute time. Examples -------- >>> import dask.bag as db >>> b = db.from_sequence(range(5), npartitions=2) >>> b2 = db.from_sequence(range(5, 10), npartitions=2) Apply a function to all elements in a bag: >>> b.map(lambda x: x + 1).compute() [1, 2, 3, 4, 5] Apply a function with arguments from multiple bags: >>> from operator import add >>> b.map(add, b2).compute() [5, 7, 9, 11, 13] Non-bag arguments are broadcast across all calls to the mapped function: >>> b.map(add, 1).compute() [1, 2, 3, 4, 5] Keyword arguments are also supported, and have the same semantics as regular arguments: >>> def myadd(x, y=0): ... return x + y >>> b.map(myadd, y=b2).compute() [5, 7, 9, 11, 13] >>> b.map(myadd, y=1).compute() [1, 2, 3, 4, 5] Both arguments and keyword arguments can also be instances of ``dask.bag.Item``. Here we'll add the max value in the bag to each element: >>> b.map(myadd, b.max()).compute() [4, 5, 6, 7, 8] """ return bag_map(func, self, *args, **kwargs) def starmap(self, func, **kwargs): """Apply a function using argument tuples from the given bag. This is similar to ``itertools.starmap``, except it also accepts keyword arguments. In pseudocode, this is could be written as: >>> def starmap(func, bag, **kwargs): ... return (func(*args, **kwargs) for args in bag) Parameters ---------- func : callable **kwargs : Item, Delayed, or object, optional Extra keyword arguments to pass to ``func``. These can either be normal objects, ``dask.bag.Item``, or ``dask.delayed.Delayed``. Examples -------- >>> import dask.bag as db >>> data = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)] >>> b = db.from_sequence(data, npartitions=2) Apply a function to each argument tuple: >>> from operator import add >>> b.starmap(add).compute() [3, 7, 11, 15, 19] Apply a function to each argument tuple, with additional keyword arguments: >>> def myadd(x, y, z=0): ... return x + y + z >>> b.starmap(myadd, z=10).compute() [13, 17, 21, 25, 29] Keyword arguments can also be instances of ``dask.bag.Item`` or ``dask.delayed.Delayed``: >>> max_second = b.pluck(1).max() >>> max_second.compute() 10 >>> b.starmap(myadd, z=max_second).compute() [13, 17, 21, 25, 29] """ name = 'starmap-{0}-{1}'.format(funcname(func), tokenize(self, func, kwargs)) dsk = self.dask.copy() if kwargs: kw_dsk, kwargs = unpack_scalar_dask_kwargs(kwargs) dsk.update(kw_dsk) dsk.update({(name, i): (reify, (starmap_chunk, func, (self.name, i), kwargs)) for i in range(self.npartitions)}) return type(self)(dsk, name, self.npartitions) @property def _args(self): return (self.dask, self.name, self.npartitions) def __getstate__(self): return self._args def __setstate__(self, state): self.dask, self.name, self.npartitions = state def filter(self, predicate): """ Filter elements in collection by a predicate function. >>> def iseven(x): ... return x % 2 == 0 >>> import dask.bag as db >>> b = db.from_sequence(range(5)) >>> list(b.filter(iseven)) # doctest: +SKIP [0, 2, 4] """ name = 'filter-{0}-{1}'.format(funcname(predicate), tokenize(self, predicate)) dsk = dict(((name, i), (reify, (filter, predicate, (self.name, i)))) for i in range(self.npartitions)) return type(self)(merge(self.dask, dsk), name, self.npartitions) def random_sample(self, prob, random_state=None): """ Return elements from bag with probability of ``prob``. Parameters ---------- prob : float A float between 0 and 1, representing the probability that each element will be returned. random_state : int or random.Random, optional If an integer, will be used to seed a new ``random.Random`` object. If provided, results in deterministic sampling. Examples -------- >>> import dask.bag as db >>> b = db.from_sequence(range(5)) >>> list(b.random_sample(0.5, 42)) [1, 4] >>> list(b.random_sample(0.5, 42)) [1, 4] """ if not 0 <= prob <= 1: raise ValueError('prob must be a number in the interval [0, 1]') if not isinstance(random_state, Random): random_state = Random(random_state) name = 'random-sample-%s' % tokenize(self, prob, random_state.getstate()) state_data = random_state_data_python(self.npartitions, random_state) dsk = {(name, i): (reify, (random_sample, (self.name, i), state, prob)) for i, state in zip(range(self.npartitions), state_data)} return type(self)(merge(self.dask, dsk), name, self.npartitions) def remove(self, predicate): """ Remove elements in collection that match predicate. >>> def iseven(x): ... return x % 2 == 0 >>> import dask.bag as db >>> b = db.from_sequence(range(5)) >>> list(b.remove(iseven)) # doctest: +SKIP [1, 3] """ name = 'remove-{0}-{1}'.format(funcname(predicate), tokenize(self, predicate)) dsk = dict(((name, i), (reify, (remove, predicate, (self.name, i)))) for i in range(self.npartitions)) return type(self)(merge(self.dask, dsk), name, self.npartitions) def map_partitions(self, func, *args, **kwargs): """Apply a function to every partition across one or more bags. Note that all ``Bag`` arguments must be partitioned identically. Parameters ---------- func : callable *args, **kwargs : Bag, Item, Delayed, or object Arguments and keyword arguments to pass to ``func``. Partitions from this bag will be the first argument, and these will be passed *after*. Examples -------- >>> import dask.bag as db >>> b = db.from_sequence(range(1, 101), npartitions=10) >>> def div(nums, den=1): ... return [num / den for num in nums] Using a python object: >>> hi = b.max().compute() >>> hi 100 >>> b.map_partitions(div, den=hi).take(5) (0.01, 0.02, 0.03, 0.04, 0.05) Using an ``Item``: >>> b.map_partitions(div, den=b.max()).take(5) (0.01, 0.02, 0.03, 0.04, 0.05) Note that while both versions give the same output, the second forms a single graph, and then computes everything at once, and in some cases may be more efficient. """ return map_partitions(func, self, *args, **kwargs) def pluck(self, key, default=no_default): """ Select item from all tuples/dicts in collection. >>> b = from_sequence([{'name': 'Alice', 'credits': [1, 2, 3]}, ... {'name': 'Bob', 'credits': [10, 20]}]) >>> list(b.pluck('name')) # doctest: +SKIP ['Alice', 'Bob'] >>> list(b.pluck('credits').pluck(0)) # doctest: +SKIP [1, 10] """ name = 'pluck-' + tokenize(self, key, default) key = quote(key) if default == no_default: dsk = dict(((name, i), (list, (pluck, key, (self.name, i)))) for i in range(self.npartitions)) else: dsk = dict(((name, i), (list, (pluck, key, (self.name, i), default))) for i in range(self.npartitions)) return type(self)(merge(self.dask, dsk), name, self.npartitions) def unzip(self, n): """Transform a bag of tuples to ``n`` bags of their elements. Examples -------- >>> b = from_sequence([(i, i + 1, i + 2) for i in range(10)]) >>> first, second, third = b.unzip(3) >>> isinstance(first, Bag) True >>> first.compute() [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] Note that this is equivalent to: >>> first, second, third = (b.pluck(i) for i in range(3)) """ return tuple(self.pluck(i) for i in range(n)) @wraps(to_textfiles) def to_textfiles(self, path, name_function=None, compression='infer', encoding=system_encoding, compute=True, get=None, storage_options=None): return to_textfiles(self, path, name_function, compression, encoding, compute, get=get, storage_options=storage_options) def fold(self, binop, combine=None, initial=no_default, split_every=None): """ Parallelizable reduction Fold is like the builtin function ``reduce`` except that it works in parallel. Fold takes two binary operator functions, one to reduce each partition of our dataset and another to combine results between partitions 1. ``binop``: Binary operator to reduce within each partition 2. ``combine``: Binary operator to combine results from binop Sequentially this would look like the following: >>> intermediates = [reduce(binop, part) for part in partitions] # doctest: +SKIP >>> final = reduce(combine, intermediates) # doctest: +SKIP If only one function is given then it is used for both functions ``binop`` and ``combine`` as in the following example to compute the sum: >>> def add(x, y): ... return x + y >>> b = from_sequence(range(5)) >>> b.fold(add).compute() # doctest: +SKIP 10 In full form we provide both binary operators as well as their default arguments >>> b.fold(binop=add, combine=add, initial=0).compute() # doctest: +SKIP 10 More complex binary operators are also doable >>> def add_to_set(acc, x): ... ''' Add new element x to set acc ''' ... return acc | set([x]) >>> b.fold(add_to_set, set.union, initial=set()).compute() # doctest: +SKIP {1, 2, 3, 4, 5} See Also -------- Bag.foldby """ combine = combine or binop if initial is not no_default: return self.reduction(curry(_reduce, binop, initial=initial), curry(_reduce, combine), split_every=split_every) else: from toolz.curried import reduce return self.reduction(reduce(binop), reduce(combine), split_every=split_every) def frequencies(self, split_every=None): """ Count number of occurrences of each distinct element. >>> b = from_sequence(['Alice', 'Bob', 'Alice']) >>> dict(b.frequencies()) # doctest: +SKIP {'Alice': 2, 'Bob', 1} """ return self.reduction(frequencies, merge_frequencies, out_type=Bag, split_every=split_every, name='frequencies').map_partitions(dictitems) def topk(self, k, key=None, split_every=None): """ K largest elements in collection Optionally ordered by some key function >>> b = from_sequence([10, 3, 5, 7, 11, 4]) >>> list(b.topk(2)) # doctest: +SKIP [11, 10] >>> list(b.topk(2, lambda x: -x)) # doctest: +SKIP [3, 4] """ if key: if callable(key) and takes_multiple_arguments(key): key = partial(apply, key) func = partial(topk, k, key=key) else: func = partial(topk, k) return self.reduction(func, compose(func, toolz.concat), out_type=Bag, split_every=split_every, name='topk') def distinct(self): """ Distinct elements of collection Unordered without repeats. >>> b = from_sequence(['Alice', 'Bob', 'Alice']) >>> sorted(b.distinct()) ['Alice', 'Bob'] """ return self.reduction(set, merge_distinct, out_type=Bag, name='distinct') def reduction(self, perpartition, aggregate, split_every=None, out_type=Item, name=None): """ Reduce collection with reduction operators. Parameters ---------- perpartition: function reduction to apply to each partition aggregate: function reduction to apply to the results of all partitions split_every: int (optional) Group partitions into groups of this size while performing reduction Defaults to 8 out_type: {Bag, Item} The out type of the result, Item if a single element, Bag if a list of elements. Defaults to Item. Examples -------- >>> b = from_sequence(range(10)) >>> b.reduction(sum, sum).compute() 45 """ if split_every is None: split_every = 8 if split_every is False: split_every = self.npartitions token = tokenize(self, perpartition, aggregate, split_every) a = '%s-part-%s' % (name or funcname(perpartition), token) is_last = self.npartitions == 1 dsk = {(a, i): (empty_safe_apply, perpartition, (self.name, i), is_last) for i in range(self.npartitions)} k = self.npartitions b = a fmt = '%s-aggregate-%s' % (name or funcname(aggregate), token) depth = 0 while k > split_every: c = fmt + str(depth) dsk2 = dict(((c, i), (empty_safe_aggregate, aggregate, [(b, j) for j in inds], False)) for i, inds in enumerate(partition_all(split_every, range(k)))) dsk.update(dsk2) k = len(dsk2) b = c depth += 1 dsk[(fmt, 0)] = (empty_safe_aggregate, aggregate, [(b, j) for j in range(k)], True) if out_type is Item: dsk[fmt] = dsk.pop((fmt, 0)) return Item(merge(self.dask, dsk), fmt) else: return Bag(merge(self.dask, dsk), fmt, 1) def sum(self, split_every=None): """ Sum all elements """ return self.reduction(sum, sum, split_every=split_every) def max(self, split_every=None): """ Maximum element """ return self.reduction(max, max, split_every=split_every) def min(self, split_every=None): """ Minimum element """ return self.reduction(min, min, split_every=split_every) def any(self, split_every=None): """ Are any of the elements truthy? """ return self.reduction(any, any, split_every=split_every) def all(self, split_every=None): """ Are all elements truthy? """ return self.reduction(all, all, split_every=split_every) def count(self, split_every=None): """ Count the number of elements. """ return self.reduction(count, sum, split_every=split_every) def mean(self): """ Arithmetic mean """ def mean_chunk(seq): total, n = 0.0, 0 for x in seq: total += x n += 1 return total, n def mean_aggregate(x): totals, counts = list(zip(*x)) return 1.0 * sum(totals) / sum(counts) return self.reduction(mean_chunk, mean_aggregate, split_every=False) def var(self, ddof=0): """ Variance """ def var_chunk(seq): squares, total, n = 0.0, 0.0, 0 for x in seq: squares += x**2 total += x n += 1 return squares, total, n def var_aggregate(x): squares, totals, counts = list(zip(*x)) x2, x, n = float(sum(squares)), float(sum(totals)), sum(counts) result = (x2 / n) - (x / n)**2 return result * n / (n - ddof) return self.reduction(var_chunk, var_aggregate, split_every=False) def std(self, ddof=0): """ Standard deviation """ return self.var(ddof=ddof).apply(math.sqrt) def join(self, other, on_self, on_other=None): """ Joins collection with another collection. Other collection must be an Iterable, and not a Bag. >>> people = from_sequence(['Alice', 'Bob', 'Charlie']) >>> fruit = ['Apple', 'Apricot', 'Banana'] >>> list(people.join(fruit, lambda x: x[0])) # doctest: +SKIP [('Apple', 'Alice'), ('Apricot', 'Alice'), ('Banana', 'Bob')] """ assert isinstance(other, Iterable) assert not isinstance(other, Bag) if on_other is None: on_other = on_self name = 'join-' + tokenize(self, other, on_self, on_other) dsk = dict(((name, i), (list, (join, on_other, other, on_self, (self.name, i)))) for i in range(self.npartitions)) return type(self)(merge(self.dask, dsk), name, self.npartitions) def product(self, other): """ Cartesian product between two bags. """ assert isinstance(other, Bag) name = 'product-' + tokenize(self, other) n, m = self.npartitions, other.npartitions dsk = dict(((name, i * m + j), (list, (itertools.product, (self.name, i), (other.name, j)))) for i in range(n) for j in range(m)) return type(self)(merge(self.dask, other.dask, dsk), name, n * m) def foldby(self, key, binop, initial=no_default, combine=None, combine_initial=no_default, split_every=None): """ Combined reduction and groupby. Foldby provides a combined groupby and reduce for efficient parallel split-apply-combine tasks. The computation >>> b.foldby(key, binop, init) # doctest: +SKIP is equivalent to the following: >>> def reduction(group): # doctest: +SKIP ... return reduce(binop, group, init) # doctest: +SKIP >>> b.groupby(key).map(lambda (k, v): (k, reduction(v)))# doctest: +SKIP But uses minimal communication and so is *much* faster. >>> b = from_sequence(range(10)) >>> iseven = lambda x: x % 2 == 0 >>> add = lambda x, y: x + y >>> dict(b.foldby(iseven, add)) # doctest: +SKIP {True: 20, False: 25} **Key Function** The key function determines how to group the elements in your bag. In the common case where your bag holds dictionaries then the key function often gets out one of those elements. >>> def key(x): ... return x['name'] This case is so common that it is special cased, and if you provide a key that is not a callable function then dask.bag will turn it into one automatically. The following are equivalent: >>> b.foldby(lambda x: x['name'], ...) # doctest: +SKIP >>> b.foldby('name', ...) # doctest: +SKIP **Binops** It can be tricky to construct the right binary operators to perform analytic queries. The ``foldby`` method accepts two binary operators, ``binop`` and ``combine``. Binary operators two inputs and output must have the same type. Binop takes a running total and a new element and produces a new total: >>> def binop(total, x): ... return total + x['amount'] Combine takes two totals and combines them: >>> def combine(total1, total2): ... return total1 + total2 Each of these binary operators may have a default first value for total, before any other value is seen. For addition binary operators like above this is often ``0`` or the identity element for your operation. **split_every** Group partitions into groups of this size while performing reduction. Defaults to 8. >>> b.foldby('name', binop, 0, combine, 0) # doctest: +SKIP See Also -------- toolz.reduceby pyspark.combineByKey """ if split_every is None: split_every = 8 if split_every is False: split_every = self.npartitions token = tokenize(self, key, binop, initial, combine, combine_initial) a = 'foldby-a-' + token if combine is None: combine = binop if initial is not no_default: dsk = {(a, i): (reduceby, key, binop, (self.name, i), initial) for i in range(self.npartitions)} else: dsk = {(a, i): (reduceby, key, binop, (self.name, i)) for i in range(self.npartitions)} def combine2(acc, x): return combine(acc, x[1]) depth = 0 k = self.npartitions b = a while k > split_every: c = b + str(depth) if combine_initial is not no_default: dsk2 = {(c, i): (reduceby, 0, combine2, (toolz.concat, (map, dictitems, [(b, j) for j in inds])), combine_initial) for i, inds in enumerate(partition_all(split_every, range(k)))} else: dsk2 = {(c, i): (merge_with, (partial, reduce, combine), [(b, j) for j in inds]) for i, inds in enumerate(partition_all(split_every, range(k)))} dsk.update(dsk2) k = len(dsk2) b = c depth += 1 e = 'foldby-b-' + token if combine_initial is not no_default: dsk[(e, 0)] = (dictitems, (reduceby, 0, combine2, (toolz.concat, (map, dictitems, [(b, j) for j in range(k)])), combine_initial)) else: dsk[(e, 0)] = (dictitems, (merge_with, (partial, reduce, combine), [(b, j) for j in range(k)])) return type(self)(merge(self.dask, dsk), e, 1) def take(self, k, npartitions=1, compute=True): """ Take the first k elements. Parameters ---------- k : int The number of elements to return npartitions : int, optional Elements are only taken from the first ``npartitions``, with a default of 1. If there are fewer than ``k`` rows in the first ``npartitions`` a warning will be raised and any found rows returned. Pass -1 to use all partitions. compute : bool, optional Whether to compute the result, default is True. >>> b = from_sequence(range(10)) >>> b.take(3) # doctest: +SKIP (0, 1, 2) """ if npartitions <= -1: npartitions = self.npartitions if npartitions > self.npartitions: raise ValueError("only {} partitions, take " "received {}".format(self.npartitions, npartitions)) token = tokenize(self, k, npartitions) name = 'take-' + token if npartitions > 1: name_p = 'take-partial-' + token dsk = {} for i in range(npartitions): dsk[(name_p, i)] = (list, (take, k, (self.name, i))) concat = (toolz.concat, ([(name_p, i) for i in range(npartitions)])) dsk[(name, 0)] = (safe_take, k, concat) else: dsk = {(name, 0): (safe_take, k, (self.name, 0))} b = Bag(merge(self.dask, dsk), name, 1) if compute: return tuple(b.compute()) else: return b def flatten(self): """ Concatenate nested lists into one long list. >>> b = from_sequence([[1], [2, 3]]) >>> list(b) [[1], [2, 3]] >>> list(b.flatten()) [1, 2, 3] """ name = 'flatten-' + tokenize(self) dsk = dict(((name, i), (list, (toolz.concat, (self.name, i)))) for i in range(self.npartitions)) return type(self)(merge(self.dask, dsk), name, self.npartitions) def __iter__(self): return iter(self.compute()) def groupby(self, grouper, method=None, npartitions=None, blocksize=2**20, max_branch=None): """ Group collection by key function This requires a full dataset read, serialization and shuffle. This is expensive. If possible you should use ``foldby``. Parameters ---------- grouper: function Function on which to group elements method: str Either 'disk' for an on-disk shuffle or 'tasks' to use the task scheduling framework. Use 'disk' if you are on a single machine and 'tasks' if you are on a distributed cluster. npartitions: int If using the disk-based shuffle, the number of output partitions blocksize: int If using the disk-based shuffle, the size of shuffle blocks (bytes) max_branch: int If using the task-based shuffle, the amount of splitting each partition undergoes. Increase this for fewer copies but more scheduler overhead. Examples -------- >>> b = from_sequence(range(10)) >>> iseven = lambda x: x % 2 == 0 >>> dict(b.groupby(iseven)) # doctest: +SKIP {True: [0, 2, 4, 6, 8], False: [1, 3, 5, 7, 9]} See Also -------- Bag.foldby """ if method is None: get = _globals.get('get') if (isinstance(get, types.MethodType) and 'distributed' in get.__func__.__module__): method = 'tasks' else: method = 'disk' if method == 'disk': return groupby_disk(self, grouper, npartitions=npartitions, blocksize=blocksize) elif method == 'tasks': return groupby_tasks(self, grouper, max_branch=max_branch) else: msg = "Shuffle method must be 'disk' or 'tasks'" raise NotImplementedError(msg) def to_dataframe(self, meta=None, columns=None): """ Create Dask Dataframe from a Dask Bag. Bag should contain tuples, dict records, or scalars. Index will not be particularly meaningful. Use ``reindex`` afterwards if necessary. Parameters ---------- meta : pd.DataFrame, dict, iterable, optional An empty ``pd.DataFrame`` that matches the dtypes and column names of the output. This metadata is necessary for many algorithms in dask dataframe to work. For ease of use, some alternative inputs are also available. Instead of a ``DataFrame``, a ``dict`` of ``{name: dtype}`` or iterable of ``(name, dtype)`` can be provided. If not provided or a list, a single element from the first partition will be computed, triggering a potentially expensive call to ``compute``. This may lead to unexpected results, so providing ``meta`` is recommended. For more information, see ``dask.dataframe.utils.make_meta``. columns : sequence, optional Column names to use. If the passed data do not have names associated with them, this argument provides names for the columns. Otherwise this argument indicates the order of the columns in the result (any names not found in the data will become all-NA columns). Note that if ``meta`` is provided, column names will be taken from there and this parameter is invalid. Examples -------- >>> import dask.bag as db >>> b = db.from_sequence([{'name': 'Alice', 'balance': 100}, ... {'name': 'Bob', 'balance': 200}, ... {'name': 'Charlie', 'balance': 300}], ... npartitions=2) >>> df = b.to_dataframe() >>> df.compute() balance name 0 100 Alice 1 200 Bob 0 300 Charlie """ import pandas as pd import dask.dataframe as dd if meta is None: if isinstance(columns, pd.DataFrame): warn("Passing metadata to `columns` is deprecated. Please " "use the `meta` keyword instead.") meta = columns else: head = self.take(1)[0] meta = pd.DataFrame([head], columns=columns) elif columns is not None: raise ValueError("Can't specify both `meta` and `columns`") else: meta = dd.utils.make_meta(meta) # Serializing the columns and dtypes is much smaller than serializing # the empty frame cols = list(meta.columns) dtypes = meta.dtypes.to_dict() name = 'to_dataframe-' + tokenize(self, cols, dtypes) dsk = self.__dask_optimize__(self.dask, self.__dask_keys__()) dsk.update({(name, i): (to_dataframe, (self.name, i), cols, dtypes) for i in range(self.npartitions)}) divisions = [None] * (self.npartitions + 1) return dd.DataFrame(dsk, name, meta, divisions) def to_delayed(self): """ Convert bag to list of dask Delayed. Returns list of Delayed, one per partition. """ from dask.delayed import Delayed keys = self.__dask_keys__() dsk = self.__dask_optimize__(self.__dask_graph__(), keys) return [Delayed(k, dsk) for k in keys] def repartition(self, npartitions): """ Coalesce bag into fewer partitions. Examples -------- >>> b.repartition(5) # set to have 5 partitions # doctest: +SKIP """ new_name = 'repartition-%d-%s' % (npartitions, tokenize(self, npartitions)) if npartitions == self.npartitions: return self elif npartitions < self.npartitions: ratio = self.npartitions / npartitions new_partitions_boundaries = [int(old_partition_index * ratio) for old_partition_index in range(npartitions + 1)] dsk = {} for new_partition_index in range(npartitions): value = (list, (toolz.concat, [(self.name, old_partition_index) for old_partition_index in range(new_partitions_boundaries[new_partition_index], new_partitions_boundaries[new_partition_index + 1])])) dsk[new_name, new_partition_index] = value else: # npartitions > self.npartitions ratio = npartitions / self.npartitions split_name = 'split-%s' % tokenize(self, npartitions) dsk = {} last = 0 j = 0 for i in range(self.npartitions): new = last + ratio if i == self.npartitions - 1: k = npartitions - j else: k = int(new - last) dsk[(split_name, i)] = (split, (self.name, i), k) for jj in range(k): dsk[(new_name, j)] = (getitem, (split_name, i), jj) j += 1 last = new return Bag(dsk=merge(self.dask, dsk), name=new_name, npartitions=npartitions) def accumulate(self, binop, initial=no_default): """ Repeatedly apply binary function to a sequence, accumulating results. This assumes that the bag is ordered. While this is typically the case not all Dask.bag functions preserve this property. Examples -------- >>> from operator import add >>> b = from_sequence([1, 2, 3, 4, 5], npartitions=2) >>> b.accumulate(add).compute() # doctest: +SKIP [1, 3, 6, 10, 15] Accumulate also takes an optional argument that will be used as the first value. >>> b.accumulate(add, initial=-1) # doctest: +SKIP [-1, 0, 2, 5, 9, 14] """ if not _implement_accumulate: raise NotImplementedError("accumulate requires `toolz` > 0.7.4" " or `cytoolz` > 0.7.3.") token = tokenize(self, binop, initial) binop_name = funcname(binop) a = '%s-part-%s' % (binop_name, token) b = '%s-first-%s' % (binop_name, token) c = '%s-second-%s' % (binop_name, token) dsk = {(a, 0): (accumulate_part, binop, (self.name, 0), initial, True), (b, 0): (first, (a, 0)), (c, 0): (second, (a, 0))} for i in range(1, self.npartitions): dsk[(a, i)] = (accumulate_part, binop, (self.name, i), (c, i - 1)) dsk[(b, i)] = (first, (a, i)) dsk[(c, i)] = (second, (a, i)) return Bag(merge(self.dask, dsk), b, self.npartitions) def accumulate_part(binop, seq, initial, is_first=False): if initial == no_default: res = list(accumulate(binop, seq)) else: res = list(accumulate(binop, seq, initial=initial)) if is_first: return res, res[-1] if res else [], initial return res[1:], res[-1] def partition(grouper, sequence, npartitions, p, nelements=2**20): """ Partition a bag along a grouper, store partitions on disk. """ for block in partition_all(nelements, sequence): d = groupby(grouper, block) d2 = defaultdict(list) for k, v in d.items(): d2[abs(hash(k)) % npartitions].extend(v) p.append(d2, fsync=True) return p def collect(grouper, group, p, barrier_token): """ Collect partitions from disk and yield k,v group pairs. """ d = groupby(grouper, p.get(group, lock=False)) return list(d.items()) def from_sequence(seq, partition_size=None, npartitions=None): """ Create a dask Bag from Python sequence. This sequence should be relatively small in memory. Dask Bag works best when it handles loading your data itself. Commonly we load a sequence of filenames into a Bag and then use ``.map`` to open them. Parameters ---------- seq: Iterable A sequence of elements to put into the dask partition_size: int (optional) The length of each partition npartitions: int (optional) The number of desired partitions It is best to provide either ``partition_size`` or ``npartitions`` (though not both.) Examples -------- >>> b = from_sequence(['Alice', 'Bob', 'Chuck'], partition_size=2) See Also -------- read_text: Create bag from text files """ seq = list(seq) if npartitions and not partition_size: partition_size = int(math.ceil(len(seq) / npartitions)) if npartitions is None and partition_size is None: if len(seq) < 100: partition_size = 1 else: partition_size = int(len(seq) / 100) parts = list(partition_all(partition_size, seq)) name = 'from_sequence-' + tokenize(seq, partition_size) d = dict(((name, i), list(part)) for i, part in enumerate(parts)) return Bag(d, name, len(d)) def from_url(urls): """Create a dask Bag from a url. Examples -------- >>> a = from_url('http://raw.githubusercontent.com/dask/dask/master/README.rst') # doctest: +SKIP >>> a.npartitions # doctest: +SKIP 1 >>> a.take(8) # doctest: +SKIP (b'Dask\\n', b'====\\n', b'\\n', b'|Build Status| |Coverage| |Doc Status| |Gitter| |Version Status|\\n', b'\\n', b'Dask is a flexible parallel computing library for analytics. See\\n', b'documentation_ for more information.\\n', b'\\n') >>> b = from_url(['http://github.com', 'http://google.com']) # doctest: +SKIP >>> b.npartitions # doctest: +SKIP 2 """ if isinstance(urls, str): urls = [urls] name = 'from_url-' + uuid.uuid4().hex dsk = {} for i, u in enumerate(urls): dsk[(name, i)] = (list, (urlopen, u)) return Bag(dsk, name, len(urls)) def dictitems(d): """ A pickleable version of dict.items >>> dictitems({'x': 1}) [('x', 1)] """ return list(d.items()) def concat(bags): """ Concatenate many bags together, unioning all elements. >>> import dask.bag as db >>> a = db.from_sequence([1, 2, 3]) >>> b = db.from_sequence([4, 5, 6]) >>> c = db.concat([a, b]) >>> list(c) [1, 2, 3, 4, 5, 6] """ name = 'concat-' + tokenize(*bags) counter = itertools.count(0) dsk = {(name, next(counter)): key for bag in bags for key in bag.__dask_keys__()} return Bag(merge(dsk, *[b.dask for b in bags]), name, len(dsk)) def reify(seq): if isinstance(seq, Iterator): seq = list(seq) if seq and isinstance(seq[0], Iterator): seq = list(map(list, seq)) return seq def from_delayed(values): """ Create bag from many dask Delayed objects. These objects will become the partitions of the resulting Bag. They should evaluate to a ``list`` or some other concrete sequence. Parameters ---------- values: list of delayed values An iterable of dask Delayed objects. Each evaluating to a list. Returns ------- Bag Examples -------- >>> x, y, z = [delayed(load_sequence_from_file)(fn) ... for fn in filenames] # doctest: +SKIP >>> b = from_delayed([x, y, z]) # doctest: +SKIP See also -------- dask.delayed """ from dask.delayed import Delayed, delayed if isinstance(values, Delayed): values = [values] values = [delayed(v) if not isinstance(v, Delayed) and hasattr(v, 'key') else v for v in values] dsk = merge(ensure_dict(v.dask) for v in values) name = 'bag-from-delayed-' + tokenize(*values) names = [(name, i) for i in range(len(values))] values = [(reify, v.key) for v in values] dsk2 = dict(zip(names, values)) return Bag(merge(dsk, dsk2), name, len(values)) def merge_distinct(seqs): return set().union(*seqs) def merge_frequencies(seqs): if isinstance(seqs, Iterable): seqs = list(seqs) if not seqs: return {} first, rest = seqs[0], seqs[1:] if not rest: return first out = defaultdict(int) out.update(first) for d in rest: for k, v in iteritems(d): out[k] += v return out def bag_range(n, npartitions): """ Numbers from zero to n Examples -------- >>> import dask.bag as db >>> b = db.range(5, npartitions=2) >>> list(b) [0, 1, 2, 3, 4] """ size = n // npartitions name = 'range-%d-npartitions-%d' % (n, npartitions) ijs = list(enumerate(take(npartitions, range(0, n, size)))) dsk = dict(((name, i), (reify, (range, j, min(j + size, n)))) for i, j in ijs) if n % npartitions != 0: i, j = ijs[-1] dsk[(name, i)] = (reify, (range, j, n)) return Bag(dsk, name, npartitions) def bag_zip(*bags): """ Partition-wise bag zip All passed bags must have the same number of partitions. NOTE: corresponding partitions should have the same length; if they do not, the "extra" elements from the longer partition(s) will be dropped. If you have this case chances are that what you really need is a data alignment mechanism like pandas's, and not a missing value filler like zip_longest. Examples -------- Correct usage: >>> import dask.bag as db >>> evens = db.from_sequence(range(0, 10, 2), partition_size=4) >>> odds = db.from_sequence(range(1, 10, 2), partition_size=4) >>> pairs = db.zip(evens, odds) >>> list(pairs) [(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)] Incorrect usage: >>> numbers = db.range(20) # doctest: +SKIP >>> fizz = numbers.filter(lambda n: n % 3 == 0) # doctest: +SKIP >>> buzz = numbers.filter(lambda n: n % 5 == 0) # doctest: +SKIP >>> fizzbuzz = db.zip(fizz, buzz) # doctest: +SKIP >>> list(fizzbuzzz) # doctest: +SKIP [(0, 0), (3, 5), (6, 10), (9, 15), (12, 20), (15, 25), (18, 30)] When what you really wanted was more along the lines of the following: >>> list(fizzbuzzz) # doctest: +SKIP [(0, 0), (3, None), (None, 5), (6, None), (None 10), (9, None), (12, None), (15, 15), (18, None), (None, 20), (None, 25), (None, 30)] """ npartitions = bags[0].npartitions assert all(bag.npartitions == npartitions for bag in bags) # TODO: do more checks name = 'zip-' + tokenize(*bags) dsk = dict( ((name, i), (reify, (zip,) + tuple((bag.name, i) for bag in bags))) for i in range(npartitions)) bags_dsk = merge(*(bag.dask for bag in bags)) return Bag(merge(bags_dsk, dsk), name, npartitions) def map_chunk(f, args, bag_kwargs, kwargs): if kwargs: f = partial(f, **kwargs) args = [iter(a) for a in args] iters = list(args) if bag_kwargs: keys = list(bag_kwargs) kw_val_iters = [iter(v) for v in bag_kwargs.values()] iters.extend(kw_val_iters) kw_iter = (dict(zip(keys, k)) for k in zip(*kw_val_iters)) if args: for a, k in zip(zip(*args), kw_iter): yield f(*a, **k) else: for k in kw_iter: yield f(**k) else: for a in zip(*args): yield f(*a) # Check that all iterators are fully exhausted if len(iters) > 1: for i in iters: if isinstance(i, itertools.repeat): continue try: next(i) except StopIteration: pass else: msg = ("map called with multiple bags that aren't identically " "partitioned. Please ensure that all bag arguments " "have the same partition lengths") raise ValueError(msg) def starmap_chunk(f, x, kwargs): if kwargs: f = partial(f, **kwargs) return itertools.starmap(f, x) def unpack_scalar_dask_kwargs(kwargs): """Extracts dask values from kwargs. Currently only ``dask.bag.Item`` and ``dask.delayed.Delayed`` are supported. Returns a merged dask graph and a task resulting in a keyword dict. """ dsk = {} kwargs2 = {} for k, v in kwargs.items(): if isinstance(v, (Delayed, Item)): dsk.update(ensure_dict(v.dask)) kwargs2[k] = v.key elif is_dask_collection(v): raise NotImplementedError("dask.bag doesn't support kwargs of " "type %s" % type(v).__name__) else: kwargs2[k] = v if dsk: kwargs = (dict, (zip, list(kwargs2), list(kwargs2.values()))) return dsk, kwargs def bag_map(func, *args, **kwargs): """Apply a function elementwise across one or more bags. Note that all ``Bag`` arguments must be partitioned identically. Parameters ---------- func : callable *args, **kwargs : Bag, Item, Delayed, or object Arguments and keyword arguments to pass to ``func``. Non-Bag args/kwargs are broadcasted across all calls to ``func``. Notes ----- For calls with multiple `Bag` arguments, corresponding partitions should have the same length; if they do not, the call will error at compute time. Examples -------- >>> import dask.bag as db >>> b = db.from_sequence(range(5), npartitions=2) >>> b2 = db.from_sequence(range(5, 10), npartitions=2) Apply a function to all elements in a bag: >>> db.map(lambda x: x + 1, b).compute() [1, 2, 3, 4, 5] Apply a function with arguments from multiple bags: >>> from operator import add >>> db.map(add, b, b2).compute() [5, 7, 9, 11, 13] Non-bag arguments are broadcast across all calls to the mapped function: >>> db.map(add, b, 1).compute() [1, 2, 3, 4, 5] Keyword arguments are also supported, and have the same semantics as regular arguments: >>> def myadd(x, y=0): ... return x + y >>> db.map(myadd, b, y=b2).compute() [5, 7, 9, 11, 13] >>> db.map(myadd, b, y=1).compute() [1, 2, 3, 4, 5] Both arguments and keyword arguments can also be instances of ``dask.bag.Item`` or ``dask.delayed.Delayed``. Here we'll add the max value in the bag to each element: >>> db.map(myadd, b, b.max()).compute() [4, 5, 6, 7, 8] """ name = 'map-%s-%s' % (funcname(func), tokenize(func, args, kwargs)) dsk = {} bags = [] args2 = [] for a in args: if isinstance(a, Bag): bags.append(a) args2.append(a) dsk.update(a.dask) elif isinstance(a, (Item, Delayed)): args2.append((itertools.repeat, a.key)) dsk.update(ensure_dict(a.dask)) else: args2.append((itertools.repeat, a)) bag_kwargs = {} other_kwargs = {} for k, v in kwargs.items(): if isinstance(v, Bag): bag_kwargs[k] = v bags.append(v) dsk.update(v.dask) else: other_kwargs[k] = v kw_dsk, other_kwargs = unpack_scalar_dask_kwargs(other_kwargs) dsk.update(kw_dsk) if not bags: raise ValueError("At least one argument must be a Bag.") npartitions = {b.npartitions for b in bags} if len(npartitions) > 1: raise ValueError("All bags must have the same number of partitions.") npartitions = npartitions.pop() def build_args(n): return [(a.name, n) if isinstance(a, Bag) else a for a in args2] def build_bag_kwargs(n): if not bag_kwargs: return None return (dict, (zip, list(bag_kwargs), [(b.name, n) for b in bag_kwargs.values()])) dsk.update({(name, n): (reify, (map_chunk, func, build_args(n), build_bag_kwargs(n), other_kwargs)) for n in range(npartitions)}) # If all bags are the same type, use that type, otherwise fallback to Bag return_type = set(map(type, bags)) return_type = return_type.pop() if len(return_type) == 1 else Bag return return_type(dsk, name, npartitions) def map_partitions(func, *args, **kwargs): """Apply a function to every partition across one or more bags. Note that all ``Bag`` arguments must be partitioned identically. Parameters ---------- func : callable *args, **kwargs : Bag, Item, Delayed, or object Arguments and keyword arguments to pass to ``func``. Examples -------- >>> import dask.bag as db >>> b = db.from_sequence(range(1, 101), npartitions=10) >>> def div(nums, den=1): ... return [num / den for num in nums] Using a python object: >>> hi = b.max().compute() >>> hi 100 >>> b.map_partitions(div, den=hi).take(5) (0.01, 0.02, 0.03, 0.04, 0.05) Using an ``Item``: >>> b.map_partitions(div, den=b.max()).take(5) (0.01, 0.02, 0.03, 0.04, 0.05) Note that while both versions give the same output, the second forms a single graph, and then computes everything at once, and in some cases may be more efficient. """ name = 'map-partitions-%s-%s' % (funcname(func), tokenize(func, args, kwargs)) # Extract bag arguments, build initial graph bags = [] dsk = {} for vals in [args, kwargs.values()]: for a in vals: if isinstance(a, (Bag, Item, Delayed)): dsk.update(ensure_dict(a.dask)) if isinstance(a, Bag): bags.append(a) elif is_dask_collection(a): raise NotImplementedError("dask.bag doesn't support args of " "type %s" % type(a).__name__) if not bags: raise ValueError("At least one argument must be a Bag.") npartitions = {b.npartitions for b in bags} if len(npartitions) > 1: raise ValueError("All bags must have the same number of partitions.") npartitions = npartitions.pop() def build_task(n): args2 = [(a.name, n) if isinstance(a, Bag) else a.key if isinstance(a, (Item, Delayed)) else a for a in args] if any(isinstance(v, (Bag, Item, Delayed)) for v in kwargs.values()): vals = [(v.name, n) if isinstance(v, Bag) else v.key if isinstance(v, (Item, Delayed)) else v for v in kwargs.values()] kwargs2 = (dict, (zip, list(kwargs), vals)) else: kwargs2 = kwargs if kwargs2 or len(args2) > 1: return (apply, func, args2, kwargs2) return (func, args2[0]) dsk.update({(name, n): build_task(n) for n in range(npartitions)}) # If all bags are the same type, use that type, otherwise fallback to Bag return_type = set(map(type, bags)) return_type = return_type.pop() if len(return_type) == 1 else Bag return return_type(dsk, name, npartitions) def _reduce(binop, sequence, initial=no_default): if initial is not no_default: return reduce(binop, sequence, initial) else: return reduce(binop, sequence) def make_group(k, stage): def h(x): return x[0] // k ** stage % k return h def groupby_tasks(b, grouper, hash=hash, max_branch=32): max_branch = max_branch or 32 n = b.npartitions stages = int(math.ceil(math.log(n) / math.log(max_branch))) if stages > 1: k = int(math.ceil(n ** (1 / stages))) else: k = n groups = [] splits = [] joins = [] inputs = [tuple(digit(i, j, k) for j in range(stages)) for i in range(k**stages)] b2 = b.map(lambda x: (hash(grouper(x)), x)) token = tokenize(b, grouper, hash, max_branch) start = dict((('shuffle-join-' + token, 0, inp), (b2.name, i) if i < b.npartitions else []) for i, inp in enumerate(inputs)) for stage in range(1, stages + 1): group = dict((('shuffle-group-' + token, stage, inp), (groupby, (make_group, k, stage - 1), ('shuffle-join-' + token, stage - 1, inp))) for inp in inputs) split = dict((('shuffle-split-' + token, stage, i, inp), (dict.get, ('shuffle-group-' + token, stage, inp), i, {})) for i in range(k) for inp in inputs) join = dict((('shuffle-join-' + token, stage, inp), (list, (toolz.concat, [('shuffle-split-' + token, stage, inp[stage - 1], insert(inp, stage - 1, j)) for j in range(k)]))) for inp in inputs) groups.append(group) splits.append(split) joins.append(join) end = dict((('shuffle-' + token, i), (list, (dict.items, (groupby, grouper, (pluck, 1, j))))) for i, j in enumerate(join)) dsk = merge(b2.dask, start, end, *(groups + splits + joins)) return type(b)(dsk, 'shuffle-' + token, len(inputs)) def groupby_disk(b, grouper, npartitions=None, blocksize=2**20): if npartitions is None: npartitions = b.npartitions token = tokenize(b, grouper, npartitions, blocksize) import partd p = ('partd-' + token,) dirname = _globals.get('temporary_directory', None) if dirname: file = (apply, partd.File, (), {'dir': dirname}) else: file = (partd.File,) try: dsk1 = {p: (partd.Python, (partd.Snappy, file))} except AttributeError: dsk1 = {p: (partd.Python, file)} # Partition data on disk name = 'groupby-part-{0}-{1}'.format(funcname(grouper), token) dsk2 = dict(((name, i), (partition, grouper, (b.name, i), npartitions, p, blocksize)) for i in range(b.npartitions)) # Barrier barrier_token = 'groupby-barrier-' + token def barrier(args): return 0 dsk3 = {barrier_token: (barrier, list(dsk2))} # Collect groups name = 'groupby-collect-' + token dsk4 = dict(((name, i), (collect, grouper, i, p, barrier_token)) for i in range(npartitions)) return type(b)(merge(b.dask, dsk1, dsk2, dsk3, dsk4), name, npartitions) def empty_safe_apply(func, part, is_last): if isinstance(part, Iterator): try: _, part = peek(part) except StopIteration: if not is_last: return no_result return func(part) elif not is_last and len(part) == 0: return no_result else: return func(part) def empty_safe_aggregate(func, parts, is_last): parts2 = (p for p in parts if p is not no_result) return empty_safe_apply(func, parts2, is_last) def safe_take(n, b): r = list(take(n, b)) if len(r) != n: warn("Insufficient elements for `take`. {0} elements requested, " "only {1} elements available. Try passing larger `npartitions` " "to `take`.".format(n, len(r))) return r def random_sample(x, state_data, prob): """Filter elements of `x` by a probability `prob`. Parameters ---------- x : iterable state_data : tuple A tuple that can be passed to ``random.Random``. prob : float A float between 0 and 1, representing the probability that each element will be yielded. """ random_state = Random(state_data) for i in x: if random_state.random() < prob: yield i def random_state_data_python(n, random_state=None): """Return a list of tuples that can initialize. ``random.Random``. Parameters ---------- n : int Number of tuples to return. random_state : int or ``random.Random``, optional If an int, is used to seed a new ``random.Random``. """ if not isinstance(random_state, Random): random_state = Random(random_state) maxuint32 = 1 << 32 return [tuple(random_state.randint(0, maxuint32) for i in range(624)) for i in range(n)] def split(seq, n): """ Split apart a sequence into n equal pieces. >>> split(range(10), 3) [[0, 1, 2], [3, 4, 5], [6, 7, 8, 9]] """ if not isinstance(seq, (list, tuple)): seq = list(seq) part = len(seq) / n L = [seq[int(part * i): int(part * (i + 1))] for i in range(n - 1)] L.append(seq[int(part * (n - 1)):]) return L def to_dataframe(seq, columns, dtypes): import pandas as pd seq = reify(seq) # pd.DataFrame expects lists, only copy if necessary if not isinstance(seq, list): seq = list(seq) res = pd.DataFrame(seq, columns=list(columns)) return res.astype(dtypes, copy=False) dask-0.16.0/dask/bag/tests/000077500000000000000000000000001320364734500153445ustar00rootroot00000000000000dask-0.16.0/dask/bag/tests/__init__.py000066400000000000000000000000001320364734500174430ustar00rootroot00000000000000dask-0.16.0/dask/bag/tests/test_bag.py000066400000000000000000001162421320364734500175140ustar00rootroot00000000000000# coding=utf-8 from __future__ import absolute_import, division, print_function import pytest import math import os import random import sys from collections import Iterator from itertools import repeat import partd from toolz import merge, join, filter, identity, valmap, groupby, pluck import dask import dask.bag as db from dask.bag.core import (Bag, lazify, lazify_task, map, collect, reduceby, reify, partition, inline_singleton_lists, optimize, from_delayed) from dask.compatibility import BZ2File, GzipFile, PY2 from dask.utils import filetexts, tmpfile, tmpdir from dask.utils_test import inc, add dsk = {('x', 0): (range, 5), ('x', 1): (range, 5), ('x', 2): (range, 5)} L = list(range(5)) * 3 b = Bag(dsk, 'x', 3) def iseven(x): return x % 2 == 0 def isodd(x): return x % 2 == 1 def test_Bag(): assert b.name == 'x' assert b.npartitions == 3 def test_keys(): assert b.__dask_keys__() == sorted(dsk.keys()) def test_bag_map(): b = db.from_sequence(range(100), npartitions=10) b2 = db.from_sequence(range(100, 200), npartitions=10) x = b.compute() x2 = b2.compute() def myadd(a=1, b=2, c=3): return a + b + c assert db.map(myadd, b).compute() == list(map(myadd, x)) assert db.map(myadd, a=b).compute() == list(map(myadd, x)) assert db.map(myadd, b, b2).compute() == list(map(myadd, x, x2)) assert db.map(myadd, b, 10).compute() == [myadd(i, 10) for i in x] assert db.map(myadd, 10, b=b).compute() == [myadd(10, b=i) for i in x] sol = [myadd(i, b=j, c=100) for (i, j) in zip(x, x2)] assert db.map(myadd, b, b=b2, c=100).compute() == sol sol = [myadd(i, c=100) for (i, j) in zip(x, x2)] assert db.map(myadd, b, c=100).compute() == sol x_sum = sum(x) sol = [myadd(x_sum, b=i, c=100) for i in x2] assert db.map(myadd, b.sum(), b=b2, c=100).compute() == sol sol = [myadd(i, b=x_sum, c=100) for i in x2] assert db.map(myadd, b2, b.sum(), c=100).compute() == sol sol = [myadd(a=100, b=x_sum, c=i) for i in x2] assert db.map(myadd, a=100, b=b.sum(), c=b2).compute() == sol a = dask.delayed(10) assert db.map(myadd, b, a).compute() == [myadd(i, 10) for i in x] assert db.map(myadd, b, b=a).compute() == [myadd(i, b=10) for i in x] # Mispatched npartitions fewer_parts = db.from_sequence(range(100), npartitions=5) with pytest.raises(ValueError): db.map(myadd, b, fewer_parts) # No bags with pytest.raises(ValueError): db.map(myadd, b.sum(), 1, 2) # Unequal partitioning unequal = db.from_sequence(range(110), npartitions=10) with pytest.raises(ValueError): db.map(myadd, b, unequal, c=b2).compute() with pytest.raises(ValueError): db.map(myadd, b, b=unequal, c=b2).compute() def test_map_method(): b = db.from_sequence(range(100), npartitions=10) b2 = db.from_sequence(range(100, 200), npartitions=10) x = b.compute() x2 = b2.compute() def myadd(a, b=2, c=3): return a + b + c assert b.map(myadd).compute() == list(map(myadd, x)) assert b.map(myadd, b2).compute() == list(map(myadd, x, x2)) assert b.map(myadd, 10).compute() == [myadd(i, 10) for i in x] assert b.map(myadd, b=10).compute() == [myadd(i, b=10) for i in x] assert (b.map(myadd, b2, c=10).compute() == [myadd(i, j, 10) for (i, j) in zip(x, x2)]) x_sum = sum(x) assert (b.map(myadd, b.sum(), c=10).compute() == [myadd(i, x_sum, 10) for i in x]) # check that map works with multiarg functions. Can be removed after # deprecated behavior is removed assert b.map(add, b2).compute() == list(map(add, x, x2)) # check that map works with vararg functions. Can be removed after # deprecated behavior is removed def vararg_inc(*args): return inc(*args) assert b.map(vararg_inc).compute(get=dask.get) == list(map(inc, x)) def test_starmap(): data = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)] b = db.from_sequence(data, npartitions=2) def myadd(a, b, c=0): return a + b + c assert b.starmap(myadd).compute() == [myadd(*a) for a in data] assert b.starmap(myadd, c=10).compute() == [myadd(*a, c=10) for a in data] max_second = b.pluck(1).max() assert (b.starmap(myadd, c=max_second).compute() == [myadd(*a, c=max_second.compute()) for a in data]) c = dask.delayed(10) assert b.starmap(myadd, c=c).compute() == [myadd(*a, c=10) for a in data] def test_filter(): c = b.filter(iseven) expected = merge(dsk, dict(((c.name, i), (reify, (filter, iseven, (b.name, i)))) for i in range(b.npartitions))) assert c.dask == expected assert c.name == b.filter(iseven).name def test_remove(): f = lambda x: x % 2 == 0 c = b.remove(f) assert list(c) == [1, 3] * 3 assert c.name == b.remove(f).name def test_iter(): assert sorted(list(b)) == sorted(L) assert sorted(list(b.map(inc))) == sorted(list(range(1, 6)) * 3) @pytest.mark.parametrize('func', [str, repr]) def test_repr(func): assert str(b.npartitions) in func(b) assert b.name[:5] in func(b) def test_pluck(): d = {('x', 0): [(1, 10), (2, 20)], ('x', 1): [(3, 30), (4, 40)]} b = Bag(d, 'x', 2) assert set(b.pluck(0)) == set([1, 2, 3, 4]) assert set(b.pluck(1)) == set([10, 20, 30, 40]) assert set(b.pluck([1, 0])) == set([(10, 1), (20, 2), (30, 3), (40, 4)]) assert b.pluck([1, 0]).name == b.pluck([1, 0]).name def test_pluck_with_default(): b = db.from_sequence(['Hello', '', 'World']) pytest.raises(IndexError, lambda: list(b.pluck(0))) assert list(b.pluck(0, None)) == ['H', None, 'W'] assert b.pluck(0, None).name == b.pluck(0, None).name assert b.pluck(0).name != b.pluck(0, None).name def test_unzip(): b = db.from_sequence(range(100)).map(lambda x: (x, x + 1, x + 2)) one, two, three = b.unzip(3) assert list(one) == list(range(100)) assert list(three) == [i + 2 for i in range(100)] assert one.name == b.unzip(3)[0].name assert one.name != two.name def test_fold(): c = b.fold(add) assert c.compute() == sum(L) assert c.key == b.fold(add).key c2 = b.fold(add, initial=10) assert c2.key != c.key assert c2.compute() == sum(L) + 10 * b.npartitions assert c2.key == b.fold(add, initial=10).key c = db.from_sequence(range(5), npartitions=3) def binop(acc, x): acc = acc.copy() acc.add(x) return acc d = c.fold(binop, set.union, initial=set()) assert d.compute() == set(c) assert d.key == c.fold(binop, set.union, initial=set()).key d = db.from_sequence('hello') assert set(d.fold(lambda a, b: ''.join([a, b]), initial='').compute()) == set('hello') e = db.from_sequence([[1], [2], [3]], npartitions=2) with dask.set_options(get=dask.get): assert set(e.fold(add, initial=[]).compute()) == set([1, 2, 3]) def test_distinct(): assert sorted(b.distinct()) == [0, 1, 2, 3, 4] assert b.distinct().name == b.distinct().name assert 'distinct' in b.distinct().name assert b.distinct().count().compute() == 5 bag = db.from_sequence([0] * 50, npartitions=50) assert bag.filter(None).distinct().compute() == [] def test_frequencies(): c = b.frequencies() assert dict(c) == {0: 3, 1: 3, 2: 3, 3: 3, 4: 3} c2 = b.frequencies(split_every=2) assert dict(c2) == {0: 3, 1: 3, 2: 3, 3: 3, 4: 3} assert c.name == b.frequencies().name assert c.name != c2.name assert c2.name == b.frequencies(split_every=2).name # test bag with empty partitions b2 = db.from_sequence(range(20), partition_size=2) b2 = b2.filter(lambda x: x < 10) d = b2.frequencies() assert dict(d) == dict(zip(range(10), [1] * 10)) bag = db.from_sequence([0, 0, 0, 0], npartitions=4) bag2 = bag.filter(None).frequencies(split_every=2) assert dict(bag2.compute(get=dask.get)) == {} def test_topk(): assert list(b.topk(4)) == [4, 4, 4, 3] c = b.topk(4, key=lambda x: -x) assert list(c) == [0, 0, 0, 1] c2 = b.topk(4, key=lambda x: -x, split_every=2) assert list(c2) == [0, 0, 0, 1] assert c.name != c2.name assert b.topk(4).name == b.topk(4).name @pytest.mark.parametrize('npartitions', [1, 2]) def test_topk_with_non_callable_key(npartitions): b = db.from_sequence([(1, 10), (2, 9), (3, 8)], npartitions=npartitions) assert list(b.topk(2, key=1)) == [(1, 10), (2, 9)] assert list(b.topk(2, key=0)) == [(3, 8), (2, 9)] assert b.topk(2, key=1).name == b.topk(2, key=1).name def test_topk_with_multiarg_lambda(): b = db.from_sequence([(1, 10), (2, 9), (3, 8)], npartitions=2) assert list(b.topk(2, key=lambda a, b: b)) == [(1, 10), (2, 9)] def test_lambdas(): assert list(b.map(lambda x: x + 1)) == list(b.map(inc)) def test_reductions(): assert int(b.count()) == 15 assert int(b.sum()) == 30 assert int(b.max()) == 4 assert int(b.min()) == 0 assert b.any().compute() is True assert b.all().compute() is False assert b.all().key == b.all().key assert b.all().key != b.any().key def test_reduction_names(): assert b.sum().name.startswith('sum') assert b.reduction(sum, sum).name.startswith('sum') assert any(isinstance(k, str) and k.startswith('max') for k in b.reduction(sum, max).dask) assert b.reduction(sum, sum, name='foo').name.startswith('foo') def test_tree_reductions(): b = db.from_sequence(range(12)) c = b.reduction(sum, sum, split_every=2) d = b.reduction(sum, sum, split_every=6) e = b.reduction(sum, sum, split_every=5) assert c.compute() == d.compute() == e.compute() assert len(c.dask) > len(d.dask) c = b.sum(split_every=2) d = b.sum(split_every=5) assert c.compute() == d.compute() assert len(c.dask) > len(d.dask) assert c.key != d.key assert c.key == b.sum(split_every=2).key assert c.key != b.sum().key @pytest.mark.parametrize('npartitions', [1, 3, 4]) def test_aggregation(npartitions): L = list(range(15)) b = db.range(15, npartitions=npartitions) assert b.mean().compute(get=dask.get) == sum(L) / len(L) assert b.sum().compute(get=dask.get) == sum(L) assert b.count().compute(get=dask.get) == len(L) @pytest.mark.parametrize('npartitions', [1, 10]) def test_non_splittable_reductions(npartitions): np = pytest.importorskip('numpy') data = list(range(100)) c = db.from_sequence(data, npartitions=npartitions) assert c.mean().compute() == np.mean(data) assert c.std().compute(get=dask.get) == np.std(data) def test_std(): assert b.std().compute(get=dask.get) == math.sqrt(2.0) assert float(b.std()) == math.sqrt(2.0) def test_var(): assert b.var().compute(get=dask.get) == 2.0 assert float(b.var()) == 2.0 def test_join(): c = b.join([1, 2, 3], on_self=isodd, on_other=iseven) assert list(c) == list(join(iseven, [1, 2, 3], isodd, list(b))) assert (list(b.join([1, 2, 3], isodd)) == list(join(isodd, [1, 2, 3], isodd, list(b)))) assert c.name == b.join([1, 2, 3], on_self=isodd, on_other=iseven).name def test_foldby(): c = b.foldby(iseven, add, 0, add, 0) assert (reduceby, iseven, add, (b.name, 0), 0) in list(c.dask.values()) assert set(c) == set(reduceby(iseven, lambda acc, x: acc + x, L, 0).items()) assert c.name == b.foldby(iseven, add, 0, add, 0).name c = b.foldby(iseven, lambda acc, x: acc + x) assert set(c) == set(reduceby(iseven, lambda acc, x: acc + x, L, 0).items()) def test_foldby_tree_reduction(): dsk = list() for n in [1, 7, 32]: b = db.from_sequence(range(100), npartitions=n) c = b.foldby(iseven, add) dsk += [c] for m in [False, None, 2, 3]: d = b.foldby(iseven, add, split_every=m) e = b.foldby(iseven, add, 0, split_every=m) f = b.foldby(iseven, add, 0, add, split_every=m) g = b.foldby(iseven, add, 0, add, 0, split_every=m) dsk += [d,e,f,g] results = dask.compute(dsk) first = results[0] assert all([r == first for r in results]) def test_map_partitions(): assert list(b.map_partitions(len)) == [5, 5, 5] assert b.map_partitions(len).name == b.map_partitions(len).name assert b.map_partitions(lambda a: len(a) + 1).name != b.map_partitions(len).name def test_map_partitions_args_kwargs(): x = [random.randint(-100, 100) for i in range(100)] y = [random.randint(-100, 100) for i in range(100)] dx = db.from_sequence(x, npartitions=10) dy = db.from_sequence(y, npartitions=10) def maximum(x, y=0): y = repeat(y) if isinstance(y, int) else y return [max(a, b) for (a, b) in zip(x, y)] sol = maximum(x, y=10) assert db.map_partitions(maximum, dx, y=10).compute() == sol assert dx.map_partitions(maximum, y=10).compute() == sol assert dx.map_partitions(maximum, 10).compute() == sol sol = maximum(x, y) assert db.map_partitions(maximum, dx, dy).compute() == sol assert dx.map_partitions(maximum, y=dy).compute() == sol assert dx.map_partitions(maximum, dy).compute() == sol dy_mean = dy.mean().apply(int) sol = maximum(x, int(sum(y) / len(y))) assert dx.map_partitions(maximum, y=dy_mean).compute() == sol assert dx.map_partitions(maximum, dy_mean).compute() == sol dy_mean = dask.delayed(dy_mean) assert dx.map_partitions(maximum, y=dy_mean).compute() == sol assert dx.map_partitions(maximum, dy_mean).compute() == sol def test_random_sample_size(): """ Number of randomly sampled elements are in the expected range. """ a = db.from_sequence(range(1000), npartitions=5) # we expect a size of approx. 100, but leave large margins to avoid # random failures assert 10 < len(list(a.random_sample(0.1, 42))) < 300 def test_random_sample_prob_range(): """ Specifying probabilities outside the range [0, 1] raises ValueError. """ a = db.from_sequence(range(50), npartitions=5) with pytest.raises(ValueError): a.random_sample(-1) with pytest.raises(ValueError): a.random_sample(1.1) def test_random_sample_repeated_computation(): """ Repeated computation of a defined random sampling operation generates identical results. """ a = db.from_sequence(range(50), npartitions=5) b = a.random_sample(0.2) assert list(b) == list(b) # computation happens here def test_random_sample_different_definitions(): """ Repeatedly defining a random sampling operation yields different results upon computation if no random seed is specified. """ a = db.from_sequence(range(50), npartitions=5) assert list(a.random_sample(0.5)) != list(a.random_sample(0.5)) assert a.random_sample(0.5).name != a.random_sample(0.5).name def test_random_sample_random_state(): """ Sampling with fixed random seed generates identical results. """ a = db.from_sequence(range(50), npartitions=5) b = a.random_sample(0.5, 1234) c = a.random_sample(0.5, 1234) assert list(b) == list(c) def test_lazify_task(): task = (sum, (reify, (map, inc, [1, 2, 3]))) assert lazify_task(task) == (sum, (map, inc, [1, 2, 3])) task = (reify, (map, inc, [1, 2, 3])) assert lazify_task(task) == task a = (reify, (map, inc, (reify, (filter, iseven, 'y')))) b = (reify, (map, inc, (filter, iseven, 'y'))) assert lazify_task(a) == b f = lambda x: x def test_lazify(): a = {'x': (reify, (map, inc, (reify, (filter, iseven, 'y')))), 'a': (f, 'x'), 'b': (f, 'x')} b = {'x': (reify, (map, inc, (filter, iseven, 'y'))), 'a': (f, 'x'), 'b': (f, 'x')} assert lazify(a) == b def test_inline_singleton_lists(): inp = {'b': (list, 'a'), 'c': (f, 'b', 1)} out = {'c': (f, (list, 'a'), 1)} assert inline_singleton_lists(inp) == out out = {'c': (f, 'a', 1)} assert optimize(inp, ['c'], rename_fused_keys=False) == out inp = {'b': (list, 'a'), 'c': (f, 'b', 1), 'd': (f, 'b', 2)} assert inline_singleton_lists(inp) == inp inp = {'b': (4, 5)} # doesn't inline constants assert inline_singleton_lists(inp) == inp def test_take(): assert list(b.take(2)) == [0, 1] assert b.take(2) == (0, 1) assert isinstance(b.take(2, compute=False), Bag) def test_take_npartitions(): assert list(b.take(6, npartitions=2)) == [0, 1, 2, 3, 4, 0] assert b.take(6, npartitions=-1) == (0, 1, 2, 3, 4, 0) assert b.take(3, npartitions=-1) == (0, 1, 2) with pytest.raises(ValueError): b.take(1, npartitions=5) @pytest.mark.skipif(sys.version_info[:2] == (3,3), reason="Python3.3 uses pytest2.7.2, w/o warns method") def test_take_npartitions_warn(): with pytest.warns(None): b.take(100) with pytest.warns(None): b.take(7) with pytest.warns(None): b.take(7, npartitions=2) def test_map_is_lazy(): from dask.bag.core import map assert isinstance(map(lambda x: x, [1, 2, 3]), Iterator) def test_can_use_dict_to_make_concrete(): assert isinstance(dict(b.frequencies()), dict) @pytest.mark.slow @pytest.mark.network @pytest.mark.skip(reason="Hangs") def test_from_url(): a = db.from_url(['http://google.com', 'http://github.com']) assert a.npartitions == 2 b = db.from_url('http://raw.githubusercontent.com/dask/dask/master/README.rst') assert b.npartitions == 1 assert b'Dask\n' in b.take(10) def test_read_text(): with filetexts({'a1.log': 'A\nB', 'a2.log': 'C\nD'}) as fns: assert (set(line.strip() for line in db.read_text(fns)) == set('ABCD')) assert (set(line.strip() for line in db.read_text('a*.log')) == set('ABCD')) pytest.raises(ValueError, lambda: db.read_text('non-existent-*-path')) def test_read_text_large(): with tmpfile() as fn: with open(fn, 'wb') as f: f.write(('Hello, world!' + os.linesep).encode() * 100) b = db.read_text(fn, blocksize=100) c = db.read_text(fn) assert len(b.dask) > 5 assert list(map(str, b.str.strip())) == list(map(str, c.str.strip())) d = db.read_text([fn], blocksize=100) assert list(b) == list(d) def test_read_text_encoding(): with tmpfile() as fn: with open(fn, 'wb') as f: f.write((u'你好!' + os.linesep).encode('gb18030') * 100) b = db.read_text(fn, blocksize=100, encoding='gb18030') c = db.read_text(fn, encoding='gb18030') assert len(b.dask) > 5 assert (list(b.str.strip().map(lambda x: x.encode('utf-8'))) == list(c.str.strip().map(lambda x: x.encode('utf-8')))) d = db.read_text([fn], blocksize=100, encoding='gb18030') assert list(b) == list(d) def test_read_text_large_gzip(): with tmpfile('gz') as fn: f = GzipFile(fn, 'wb') f.write(b'Hello, world!\n' * 100) f.close() with pytest.raises(ValueError): db.read_text(fn, blocksize=50, linedelimiter='\n') c = db.read_text(fn) assert c.npartitions == 1 @pytest.mark.slow @pytest.mark.network def test_from_s3(): # note we don't test connection modes with aws_access_key and # aws_secret_key because these are not on travis-ci pytest.importorskip('s3fs') five_tips = (u'total_bill,tip,sex,smoker,day,time,size\n', u'16.99,1.01,Female,No,Sun,Dinner,2\n', u'10.34,1.66,Male,No,Sun,Dinner,3\n', u'21.01,3.5,Male,No,Sun,Dinner,3\n', u'23.68,3.31,Male,No,Sun,Dinner,2\n') # test compressed data e = db.read_text('s3://tip-data/t*.gz', storage_options=dict(anon=True)) assert e.take(5) == five_tips # test all keys in bucket c = db.read_text('s3://tip-data/*', storage_options=dict(anon=True)) assert c.npartitions == 4 def test_from_sequence(): b = db.from_sequence([1, 2, 3, 4, 5], npartitions=3) assert len(b.dask) == 3 assert set(b) == set([1, 2, 3, 4, 5]) def test_from_long_sequence(): L = list(range(1001)) b = db.from_sequence(L) assert set(b) == set(L) def test_product(): b2 = b.product(b) assert b2.npartitions == b.npartitions**2 assert set(b2) == set([(i, j) for i in L for j in L]) x = db.from_sequence([1, 2, 3, 4]) y = db.from_sequence([10, 20, 30]) z = x.product(y) assert set(z) == set([(i, j) for i in [1, 2, 3, 4] for j in [10, 20, 30]]) assert z.name != b2.name assert z.name == x.product(y).name def test_partition_collect(): with partd.Pickle() as p: partition(identity, range(6), 3, p) assert set(p.get(0)) == set([0, 3]) assert set(p.get(1)) == set([1, 4]) assert set(p.get(2)) == set([2, 5]) assert sorted(collect(identity, 0, p, '')) == [(0, [0]), (3, [3])] def test_groupby(): c = b.groupby(identity) result = dict(c) assert result == {0: [0, 0 ,0], 1: [1, 1, 1], 2: [2, 2, 2], 3: [3, 3, 3], 4: [4, 4, 4]} assert c.npartitions == b.npartitions assert c.name == b.groupby(identity).name assert c.name != b.groupby(lambda x: x + 1).name def test_groupby_with_indexer(): b = db.from_sequence([[1, 2, 3], [1, 4, 9], [2, 3, 4]]) result = dict(b.groupby(0)) assert valmap(sorted, result) == {1: [[1, 2, 3], [1, 4, 9]], 2: [[2, 3, 4]]} def test_groupby_with_npartitions_changed(): result = b.groupby(lambda x: x, npartitions=1) result2 = dict(result) assert result2 == {0: [0, 0 ,0], 1: [1, 1, 1], 2: [2, 2, 2], 3: [3, 3, 3], 4: [4, 4, 4]} assert result.npartitions == 1 def test_concat(): a = db.from_sequence([1, 2, 3]) b = db.from_sequence([4, 5, 6]) c = db.concat([a, b]) assert list(c) == [1, 2, 3, 4, 5, 6] assert c.name == db.concat([a, b]).name def test_flatten(): b = db.from_sequence([[1], [2, 3]]) assert list(b.flatten()) == [1, 2, 3] assert b.flatten().name == b.flatten().name def test_concat_after_map(): a = db.from_sequence([1, 2]) b = db.from_sequence([4, 5]) result = db.concat([a.map(inc), b]) assert list(result) == [2, 3, 4, 5] def test_args(): c = b.map(lambda x: x + 1) d = Bag(*c._args) assert list(c) == list(d) assert c.npartitions == d.npartitions def test_to_dataframe(): dd = pytest.importorskip('dask.dataframe') pd = pytest.importorskip('pandas') def check_parts(df, sol): assert all((p.dtypes == sol.dtypes).all() for p in dask.compute(*df.to_delayed())) dsk = {('test', 0): [(1, 2)], ('test', 1): [], ('test', 2): [(10, 20), (100, 200)]} b = Bag(dsk, 'test', 3) sol = pd.DataFrame(b.compute(), columns=['a', 'b']) # Elements are tuples df = b.to_dataframe() dd.utils.assert_eq(df, sol.rename(columns={'a': 0, 'b': 1}), check_index=False) df = b.to_dataframe(columns=['a', 'b']) dd.utils.assert_eq(df, sol, check_index=False) check_parts(df, sol) df = b.to_dataframe(meta=[('a', 'i8'), ('b', 'i8')]) dd.utils.assert_eq(df, sol, check_index=False) check_parts(df, sol) # Elements are dictionaries b = b.map(lambda x: dict(zip(['a', 'b'], x))) df = b.to_dataframe() dd.utils.assert_eq(df, sol, check_index=False) check_parts(df, sol) assert df._name == b.to_dataframe()._name # With metadata specified for meta in [sol, [('a', 'i8'), ('b', 'i8')]]: df = b.to_dataframe(meta=meta) dd.utils.assert_eq(df, sol, check_index=False) check_parts(df, sol) # Error to specify both columns and meta with pytest.raises(ValueError): b.to_dataframe(columns=['a', 'b'], meta=sol) # Single column b = b.pluck('a') sol = sol[['a']] df = b.to_dataframe(meta=sol) dd.utils.assert_eq(df, sol, check_index=False) check_parts(df, sol) # Works with iterators and tuples sol = pd.DataFrame({'a': range(100)}) b = db.from_sequence(range(100), npartitions=5) for f in [iter, tuple]: df = b.map_partitions(f).to_dataframe(meta=sol) dd.utils.assert_eq(df, sol, check_index=False) check_parts(df, sol) ext_open = [('gz', GzipFile), ('', open)] if not PY2: ext_open.append(('bz2', BZ2File)) @pytest.mark.parametrize('ext,myopen', ext_open) def test_to_textfiles(ext, myopen): b = db.from_sequence(['abc', '123', 'xyz'], npartitions=2) with tmpdir() as dir: c = b.to_textfiles(os.path.join(dir, '*.' + ext), compute=False) dask.compute(*c, get=dask.get) assert os.path.exists(os.path.join(dir, '1.' + ext)) f = myopen(os.path.join(dir, '1.' + ext), 'rb') text = f.read() if hasattr(text, 'decode'): text = text.decode() assert 'xyz' in text f.close() def test_to_textfiles_name_function_preserves_order(): seq = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p'] b = db.from_sequence(seq, npartitions=16) with tmpdir() as dn: b.to_textfiles(dn) out = db.read_text(os.path.join(dn, "*"), encoding='ascii').map(str).map(str.strip).compute() assert seq == out @pytest.mark.skipif(sys.version_info[:2] == (3,3), reason="Python3.3 uses pytest2.7.2, w/o warns method") def test_to_textfiles_name_function_warn(): seq = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p'] a = db.from_sequence(seq, npartitions=16) with tmpdir() as dn: with pytest.warns(None): a.to_textfiles(dn, name_function=str) def test_to_textfiles_encoding(): b = db.from_sequence([u'汽车', u'苹果', u'天气'], npartitions=2) for ext, myopen in [('gz', GzipFile), ('bz2', BZ2File), ('', open)]: if ext == 'bz2' and PY2: continue with tmpdir() as dir: c = b.to_textfiles(os.path.join(dir, '*.' + ext), encoding='gb18030', compute=False) dask.compute(*c) assert os.path.exists(os.path.join(dir, '1.' + ext)) f = myopen(os.path.join(dir, '1.' + ext), 'rb') text = f.read() if hasattr(text, 'decode'): text = text.decode('gb18030') assert u'天气' in text f.close() def test_to_textfiles_inputs(): B = db.from_sequence(['abc', '123', 'xyz'], npartitions=2) with tmpfile() as a: with tmpfile() as b: B.to_textfiles([a, b]) assert os.path.exists(a) assert os.path.exists(b) with tmpdir() as dirname: B.to_textfiles(dirname) assert os.path.exists(dirname) assert os.path.exists(os.path.join(dirname, '0.part')) pytest.raises(ValueError, lambda: B.to_textfiles(5)) def test_to_textfiles_endlines(): b = db.from_sequence(['a', 'b', 'c'], npartitions=1) with tmpfile() as fn: b.to_textfiles([fn]) with open(fn, 'r') as f: result = f.readlines() assert result == ['a\n', 'b\n', 'c'] def test_string_namespace(): b = db.from_sequence(['Alice Smith', 'Bob Jones', 'Charlie Smith'], npartitions=2) assert 'split' in dir(b.str) assert 'match' in dir(b.str) assert list(b.str.lower()) == ['alice smith', 'bob jones', 'charlie smith'] assert list(b.str.split(' ')) == [['Alice', 'Smith'], ['Bob', 'Jones'], ['Charlie', 'Smith']] assert list(b.str.match('*Smith')) == ['Alice Smith', 'Charlie Smith'] pytest.raises(AttributeError, lambda: b.str.sfohsofhf) assert b.str.match('*Smith').name == b.str.match('*Smith').name assert b.str.match('*Smith').name != b.str.match('*John').name def test_string_namespace_with_unicode(): b = db.from_sequence([u'Alice Smith', u'Bob Jones', 'Charlie Smith'], npartitions=2) assert list(b.str.lower()) == ['alice smith', 'bob jones', 'charlie smith'] def test_str_empty_split(): b = db.from_sequence([u'Alice Smith', u'Bob Jones', 'Charlie Smith'], npartitions=2) assert list(b.str.split()) == [['Alice', 'Smith'], ['Bob', 'Jones'], ['Charlie', 'Smith']] def test_map_with_iterator_function(): b = db.from_sequence([[1, 2, 3], [4, 5, 6]], npartitions=2) def f(L): for x in L: yield x + 1 c = b.map(f) assert list(c) == [[2, 3, 4], [5, 6, 7]] def test_ensure_compute_output_is_concrete(): b = db.from_sequence([1, 2, 3]) result = b.map(lambda x: x + 1).compute() assert not isinstance(result, Iterator) class BagOfDicts(db.Bag): def get(self, key, default=None): return self.map(lambda d: d.get(key, default)) def set(self, key, value): def setter(d): d[key] = value return d return self.map(setter) def test_bag_class_extend(): dictbag = BagOfDicts(*db.from_sequence([{'a': {'b': 'c'}}])._args) assert dictbag.get('a').get('b').compute()[0] == 'c' assert dictbag.get('a').set('d', 'EXTENSIBILITY!!!').compute()[0] == \ {'b': 'c', 'd': 'EXTENSIBILITY!!!'} assert isinstance(dictbag.get('a').get('b'), BagOfDicts) def test_gh715(): bin_data = u'\u20ac'.encode('utf-8') with tmpfile() as fn: with open(fn, 'wb') as f: f.write(bin_data) a = db.read_text(fn) assert a.compute()[0] == bin_data.decode('utf-8') def test_bag_compute_forward_kwargs(): x = db.from_sequence([1, 2, 3]).map(lambda a: a + 1) x.compute(bogus_keyword=10) def test_to_delayed(): from dask.delayed import Delayed b = db.from_sequence([1, 2, 3, 4, 5, 6], npartitions=3) a, b, c = b.map(inc).to_delayed() assert all(isinstance(x, Delayed) for x in [a, b, c]) assert b.compute() == [4, 5] b = db.from_sequence([1, 2, 3, 4, 5, 6], npartitions=3) t = b.sum().to_delayed() assert isinstance(t, Delayed) assert t.compute() == 21 def test_to_delayed_optimizes(): b = db.from_sequence([1, 2, 3, 4, 5, 6], npartitions=1) b2 = b.map(inc).map(inc).map(inc) [d] = b2.to_delayed() text = str(dict(d.dask)) assert text.count('reify') == 1 d = b2.sum().to_delayed() text = str(dict(d.dask)) assert text.count('reify') == 0 [d] = b2.to_textfiles('foo.txt', compute=False) text = str(dict(d.dask)) assert text.count('reify') <= 0 def test_from_delayed(): from dask.delayed import delayed a, b, c = delayed([1, 2, 3]), delayed([4, 5, 6]), delayed([7, 8, 9]) bb = from_delayed([a, b, c]) assert bb.name == from_delayed([a, b, c]).name assert isinstance(bb, Bag) assert list(bb) == [1, 2, 3, 4, 5, 6, 7, 8, 9] asum_value = delayed(lambda X: sum(X))(a) asum_item = db.Item.from_delayed(asum_value) assert asum_value.compute() == asum_item.compute() == 6 def test_from_delayed_iterator(): from dask.delayed import delayed def lazy_records(n): return ({'operations': [1, 2]} for _ in range(n)) delayed_records = delayed(lazy_records, pure=False) bag = db.from_delayed([delayed_records(5) for _ in range(5)]) assert db.compute( bag.count(), bag.pluck('operations').count(), bag.pluck('operations').flatten().count(), get=dask.get, ) == (25, 25, 50) def test_range(): for npartitions in [1, 7, 10, 28]: b = db.range(100, npartitions=npartitions) assert len(b.dask) == npartitions assert b.npartitions == npartitions assert list(b) == list(range(100)) @pytest.mark.parametrize("npartitions", [1, 7, 10, 28]) def test_zip(npartitions, hi=1000): evens = db.from_sequence(range(0, hi, 2), npartitions=npartitions) odds = db.from_sequence(range(1, hi, 2), npartitions=npartitions) pairs = db.zip(evens, odds) assert pairs.npartitions == npartitions assert list(pairs) == list(zip(range(0, hi, 2), range(1, hi, 2))) @pytest.mark.parametrize('nin', [1, 2, 7, 11, 23]) @pytest.mark.parametrize('nout', [1, 2, 5, 12, 23]) def test_repartition(nin, nout): b = db.from_sequence(range(100), npartitions=nin) c = b.repartition(npartitions=nout) assert c.npartitions == nout assert b.compute(get=dask.get) == c.compute(get=dask.get) results = dask.get(c.dask, c.__dask_keys__()) assert all(results) def test_repartition_names(): b = db.from_sequence(range(100), npartitions=5) c = b.repartition(2) assert b.name != c.name d = b.repartition(20) assert b.name != c.name assert c.name != d.name c = b.repartition(5) assert b is c @pytest.mark.skipif('not db.core._implement_accumulate') def test_accumulate(): parts = [[1, 2, 3], [4, 5], [], [6, 7]] dsk = dict((('test', i), p) for (i, p) in enumerate(parts)) b = db.Bag(dsk, 'test', len(parts)) r = b.accumulate(add) assert r.name == b.accumulate(add).name assert r.name != b.accumulate(add, -1).name assert r.compute() == [1, 3, 6, 10, 15, 21, 28] assert b.accumulate(add, -1).compute() == [-1, 0, 2, 5, 9, 14, 20, 27] assert b.accumulate(add).map(inc).compute() == [2, 4, 7, 11, 16, 22, 29] b = db.from_sequence([1, 2, 3], npartitions=1) assert b.accumulate(add).compute() == [1, 3, 6] def test_groupby_tasks(): b = db.from_sequence(range(160), npartitions=4) out = b.groupby(lambda x: x % 10, max_branch=4, method='tasks') partitions = dask.get(out.dask, out.__dask_keys__()) for a in partitions: for b in partitions: if a is not b: assert not set(pluck(0, a)) & set(pluck(0, b)) b = db.from_sequence(range(1000), npartitions=100) out = b.groupby(lambda x: x % 123, method='tasks') assert len(out.dask) < 100**2 partitions = dask.get(out.dask, out.__dask_keys__()) for a in partitions: for b in partitions: if a is not b: assert not set(pluck(0, a)) & set(pluck(0, b)) b = db.from_sequence(range(10000), npartitions=345) out = b.groupby(lambda x: x % 2834, max_branch=24, method='tasks') partitions = dask.get(out.dask, out.__dask_keys__()) for a in partitions: for b in partitions: if a is not b: assert not set(pluck(0, a)) & set(pluck(0, b)) def test_groupby_tasks_names(): b = db.from_sequence(range(160), npartitions=4) func = lambda x: x % 10 func2 = lambda x: x % 20 assert (set(b.groupby(func, max_branch=4, method='tasks').dask) == set(b.groupby(func, max_branch=4, method='tasks').dask)) assert (set(b.groupby(func, max_branch=4, method='tasks').dask) != set(b.groupby(func, max_branch=2, method='tasks').dask)) assert (set(b.groupby(func, max_branch=4, method='tasks').dask) != set(b.groupby(func2, max_branch=4, method='tasks').dask)) @pytest.mark.parametrize('size,npartitions,groups', [(1000, 20, 100), (12345, 234, 1042)]) def test_groupby_tasks_2(size, npartitions, groups): func = lambda x: x % groups b = db.range(size, npartitions=npartitions).groupby(func, method='tasks') result = b.compute(get=dask.get) assert dict(result) == groupby(func, range(size)) def test_groupby_tasks_3(): func = lambda x: x % 10 b = db.range(20, npartitions=5).groupby(func, method='tasks', max_branch=2) result = b.compute(get=dask.get) assert dict(result) == groupby(func, range(20)) # assert b.npartitions == 5 def test_to_textfiles_empty_partitions(): with tmpdir() as d: b = db.range(5, npartitions=5).filter(lambda x: x == 1).map(str) b.to_textfiles(os.path.join(d, '*.txt')) assert len(os.listdir(d)) == 5 def test_reduction_empty(): b = db.from_sequence(range(10), npartitions=100) assert b.filter(lambda x: x % 2 == 0).max().compute(get=dask.get) == 8 assert b.filter(lambda x: x % 2 == 0).min().compute(get=dask.get) == 0 @pytest.mark.parametrize('npartitions', [1, 2, 4]) def test_reduction_empty_aggregate(npartitions): b = db.from_sequence([0, 0, 0, 1], npartitions=npartitions).filter(None) assert b.min(split_every=2).compute(get=dask.get) == 1 vals = db.compute(b.min(split_every=2), b.max(split_every=2), get=dask.get) assert vals == (1, 1) with pytest.raises(ValueError): b = db.from_sequence([0, 0, 0, 0], npartitions=npartitions) b.filter(None).min(split_every=2).compute(get=dask.get) class StrictReal(int): def __eq__(self, other): assert isinstance(other, StrictReal) return self.real == other.real def __ne__(self, other): assert isinstance(other, StrictReal) return self.real != other.real def test_reduction_with_non_comparable_objects(): b = db.from_sequence([StrictReal(x) for x in range(10)], partition_size=2) assert b.fold(max, max).compute(get=dask.get) == StrictReal(9) def test_reduction_with_sparse_matrices(): sp = pytest.importorskip('scipy.sparse') b = db.from_sequence([sp.csr_matrix([0]) for x in range(4)], partition_size=2) def sp_reduce(a, b): return sp.vstack([a, b]) assert b.fold(sp_reduce, sp_reduce).compute(get=dask.get).shape == (4, 1) def test_empty(): list(db.from_sequence([])) == [] def test_bag_picklable(): from pickle import loads, dumps b = db.from_sequence(range(100)) b2 = loads(dumps(b)) assert b.compute() == b2.compute() s = b.sum() s2 = loads(dumps(s)) assert s.compute() == s2.compute() def test_msgpack_unicode(): b = db.from_sequence([{"a": 1}]).groupby("a") result = b.compute(get=dask.get) assert dict(result) == {1: [{'a': 1}]} def test_bag_with_single_callable(): f = lambda: None b = db.from_sequence([f]) assert list(b.compute(get=dask.get)) == [f] def test_optimize_fuse_keys(): x = db.range(10, npartitions=2) y = x.map(inc) z = y.map(inc) dsk = z.__dask_optimize__(z.dask, z.__dask_keys__()) assert not set(y.dask) & set(dsk) dsk = z.__dask_optimize__(z.dask, z.__dask_keys__(), fuse_keys=y.__dask_keys__()) assert all(k in dsk for k in y.__dask_keys__()) def test_reductions_are_lazy(): current = [None] def part(): for i in range(10): current[0] = i yield i def func(part): assert current[0] == 0 return sum(part) b = Bag({('foo', 0): part()}, 'foo', 1) res = b.reduction(func, sum) assert res.compute(get=dask.get) == sum(range(10)) def test_repeated_groupby(): b = db.range(10, npartitions=4) c = b.groupby(lambda x: x % 3) assert valmap(len, dict(c)) == valmap(len, dict(c)) def test_temporary_directory(tmpdir): b = db.range(10, npartitions=4) with dask.set_options(temporary_directory=str(tmpdir)): b2 = b.groupby(lambda x: x % 2) b2.compute() assert any(fn.endswith('.partd') for fn in os.listdir(str(tmpdir))) def test_empty_bag(): b = db.from_sequence([]) assert b.map(inc).all().compute(get=dask.get) assert not b.map(inc).any().compute(get=dask.get) assert not b.map(inc).sum().compute(get=dask.get) assert not b.map(inc).count().compute(get=dask.get) def test_bag_paths(): b = db.from_sequence(['abc', '123', 'xyz'], npartitions=2) assert b.to_textfiles('foo*') == ['foo0', 'foo1'] os.remove('foo0') os.remove('foo1') dask-0.16.0/dask/bag/tests/test_text.py000066400000000000000000000044201320364734500177410ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import import pytest from toolz import partial from dask import compute, get from dask.utils import filetexts from dask.bytes import compression from dask.bag.text import read_text compute = partial(compute, get=get) files = {'.test.accounts.1.json': ('{"amount": 100, "name": "Alice"}\n' '{"amount": 200, "name": "Bob"}\n' '{"amount": 300, "name": "Charlie"}\n' '{"amount": 400, "name": "Dennis"}\n'), '.test.accounts.2.json': ('{"amount": 500, "name": "Alice"}\n' '{"amount": 600, "name": "Bob"}\n' '{"amount": 700, "name": "Charlie"}\n' '{"amount": 800, "name": "Dennis"}\n')} expected = ''.join([files[v] for v in sorted(files)]) fmt_bs = ([(fmt, None) for fmt in compression.files] + [(fmt, 10) for fmt in compression.seekable_files] + [(fmt, None) for fmt in compression.seekable_files]) encodings = ['ascii', 'utf-8'] # + ['utf-16', 'utf-16-le', 'utf-16-be'] fmt_bs_enc = [(fmt, bs, encoding) for fmt, bs in fmt_bs for encoding in encodings] @pytest.mark.parametrize('fmt,bs,encoding', fmt_bs_enc) def test_read_text(fmt, bs, encoding): compress = compression.compress[fmt] files2 = dict((k, compress(v.encode(encoding))) for k, v in files.items()) with filetexts(files2, mode='b'): b = read_text('.test.accounts.*.json', compression=fmt, blocksize=bs, encoding=encoding) L, = compute(b) assert ''.join(L) == expected blocks = read_text('.test.accounts.*.json', compression=fmt, blocksize=bs, encoding=encoding, collection=False) L = compute(*blocks) assert ''.join(line for block in L for line in block) == expected def test_errors(): with filetexts({'.test.foo': b'Jos\xe9\nAlice'}, mode='b'): with pytest.raises(UnicodeDecodeError): read_text('.test.foo', encoding='ascii').compute() result = read_text('.test.foo', encoding='ascii', errors='ignore') result = result.compute(get=get) assert result == ['Jos\n', 'Alice'] dask-0.16.0/dask/bag/text.py000066400000000000000000000070721320364734500155460ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import import io import os from toolz import concat from ..utils import system_encoding from ..delayed import delayed from ..bytes import open_text_files, read_bytes from .core import from_delayed delayed = delayed(pure=True) def read_text(urlpath, blocksize=None, compression='infer', encoding=system_encoding, errors='strict', linedelimiter=os.linesep, collection=True, storage_options=None): """ Read lines from text files Parameters ---------- urlpath: string or list Absolute or relative filepath, URL (may include protocols like ``s3://``), globstring, or a list of beforementioned strings. blocksize: None or int Size (in bytes) to cut up larger files. Streams by default. compression: string Compression format like 'gzip' or 'xz'. Defaults to 'infer' encoding: string errors: string linedelimiter: string collection: bool, optional Return dask.bag if True, or list of delayed values if false storage_options: dict Extra options that make sense to a particular storage connection, e.g. host, port, username, password, etc. Examples -------- >>> b = read_text('myfiles.1.txt') # doctest: +SKIP >>> b = read_text('myfiles.*.txt') # doctest: +SKIP >>> b = read_text('myfiles.*.txt.gz') # doctest: +SKIP >>> b = read_text('s3://bucket/myfiles.*.txt') # doctest: +SKIP >>> b = read_text('s3://key:secret@bucket/myfiles.*.txt') # doctest: +SKIP >>> b = read_text('hdfs://namenode.example.com/myfiles.*.txt') # doctest: +SKIP Parallelize a large file by providing the number of uncompressed bytes to load into each partition. >>> b = read_text('largefile.txt', blocksize=1e7) # doctest: +SKIP Returns ------- dask.bag.Bag if collection is True or list of Delayed lists otherwise See Also -------- from_sequence: Build bag from Python sequence """ if isinstance(urlpath, (tuple, list, set)): blocks = sum([read_text(fn, blocksize=blocksize, compression=compression, encoding=encoding, errors=errors, linedelimiter=linedelimiter, collection=False, storage_options=storage_options) for fn in urlpath], []) else: if blocksize is None: files = open_text_files(urlpath, encoding=encoding, errors=errors, compression=compression, **(storage_options or {})) blocks = [delayed(list, pure=True)(delayed(file_to_blocks)(file)) for file in files] else: _, blocks = read_bytes(urlpath, delimiter=linedelimiter.encode(), blocksize=blocksize, sample=False, compression=compression, **(storage_options or {})) if isinstance(blocks[0], (tuple, list)): blocks = list(concat(blocks)) blocks = [delayed(decode)(b, encoding, errors) for b in blocks] if not blocks: raise ValueError("No files found", urlpath) if not collection: return blocks else: return from_delayed(blocks) def file_to_blocks(lazy_file): with lazy_file as f: for line in f: yield line def decode(block, encoding, errors): text = block.decode(encoding, errors) lines = io.StringIO(text) return list(lines) dask-0.16.0/dask/base.py000066400000000000000000000561411320364734500147440ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from abc import ABCMeta from collections import OrderedDict, Iterator from functools import partial from hashlib import md5 import inspect import pickle import os import threading import uuid import warnings from toolz import merge, groupby, curry, identity from toolz.functoolz import Compose from .compatibility import long, unicode from .context import _globals, thread_state from .core import flatten from .hashing import hash_buffer_hex from .utils import Dispatch, ensure_dict __all__ = ("DaskMethodsMixin", "is_dask_collection", "compute", "persist", "visualize", "tokenize", "normalize_token") def is_dask_collection(x): """Returns ``True`` if ``x`` is a dask collection""" try: return x.__dask_graph__() is not None except (AttributeError, TypeError): return False class DaskMethodsMixin(object): """A mixin adding standard dask collection methods""" __slots__ = () def visualize(self, filename='mydask', format=None, optimize_graph=False, **kwargs): """Render the computation of this object's task graph using graphviz. Requires ``graphviz`` to be installed. Parameters ---------- filename : str or None, optional The name (without an extension) of the file to write to disk. If `filename` is None, no file will be written, and we communicate with dot using only pipes. format : {'png', 'pdf', 'dot', 'svg', 'jpeg', 'jpg'}, optional Format in which to write output file. Default is 'png'. optimize_graph : bool, optional If True, the graph is optimized before rendering. Otherwise, the graph is displayed as is. Default is False. **kwargs Additional keyword arguments to forward to ``to_graphviz``. Returns ------- result : IPython.diplay.Image, IPython.display.SVG, or None See dask.dot.dot_graph for more information. See Also -------- dask.base.visualize dask.dot.dot_graph Notes ----- For more information on optimization see here: http://dask.pydata.org/en/latest/optimize.html """ return visualize(self, filename=filename, format=format, optimize_graph=optimize_graph, **kwargs) def persist(self, **kwargs): """Persist this dask collection into memory This turns a lazy Dask collection into a Dask collection with the same metadata, but now with the results fully computed or actively computing in the background. Parameters ---------- get : callable, optional A scheduler ``get`` function to use. If not provided, the default is to check the global settings first, and then fall back to the collection defaults. optimize_graph : bool, optional If True [default], the graph is optimized before computation. Otherwise the graph is run as is. This can be useful for debugging. **kwargs Extra keywords to forward to the scheduler ``get`` function. Returns ------- New dask collections backed by in-memory data See Also -------- dask.base.persist """ (result,) = persist(self, **kwargs) return result def compute(self, **kwargs): """Compute this dask collection This turns a lazy Dask collection into its in-memory equivalent. For example a Dask.array turns into a NumPy array and a Dask.dataframe turns into a Pandas dataframe. The entire dataset must fit into memory before calling this operation. Parameters ---------- get : callable, optional A scheduler ``get`` function to use. If not provided, the default is to check the global settings first, and then fall back to the collection defaults. optimize_graph : bool, optional If True [default], the graph is optimized before computation. Otherwise the graph is run as is. This can be useful for debugging. kwargs Extra keywords to forward to the scheduler ``get`` function. See Also -------- dask.base.compute """ (result,) = compute(self, traverse=False, **kwargs) return result def call_finalize(finalize, args, results): return finalize(results, *args) def add_ABCMeta(cls): """Use the metaclass ABCMeta for this class""" return ABCMeta(cls.__name__, cls.__bases__, cls.__dict__.copy()) # TODO: this class is deprecated and should be removed in a future release. @add_ABCMeta class Base(DaskMethodsMixin): """DEPRECATED. The recommended way to create a custom dask object now is to implement the dask collection interface (see the docs), and optionally subclass from ``DaskMethodsMixin`` if desired. See http://dask.pydata.org/en/latest/custom-collections.html for more information""" __slots__ = () @classmethod def __subclasshook__(cls, other): if cls is Base: warnings.warn("DeprecationWarning: `dask.base.Base` is deprecated. " "To check if an object is a dask collection use " "dask.base.is_dask_collection.\n\nSee http://dask." "pydata.org/en/latest/custom-collections.html " " for more information") return NotImplemented def __dask_graph__(self): # We issue a deprecation warning for the whole class here, as any # non-instance check usage will end up calling `__dask_graph__`. warnings.warn("DeprecationWarning: `dask.base.Base` is deprecated. " "To create a custom dask object implement the dask " "collection interface, and optionally subclass from " "``DaskMethodsMixin`` if desired.\n\nSee http://dask." "pydata.org/en/latest/custom-collections.html " " for more information") return self.dask def _keys(self): warnings.warn("DeprecationWarning: the `_keys` method is deprecated, " "use `__dask_keys__` instead") return self.__dask_keys__() @property def _finalize(self): warnings.warn("DeprecationWarning: the `_finalize` method is " "deprecated, use `__dask_postcompute__` instead") f, args = self.__dask_postcompute__() return partial(call_finalize, f, args) if args else f @classmethod def _optimize(cls, *args, **kwargs): warnings.warn("DeprecationWarning: the `_optimize` method is " "deprecated, use `__dask_optimize__` instead") return cls.__dask_optimize__(*args, **kwargs) @classmethod def _get(cls, dsk, keys, **kwargs): warnings.warn("DeprecationWarning: the `_get` method is " "deprecated, use ``dask.base.compute_as_if_collection`` " "instead") return compute_as_if_collection(cls, dsk, keys, **kwargs) def compute_as_if_collection(cls, dsk, keys, get=None, **kwargs): """Compute a graph as if it were of type cls. Allows for applying the same optimizations and default scheduler.""" get = get or _globals['get'] or cls.__dask_scheduler__ dsk2 = optimization_function(cls)(ensure_dict(dsk), keys, **kwargs) return get(dsk2, keys, **kwargs) def dont_optimize(dsk, keys, **kwargs): return dsk def optimization_function(x): return getattr(x, '__dask_optimize__', dont_optimize) def collections_to_dsk(collections, optimize_graph=True, **kwargs): """ Convert many collections into a single dask graph, after optimization """ optimizations = (kwargs.pop('optimizations', None) or _globals.get('optimizations', [])) if optimize_graph: groups = groupby(optimization_function, collections) groups = {opt: _extract_graph_and_keys(val) for opt, val in groups.items()} for opt in optimizations: groups = {k: (opt(dsk, keys), keys) for k, (dsk, keys) in groups.items()} dsk = merge(*(opt(dsk, keys, **kwargs) for opt, (dsk, keys) in groups.items())) else: dsk, _ = _extract_graph_and_keys(collections) return dsk def _extract_graph_and_keys(vals): """Given a list of dask vals, return a single graph and a list of keys such that ``get(dsk, keys)`` is equivalent to ``[v.compute() v in vals]``.""" dsk = {} keys = [] for v in vals: d = v.__dask_graph__() if hasattr(d, 'dicts'): for dd in d.dicts.values(): dsk.update(dd) else: dsk.update(d) keys.append(v.__dask_keys__()) return dsk, keys def compute(*args, **kwargs): """Compute several dask collections at once. Parameters ---------- args : object Any number of objects. If it is a dask object, it's computed and the result is returned. By default, python builtin collections are also traversed to look for dask objects (for more information see the ``traverse`` keyword). Non-dask arguments are passed through unchanged. traverse : bool, optional By default dask traverses builtin python collections looking for dask objects passed to ``compute``. For large collections this can be expensive. If none of the arguments contain any dask objects, set ``traverse=False`` to avoid doing this traversal. get : callable, optional A scheduler ``get`` function to use. If not provided, the default is to check the global settings first, and then fall back to defaults for the collections. optimize_graph : bool, optional If True [default], the optimizations for each collection are applied before computation. Otherwise the graph is run as is. This can be useful for debugging. kwargs Extra keywords to forward to the scheduler ``get`` function. Examples -------- >>> import dask.array as da >>> a = da.arange(10, chunks=2).sum() >>> b = da.arange(10, chunks=2).mean() >>> compute(a, b) (45, 4.5) By default, dask objects inside python collections will also be computed: >>> compute({'a': a, 'b': b, 'c': 1}) # doctest: +SKIP ({'a': 45, 'b': 4.5, 'c': 1},) """ from dask.delayed import delayed traverse = kwargs.pop('traverse', True) if traverse: args = tuple(delayed(a) if isinstance(a, (list, set, tuple, dict, Iterator)) else a for a in args) optimize_graph = kwargs.pop('optimize_graph', True) variables = [a for a in args if is_dask_collection(a)] if not variables: return args get = kwargs.pop('get', None) or _globals['get'] if get is None and getattr(thread_state, 'key', False): from distributed.worker import get_worker get = get_worker().client.get if not get: get = variables[0].__dask_scheduler__ if not all(a.__dask_scheduler__ == get for a in variables): raise ValueError("Compute called on multiple collections with " "differing default schedulers. Please specify a " "scheduler `get` function using either " "the `get` kwarg or globally with `set_options`.") dsk = collections_to_dsk(variables, optimize_graph, **kwargs) keys = [var.__dask_keys__() for var in variables] postcomputes = [a.__dask_postcompute__() if is_dask_collection(a) else (None, a) for a in args] results = get(dsk, keys, **kwargs) results_iter = iter(results) return tuple(a if f is None else f(next(results_iter), *a) for f, a in postcomputes) def visualize(*args, **kwargs): """ Visualize several dask graphs at once. Requires ``graphviz`` to be installed. All options that are not the dask graph(s) should be passed as keyword arguments. Parameters ---------- dsk : dict(s) or collection(s) The dask graph(s) to visualize. filename : str or None, optional The name (without an extension) of the file to write to disk. If `filename` is None, no file will be written, and we communicate with dot using only pipes. format : {'png', 'pdf', 'dot', 'svg', 'jpeg', 'jpg'}, optional Format in which to write output file. Default is 'png'. optimize_graph : bool, optional If True, the graph is optimized before rendering. Otherwise, the graph is displayed as is. Default is False. **kwargs Additional keyword arguments to forward to ``to_graphviz``. Returns ------- result : IPython.diplay.Image, IPython.display.SVG, or None See dask.dot.dot_graph for more information. See Also -------- dask.dot.dot_graph Notes ----- For more information on optimization see here: http://dask.pydata.org/en/latest/optimize.html """ from dask.dot import dot_graph filename = kwargs.pop('filename', 'mydask') optimize_graph = kwargs.pop('optimize_graph', False) dsks = [arg for arg in args if isinstance(arg, dict)] args = [arg for arg in args if is_dask_collection(arg)] dsk = collections_to_dsk(args, optimize_graph=optimize_graph) for d in dsks: dsk.update(d) return dot_graph(dsk, filename=filename, **kwargs) def persist(*args, **kwargs): """ Persist multiple Dask collections into memory This turns lazy Dask collections into Dask collections with the same metadata, but now with their results fully computed or actively computing in the background. For example a lazy dask.array built up from many lazy calls will now be a dask.array of the same shape, dtype, chunks, etc., but now with all of those previously lazy tasks either computed in memory as many small NumPy arrays (in the single-machine case) or asynchronously running in the background on a cluster (in the distributed case). This function operates differently if a ``dask.distributed.Client`` exists and is connected to a distributed scheduler. In this case this function will return as soon as the task graph has been submitted to the cluster, but before the computations have completed. Computations will continue asynchronously in the background. When using this function with the single machine scheduler it blocks until the computations have finished. When using Dask on a single machine you should ensure that the dataset fits entirely within memory. Examples -------- >>> df = dd.read_csv('/path/to/*.csv') # doctest: +SKIP >>> df = df[df.name == 'Alice'] # doctest: +SKIP >>> df['in-debt'] = df.balance < 0 # doctest: +SKIP >>> df = df.persist() # triggers computation # doctest: +SKIP >>> df.value().min() # future computations are now fast # doctest: +SKIP -10 >>> df.value().max() # doctest: +SKIP 100 >>> from dask import persist # use persist function on multiple collections >>> a, b = persist(a, b) # doctest: +SKIP Parameters ---------- *args: Dask collections get : callable, optional A scheduler ``get`` function to use. If not provided, the default is to check the global settings first, and then fall back to the collection defaults. optimize_graph : bool, optional If True [default], the graph is optimized before computation. Otherwise the graph is run as is. This can be useful for debugging. **kwargs Extra keywords to forward to the scheduler ``get`` function. Returns ------- New dask collections backed by in-memory data """ collections = [a for a in args if is_dask_collection(a)] if not collections: return args get = kwargs.pop('get', None) or _globals['get'] if get is None and getattr(thread_state, 'key', False): from distributed.worker import get_worker get = get_worker().client.get if inspect.ismethod(get): try: from distributed.client import default_client except ImportError: pass else: try: client = default_client() except ValueError: pass else: if client.get == _globals['get']: collections = client.persist(collections, **kwargs) if isinstance(collections, list): # distributed is inconsistent here collections = tuple(collections) else: collections = (collections,) results_iter = iter(collections) return tuple(a if not is_dask_collection(a) else next(results_iter) for a in args) optimize_graph = kwargs.pop('optimize_graph', True) if not get: get = collections[0].__dask_scheduler__ if not all(a.__dask_scheduler__ == get for a in collections): raise ValueError("Compute called on multiple collections with " "differing default schedulers. Please specify a " "scheduler `get` function using either " "the `get` kwarg or globally with `set_options`.") dsk = collections_to_dsk(collections, optimize_graph, **kwargs) keys, postpersists = [], [] for a in args: if is_dask_collection(a): a_keys = list(flatten(a.__dask_keys__())) rebuild, state = a.__dask_postpersist__() keys.extend(a_keys) postpersists.append((rebuild, a_keys, state)) else: postpersists.append((None, None, a)) results = get(dsk, keys, **kwargs) d = dict(zip(keys, results)) return tuple(s if r is None else r({k: d[k] for k in ks}, *s) for r, ks, s in postpersists) ############ # Tokenize # ############ def tokenize(*args, **kwargs): """ Deterministic token >>> tokenize([1, 2, '3']) '7d6a880cd9ec03506eee6973ff551339' >>> tokenize('Hello') == tokenize('Hello') True """ if kwargs: args = args + (kwargs,) return md5(str(tuple(map(normalize_token, args))).encode()).hexdigest() normalize_token = Dispatch() normalize_token.register((int, long, float, str, unicode, bytes, type(None), type, slice, complex, type(Ellipsis)), identity) @normalize_token.register(dict) def normalize_dict(d): return normalize_token(sorted(d.items(), key=str)) @normalize_token.register(OrderedDict) def normalize_ordered_dict(d): return type(d).__name__, normalize_token(list(d.items())) @normalize_token.register(set) def normalize_set(s): return normalize_token(sorted(s, key=str)) @normalize_token.register((tuple, list)) def normalize_seq(seq): return type(seq).__name__, list(map(normalize_token, seq)) @normalize_token.register(object) def normalize_object(o): method = getattr(o, '__dask_tokenize__', None) if method is not None: return method() return normalize_function(o) if callable(o) else uuid.uuid4().hex function_cache = {} function_cache_lock = threading.Lock() def normalize_function(func): try: return function_cache[func] except KeyError: result = _normalize_function(func) if len(function_cache) >= 500: # clear half of cache if full with function_cache_lock: if len(function_cache) >= 500: for k in list(function_cache)[::2]: del function_cache[k] function_cache[func] = result return result except TypeError: # not hashable return _normalize_function(func) def _normalize_function(func): if isinstance(func, curry): func = func._partial if isinstance(func, Compose): first = getattr(func, 'first', None) funcs = reversed((first,) + func.funcs) if first else func.funcs return tuple(normalize_function(f) for f in funcs) elif isinstance(func, partial): kws = tuple(sorted(func.keywords.items())) if func.keywords else () return (normalize_function(func.func), func.args, kws) else: try: result = pickle.dumps(func, protocol=0) if b'__main__' not in result: # abort on dynamic functions return result except Exception: pass try: import cloudpickle return cloudpickle.dumps(func, protocol=0) except Exception: return str(func) @normalize_token.register_lazy("pandas") def register_pandas(): import pandas as pd @normalize_token.register(pd.Index) def normalize_index(ind): return [ind.name, normalize_token(ind.values)] @normalize_token.register(pd.Categorical) def normalize_categorical(cat): return [normalize_token(cat.codes), normalize_token(cat.categories), cat.ordered] @normalize_token.register(pd.Series) def normalize_series(s): return [s.name, s.dtype, normalize_token(s._data.blocks[0].values), normalize_token(s.index)] @normalize_token.register(pd.DataFrame) def normalize_dataframe(df): data = [block.values for block in df._data.blocks] data += [df.columns, df.index] return list(map(normalize_token, data)) @normalize_token.register_lazy("numpy") def register_numpy(): import numpy as np @normalize_token.register(np.ndarray) def normalize_array(x): if not x.shape: return (str(x), x.dtype) if hasattr(x, 'mode') and getattr(x, 'filename', None): if hasattr(x.base, 'ctypes'): offset = (x.ctypes.get_as_parameter().value - x.base.ctypes.get_as_parameter().value) else: offset = 0 # root memmap's have mmap object as base return (x.filename, os.path.getmtime(x.filename), x.dtype, x.shape, x.strides, offset) if x.dtype.hasobject: try: data = hash_buffer_hex('-'.join(x.flat).encode('utf-8')) except TypeError: data = hash_buffer_hex(b'-'.join([unicode(item).encode('utf-8') for item in x.flat])) else: try: data = hash_buffer_hex(x.ravel(order='K').view('i1')) except (BufferError, AttributeError, ValueError): data = hash_buffer_hex(x.copy().ravel(order='K').view('i1')) return (data, x.dtype, x.shape, x.strides) normalize_token.register(np.dtype, repr) normalize_token.register(np.generic, repr) @normalize_token.register(np.ufunc) def normalize_ufunc(x): try: name = x.__name__ if getattr(np, name) is x: return 'np.' + name except AttributeError: return normalize_function(x) dask-0.16.0/dask/bytes/000077500000000000000000000000001320364734500145775ustar00rootroot00000000000000dask-0.16.0/dask/bytes/__init__.py000066400000000000000000000003551320364734500167130ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import from ..utils import ignoring from .core import read_bytes, open_files, open_text_files from . import local with ignoring(ImportError, SyntaxError): from . import s3 dask-0.16.0/dask/bytes/compression.py000066400000000000000000000053131320364734500175140ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import import bz2 import sys import zlib from toolz import identity from ..compatibility import gzip_compress, gzip_decompress, GzipFile from ..utils import ignoring def noop_file(file, **kwargs): return file compress = {'gzip': gzip_compress, 'zlib': zlib.compress, 'bz2': bz2.compress, None: identity} decompress = {'gzip': gzip_decompress, 'zlib': zlib.decompress, 'bz2': bz2.decompress, None: identity} files = {'gzip': lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), None: noop_file} seekable_files = {None: noop_file} with ignoring(ImportError): import snappy compress['snappy'] = snappy.compress decompress['snappy'] = snappy.decompress with ignoring(ImportError): import lz4 compress['lz4'] = lz4.LZ4_compress decompress['lz4'] = lz4.LZ4_uncompress with ignoring(ImportError): from ..compatibility import LZMAFile, lzma_compress, lzma_decompress compress['xz'] = lzma_compress decompress['xz'] = lzma_decompress files['xz'] = LZMAFile # Seekable xz files actually tend to scan whole file - see `get_xz_blocks` # with ignoring(ImportError): # import lzma # seekable_files['xz'] = lzma.LZMAFile # # with ignoring(ImportError): # import lzmaffi # seekable_files['xz'] = lzmaffi.LZMAFile if sys.version_info[0] >= 3: import bz2 files['bz2'] = bz2.BZ2File def get_xz_blocks(fp): from lzmaffi import (STREAM_HEADER_SIZE, decode_stream_footer, decode_index, LZMAError) fp.seek(0, 2) def _peek(f, size): data = f.read(size) f.seek(-size, 1) return data if fp.tell() < 2 * STREAM_HEADER_SIZE: raise LZMAError("file too small") # read stream paddings (4 bytes each) fp.seek(-4, 1) padding = 0 while _peek(fp, 4) == b'\x00\x00\x00\x00': fp.seek(-4, 1) padding += 4 fp.seek(-STREAM_HEADER_SIZE + 4, 1) stream_flags = decode_stream_footer(_peek(fp, STREAM_HEADER_SIZE)) fp.seek(-stream_flags.backward_size, 1) index = decode_index(_peek(fp, stream_flags.backward_size), padding) return {'offsets': [b.compressed_file_offset for i, b in index], 'lengths': [b.unpadded_size for i, b in index], 'check': stream_flags.check} def xz_decompress(data, check): from lzmaffi import decode_block_header_size, LZMADecompressor, FORMAT_BLOCK hsize = decode_block_header_size(data[:1]) header = data[:hsize] dc = LZMADecompressor(format=FORMAT_BLOCK, header=header, unpadded_size=len(data), check=check) return dc.decompress(data[len(header):]) dask-0.16.0/dask/bytes/core.py000066400000000000000000000464711320364734500161150ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import import io import os from distutils.version import LooseVersion from warnings import warn from toolz import merge from .compression import seekable_files, files as compress_files from .utils import (SeekableFile, read_block, infer_compression, infer_storage_options, build_name_function) from ..compatibility import PY2, unicode from ..base import tokenize from ..delayed import delayed from ..utils import import_required, ensure_bytes, ensure_unicode, is_integer def write_block_to_file(data, lazy_file): """ Parameters ---------- data : data to write Either str/bytes, or iterable producing those, or something file-like which can be read. lazy_file : file-like or file context gives writable backend-dependent file-like object when used with `with` """ binary = 'b' in str(getattr(lazy_file, 'mode', 'b')) with lazy_file as f: if isinstance(f, io.TextIOWrapper): binary = False if binary: ensure = ensure_bytes else: ensure = ensure_unicode if isinstance(data, (str, bytes, unicode)): f.write(ensure(data)) elif isinstance(data, io.IOBase): # file-like out = True while out: out = data.read(64 * 2 ** 10) f.write(ensure(out)) else: # iterable, e.g., bag contents start = False for d in data: if start: if binary: try: f.write(b'\n') except TypeError: binary = False f.write('\n') else: f.write(u'\n') else: start = True f.write(ensure(d)) def write_bytes(data, urlpath, name_function=None, compression=None, encoding=None, **kwargs): """Write dask data to a set of files Parameters ---------- data: list of delayed objects Producing data to write urlpath: list or template Location(s) to write to, including backend specifier. name_function: function or None If urlpath is a template, use this function to create a string out of the sequence number. compression: str or None Compression algorithm to apply (e.g., gzip), if any encoding: str or None If None, data must produce bytes, else will be encoded. kwargs: passed to filesystem constructor """ mode = 'wb' if encoding is None else 'wt' fs, names, myopen = get_fs_paths_myopen(urlpath, compression, mode, name_function=name_function, num=len(data), encoding=encoding, **kwargs) values = [delayed(write_block_to_file, pure=False)(d, myopen(f, mode='wb')) for d, f in zip(data, names)] return values, names def read_bytes(urlpath, delimiter=None, not_zero=False, blocksize=2**27, sample=True, compression=None, **kwargs): """ Convert path to a list of delayed values The path may be a filename like ``'2015-01-01.csv'`` or a globstring like ``'2015-*-*.csv'``. The path may be preceded by a protocol, like ``s3://`` or ``hdfs://`` if those libraries are installed. This cleanly breaks data by a delimiter if given, so that block boundaries start directly after a delimiter and end on the delimiter. Parameters ---------- urlpath: string Absolute or relative filepath, URL (may include protocols like ``s3://``), or globstring pointing to data. delimiter: bytes An optional delimiter, like ``b'\\n'`` on which to split blocks of bytes. not_zero: bool Force seek of start-of-file delimiter, discarding header. blocksize: int (=128MB) Chunk size in bytes compression: string or None String like 'gzip' or 'xz'. Must support efficient random access. sample: bool or int Whether or not to return a header sample. If an integer is given it is used as sample size, otherwise the default sample size is 10kB. **kwargs: dict Extra options that make sense to a particular storage connection, e.g. host, port, username, password, etc. Examples -------- >>> sample, blocks = read_bytes('2015-*-*.csv', delimiter=b'\\n') # doctest: +SKIP >>> sample, blocks = read_bytes('s3://bucket/2015-*-*.csv', delimiter=b'\\n') # doctest: +SKIP Returns ------- A sample header and list of ``dask.Delayed`` objects or list of lists of delayed objects if ``fn`` is a globstring. """ fs, paths, myopen = get_fs_paths_myopen(urlpath, compression, 'rb', None, **kwargs) client = None if len(paths) == 0: raise IOError("%s resolved to no files" % urlpath) if blocksize is not None: if not is_integer(blocksize): raise TypeError("blocksize must be an integer") blocksize = int(blocksize) blocks, lengths, machines = fs.get_block_locations(paths) if blocks: offsets = blocks elif blocksize is None: offsets = [[0]] * len(paths) lengths = [[None]] * len(offsets) machines = [[None]] * len(offsets) else: offsets = [] lengths = [] for path in paths: try: size = fs.logical_size(path, compression) except KeyError: raise ValueError('Cannot read compressed files (%s) in byte chunks,' 'use blocksize=None' % infer_compression(urlpath)) off = list(range(0, size, blocksize)) length = [blocksize] * len(off) if not_zero: off[0] = 1 length[0] -= 1 offsets.append(off) lengths.append(length) machines = [[None]] * len(offsets) out = [] for path, offset, length, machine in zip(paths, offsets, lengths, machines): ukey = fs.ukey(path) keys = ['read-block-%s-%s' % (o, tokenize(path, compression, offset, ukey, kwargs, delimiter)) for o in offset] L = [delayed(read_block_from_file)(myopen(path, mode='rb'), o, l, delimiter, dask_key_name=key) for (o, key, l) in zip(offset, keys, length)] out.append(L) if machine is not None: # blocks are in preferred locations if client is None: try: from distributed.client import default_client client = default_client() except (ImportError, ValueError): # no distributed client client = False if client: restrictions = {key: w for key, w in zip(keys, machine)} client._send_to_scheduler({'op': 'update-graph', 'tasks': {}, 'dependencies': [], 'keys': [], 'restrictions': restrictions, 'loose_restrictions': list(restrictions), 'client': client.id}) if sample is not True: nbytes = sample else: nbytes = 10000 if sample: # myopen = OpenFileCreator(urlpath, compression) with myopen(paths[0], 'rb') as f: sample = read_block(f, 0, nbytes, delimiter) return sample, out def read_block_from_file(lazy_file, off, bs, delimiter): with lazy_file as f: return read_block(f, off, bs, delimiter) class OpenFileCreator(object): """ Produces a function-like instance, which generates open file contexts Analyses the passed URL to determine the appropriate backend (local file, s3, etc.), and then acts something like the builtin `open` in with a context, where the further options such as compression are applied to the file to be opened. Parameters ---------- urlpath: str Template URL, like the files we wish to access, with optional backend-specific parts compression: str or None One of the keys of `compress_files` or None; all files opened will use this compression. If `'infer'`, will choose based on the urlpath text: bool Whether files should be binary or text encoding: str If files are text, the encoding to use errors: str ['strict'] How to handle encoding errors for text files kwargs: passed to filesystem instance constructor Examples -------- >>> ofc = OpenFileCreator('2015-*-*.csv') # doctest: +SKIP >>> with ofc('2015-12-10.csv', 'rb') as f: # doctest: +SKIP ... f.read(10) # doctest: +SKIP """ def __init__(self, urlpath, compression=None, text=False, encoding='utf8', errors=None, **kwargs): if compression == 'infer': compression = infer_compression(urlpath) if compression is not None and compression not in compress_files: raise ValueError("Compression type %s not supported" % compression) self.compression = compression self.text = text self.encoding = encoding self.errors = errors self.storage_options = infer_storage_options( urlpath, inherit_storage_options=kwargs) self.protocol = self.storage_options.pop('protocol') ensure_protocol(self.protocol) try: self.fs = _filesystems[self.protocol](**self.storage_options) except KeyError: raise NotImplementedError("Unknown protocol %s (%s)" % (self.protocol, urlpath)) def __call__(self, path, mode='rb'): """Produces `OpenFile` instance""" return OpenFile(self.fs.open, path, self.compression, mode, self.text, self.encoding, self.errors) def __dask_tokenize__(self): return (self.compression, self.text, self.encoding, self.protocol, self.storage_options) class OpenFile(object): """ File-like object to be used in a context These instances are safe to serialize, as the low-level file object is not created until invoked using `with`. Parameters ---------- myopen: function Opens the backend file. Should accept path and mode, as the builtin open path: str Location to open compression: str or None Compression to apply mode: str like 'rb' Mode of the opened file text: bool Whether to wrap the file to be text-like encoding: if using text errors: if using text """ def __init__(self, myopen, path, compression, mode, text, encoding, errors=None): self.myopen = myopen self.path = path self.compression = compression self.mode = mode self.text = text self.encoding = encoding self.closers = None self.fobjects = None self.errors = errors self.f = None def __enter__(self): mode = self.mode.replace('t', '').replace('b', '') + 'b' f = f2 = self.myopen(self.path, mode=mode) CompressFile = merge(seekable_files, compress_files)[self.compression] if PY2: f2 = SeekableFile(f) f3 = CompressFile(f2, mode=mode) if self.text: f4 = io.TextIOWrapper(f3, encoding=self.encoding, errors=self.errors) else: f4 = f3 self.closers = [f4.close, f3.close, f2.close, f.close] self.fobjects = [f4, f3, f2, f] self.f = f4 f4.close = self.close return f4 def __exit__(self, *args): self.close() def close(self): """ Close all encapsulated file objects """ [_() for _ in self.closers] del self.closers[:] del self.fobjects[:] self.f = None def open_files(urlpath, compression=None, mode='rb', encoding='utf8', errors=None, name_function=None, num=1, **kwargs): """ Given path return dask.delayed file-like objects Parameters ---------- urlpath: string Absolute or relative filepath, URL (may include protocols like ``s3://``), or globstring pointing to data. compression: string Compression to use. See ``dask.bytes.compression.files`` for options. mode: 'rb', 'wt', etc. encoding: str For text mode only errors: None or str Passed to TextIOWrapper in text mode name_function: function or None if opening a set of files for writing, those files do not yet exist, so we need to generate their names by formatting the urlpath for each sequence number num: int [1] if writing mode, number of files we expect to create (passed to name+function) **kwargs: dict Extra options that make sense to a particular storage connection, e.g. host, port, username, password, etc. Examples -------- >>> files = open_files('2015-*-*.csv') # doctest: +SKIP >>> files = open_files('s3://bucket/2015-*-*.csv.gz', compression='gzip') # doctest: +SKIP Returns ------- List of ``dask.delayed`` objects that compute to file-like objects """ fs, paths, myopen = get_fs_paths_myopen(urlpath, compression, mode, encoding=encoding, num=num, name_function=name_function, errors=errors, **kwargs) return [myopen(path, mode) for path in paths] def get_fs_paths_myopen(urlpath, compression, mode, encoding='utf8', errors='strict', num=1, name_function=None, **kwargs): if hasattr(urlpath, 'name'): # deal with pathlib.Path objects - must be local urlpath = str(urlpath) ispath = True else: ispath = False if isinstance(urlpath, (str, unicode)): myopen = OpenFileCreator(urlpath, compression, text='b' not in mode, encoding=encoding, errors=errors, **kwargs) if ispath and myopen.protocol != 'file': raise ValueError("Only use pathlib.Path with local files.") if 'w' in mode: paths = _expand_paths(urlpath, name_function, num) elif "*" in urlpath: paths = myopen.fs.glob(urlpath) else: paths = [urlpath] elif isinstance(urlpath, (list, set, tuple, dict)): if hasattr(urlpath[0], 'name'): # deal with pathlib.Path objects - must be local urlpath = [str(u) for u in urlpath] ispath = True else: ispath = False myopen = OpenFileCreator(urlpath[0], compression, text='b' not in mode, encoding=encoding, **kwargs) if ispath and myopen.protocol != 'file': raise ValueError("Only use pathlib.Path with local files.") paths = urlpath else: raise ValueError('url type not understood: %s' % urlpath) return myopen.fs, paths, myopen def open_text_files(urlpath, compression=None, mode='rt', encoding='utf8', errors='strict', **kwargs): """ Given path return dask.delayed file-like objects in text mode Parameters ---------- urlpath: string Absolute or relative filepath, URL (may include protocols like ``s3://``), or globstring pointing to data. encoding: string errors: string compression: string Compression to use. See ``dask.bytes.compression.files`` for options. **kwargs: dict Extra options that make sense to a particular storage connection, e.g. host, port, username, password, etc. Examples -------- >>> files = open_text_files('2015-*-*.csv', encoding='utf-8') # doctest: +SKIP >>> files = open_text_files('s3://bucket/2015-*-*.csv') # doctest: +SKIP Returns ------- List of ``dask.delayed`` objects that compute to text file-like objects """ return open_files(urlpath, compression, mode.replace('b', 't'), encoding, errors=errors, **kwargs) def _expand_paths(path, name_function, num): if isinstance(path, (str, unicode)): if path.count('*') > 1: raise ValueError("Output path spec must contain at most one '*'.") if name_function is None: name_function = build_name_function(num - 1) if '*' not in path: path = os.path.join(path, '*.part') formatted_names = [name_function(i) for i in range(num)] if formatted_names != sorted(formatted_names): warn("In order to preserve order between partitions " "name_function must preserve the order of its input") paths = [path.replace('*', name_function(i)) for i in range(num)] elif isinstance(path, (tuple, list, set)): assert len(path) == num paths = path else: raise ValueError("""Path should be either" 1. A list of paths -- ['foo.json', 'bar.json', ...] 2. A directory -- 'foo/ 3. A path with a * in it -- 'foo.*.json'""") return paths def ensure_protocol(protocol): if protocol == 's3': import_required('s3fs', "Need to install `s3fs` library for s3 support\n" " conda install s3fs -c conda-forge\n" " or\n" " pip install s3fs") elif protocol in ('gs', 'gcs'): import_required('gcsfs', "Need to install `gcsfs` library for Google Cloud Storage support\n" " conda install gcsfs -c conda-forge\n" " or\n" " pip install gcsfs") elif protocol == 'hdfs': msg = ("Need to install `hdfs3 > 0.2.0` for HDFS support\n" " conda install hdfs3 -c conda-forge") hdfs3 = import_required('hdfs3', msg) if not LooseVersion(hdfs3.__version__) > '0.2.0': raise RuntimeError(msg) import hdfs3.dask # register dask filesystem elif protocol in _filesystems: return else: raise ValueError("Unknown protocol %s" % protocol) _filesystems = dict() # see .local.LocalFileSystem for reference implementation class FileSystem(object): def logical_size(self, path, compression): if compression == 'infer': compression = infer_compression(path) if compression is None: return self.size(path) else: with self.open(path, 'rb') as f: f = SeekableFile(f) g = seekable_files[compression](f) g.seek(0, 2) result = g.tell() g.close() return result def get_block_locations(self, path): return None, None, None def get_pyarrow_filesystem(fs): """Get an equivalent pyarrow filesystem. Not for public use, will be removed once a consistent filesystem api is defined.""" try: return fs._get_pyarrow_filesystem() except AttributeError: raise NotImplementedError("Using pyarrow with a %r " "filesystem object" % type(fs).__name__) dask-0.16.0/dask/bytes/local.py000066400000000000000000000044661320364734500162550ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import from glob import glob import os from . import core from .utils import infer_storage_options from ..base import tokenize class LocalFileSystem(core.FileSystem): """API spec for the methods a filesystem A filesystem must provide these methods, if it is to be registered as a backend for dask. Implementation for local disc""" sep = os.sep def __init__(self, **storage_options): """ Parameters ---------- storage_options: key-value May be credentials, or other configuration specific to the backend. """ self.cwd = os.getcwd() def _trim_filename(self, fn): path = infer_storage_options(fn)['path'] if not os.path.isabs(path): path = os.path.normpath(os.path.join(self.cwd, path)) return path def glob(self, path): """For a template path, return matching files""" path = self._trim_filename(path) return sorted(glob(path)) def mkdirs(self, path): """Make any intermediate directories to make path writable""" path = self._trim_filename(path) try: os.makedirs(path) except OSError: assert os.path.isdir(path) def open(self, path, mode='rb', **kwargs): """Make a file-like object Parameters ---------- mode: string normally "rb", "wb" or "ab" or other. kwargs: key-value Any other parameters, such as buffer size. May be better to set these on the filesystem instance, to apply to all files created by it. Not used for local. """ path = self._trim_filename(path) return open(path, mode=mode) def ukey(self, path): """Unique identifier, so we can tell if a file changed""" path = self._trim_filename(path) return tokenize(path, os.stat(path).st_mtime) def size(self, path): """Size in bytes of the file at path""" path = self._trim_filename(path) return os.stat(path).st_size def _get_pyarrow_filesystem(self): """Get an equivalent pyarrow filesystem""" import pyarrow as pa return pa.filesystem.LocalFileSystem.get_instance() core._filesystems['file'] = LocalFileSystem dask-0.16.0/dask/bytes/s3.py000066400000000000000000000044051320364734500155010ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import from s3fs import S3FileSystem from . import core from .utils import infer_storage_options class DaskS3FileSystem(S3FileSystem, core.FileSystem): sep = '/' def __init__(self, key=None, username=None, secret=None, password=None, path=None, host=None, s3=None, **kwargs): if username is not None: if key is not None: raise KeyError("S3 storage options got secrets argument " "collision. Please, use either `key` " "storage option or password field in URLpath, " "not both options together.") key = username if key is not None: kwargs['key'] = key if password is not None: if secret is not None: raise KeyError("S3 storage options got secrets argument " "collision. Please, use either `secret` " "storage option or password field in URLpath, " "not both options together.") secret = password if secret is not None: kwargs['secret'] = secret # S3FileSystem.__init__(self, kwargs) # not sure what do do here S3FileSystem.__init__(self, **kwargs) def _trim_filename(self, fn): so = infer_storage_options(fn) return so.get('host', '') + so['path'] def open(self, path, mode='rb'): s3_path = self._trim_filename(path) f = S3FileSystem.open(self, s3_path, mode=mode) return f def glob(self, path): s3_path = self._trim_filename(path) return ['s3://%s' % s for s in S3FileSystem.glob(self, s3_path)] def mkdirs(self, path): pass # no need to pre-make paths on S3 def ukey(self, path): s3_path = self._trim_filename(path) return self.info(s3_path)['ETag'] def size(self, path): s3_path = self._trim_filename(path) return self.info(s3_path)['Size'] def _get_pyarrow_filesystem(self): """Get an equivalent pyarrow fileystem""" import pyarrow as pa return pa.filesystem.S3FSWrapper(self) core._filesystems['s3'] = DaskS3FileSystem dask-0.16.0/dask/bytes/tests/000077500000000000000000000000001320364734500157415ustar00rootroot00000000000000dask-0.16.0/dask/bytes/tests/__init__.py000066400000000000000000000000001320364734500200400ustar00rootroot00000000000000dask-0.16.0/dask/bytes/tests/test_bytes_utils.py000066400000000000000000000104601320364734500217210ustar00rootroot00000000000000import io import pytest from dask.bytes.utils import read_block, seek_delimiter, infer_storage_options def test_read_block(): delimiter = b'\n' data = delimiter.join([b'123', b'456', b'789']) f = io.BytesIO(data) assert read_block(f, 1, 2) == b'23' assert read_block(f, 0, 1, delimiter=b'\n') == b'123\n' assert read_block(f, 0, 2, delimiter=b'\n') == b'123\n' assert read_block(f, 0, 3, delimiter=b'\n') == b'123\n' assert read_block(f, 0, 5, delimiter=b'\n') == b'123\n456\n' assert read_block(f, 0, 8, delimiter=b'\n') == b'123\n456\n789' assert read_block(f, 0, 100, delimiter=b'\n') == b'123\n456\n789' assert read_block(f, 1, 1, delimiter=b'\n') == b'' assert read_block(f, 1, 5, delimiter=b'\n') == b'456\n' assert read_block(f, 1, 8, delimiter=b'\n') == b'456\n789' for ols in [[(0, 3), (3, 3), (6, 3), (9, 2)], [(0, 4), (4, 4), (8, 4)]]: out = [read_block(f, o, l, b'\n') for o, l in ols] assert b"".join(filter(None, out)) == data def test_seek_delimiter_endline(): f = io.BytesIO(b'123\n456\n789') # if at zero, stay at zero seek_delimiter(f, b'\n', 5) assert f.tell() == 0 # choose the first block for bs in [1, 5, 100]: f.seek(1) seek_delimiter(f, b'\n', blocksize=bs) assert f.tell() == 4 # handle long delimiters well, even with short blocksizes f = io.BytesIO(b'123abc456abc789') for bs in [1, 2, 3, 4, 5, 6, 10]: f.seek(1) seek_delimiter(f, b'abc', blocksize=bs) assert f.tell() == 6 # End at the end f = io.BytesIO(b'123\n456') f.seek(5) seek_delimiter(f, b'\n', 5) assert f.tell() == 7 def test_ensure_protocol(): try: import hdfs3 # noqa: F401 pytest.skip() except ImportError: pass dd = pytest.importorskip('dask.dataframe') try: dd.read_csv('hdfs://data/*.csv') except RuntimeError as e: assert "hdfs3" in str(e) def test_infer_storage_options(): so = infer_storage_options('/mnt/datasets/test.csv') assert so.pop('protocol') == 'file' assert so.pop('path') == '/mnt/datasets/test.csv' assert not so assert infer_storage_options('./test.csv')['path'] == './test.csv' assert infer_storage_options('../test.csv')['path'] == '../test.csv' so = infer_storage_options('C:\\test.csv') assert so.pop('protocol') == 'file' assert so.pop('path') == 'C:\\test.csv' assert not so assert infer_storage_options('d:\\test.csv')['path'] == 'd:\\test.csv' assert infer_storage_options('\\test.csv')['path'] == '\\test.csv' assert infer_storage_options('.\\test.csv')['path'] == '.\\test.csv' assert infer_storage_options('test.csv')['path'] == 'test.csv' so = infer_storage_options( 'hdfs://username:pwd@Node:123/mnt/datasets/test.csv?q=1#fragm', inherit_storage_options={'extra': 'value'}) assert so.pop('protocol') == 'hdfs' assert so.pop('username') == 'username' assert so.pop('password') == 'pwd' assert so.pop('host') == 'Node' assert so.pop('port') == 123 assert so.pop('path') == '/mnt/datasets/test.csv' assert so.pop('url_query') == 'q=1' assert so.pop('url_fragment') == 'fragm' assert so.pop('extra') == 'value' assert not so so = infer_storage_options('hdfs://User-name@Node-name.com/mnt/datasets/test.csv') assert so.pop('username') == 'User-name' assert so.pop('host') == 'Node-name.com' assert infer_storage_options('s3://Bucket-name.com/test.csv')['host'] == 'Bucket-name.com' assert infer_storage_options('http://127.0.0.1:8080/test.csv')['host'] == '127.0.0.1' with pytest.raises(KeyError): infer_storage_options('file:///bucket/file.csv', {'path': 'collide'}) with pytest.raises(KeyError): infer_storage_options('hdfs:///bucket/file.csv', {'protocol': 'collide'}) @pytest.mark.parametrize('urlpath, expected_path', ( (r'c:\foo\bar', r'c:\foo\bar'), (r'C:\\foo\bar', r'C:\\foo\bar'), (r'c:/foo/bar', r'c:/foo/bar'), (r'file:///c|\foo\bar', r'c:\foo\bar'), (r'file:///C|/foo/bar', r'C:/foo/bar'), (r'file:///C:/foo/bar', r'C:/foo/bar'), )) def test_infer_storage_options_c(urlpath, expected_path): so = infer_storage_options(urlpath) assert so['protocol'] == 'file' assert so['path'] == expected_path dask-0.16.0/dask/bytes/tests/test_compression.py000066400000000000000000000014611320364734500217150ustar00rootroot00000000000000from io import BytesIO import pytest from dask.bytes.compression import compress, decompress, files def test_compression(): assert set(compress) == set(decompress) a = b'Hello, world!' for k in compress: comp = compress[k] decomp = decompress[k] b = comp(a) c = decomp(b) assert a == c if k is not None: assert a != b @pytest.mark.parametrize('fmt,File', files.items()) def test_files(fmt,File): if fmt is None: return data = b'1234' * 1000 out = BytesIO() f = File(out, mode='wb') f.write(data) f.close() out.seek(0) compressed = out.read() assert len(data) > len(compressed) b = BytesIO(compressed) g = File(b, mode='rb') data2 = g.read() g.close() assert data == data2 dask-0.16.0/dask/bytes/tests/test_local.py000066400000000000000000000316521320364734500204530ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import import gzip import os from time import sleep import sys import pytest from toolz import concat, valmap, partial from dask import compute, get, delayed from dask.compatibility import FileNotFoundError, unicode from dask.utils import filetexts from dask.bytes import compression from dask.bytes.local import LocalFileSystem from dask.bytes.core import (open_text_files, write_bytes, read_bytes, open_files, OpenFileCreator, FileSystem, get_pyarrow_filesystem) compute = partial(compute, get=get) files = {'.test.accounts.1.json': (b'{"amount": 100, "name": "Alice"}\n' b'{"amount": 200, "name": "Bob"}\n' b'{"amount": 300, "name": "Charlie"}\n' b'{"amount": 400, "name": "Dennis"}\n'), '.test.accounts.2.json': (b'{"amount": 500, "name": "Alice"}\n' b'{"amount": 600, "name": "Bob"}\n' b'{"amount": 700, "name": "Charlie"}\n' b'{"amount": 800, "name": "Dennis"}\n')} try: # used only in test_with_urls - may be more generally useful import pathlib def to_uri(path): return pathlib.Path(os.path.abspath(path)).as_uri() except (ImportError, NameError): import urlparse, urllib def to_uri(path): return urlparse.urljoin( 'file:', urllib.pathname2url(os.path.abspath(path))) def test_read_bytes(): with filetexts(files, mode='b'): sample, values = read_bytes('.test.accounts.*') assert isinstance(sample, bytes) assert sample[:5] == files[sorted(files)[0]][:5] assert sample.endswith(b'\n') assert isinstance(values, (list, tuple)) assert isinstance(values[0], (list, tuple)) assert hasattr(values[0][0], 'dask') assert sum(map(len, values)) >= len(files) results = compute(*concat(values)) assert set(results) == set(files.values()) def test_read_bytes_sample_delimiter(): with filetexts(files, mode='b'): sample, values = read_bytes('.test.accounts.*', sample=80, delimiter=b'\n') assert sample.endswith(b'\n') sample, values = read_bytes('.test.accounts.1.json', sample=80, delimiter=b'\n') assert sample.endswith(b'\n') sample, values = read_bytes('.test.accounts.1.json', sample=2, delimiter=b'\n') assert sample.endswith(b'\n') def test_read_bytes_blocksize_none(): with filetexts(files, mode='b'): sample, values = read_bytes('.test.accounts.*', blocksize=None) assert sum(map(len, values)) == len(files) def test_read_bytes_blocksize_float(): with filetexts(files, mode='b'): sample, vals = read_bytes('.test.account*', blocksize=5.0) results = compute(*concat(vals)) ourlines = b"".join(results).split(b'\n') testlines = b"".join(files.values()).split(b'\n') assert set(ourlines) == set(testlines) with pytest.raises(TypeError): read_bytes('.test.account*', blocksize=5.5) def test_with_urls(): with filetexts(files, mode='b'): # OS-independent file:// URI with glob * url = to_uri('.test.accounts.') + '*' sample, values = read_bytes(url, blocksize=None) assert sum(map(len, values)) == len(files) @pytest.mark.skipif(sys.platform == 'win32', reason="pathlib and moto clash on windows") def test_with_paths(): pathlib = pytest.importorskip('pathlib') with filetexts(files, mode='b'): url = pathlib.Path('./.test.accounts.*') sample, values = read_bytes(url, blocksize=None) assert sum(map(len, values)) == len(files) with pytest.raises(OSError): # relative path doesn't work url = pathlib.Path('file://.test.accounts.*') read_bytes(url, blocksize=None) def test_read_bytes_block(): with filetexts(files, mode='b'): for bs in [5, 15, 45, 1500]: sample, vals = read_bytes('.test.account*', blocksize=bs) assert (list(map(len, vals)) == [(len(v) // bs + 1) for v in files.values()]) results = compute(*concat(vals)) assert (sum(len(r) for r in results) == sum(len(v) for v in files.values())) ourlines = b"".join(results).split(b'\n') testlines = b"".join(files.values()).split(b'\n') assert set(ourlines) == set(testlines) def test_read_bytes_delimited(): with filetexts(files, mode='b'): for bs in [5, 15, 45, 1500]: _, values = read_bytes('.test.accounts*', blocksize=bs, delimiter=b'\n') _, values2 = read_bytes('.test.accounts*', blocksize=bs, delimiter=b'foo') assert ([a.key for a in concat(values)] != [b.key for b in concat(values2)]) results = compute(*concat(values)) res = [r for r in results if r] assert all(r.endswith(b'\n') for r in res) ourlines = b''.join(res).split(b'\n') testlines = b"".join(files[k] for k in sorted(files)).split(b'\n') assert ourlines == testlines # delimiter not at the end d = b'}' _, values = read_bytes('.test.accounts*', blocksize=bs, delimiter=d) results = compute(*concat(values)) res = [r for r in results if r] # All should end in } except EOF assert sum(r.endswith(b'}') for r in res) == len(res) - 2 ours = b"".join(res) test = b"".join(files[v] for v in sorted(files)) assert ours == test fmt_bs = ([(fmt, None) for fmt in compression.files] + [(fmt, 10) for fmt in compression.seekable_files]) @pytest.mark.parametrize('fmt,blocksize', fmt_bs) def test_compression(fmt, blocksize): compress = compression.compress[fmt] files2 = valmap(compress, files) with filetexts(files2, mode='b'): sample, values = read_bytes('.test.accounts.*.json', blocksize=blocksize, delimiter=b'\n', compression=fmt) assert sample[:5] == files[sorted(files)[0]][:5] assert sample.endswith(b'\n') results = compute(*concat(values)) assert (b''.join(results) == b''.join([files[k] for k in sorted(files)])) def test_registered_read_bytes(): from dask.bytes.core import read_bytes with filetexts(files, mode='b'): sample, values = read_bytes('.test.accounts.*') results = compute(*concat(values)) assert set(results) == set(files.values()) def test_registered_open_files(): with filetexts(files, mode='b'): myfiles = open_files('.test.accounts.*') assert len(myfiles) == len(files) data = [] for file in myfiles: with file as f: data.append(f.read()) assert list(data) == [files[k] for k in sorted(files)] @pytest.mark.parametrize('encoding', ['utf-8', 'ascii']) def test_registered_open_text_files(encoding): from dask.bytes.core import open_text_files with filetexts(files, mode='b'): myfiles = open_text_files('.test.accounts.*', encoding=encoding) assert len(myfiles) == len(files) data = [] for file in myfiles: with file as f: data.append(f.read()) assert list(data) == [files[k].decode(encoding) for k in sorted(files)] def test_open_files(): with filetexts(files, mode='b'): myfiles = open_files('.test.accounts.*') assert len(myfiles) == len(files) for lazy_file, data_file in zip(myfiles, sorted(files)): with lazy_file as f: x = f.read() assert x == files[data_file] @pytest.mark.parametrize('fmt', list(compression.files)) def test_compression_binary(fmt): files2 = valmap(compression.compress[fmt], files) with filetexts(files2, mode='b'): myfiles = open_files('.test.accounts.*', compression=fmt) data = [] for file in myfiles: with file as f: data.append(f.read()) assert list(data) == [files[k] for k in sorted(files)] @pytest.mark.parametrize('fmt', list(compression.files)) def test_compression_text(fmt): files2 = valmap(compression.compress[fmt], files) with filetexts(files2, mode='b'): myfiles = open_text_files('.test.accounts.*', compression=fmt) data = [] for file in myfiles: with file as f: data.append(f.read()) assert list(data) == [files[k].decode() for k in sorted(files)] @pytest.mark.parametrize('fmt', list(compression.seekable_files)) def test_getsize(fmt): compress = compression.compress[fmt] with filetexts({'.tmp.getsize': compress(b'1234567890')}, mode='b'): fs = LocalFileSystem() assert fs.logical_size('.tmp.getsize', fmt) == 10 def test_bad_compression(): with filetexts(files, mode='b'): for func in [read_bytes, open_files, open_text_files]: with pytest.raises(ValueError): sample, values = func('.test.accounts.*', compression='not-found') def test_not_found(): fn = 'not-a-file' with pytest.raises((FileNotFoundError, OSError)) as e: read_bytes(fn) assert fn in str(e) @pytest.mark.slow def test_names(): with filetexts(files, mode='b'): _, a = read_bytes('.test.accounts.*') _, b = read_bytes('.test.accounts.*') a = list(concat(a)) b = list(concat(b)) assert [aa._key for aa in a] == [bb._key for bb in b] sleep(1) for fn in files: with open(fn, 'ab') as f: f.write(b'x') _, c = read_bytes('.test.accounts.*') c = list(concat(c)) assert [aa._key for aa in a] != [cc._key for cc in c] def test_simple_write(tmpdir): tmpdir = str(tmpdir) make_bytes = lambda: b'000' some_bytes = delayed(make_bytes)() data = [some_bytes, some_bytes] out = write_bytes(data, tmpdir) assert len(out) == 2 compute(*out) files = os.listdir(tmpdir) assert len(files) == 2 assert '0.part' in files d = open(os.path.join(tmpdir, files[0]), 'rb').read() assert d == b'000' def test_compressed_write(tmpdir): tmpdir = str(tmpdir) make_bytes = lambda: b'000' some_bytes = delayed(make_bytes)() data = [some_bytes, some_bytes] out = write_bytes(data, os.path.join(tmpdir, 'bytes-*.gz'), compression='gzip') compute(*out) files = os.listdir(tmpdir) assert len(files) == 2 assert 'bytes-0.gz' in files import gzip d = gzip.GzipFile(os.path.join(tmpdir, files[0])).read() assert d == b'000' def test_open_files_write(tmpdir): tmpdir = str(tmpdir) files = open_files([os.path.join(tmpdir, 'test1'), os.path.join(tmpdir, 'test2')], mode='wb') assert len(files) == 2 assert files[0].mode == 'wb' def test_pickability_of_lazy_files(tmpdir): cloudpickle = pytest.importorskip('cloudpickle') fn = os.path.join(str(tmpdir), 'foo') with open(fn, 'wb') as f: f.write(b'1') opener = OpenFileCreator('file://foo.py', open=open) opener2 = cloudpickle.loads(cloudpickle.dumps(opener)) assert type(opener2.fs) == type(opener.fs) lazy_file = opener(fn, mode='rt') lazy_file2 = cloudpickle.loads(cloudpickle.dumps(lazy_file)) assert lazy_file.path == lazy_file2.path with lazy_file as f: pass lazy_file3 = cloudpickle.loads(cloudpickle.dumps(lazy_file)) assert lazy_file.path == lazy_file3.path def test_py2_local_bytes(tmpdir): fn = str(tmpdir / 'myfile.txt.gz') with gzip.open(fn, mode='wb') as f: f.write(b'hello\nworld') ofc = OpenFileCreator(fn, text=True, open=open, mode='rt', compression='gzip', encoding='utf-8') lazy_file = ofc(fn) with lazy_file as f: assert all(isinstance(line, unicode) for line in f) def test_abs_paths(tmpdir): tmpdir = str(tmpdir) here = os.getcwd() os.chdir(tmpdir) with open('tmp', 'w') as f: f.write('hi') out = LocalFileSystem().glob('*') assert len(out) == 1 assert os.sep in out[0] assert tmpdir in out[0] and 'tmp' in out[0] fs = LocalFileSystem() os.chdir(here) assert fs.open('tmp', 'r').read() == 'hi' class UnknownFileSystem(FileSystem): pass def test_get_pyarrow_filesystem(): pa = pytest.importorskip('pyarrow') fs = LocalFileSystem() assert isinstance(get_pyarrow_filesystem(fs), pa.filesystem.LocalFileSystem) with pytest.raises(NotImplementedError): get_pyarrow_filesystem(UnknownFileSystem()) dask-0.16.0/dask/bytes/tests/test_s3.py000066400000000000000000000300671320364734500177050ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import import sys from contextlib import contextmanager import pytest s3fs = pytest.importorskip('s3fs') boto3 = pytest.importorskip('boto3') moto = pytest.importorskip('moto') from toolz import concat, valmap, partial from dask import compute, get, delayed from dask.bytes.s3 import DaskS3FileSystem from dask.bytes.core import (read_bytes, open_files, open_text_files, get_pyarrow_filesystem) from dask.bytes import core compute = partial(compute, get=get) test_bucket_name = 'test' files = {'test/accounts.1.json': (b'{"amount": 100, "name": "Alice"}\n' b'{"amount": 200, "name": "Bob"}\n' b'{"amount": 300, "name": "Charlie"}\n' b'{"amount": 400, "name": "Dennis"}\n'), 'test/accounts.2.json': (b'{"amount": 500, "name": "Alice"}\n' b'{"amount": 600, "name": "Bob"}\n' b'{"amount": 700, "name": "Charlie"}\n' b'{"amount": 800, "name": "Dennis"}\n')} @pytest.yield_fixture def s3(): # writable local S3 system with moto.mock_s3(): client = boto3.client('s3') client.create_bucket(Bucket=test_bucket_name, ACL='public-read-write') for f, data in files.items(): client.put_object(Bucket=test_bucket_name, Key=f, Body=data) yield s3fs.S3FileSystem(anon=True) @contextmanager def s3_context(bucket, files): m = moto.mock_s3() m.start() client = boto3.client('s3') client.create_bucket(Bucket=bucket, ACL='public-read-write') for f, data in files.items(): client.put_object(Bucket=bucket, Key=f, Body=data) yield DaskS3FileSystem(anon=True) for f, data in files.items(): try: client.delete_object(Bucket=bucket, Key=f, Body=data) except Exception: pass m.stop() def test_get_s3(): s3 = DaskS3FileSystem(key='key', secret='secret') assert s3.key == 'key' assert s3.secret == 'secret' s3 = DaskS3FileSystem(username='key', password='secret') assert s3.key == 'key' assert s3.secret == 'secret' with pytest.raises(KeyError): DaskS3FileSystem(key='key', username='key') with pytest.raises(KeyError): DaskS3FileSystem(secret='key', password='key') def test_write_bytes(s3): paths = ['s3://' + test_bucket_name + '/more/' + f for f in files] values = [delayed(v) for v in files.values()] out = core.write_bytes(values, paths) compute(*out) sample, values = read_bytes('s3://' + test_bucket_name + '/more/test/accounts.*') results = compute(*concat(values)) assert set(list(files.values())) == set(results) def test_read_bytes(s3): sample, values = read_bytes('s3://' + test_bucket_name + '/test/accounts.*') assert isinstance(sample, bytes) assert sample[:5] == files[sorted(files)[0]][:5] assert sample.endswith(b'\n') assert isinstance(values, (list, tuple)) assert isinstance(values[0], (list, tuple)) assert hasattr(values[0][0], 'dask') assert sum(map(len, values)) >= len(files) results = compute(*concat(values)) assert set(results) == set(files.values()) def test_read_bytes_sample_delimiter(s3): sample, values = read_bytes('s3://' + test_bucket_name + '/test/accounts.*', sample=80, delimiter=b'\n') assert sample.endswith(b'\n') sample, values = read_bytes('s3://' + test_bucket_name + '/test/accounts.1.json', sample=80, delimiter=b'\n') assert sample.endswith(b'\n') sample, values = read_bytes('s3://' + test_bucket_name + '/test/accounts.1.json', sample=2, delimiter=b'\n') assert sample.endswith(b'\n') def test_read_bytes_non_existing_glob(s3): with pytest.raises(IOError): read_bytes('s3://' + test_bucket_name + '/non-existing/*') def test_read_bytes_blocksize_none(s3): _, values = read_bytes('s3://' + test_bucket_name + '/test/accounts.*', blocksize=None) assert sum(map(len, values)) == len(files) @pytest.mark.slow @pytest.mark.network def test_read_bytes_blocksize_on_large_data(): _, L = read_bytes('s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv', blocksize=None, anon=True) assert len(L) == 1 _, L = read_bytes('s3://dask-data/nyc-taxi/2014/*.csv', blocksize=None, anon=True) assert len(L) == 12 @pytest.mark.parametrize('blocksize', [5, 15, 45, 1500]) def test_read_bytes_block(s3, blocksize): _, vals = read_bytes('s3://' + test_bucket_name + '/test/account*', blocksize=blocksize) assert (list(map(len, vals)) == [(len(v) // blocksize + 1) for v in files.values()]) results = compute(*concat(vals)) assert (sum(len(r) for r in results) == sum(len(v) for v in files.values())) ourlines = b"".join(results).split(b'\n') testlines = b"".join(files.values()).split(b'\n') assert set(ourlines) == set(testlines) @pytest.mark.parametrize('blocksize', [5, 15, 45, 1500]) def test_read_bytes_delimited(s3, blocksize): _, values = read_bytes('s3://' + test_bucket_name + '/test/accounts*', blocksize=blocksize, delimiter=b'\n') _, values2 = read_bytes('s3://' + test_bucket_name + '/test/accounts*', blocksize=blocksize, delimiter=b'foo') assert ([a.key for a in concat(values)] != [b.key for b in concat(values2)]) results = compute(*concat(values)) res = [r for r in results if r] assert all(r.endswith(b'\n') for r in res) ourlines = b''.join(res).split(b'\n') testlines = b"".join(files[k] for k in sorted(files)).split(b'\n') assert ourlines == testlines # delimiter not at the end d = b'}' _, values = read_bytes('s3://' + test_bucket_name + '/test/accounts*', blocksize=blocksize, delimiter=d) results = compute(*concat(values)) res = [r for r in results if r] # All should end in } except EOF assert sum(r.endswith(b'}') for r in res) == len(res) - 2 ours = b"".join(res) test = b"".join(files[v] for v in sorted(files)) assert ours == test def test_registered(s3): sample, values = read_bytes('s3://%s/test/accounts.*.json' % test_bucket_name) results = compute(*concat(values)) assert set(results) == set(files.values()) def test_registered_open_files(s3): myfiles = open_files('s3://%s/test/accounts.*.json' % test_bucket_name) assert len(myfiles) == len(files) data = [] for file in myfiles: with file as f: data.append(f.read()) assert list(data) == [files[k] for k in sorted(files)] def test_registered_open_text_files(s3): myfiles = open_text_files('s3://%s/test/accounts.*.json' % test_bucket_name) assert len(myfiles) == len(files) data = [] for file in myfiles: with file as f: data.append(f.read()) assert list(data) == [files[k].decode() for k in sorted(files)] from dask.bytes.compression import compress, files as cfiles, seekable_files fmt_bs = [(fmt, None) for fmt in cfiles] + [(fmt, 10) for fmt in seekable_files] @pytest.mark.parametrize('fmt,blocksize', fmt_bs) def test_compression(s3, fmt, blocksize): with s3_context('compress', valmap(compress[fmt], files)): sample, values = read_bytes('s3://compress/test/accounts.*', compression=fmt, blocksize=blocksize) assert sample.startswith(files[sorted(files)[0]][:10]) assert sample.endswith(b'\n') results = compute(*concat(values)) assert b''.join(results) == b''.join([files[k] for k in sorted(files)]) def test_files(s3): myfiles = open_files('s3://' + test_bucket_name + '/test/accounts.*') assert len(myfiles) == len(files) for lazy_file, path in zip(myfiles, sorted(files)): with lazy_file as f: data = f.read() assert data == files[path] @pytest.mark.parametrize('fmt', list(seekable_files)) def test_getsize(fmt): with s3_context('compress', {'x': compress[fmt](b'1234567890')}) as s3: assert s3.logical_size('compress/x', fmt) == 10 double = lambda x: x * 2 def test_modification_time_read_bytes(): with s3_context('compress', files): _, a = read_bytes('s3://compress/test/accounts.*') _, b = read_bytes('s3://compress/test/accounts.*') assert [aa._key for aa in concat(a)] == [bb._key for bb in concat(b)] with s3_context('compress', valmap(double, files)): _, c = read_bytes('s3://compress/test/accounts.*') assert [aa._key for aa in concat(a)] != [cc._key for cc in concat(c)] @pytest.mark.skip() def test_modification_time_open_files(): with s3_context('compress', files): a = open_files('s3://compress/test/accounts.*') b = open_files('s3://compress/test/accounts.*') assert [aa._key for aa in a] == [bb._key for bb in b] with s3_context('compress', valmap(double, files)): c = open_files('s3://compress/test/accounts.*') assert [aa._key for aa in a] != [cc._key for cc in c] def test_read_csv_passes_through_options(): dd = pytest.importorskip('dask.dataframe') with s3_context('csv', {'a.csv': b'a,b\n1,2\n3,4'}) as s3: df = dd.read_csv('s3://csv/*.csv', storage_options={'s3': s3}) assert df.a.sum().compute() == 1 + 3 def test_read_text_passes_through_options(): db = pytest.importorskip('dask.bag') with s3_context('csv', {'a.csv': b'a,b\n1,2\n3,4'}) as s3: df = db.read_text('s3://csv/*.csv', storage_options={'s3': s3}) assert df.count().compute(get=get) == 3 @pytest.mark.parametrize("engine", ['pyarrow', 'fastparquet']) def test_parquet(s3, engine): dd = pytest.importorskip('dask.dataframe') pytest.importorskip(engine) import pandas as pd import numpy as np url = 's3://%s/test.parquet' % test_bucket_name data = pd.DataFrame({'i32': np.arange(1000, dtype=np.int32), 'i64': np.arange(1000, dtype=np.int64), 'f': np.arange(1000, dtype=np.float64), 'bhello': np.random.choice( [u'hello', u'you', u'people'], size=1000).astype("O")}, index=pd.Index(np.arange(1000), name='foo')) df = dd.from_pandas(data, chunksize=500) df.to_parquet(url, engine=engine) files = [f.split('/')[-1] for f in s3.ls(url)] assert '_metadata' in files assert 'part.0.parquet' in files df2 = dd.read_parquet(url, index='foo', engine=engine) assert len(df2.divisions) > 1 pd.util.testing.assert_frame_equal(data, df2.compute()) def test_parquet_wstoragepars(s3): dd = pytest.importorskip('dask.dataframe') pytest.importorskip('fastparquet') import pandas as pd import numpy as np url = 's3://%s/test.parquet' % test_bucket_name data = pd.DataFrame({'i32': np.array([0, 5, 2, 5])}) df = dd.from_pandas(data, chunksize=500) df.to_parquet(url, write_index=False) dd.read_parquet(url, storage_options={'default_fill_cache': False}) assert s3.current().default_fill_cache is False dd.read_parquet(url, storage_options={'default_fill_cache': True}) assert s3.current().default_fill_cache is True dd.read_parquet(url, storage_options={'default_block_size': 2**20}) assert s3.current().default_block_size == 2**20 with s3.current().open(url + '/_metadata') as f: assert f.blocksize == 2**20 @pytest.mark.skipif(sys.platform == 'win32', reason="pathlib and moto clash on windows") def test_pathlib_s3(s3): pathlib = pytest.importorskip("pathlib") with pytest.raises(ValueError): url = pathlib.Path('s3://bucket/test.accounts.*') sample, values = read_bytes(url, blocksize=None) def test_get_pyarrow_fs_s3(s3): pa = pytest.importorskip('pyarrow') fs = DaskS3FileSystem(anon=True) assert isinstance(get_pyarrow_filesystem(fs), pa.filesystem.S3FSWrapper) dask-0.16.0/dask/bytes/utils.py000066400000000000000000000163541320364734500163220ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import import math import os import re from toolz import identity from ..compatibility import PY2, urlsplit def infer_storage_options(urlpath, inherit_storage_options=None): """ Infer storage options from URL path and merge it with existing storage options. Parameters ---------- urlpath: str or unicode Either local absolute file path or URL (hdfs://namenode:8020/file.csv) storage_options: dict (optional) Its contents will get merged with the inferred information from the given path Returns ------- Storage options dict. Examples -------- >>> infer_storage_options('/mnt/datasets/test.csv') # doctest: +SKIP {"protocol": "file", "path", "/mnt/datasets/test.csv"} >>> infer_storage_options( ... 'hdfs://username:pwd@node:123/mnt/datasets/test.csv?q=1', ... inherit_storage_options={'extra': 'value'}) # doctest: +SKIP {"protocol": "hdfs", "username": "username", "password": "pwd", "host": "node", "port": 123, "path": "/mnt/datasets/test.csv", "url_query": "q=1", "extra": "value"} """ # Handle Windows paths including disk name in this special case if re.match(r'^[a-zA-Z]:[\\/]', urlpath): return {'protocol': 'file', 'path': urlpath} parsed_path = urlsplit(urlpath) protocol = parsed_path.scheme or 'file' path = parsed_path.path if protocol == 'file': # Special case parsing file protocol URL on Windows according to: # https://msdn.microsoft.com/en-us/library/jj710207.aspx windows_path = re.match(r'^/([a-zA-Z])[:|]([\\/].*)$', path) if windows_path: path = '%s:%s' % windows_path.groups() inferred_storage_options = { 'protocol': protocol, 'path': path, } if parsed_path.netloc: # Parse `hostname` from netloc manually because `parsed_path.hostname` # lowercases the hostname which is not always desirable (e.g. in S3): # https://github.com/dask/dask/issues/1417 inferred_storage_options['host'] = parsed_path.netloc.rsplit('@', 1)[-1].rsplit(':', 1)[0] if parsed_path.port: inferred_storage_options['port'] = parsed_path.port if parsed_path.username: inferred_storage_options['username'] = parsed_path.username if parsed_path.password: inferred_storage_options['password'] = parsed_path.password if parsed_path.query: inferred_storage_options['url_query'] = parsed_path.query if parsed_path.fragment: inferred_storage_options['url_fragment'] = parsed_path.fragment if inherit_storage_options: if set(inherit_storage_options) & set(inferred_storage_options): raise KeyError("storage options (%r) and path url options (%r) " "collision is detected" % (inherit_storage_options, inferred_storage_options)) inferred_storage_options.update(inherit_storage_options) return inferred_storage_options if PY2: class SeekableFile(object): def __init__(self, file): if isinstance(file, SeekableFile): # idempotent file = file.file self.file = file def seekable(self): return True def readable(self): try: return self.file.readable() except AttributeError: return 'r' in self.file.mode def writable(self): try: return self.file.writable() except AttributeError: return 'w' in self.file.mode def read1(self, *args, **kwargs): # https://bugs.python.org/issue12591 return self.file.read(*args, **kwargs) def __iter__(self): return self.file.__iter__() def __getattr__(self, key): return getattr(self.file, key) else: SeekableFile = identity compressions = {'gz': 'gzip', 'bz2': 'bz2', 'xz': 'xz'} def infer_compression(filename): extension = os.path.splitext(filename)[-1].strip('.') return compressions.get(extension, None) def seek_delimiter(file, delimiter, blocksize): """ Seek current file to next byte after a delimiter bytestring This seeks the file to the next byte following the delimiter. It does not return anything. Use ``file.tell()`` to see location afterwards. Parameters ---------- file: a file delimiter: bytes a delimiter like ``b'\n'`` or message sentinel blocksize: int Number of bytes to read from the file at once. """ if file.tell() == 0: return last = b'' while True: current = file.read(blocksize) if not current: return full = last + current try: i = full.index(delimiter) file.seek(file.tell() - (len(full) - i) + len(delimiter)) return except ValueError: pass last = full[-len(delimiter):] def read_block(f, offset, length, delimiter=None): """ Read a block of bytes from a file Parameters ---------- f: File offset: int Byte offset to start read length: int Number of bytes to read delimiter: bytes (optional) Ensure reading starts and stops at delimiter bytestring If using the ``delimiter=`` keyword argument we ensure that the read starts and stops at delimiter boundaries that follow the locations ``offset`` and ``offset + length``. If ``offset`` is zero then we start at zero. The bytestring returned WILL include the terminating delimiter string. Examples -------- >>> from io import BytesIO # doctest: +SKIP >>> f = BytesIO(b'Alice, 100\\nBob, 200\\nCharlie, 300') # doctest: +SKIP >>> read_block(f, 0, 13) # doctest: +SKIP b'Alice, 100\\nBo' >>> read_block(f, 0, 13, delimiter=b'\\n') # doctest: +SKIP b'Alice, 100\\nBob, 200\\n' >>> read_block(f, 10, 10, delimiter=b'\\n') # doctest: +SKIP b'Bob, 200\\nCharlie, 300' """ if offset != f.tell(): # commonly both zero f.seek(offset) if not offset and length is None and f.tell() == 0: return f.read() if delimiter: seek_delimiter(f, delimiter, 2**16) start = f.tell() length -= start - offset try: f.seek(start + length) seek_delimiter(f, delimiter, 2**16) except ValueError: f.seek(0, 2) end = f.tell() offset = start length = end - start f.seek(offset) return f.read(length) def build_name_function(max_int): """ Returns a function that receives a single integer and returns it as a string padded by enough zero characters to align with maximum possible integer >>> name_f = build_name_function(57) >>> name_f(7) '07' >>> name_f(31) '31' >>> build_name_function(1000)(42) '0042' >>> build_name_function(999)(42) '042' >>> build_name_function(0)(0) '0' """ # handle corner cases max_int is 0 or exact power of 10 max_int += 1e-8 pad_length = int(math.ceil(math.log10(max_int))) def name_function(i): return str(i).zfill(pad_length) return name_function dask-0.16.0/dask/cache.py000066400000000000000000000040121320364734500150630ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from .callbacks import Callback from timeit import default_timer from numbers import Number import sys overhead = sys.getsizeof(1.23) * 4 + sys.getsizeof(()) * 4 class Cache(Callback): """ Use cache for computation Examples -------- >>> cache = Cache(1e9) # doctest: +SKIP The cache can be used locally as a context manager around ``compute`` or ``get`` calls: >>> with cache: # doctest: +SKIP ... result = x.compute() You can also register a cache globally, so that it works for all computations: >>> cache.register() # doctest: +SKIP >>> cache.unregister() # doctest: +SKIP """ def __init__(self, cache, *args, **kwargs): try: import cachey except ImportError as ex: raise ImportError('Cache requires cachey, "{ex}" problem ' 'importing'.format(ex=str(ex))) self._nbytes = cachey.nbytes if isinstance(cache, Number): cache = cachey.Cache(cache, *args, **kwargs) else: assert not args and not kwargs self.cache = cache self.starttimes = dict() def _start(self, dsk): self.durations = dict() overlap = set(dsk) & set(self.cache.data) for key in overlap: dsk[key] = self.cache.data[key] def _pretask(self, key, dsk, state): self.starttimes[key] = default_timer() def _posttask(self, key, value, dsk, state, id): duration = default_timer() - self.starttimes[key] deps = state['dependencies'][key] if deps: duration += max(self.durations.get(k, 0) for k in deps) self.durations[key] = duration nb = self._nbytes(value) + overhead + sys.getsizeof(key) * 4 self.cache.put(key, value, cost=duration / nb / 1e9, nbytes=nb) def _finish(self, dsk, state, errored): self.starttimes.clear() self.durations.clear() dask-0.16.0/dask/callbacks.py000066400000000000000000000077041320364734500157520ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from contextlib import contextmanager from .context import _globals __all__ = ['Callback', 'add_callbacks'] class Callback(object): """ Base class for using the callback mechanism Create a callback with functions of the following signatures: >>> def start(dsk): ... pass >>> def start_state(dsk, state): ... pass >>> def pretask(key, dsk, state): ... pass >>> def posttask(key, result, dsk, state, worker_id): ... pass >>> def finish(dsk, state, failed): ... pass You may then construct a callback object with any number of them >>> cb = Callback(pretask=pretask, finish=finish) # doctest: +SKIP And use it either as a context manager over a compute/get call >>> with cb: # doctest: +SKIP ... x.compute() # doctest: +SKIP Or globally with the ``register`` method >>> cb.register() # doctest: +SKIP >>> cb.unregister() # doctest: +SKIP Alternatively subclass the ``Callback`` class with your own methods. >>> class PrintKeys(Callback): ... def _pretask(self, key, dask, state): ... print("Computing: {0}!".format(repr(key))) >>> with PrintKeys(): # doctest: +SKIP ... x.compute() # doctest: +SKIP """ def __init__(self, start=None, start_state=None, pretask=None, posttask=None, finish=None): if start: self._start = start if start_state: self._start_state = start_state if pretask: self._pretask = pretask if posttask: self._posttask = posttask if finish: self._finish = finish @property def _callback(self): fields = ['_start', '_start_state', '_pretask', '_posttask', '_finish'] return tuple(getattr(self, i, None) for i in fields) def __enter__(self): self._cm = add_callbacks(self) self._cm.__enter__() return self def __exit__(self, *args): self._cm.__exit__(*args) def register(self): _globals['callbacks'].add(self._callback) def unregister(self): _globals['callbacks'].remove(self._callback) def unpack_callbacks(cbs): """Take an iterable of callbacks, return a list of each callback.""" if cbs: return [[i for i in f if i] for f in zip(*cbs)] else: return [(), (), (), (), ()] @contextmanager def local_callbacks(callbacks=None): """Allows callbacks to work with nested schedulers. Callbacks will only be used by the first started scheduler they encounter. This means that only the outermost scheduler will use global callbacks.""" global_callbacks = callbacks is None if global_callbacks: callbacks, _globals['callbacks'] = _globals['callbacks'], set() try: yield callbacks or () finally: if global_callbacks: _globals['callbacks'] = callbacks def normalize_callback(cb): """Normalizes a callback to a tuple""" if isinstance(cb, Callback): return cb._callback elif isinstance(cb, tuple): return cb else: raise TypeError("Callbacks must be either `Callback` or `tuple`") class add_callbacks(object): """Context manager for callbacks. Takes several callbacks and applies them only in the enclosed context. Callbacks can either be represented as a ``Callback`` object, or as a tuple of length 4. Examples -------- >>> def pretask(key, dsk, state): ... print("Now running {0}").format(key) >>> callbacks = (None, pretask, None, None) >>> with add_callbacks(callbacks): # doctest: +SKIP ... res.compute() """ def __init__(self, *args): self.old = _globals['callbacks'].copy() _globals['callbacks'].update([normalize_callback(a) for a in args]) def __enter__(self): return def __exit__(self, type, value, traceback): _globals['callbacks'] = self.old dask-0.16.0/dask/compatibility.py000066400000000000000000000166361320364734500167100ustar00rootroot00000000000000# flake8: noqa from __future__ import absolute_import, division, print_function import functools import inspect import operator import sys import types PY3 = sys.version_info[0] == 3 PY2 = sys.version_info[0] == 2 class LZMAFile: def __init__(self, *args, **kwargs): raise ValueError("xz files requires the lzma module. " "To use, install lzmaffi or backports.lzma.") LZMA_AVAILABLE = False if PY3: import builtins from queue import Queue, Empty from itertools import zip_longest from io import StringIO, BytesIO from bz2 import BZ2File from gzip import (GzipFile, compress as gzip_compress, decompress as gzip_decompress) try: try: from lzmaffi import (LZMAFile, compress as lzma_compress, decompress as lzma_decompress) except ImportError: from lzma import (LZMAFile, compress as lzma_compress, decompress as lzma_decompress) LZMA_AVAILABLE = True except ImportError: # Fallback to top-level definition pass from urllib.request import urlopen from urllib.parse import urlparse, urlsplit, quote, unquote FileNotFoundError = FileNotFoundError unicode = str long = int zip = zip def apply(func, args, kwargs=None): if kwargs: return func(*args, **kwargs) else: return func(*args) range = range reduce = functools.reduce operator_div = operator.truediv def _getargspec(func): return inspect.getfullargspec(func) def reraise(exc, tb=None): if exc.__traceback__ is not tb: raise exc.with_traceback(tb) raise exc else: import __builtin__ as builtins from Queue import Queue, Empty from itertools import izip_longest as zip_longest, izip as zip from StringIO import StringIO from io import BytesIO, BufferedIOBase import bz2 import gzip from urllib2 import urlopen from urlparse import urlparse, urlsplit from urllib import quote, unquote unicode = unicode long = long apply = apply range = xrange reduce = reduce operator_div = operator.div FileNotFoundError = IOError def _make_reraise(): _code = ("def reraise(exc, tb=None):" " raise type(exc), exc, tb") namespace = {} exec("exec _code in namespace") return namespace['reraise'] reraise = _make_reraise() del _make_reraise def _getargspec(func): return inspect.getargspec(func) def gzip_decompress(b): f = gzip.GzipFile(fileobj=BytesIO(b)) result = f.read() f.close() return result def gzip_compress(b): bio = BytesIO() f = gzip.GzipFile(fileobj=bio, mode='w') f.write(b) f.close() bio.seek(0) result = bio.read() return result if sys.version_info[1] <= 7: class BZ2File(BufferedIOBase): def __init__(self, *args, **kwargs): self.__obj = bz2.BZ2File(*args, **kwargs) def close(self): return self.__obj.close() @property def closed(self): return self.__obj.closed def flush(self): pass def isatty(self): return self.__obj.isatty() def read(self, *args, **kwargs): return self.__obj.read(*args, **kwargs) def read1(self, *args, **kwargs): return self.__obj.read(*args, **kwargs) def readable(self): return 'r' in self.__obj.mode def readline(self, *args, **kwargs): return self.__obj.readline(*args, **kwargs) def readlines(self, *args, **kwargs): return self.__obj.readlines(*args, **kwargs) def seek(self, *args, **kwargs): self.__obj.seek(*args, **kwargs) return self.tell() def seekable(self): return self.readable() def tell(self): return self.__obj.tell() def truncate(self, *args, **kwargs): return self.__obj.truncate(*args, **kwargs) def writable(self): return 'w' in self.__obj.mode def write(self, *args, **kwargs): return self.__obj.write(*args, **kwargs) def writelines(self, *args, **kwargs): return self.__obj.writelines(*args, **kwargs) else: BZ2File = bz2.BZ2File class GzipFile(BufferedIOBase): def __init__(self, *args, **kwargs): self.__obj = gzip.GzipFile(*args, **kwargs) def close(self): return self.__obj.close() @property def closed(self): return self.__obj.fileobj is None def flush(self, *args, **kwargs): return self.__obj.flush(*args, **kwargs) def isatty(self): return self.__obj.isatty() def read(self, *args, **kwargs): return self.__obj.read(*args, **kwargs) def read1(self, *args, **kwargs): return self.__obj.read(*args, **kwargs) def readable(self): return self.__obj.mode == gzip.READ def readline(self, *args, **kwargs): return self.__obj.readline(*args, **kwargs) def readlines(self, *args, **kwargs): return self.__obj.readlines(*args, **kwargs) def seek(self, *args, **kwargs): self.__obj.seek(*args, **kwargs) return self.tell() def seekable(self): # See https://hg.python.org/cpython/file/2.7/Lib/gzip.py#l421 return True def tell(self): return self.__obj.tell() def truncate(self, *args, **kwargs): return self.__obj.truncate(*args, **kwargs) def writable(self): return self.__obj.mode == gzip.WRITE def write(self, *args, **kwargs): return self.__obj.write(*args, **kwargs) def writelines(self, *args, **kwargs): return self.__obj.writelines(*args, **kwargs) try: try: from lzmaffi import (LZMAFile, compress as lzma_compress, decompress as lzma_decompress) except ImportError: from backports.lzma import LZMAFile from backports.lzma import (LZMAFile, compress as lzma_compress, decompress as lzma_decompress) LZMA_AVAILABLE = True except ImportError: # Fallback to top-level definition pass def getargspec(func): """Version of inspect.getargspec that works for functools.partial objects""" if isinstance(func, functools.partial): return _getargspec(func.func) else: if isinstance(func, type): return _getargspec(func.__init__) else: return _getargspec(func) def bind_method(cls, name, func): """Bind a method to class Parameters ---------- cls : type class to receive bound method name : basestring name of method on class instance func : function function to be bound as method Returns ------- None """ # only python 2 has bound/unbound method issue if not PY3: setattr(cls, name, types.MethodType(func, None, cls)) else: setattr(cls, name, func) dask-0.16.0/dask/context.py000066400000000000000000000054671320364734500155230ustar00rootroot00000000000000""" Control global computation context """ from __future__ import absolute_import, division, print_function import threading from functools import partial from collections import defaultdict _globals = defaultdict(lambda: None) _globals['callbacks'] = set() thread_state = threading.local() class set_options(object): """ Set global state within controlled context This lets you specify various global settings in a tightly controlled ``with`` block. Valid keyword arguments currently include the following:: get - the scheduler to use pool - a thread or process pool cache - Cache to use for intermediate results func_loads/func_dumps - loads/dumps functions for serialization of data likely to contain functions. Defaults to cloudpickle.loads/cloudpickle.dumps optimizations - List of additional optimizations to run Examples -------- >>> with set_options(get=dask.get): # doctest: +SKIP ... x = np.array(x) # uses dask.get internally """ def __init__(self, **kwargs): self.old = _globals.copy() _globals.update(kwargs) def __enter__(self): return def __exit__(self, type, value, traceback): _globals.clear() _globals.update(self.old) def globalmethod(default=None, key=None, falsey=None): """ Allow function to be taken over by globals This modifies a method so that occurrences of it may be taken over by functions registered in the global options. Can be used as a decorator or a function. Parameters ---------- default : callable The default callable to use. key : str Key under which we register this function in the global parameters falsey : callable, None, optional A function to use if the option is falsey. If not provided, the default is used instead. Examples -------- >>> import dask >>> class Foo(object): ... @globalmethod(key='bar', falsey=lambda: 3) ... def bar(): ... return 1 >>> f = Foo() >>> f.bar() 1 >>> with dask.set_options(bar=lambda: 2): ... print(f.bar()) 2 >>> with dask.set_options(bar=False): ... print(f.bar()) 3 """ if default is None: return partial(globalmethod, key=key, falsey=falsey) return GlobalMethod(default=default, key=key, falsey=falsey) class GlobalMethod(object): def __init__(self, default, key, falsey=None): self._default = default self._key = key self._falsey = falsey def __get__(self, instance, owner=None): if self._key in _globals: if _globals[self._key]: return _globals[self._key] elif self._falsey is not None: return self._falsey return self._default dask-0.16.0/dask/core.py000066400000000000000000000303611320364734500147560ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from itertools import chain from .utils_test import add, inc # noqa: F401 def ishashable(x): """ Is x hashable? Examples -------- >>> ishashable(1) True >>> ishashable([1]) False """ try: hash(x) return True except TypeError: return False def istask(x): """ Is x a runnable task? A task is a tuple with a callable first argument Examples -------- >>> inc = lambda x: x + 1 >>> istask((inc, 1)) True >>> istask(1) False """ return type(x) is tuple and x and callable(x[0]) def has_tasks(dsk, x): """Whether ``x`` has anything to compute. Returns True if: - ``x`` is a task - ``x`` is a key in ``dsk`` - ``x`` is a list that contains any tasks or keys """ if istask(x): return True try: if x in dsk: return True except Exception: pass if isinstance(x, list): for i in x: if has_tasks(dsk, i): return True return False def preorder_traversal(task): """A generator to preorder-traverse a task.""" for item in task: if istask(item): for i in preorder_traversal(item): yield i elif isinstance(item, list): yield list for i in preorder_traversal(item): yield i else: yield item def _get_nonrecursive(d, x, maxdepth=1000): # Non-recursive. DAG property is checked upon reaching maxdepth. _list = lambda *args: list(args) # We construct a nested hierarchy of tuples to mimic the execution stack # of frames that Python would maintain for a recursive implementation. # A frame is associated with a single task from a Dask. # A frame tuple has three elements: # 1) The function for the task. # 2) The arguments for the task (typically keys in the Dask). # Arguments are stored in reverse order, and elements are popped # as they are evaluated. # 3) The calculated results of the arguments from (2). stack = [(lambda x: x, [x], [])] while True: func, args, results = stack[-1] if not args: val = func(*results) if len(stack) == 1: return val stack.pop() stack[-1][2].append(val) continue elif maxdepth and len(stack) > maxdepth: cycle = getcycle(d, x) if cycle: cycle = '->'.join(cycle) raise RuntimeError('Cycle detected in Dask: %s' % cycle) maxdepth = None key = args.pop() if isinstance(key, list): stack.append((_list, list(key[::-1]), [])) continue elif ishashable(key) and key in d: args.append(d[key]) continue elif istask(key): stack.append((key[0], list(key[:0:-1]), [])) else: results.append(key) def _get_recursive(d, x): # recursive, no cycle detection if isinstance(x, list): return [_get_recursive(d, k) for k in x] elif ishashable(x) and x in d: return _get_recursive(d, d[x]) elif istask(x): func, args = x[0], x[1:] args2 = [_get_recursive(d, k) for k in args] return func(*args2) else: return x def get(d, x, recursive=False): """ Get value from Dask Examples -------- >>> inc = lambda x: x + 1 >>> d = {'x': 1, 'y': (inc, 'x')} >>> get(d, 'x') 1 >>> get(d, 'y') 2 """ _get = _get_recursive if recursive else _get_nonrecursive if isinstance(x, list): return tuple(get(d, k) for k in x) elif x in d: return _get(d, x) raise KeyError("{0} is not a key in the graph".format(x)) def get_dependencies(dsk, key=None, task=None, as_list=False): """ Get the immediate tasks on which this task depends Examples -------- >>> dsk = {'x': 1, ... 'y': (inc, 'x'), ... 'z': (add, 'x', 'y'), ... 'w': (inc, 'z'), ... 'a': (add, (inc, 'x'), 1)} >>> get_dependencies(dsk, 'x') set([]) >>> get_dependencies(dsk, 'y') set(['x']) >>> get_dependencies(dsk, 'z') # doctest: +SKIP set(['x', 'y']) >>> get_dependencies(dsk, 'w') # Only direct dependencies set(['z']) >>> get_dependencies(dsk, 'a') # Ignore non-keys set(['x']) >>> get_dependencies(dsk, task=(inc, 'x')) # provide tasks directly set(['x']) """ if key is not None: arg = dsk[key] elif task is not None: arg = task else: raise ValueError("Provide either key or task") result = [] work = [arg] while work: new_work = [] for w in work: typ = type(w) if typ is tuple and w and callable(w[0]): # istask(w) new_work += w[1:] elif typ is list: new_work += w elif typ is dict: new_work += w.values() else: try: if w in dsk: result.append(w) except TypeError: # not hashable pass work = new_work return result if as_list else set(result) def get_deps(dsk): """ Get dependencies and dependents from dask dask graph >>> dsk = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b')} >>> dependencies, dependents = get_deps(dsk) >>> dependencies {'a': set([]), 'c': set(['b']), 'b': set(['a'])} >>> dependents {'a': set(['b']), 'c': set([]), 'b': set(['c'])} """ dependencies = {k: get_dependencies(dsk, task=v) for k, v in dsk.items()} dependents = reverse_dict(dependencies) return dependencies, dependents def flatten(seq, container=list): """ >>> list(flatten([1])) [1] >>> list(flatten([[1, 2], [1, 2]])) [1, 2, 1, 2] >>> list(flatten([[[1], [2]], [[1], [2]]])) [1, 2, 1, 2] >>> list(flatten(((1, 2), (1, 2)))) # Don't flatten tuples [(1, 2), (1, 2)] >>> list(flatten((1, 2, [3, 4]))) # support heterogeneous [1, 2, 3, 4] """ if isinstance(seq, str): yield seq else: for item in seq: if isinstance(item, container): for item2 in flatten(item, container=container): yield item2 else: yield item def reverse_dict(d): """ >>> a, b, c = 'abc' >>> d = {a: [b, c], b: [c]} >>> reverse_dict(d) # doctest: +SKIP {'a': set([]), 'b': set(['a']}, 'c': set(['a', 'b'])} """ terms = list(d.keys()) + list(chain.from_iterable(d.values())) result = {t: set() for t in terms} for k, vals in d.items(): for val in vals: result[val].add(k) return result def subs(task, key, val): """ Perform a substitution on a task Examples -------- >>> subs((inc, 'x'), 'x', 1) # doctest: +SKIP (inc, 1) """ type_task = type(task) if not (type_task is tuple and task and callable(task[0])): # istask(task): try: if type_task is type(key) and task == key: return val except Exception: pass if type_task is list: return [subs(x, key, val) for x in task] return task newargs = [] for arg in task[1:]: type_arg = type(arg) if type_arg is tuple and arg and callable(arg[0]): # istask(task): arg = subs(arg, key, val) elif type_arg is list: arg = [subs(x, key, val) for x in arg] elif type_arg is type(key): # Can't do a simple equality check, since this may trigger # a FutureWarning from NumPy about array equality # https://github.com/dask/dask/pull/2457 if len(arg) == len(key) and all(type(aa) == type(bb) and aa == bb for aa, bb in zip(arg, key)): arg = val newargs.append(arg) return task[:1] + tuple(newargs) def _toposort(dsk, keys=None, returncycle=False, dependencies=None): # Stack-based depth-first search traversal. This is based on Tarjan's # method for topological sorting (see wikipedia for pseudocode) if keys is None: keys = dsk elif not isinstance(keys, list): keys = [keys] if not returncycle: ordered = [] # Nodes whose descendents have been completely explored. # These nodes are guaranteed to not be part of a cycle. completed = set() # All nodes that have been visited in the current traversal. Because # we are doing depth-first search, going "deeper" should never result # in visiting a node that has already been seen. The `seen` and # `completed` sets are mutually exclusive; it is okay to visit a node # that has already been added to `completed`. seen = set() if dependencies is None: dependencies = dict((k, get_dependencies(dsk, k)) for k in dsk) for key in keys: if key in completed: continue nodes = [key] while nodes: # Keep current node on the stack until all descendants are visited cur = nodes[-1] if cur in completed: # Already fully traversed descendants of cur nodes.pop() continue seen.add(cur) # Add direct descendants of cur to nodes stack next_nodes = [] for nxt in dependencies[cur]: if nxt not in completed: if nxt in seen: # Cycle detected! cycle = [nxt] while nodes[-1] != nxt: cycle.append(nodes.pop()) cycle.append(nodes.pop()) cycle.reverse() if returncycle: return cycle else: cycle = '->'.join(cycle) raise RuntimeError('Cycle detected in Dask: %s' % cycle) next_nodes.append(nxt) if next_nodes: nodes.extend(next_nodes) else: # cur has no more descendants to explore, so we're done with it if not returncycle: ordered.append(cur) completed.add(cur) seen.remove(cur) nodes.pop() if returncycle: return [] return ordered def toposort(dsk, dependencies=None): """ Return a list of keys of dask sorted in topological order.""" return _toposort(dsk, dependencies=dependencies) def getcycle(d, keys): """ Return a list of nodes that form a cycle if Dask is not a DAG. Returns an empty list if no cycle is found. ``keys`` may be a single key or list of keys. Examples -------- >>> d = {'x': (inc, 'z'), 'y': (inc, 'x'), 'z': (inc, 'y')} >>> getcycle(d, 'x') ['x', 'z', 'y', 'x'] See Also -------- isdag """ return _toposort(d, keys=keys, returncycle=True) def isdag(d, keys): """ Does Dask form a directed acyclic graph when calculating keys? ``keys`` may be a single key or list of keys. Examples -------- >>> inc = lambda x: x + 1 >>> isdag({'x': 0, 'y': (inc, 'x')}, 'y') True >>> isdag({'x': (inc, 'y'), 'y': (inc, 'x')}, 'y') False See Also -------- getcycle """ return not getcycle(d, keys) class literal(object): """A small serializable object to wrap literal values without copying""" __slots__ = ('data',) def __init__(self, data): self.data = data def __repr__(self): return 'literal' % type(self.data).__name__ def __reduce__(self): return (literal, (self.data,)) def __call__(self): return self.data def quote(x): """ Ensure that this value remains this value in a dask graph Some values in dask graph take on special meaning. Sometimes we want to ensure that our data is not interpreted but remains literal. >>> quote((add, 1, 2)) # doctest: +SKIP (literal,) """ if istask(x) or type(x) is list: return (literal(x),) return x dask-0.16.0/dask/dataframe/000077500000000000000000000000001320364734500153755ustar00rootroot00000000000000dask-0.16.0/dask/dataframe/__init__.py000066400000000000000000000017021320364734500175060ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import from .core import (DataFrame, Series, Index, _Frame, map_partitions, repartition, to_delayed, to_datetime, to_timedelta) from .groupby import Aggregation from .io import (from_array, from_pandas, from_bcolz, from_dask_array, read_hdf, read_sql_table, from_delayed, read_csv, to_csv, read_table, demo, to_hdf, to_records, to_bag) from .optimize import optimize from .multi import merge, concat from .rolling import (rolling_count, rolling_sum, rolling_mean, rolling_median, rolling_min, rolling_max, rolling_std, rolling_var, rolling_skew, rolling_kurt, rolling_quantile, rolling_apply, rolling_window) from ..base import compute from .reshape import get_dummies, pivot_table, melt try: from .io import read_parquet, to_parquet except ImportError: pass dask-0.16.0/dask/dataframe/accessor.py000066400000000000000000000075601320364734500175610ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import numpy as np import pandas as pd from toolz import partial from ..utils import derived_from from .utils import is_categorical_dtype def maybe_wrap_pandas(obj, x): if isinstance(x, np.ndarray): if isinstance(obj, pd.Series): return pd.Series(x, index=obj.index, dtype=x.dtype) return pd.Index(x) return x class Accessor(object): """ Base class for pandas Accessor objects cat, dt, and str. Notes ----- Subclasses should define the following attributes: * _accessor * _accessor_name """ _not_implemented = set() def __init__(self, series): from .core import Series if not isinstance(series, Series): raise ValueError('Accessor cannot be initialized') self._validate(series) self._series = series def _validate(self, series): pass @staticmethod def _delegate_property(obj, accessor, attr): out = getattr(getattr(obj, accessor, obj), attr) return maybe_wrap_pandas(obj, out) @staticmethod def _delegate_method(obj, accessor, attr, args, kwargs): out = getattr(getattr(obj, accessor, obj), attr)(*args, **kwargs) return maybe_wrap_pandas(obj, out) def _property_map(self, attr): meta = self._delegate_property(self._series._meta, self._accessor_name, attr) token = '%s-%s' % (self._accessor_name, attr) return self._series.map_partitions(self._delegate_property, self._accessor_name, attr, token=token, meta=meta) def _function_map(self, attr, *args, **kwargs): meta = self._delegate_method(self._series._meta_nonempty, self._accessor_name, attr, args, kwargs) token = '%s-%s' % (self._accessor_name, attr) return self._series.map_partitions(self._delegate_method, self._accessor_name, attr, args, kwargs, meta=meta, token=token) @property def _delegates(self): return set(dir(self._accessor)).difference(self._not_implemented) def __dir__(self): o = self._delegates o.update(self.__dict__) o.update(dir(type(self))) return list(o) def __getattr__(self, key): if key in self._delegates: if isinstance(getattr(self._accessor, key), property): return self._property_map(key) else: return partial(self._function_map, key) else: raise AttributeError(key) class DatetimeAccessor(Accessor): """ Accessor object for datetimelike properties of the Series values. Examples -------- >>> s.dt.microsecond # doctest: +SKIP """ _accessor = pd.Series.dt _accessor_name = 'dt' class StringAccessor(Accessor): """ Accessor object for string properties of the Series values. Examples -------- >>> s.str.lower() # doctest: +SKIP """ _accessor = pd.Series.str _accessor_name = 'str' _not_implemented = {'get_dummies'} def _validate(self, series): if not (series.dtype == 'object' or ( is_categorical_dtype(series) and series.cat.categories.dtype == 'object')): raise AttributeError("Can only use .str accessor with object dtype") @derived_from(pd.core.strings.StringMethods) def split(self, pat=None, n=-1): return self._function_map('split', pat=pat, n=n) def __getitem__(self, index): return self._series.map_partitions(str_get, index, meta=self._series._meta) def str_get(series, index): """ Implements series.str[index] """ return series.str[index] dask-0.16.0/dask/dataframe/categorical.py000066400000000000000000000207341320364734500202320ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from collections import defaultdict import pandas as pd from toolz import partition_all from ..base import tokenize, compute_as_if_collection from .accessor import Accessor from .utils import (has_known_categories, clear_known_categories, is_scalar, is_categorical_dtype) def _categorize_block(df, categories, index): """ Categorize a dataframe with given categories df: DataFrame categories: dict mapping column name to iterable of categories """ df = df.copy() for col, vals in categories.items(): if is_categorical_dtype(df[col]): df[col] = df[col].cat.set_categories(vals) else: df[col] = pd.Categorical(df[col], categories=vals, ordered=False) if index is not None: if is_categorical_dtype(df.index): ind = df.index.set_categories(index) else: ind = pd.Categorical(df.index, categories=index, ordered=False) ind.name = df.index.name df.index = ind return df def _get_categories(df, columns, index): res = {} for col in columns: x = df[col] if is_categorical_dtype(x): res[col] = pd.Series(x.cat.categories) else: res[col] = x.dropna().drop_duplicates() if index: if is_categorical_dtype(df.index): return res, df.index.categories return res, df.index.dropna().drop_duplicates() return res, None def _get_categories_agg(parts): res = defaultdict(list) res_ind = [] for p in parts: for k, v in p[0].items(): res[k].append(v) res_ind.append(p[1]) res = {k: pd.concat(v, ignore_index=True).drop_duplicates() for k, v in res.items()} if res_ind[0] is None: return res, None return res, res_ind[0].append(res_ind[1:]).drop_duplicates() def categorize(df, columns=None, index=None, split_every=None, **kwargs): """Convert columns of the DataFrame to category dtype. Parameters ---------- columns : list, optional A list of column names to convert to categoricals. By default any column with an object dtype is converted to a categorical, and any unknown categoricals are made known. index : bool, optional Whether to categorize the index. By default, object indices are converted to categorical, and unknown categorical indices are made known. Set True to always categorize the index, False to never. split_every : int, optional Group partitions into groups of this size while performing a tree-reduction. If set to False, no tree-reduction will be used. Default is 16. kwargs Keyword arguments are passed on to compute. """ meta = df._meta if columns is None: columns = list(meta.select_dtypes(['object', 'category']).columns) elif is_scalar(columns): columns = [columns] # Filter out known categorical columns columns = [c for c in columns if not (is_categorical_dtype(meta[c]) and has_known_categories(meta[c]))] if index is not False: if is_categorical_dtype(meta.index): index = not has_known_categories(meta.index) elif index is None: index = meta.index.dtype == object # Nothing to do if not len(columns) and index is False: return df if split_every is None: split_every = 16 elif split_every is False: split_every = df.npartitions elif not isinstance(split_every, int) or split_every < 2: raise ValueError("split_every must be an integer >= 2") token = tokenize(df, columns, index, split_every) a = 'get-categories-chunk-' + token dsk = {(a, i): (_get_categories, key, columns, index) for (i, key) in enumerate(df.__dask_keys__())} prefix = 'get-categories-agg-' + token k = df.npartitions depth = 0 while k > split_every: b = prefix + str(depth) for part_i, inds in enumerate(partition_all(split_every, range(k))): dsk[(b, part_i)] = (_get_categories_agg, [(a, i) for i in inds]) k = part_i + 1 a = b depth += 1 dsk[(prefix, 0)] = (_get_categories_agg, [(a, i) for i in range(k)]) dsk.update(df.dask) # Compute the categories categories, index = compute_as_if_collection(type(df), dsk, (prefix, 0), **kwargs) # Categorize each partition return df.map_partitions(_categorize_block, categories, index) class CategoricalAccessor(Accessor): """ Accessor object for categorical properties of the Series values. Examples -------- >>> s.cat.categories # doctest: +SKIP Notes ----- Attributes that depend only on metadata are eager * categories * ordered Attributes depending on the entire dataset are lazy * codes * ... So `df.a.cat.categories` <=> `df.a._meta.cat.categories` So `df.a.cat.codes` <=> `df.a.map_partitions(lambda x: x.cat.codes)` """ _accessor = pd.Series.cat _accessor_name = 'cat' def _validate(self, series): if not is_categorical_dtype(series.dtype): raise AttributeError("Can only use .cat accessor with a " "'category' dtype") @property def known(self): """Whether the categories are fully known""" return has_known_categories(self._series) def as_known(self, **kwargs): """Ensure the categories in this series are known. If the categories are known, this is a no-op. If unknown, the categories are computed, and a new series with known categories is returned. Parameters ---------- kwargs Keywords to pass on to the call to `compute`. """ if self.known: return self categories = self._property_map('categories').unique().compute(**kwargs) return self.set_categories(categories.values) def as_unknown(self): """Ensure the categories in this series are unknown""" if not self.known: return self._series out = self._series.copy() out._meta = clear_known_categories(out._meta) return out @property def ordered(self): return self._delegate_property(self._series._meta, 'cat', 'ordered') @property def categories(self): """The categories of this categorical. If categories are unknown, an error is raised""" if not self.known: msg = ("`df.column.cat.categories` with unknown categories is not " "supported. Please use `column.cat.as_known()` or " "`df.categorize()` beforehand to ensure known categories") raise NotImplementedError(msg) return self._delegate_property(self._series._meta, 'cat', 'categories') @property def codes(self): """The codes of this categorical. If categories are unknown, an error is raised""" if not self.known: msg = ("`df.column.cat.codes` with unknown categories is not " "supported. Please use `column.cat.as_known()` or " "`df.categorize()` beforehand to ensure known categories") raise NotImplementedError(msg) return self._property_map('codes') def remove_unused_categories(self): """ Removes categories which are not used Notes ----- This method requires a full scan of the data to compute the unique values, which can be expensive. """ # get the set of used categories present = self._series.dropna().unique() present = pd.Index(present.compute()) if isinstance(self._series._meta, pd.CategoricalIndex): meta_cat = self._series._meta else: meta_cat = self._series._meta.cat # Reorder to keep cat:code relationship, filtering unused (-1) ordered, mask = present.reindex(meta_cat.categories) new_categories = ordered[mask != -1] meta = meta_cat.set_categories(new_categories, ordered=meta_cat.ordered) return self._series.map_partitions(self._delegate_method, 'cat', 'set_categories', (), {'new_categories': new_categories}, meta=meta, token='cat-set_categories') dask-0.16.0/dask/dataframe/core.py000066400000000000000000004453161320364734500167140ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from collections import Iterator from functools import wraps, partial import operator from operator import getitem from pprint import pformat import warnings from toolz import merge, first, unique, partition_all, remove import pandas as pd import numpy as np try: from chest import Chest as Cache except ImportError: Cache = dict from .. import array as da from .. import core from ..utils import partial_by_order from .. import threaded from ..compatibility import apply, operator_div, bind_method, PY3 from ..context import globalmethod from ..utils import (random_state_data, pseudorandom, derived_from, funcname, memory_repr, put_lines, M, key_split, OperatorMethodMixin) from ..base import Base, tokenize, dont_optimize, is_dask_collection from . import methods from .accessor import DatetimeAccessor, StringAccessor from .categorical import CategoricalAccessor, categorize from .hashing import hash_pandas_object from .optimize import optimize from .utils import (meta_nonempty, make_meta, insert_meta_param_description, raise_on_meta_error, clear_known_categories, is_categorical_dtype, has_known_categories, PANDAS_VERSION) no_default = '__no_default__' if PANDAS_VERSION >= '0.20.0': from pandas.util import cache_readonly pd.set_option('compute.use_numexpr', False) else: from pandas.util.decorators import cache_readonly pd.computation.expressions.set_use_numexpr(False) def _concat(args): if not args: return args if isinstance(first(core.flatten(args)), np.ndarray): return da.core.concatenate3(args) if not isinstance(args[0], (pd.DataFrame, pd.Series, pd.Index)): try: return pd.Series(args) except Exception: return args # We filter out empty partitions here because pandas frequently has # inconsistent dtypes in results between empty and non-empty frames. # Ideally this would be handled locally for each operation, but in practice # this seems easier. TODO: don't do this. args2 = [i for i in args if len(i)] return args[0] if not args2 else methods.concat(args2, uniform=True) def _get_return_type(meta): if isinstance(meta, _Frame): meta = meta._meta if isinstance(meta, pd.Series): return Series elif isinstance(meta, pd.DataFrame): return DataFrame elif isinstance(meta, pd.Index): return Index return Scalar def new_dd_object(dsk, _name, meta, divisions): """Generic constructor for dask.dataframe objects. Decides the appropriate output class based on the type of `meta` provided. """ return _get_return_type(meta)(dsk, _name, meta, divisions) def finalize(results): return _concat(results) class Scalar(Base, OperatorMethodMixin): """ A Dask object to represent a pandas scalar""" def __init__(self, dsk, name, meta, divisions=None): # divisions is ignored, only present to be compatible with other # objects. self.dask = dsk self._name = name meta = make_meta(meta) if isinstance(meta, (pd.DataFrame, pd.Series, pd.Index)): raise TypeError("Expected meta to specify scalar, got " "{0}".format(type(meta).__name__)) self._meta = meta def __dask_graph__(self): return self.dask def __dask_keys__(self): return [self.key] def __dask_tokenize__(self): return self._name __dask_optimize__ = globalmethod(optimize, key='dataframe_optimize', falsey=dont_optimize) __dask_scheduler__ = staticmethod(threaded.get) def __dask_postcompute__(self): return first, () def __dask_postpersist__(self): return Scalar, (self._name, self._meta, self.divisions) @property def _meta_nonempty(self): return self._meta @property def dtype(self): return self._meta.dtype def __dir__(self): o = set(dir(type(self))) o.update(self.__dict__) if not hasattr(self._meta, 'dtype'): o.remove('dtype') # dtype only in `dir` if available return list(o) @property def divisions(self): """Dummy divisions to be compat with Series and DataFrame""" return [None, None] def __repr__(self): name = self._name if len(self._name) < 10 else self._name[:7] + '...' if hasattr(self._meta, 'dtype'): extra = ', dtype=%s' % self._meta.dtype else: extra = ', type=%s' % type(self._meta).__name__ return "dd.Scalar<%s%s>" % (name, extra) def __array__(self): # array interface is required to support pandas instance + Scalar # Otherwise, above op results in pd.Series of Scalar (object dtype) return np.asarray(self.compute()) @property def _args(self): return (self.dask, self._name, self._meta) def __getstate__(self): return self._args def __setstate__(self, state): self.dask, self._name, self._meta = state @property def key(self): return (self._name, 0) @classmethod def _get_unary_operator(cls, op): def f(self): name = funcname(op) + '-' + tokenize(self) dsk = {(name, 0): (op, (self._name, 0))} meta = op(self._meta_nonempty) return Scalar(merge(dsk, self.dask), name, meta) return f @classmethod def _get_binary_operator(cls, op, inv=False): return lambda self, other: _scalar_binary(op, self, other, inv=inv) def _scalar_binary(op, self, other, inv=False): name = '{0}-{1}'.format(funcname(op), tokenize(self, other)) dsk = self.dask return_type = _get_return_type(other) if isinstance(other, Scalar): dsk = merge(dsk, other.dask) other_key = (other._name, 0) elif is_dask_collection(other): return NotImplemented else: other_key = other if inv: dsk.update({(name, 0): (op, other_key, (self._name, 0))}) else: dsk.update({(name, 0): (op, (self._name, 0), other_key)}) other_meta = make_meta(other) other_meta_nonempty = meta_nonempty(other_meta) if inv: meta = op(other_meta_nonempty, self._meta_nonempty) else: meta = op(self._meta_nonempty, other_meta_nonempty) if return_type is not Scalar: return return_type(dsk, name, meta, [other.index.min(), other.index.max()]) else: return Scalar(dsk, name, meta) class _Frame(Base, OperatorMethodMixin): """ Superclass for DataFrame and Series Parameters ---------- dsk: dict The dask graph to compute this DataFrame name: str The key prefix that specifies which keys in the dask comprise this particular DataFrame / Series meta: pandas.DataFrame, pandas.Series, or pandas.Index An empty pandas object with names, dtypes, and indices matching the expected output. divisions: tuple of index values Values along which we partition our blocks on the index """ def __init__(self, dsk, name, meta, divisions): self.dask = dsk self._name = name meta = make_meta(meta) if not isinstance(meta, self._partition_type): raise TypeError("Expected meta to specify type {0}, got type " "{1}".format(self._partition_type.__name__, type(meta).__name__)) self._meta = meta self.divisions = tuple(divisions) def __dask_graph__(self): return self.dask def __dask_keys__(self): return [(self._name, i) for i in range(self.npartitions)] def __dask_tokenize__(self): return self._name __dask_optimize__ = globalmethod(optimize, key='dataframe_optimize', falsey=dont_optimize) __dask_scheduler__ = staticmethod(threaded.get) def __dask_postcompute__(self): return finalize, () def __dask_postpersist__(self): return type(self), (self._name, self._meta, self.divisions) @property def _constructor(self): return new_dd_object @property def npartitions(self): """Return number of partitions""" return len(self.divisions) - 1 @property def size(self): """ Size of the series """ return self.reduction(methods.size, np.sum, token='size', meta=int, split_every=False) @property def _meta_nonempty(self): """ A non-empty version of `_meta` with fake data.""" return meta_nonempty(self._meta) @property def _args(self): return (self.dask, self._name, self._meta, self.divisions) def __getstate__(self): return self._args def __setstate__(self, state): self.dask, self._name, self._meta, self.divisions = state def copy(self): """ Make a copy of the dataframe This is strictly a shallow copy of the underlying computational graph. It does not affect the underlying data """ return new_dd_object(self.dask, self._name, self._meta, self.divisions) def __array__(self, dtype=None, **kwargs): self._computed = self.compute() x = np.array(self._computed) return x def __array_wrap__(self, array, context=None): raise NotImplementedError @property def _elemwise(self): return elemwise @property def _repr_data(self): raise NotImplementedError @property def _repr_divisions(self): name = "npartitions={0}".format(self.npartitions) if self.known_divisions: divisions = pd.Index(self.divisions, name=name) else: # avoid to be converted to NaN divisions = pd.Index([''] * (self.npartitions + 1), name=name) return divisions def __repr__(self): data = self._repr_data.to_string(max_rows=5, show_dimensions=False) return """Dask {klass} Structure: {data} Dask Name: {name}, {task} tasks""".format(klass=self.__class__.__name__, data=data, name=key_split(self._name), task=len(self.dask)) @property def index(self): """Return dask Index instance""" name = self._name + '-index' dsk = {(name, i): (getattr, key, 'index') for i, key in enumerate(self.__dask_keys__())} return Index(merge(dsk, self.dask), name, self._meta.index, self.divisions) def reset_index(self, drop=False): """Reset the index to the default index. Note that unlike in ``pandas``, the reset ``dask.dataframe`` index will not be monotonically increasing from 0. Instead, it will restart at 0 for each partition (e.g. ``index1 = [0, ..., 10], index2 = [0, ...]``). This is due to the inability to statically know the full length of the index. For DataFrame with multi-level index, returns a new DataFrame with labeling information in the columns under the index names, defaulting to 'level_0', 'level_1', etc. if any are None. For a standard index, the index name will be used (if set), otherwise a default 'index' or 'level_0' (if 'index' is already taken) will be used. Parameters ---------- drop : boolean, default False Do not try to insert index into dataframe columns. """ return self.map_partitions(M.reset_index, drop=drop).clear_divisions() @property def known_divisions(self): """Whether divisions are already known""" return len(self.divisions) > 0 and self.divisions[0] is not None def clear_divisions(self): """ Forget division information """ divisions = (None,) * (self.npartitions + 1) return type(self)(self.dask, self._name, self._meta, divisions) def get_partition(self, n): """Get a dask DataFrame/Series representing the `nth` partition.""" if 0 <= n < self.npartitions: name = 'get-partition-%s-%s' % (str(n), self._name) dsk = {(name, 0): (self._name, n)} divisions = self.divisions[n:n + 2] return new_dd_object(merge(self.dask, dsk), name, self._meta, divisions) else: msg = "n must be 0 <= n < {0}".format(self.npartitions) raise ValueError(msg) @derived_from(pd.DataFrame) def drop_duplicates(self, split_every=None, split_out=1, **kwargs): # Let pandas error on bad inputs self._meta_nonempty.drop_duplicates(**kwargs) if 'subset' in kwargs and kwargs['subset'] is not None: split_out_setup = split_out_on_cols split_out_setup_kwargs = {'cols': kwargs['subset']} else: split_out_setup = split_out_setup_kwargs = None if kwargs.get('keep', True) is False: raise NotImplementedError("drop_duplicates with keep=False") chunk = M.drop_duplicates return aca(self, chunk=chunk, aggregate=chunk, meta=self._meta, token='drop-duplicates', split_every=split_every, split_out=split_out, split_out_setup=split_out_setup, split_out_setup_kwargs=split_out_setup_kwargs, **kwargs) def __len__(self): return self.reduction(len, np.sum, token='len', meta=int, split_every=False).compute() @insert_meta_param_description(pad=12) def map_partitions(self, func, *args, **kwargs): """ Apply Python function on each DataFrame partition. Note that the index and divisions are assumed to remain unchanged. Parameters ---------- func : function Function applied to each partition. args, kwargs : Arguments and keywords to pass to the function. The partition will be the first argument, and these will be passed *after*. $META Examples -------- Given a DataFrame, Series, or Index, such as: >>> import dask.dataframe as dd >>> df = pd.DataFrame({'x': [1, 2, 3, 4, 5], ... 'y': [1., 2., 3., 4., 5.]}) >>> ddf = dd.from_pandas(df, npartitions=2) One can use ``map_partitions`` to apply a function on each partition. Extra arguments and keywords can optionally be provided, and will be passed to the function after the partition. Here we apply a function with arguments and keywords to a DataFrame, resulting in a Series: >>> def myadd(df, a, b=1): ... return df.x + df.y + a + b >>> res = ddf.map_partitions(myadd, 1, b=2) >>> res.dtype dtype('float64') By default, dask tries to infer the output metadata by running your provided function on some fake data. This works well in many cases, but can sometimes be expensive, or even fail. To avoid this, you can manually specify the output metadata with the ``meta`` keyword. This can be specified in many forms, for more information see ``dask.dataframe.utils.make_meta``. Here we specify the output is a Series with no name, and dtype ``float64``: >>> res = ddf.map_partitions(myadd, 1, b=2, meta=(None, 'f8')) Here we map a function that takes in a DataFrame, and returns a DataFrame with a new column: >>> res = ddf.map_partitions(lambda df: df.assign(z=df.x * df.y)) >>> res.dtypes x int64 y float64 z float64 dtype: object As before, the output metadata can also be specified manually. This time we pass in a ``dict``, as the output is a DataFrame: >>> res = ddf.map_partitions(lambda df: df.assign(z=df.x * df.y), ... meta={'x': 'i8', 'y': 'f8', 'z': 'f8'}) In the case where the metadata doesn't change, you can also pass in the object itself directly: >>> res = ddf.map_partitions(lambda df: df.head(), meta=df) Also note that the index and divisions are assumed to remain unchanged. If the function you're mapping changes the index/divisions, you'll need to clear them afterwards: >>> ddf.map_partitions(func).clear_divisions() # doctest: +SKIP """ return map_partitions(func, self, *args, **kwargs) @insert_meta_param_description(pad=12) def map_overlap(self, func, before, after, *args, **kwargs): """Apply a function to each partition, sharing rows with adjacent partitions. This can be useful for implementing windowing functions such as ``df.rolling(...).mean()`` or ``df.diff()``. Parameters ---------- func : function Function applied to each partition. before : int The number of rows to prepend to partition ``i`` from the end of partition ``i - 1``. after : int The number of rows to append to partition ``i`` from the beginning of partition ``i + 1``. args, kwargs : Arguments and keywords to pass to the function. The partition will be the first argument, and these will be passed *after*. $META Notes ----- Given positive integers ``before`` and ``after``, and a function ``func``, ``map_overlap`` does the following: 1. Prepend ``before`` rows to each partition ``i`` from the end of partition ``i - 1``. The first partition has no rows prepended. 2. Append ``after`` rows to each partition ``i`` from the beginning of partition ``i + 1``. The last partition has no rows appended. 3. Apply ``func`` to each partition, passing in any extra ``args`` and ``kwargs`` if provided. 4. Trim ``before`` rows from the beginning of all but the first partition. 5. Trim ``after`` rows from the end of all but the last partition. Note that the index and divisions are assumed to remain unchanged. Examples -------- Given a DataFrame, Series, or Index, such as: >>> import dask.dataframe as dd >>> df = pd.DataFrame({'x': [1, 2, 4, 7, 11], ... 'y': [1., 2., 3., 4., 5.]}) >>> ddf = dd.from_pandas(df, npartitions=2) A rolling sum with a trailing moving window of size 2 can be computed by overlapping 2 rows before each partition, and then mapping calls to ``df.rolling(2).sum()``: >>> ddf.compute() x y 0 1 1.0 1 2 2.0 2 4 3.0 3 7 4.0 4 11 5.0 >>> ddf.map_overlap(lambda df: df.rolling(2).sum(), 2, 0).compute() x y 0 NaN NaN 1 3.0 3.0 2 6.0 5.0 3 11.0 7.0 4 18.0 9.0 The pandas ``diff`` method computes a discrete difference shifted by a number of periods (can be positive or negative). This can be implemented by mapping calls to ``df.diff`` to each partition after prepending/appending that many rows, depending on sign: >>> def diff(df, periods=1): ... before, after = (periods, 0) if periods > 0 else (0, -periods) ... return df.map_overlap(lambda df, periods=1: df.diff(periods), ... periods, 0, periods=periods) >>> diff(ddf, 1).compute() x y 0 NaN NaN 1 1.0 1.0 2 2.0 1.0 3 3.0 1.0 4 4.0 1.0 If you have a ``DatetimeIndex``, you can use a `timedelta` for time- based windows. >>> ts = pd.Series(range(10), index=pd.date_range('2017', periods=10)) >>> dts = dd.from_pandas(ts, npartitions=2) >>> dts.map_overlap(lambda df: df.rolling('2D').sum(), ... pd.Timedelta('2D'), 0).compute() 2017-01-01 0.0 2017-01-02 1.0 2017-01-03 3.0 2017-01-04 5.0 2017-01-05 7.0 2017-01-06 9.0 2017-01-07 11.0 2017-01-08 13.0 2017-01-09 15.0 2017-01-10 17.0 dtype: float64 """ from .rolling import map_overlap return map_overlap(func, self, before, after, *args, **kwargs) @insert_meta_param_description(pad=12) def reduction(self, chunk, aggregate=None, combine=None, meta=no_default, token=None, split_every=None, chunk_kwargs=None, aggregate_kwargs=None, combine_kwargs=None, **kwargs): """Generic row-wise reductions. Parameters ---------- chunk : callable Function to operate on each partition. Should return a ``pandas.DataFrame``, ``pandas.Series``, or a scalar. aggregate : callable, optional Function to operate on the concatenated result of ``chunk``. If not specified, defaults to ``chunk``. Used to do the final aggregation in a tree reduction. The input to ``aggregate`` depends on the output of ``chunk``. If the output of ``chunk`` is a: - scalar: Input is a Series, with one row per partition. - Series: Input is a DataFrame, with one row per partition. Columns are the rows in the output series. - DataFrame: Input is a DataFrame, with one row per partition. Columns are the columns in the output dataframes. Should return a ``pandas.DataFrame``, ``pandas.Series``, or a scalar. combine : callable, optional Function to operate on intermediate concatenated results of ``chunk`` in a tree-reduction. If not provided, defaults to ``aggregate``. The input/output requirements should match that of ``aggregate`` described above. $META token : str, optional The name to use for the output keys. split_every : int, optional Group partitions into groups of this size while performing a tree-reduction. If set to False, no tree-reduction will be used, and all intermediates will be concatenated and passed to ``aggregate``. Default is 8. chunk_kwargs : dict, optional Keyword arguments to pass on to ``chunk`` only. aggregate_kwargs : dict, optional Keyword arguments to pass on to ``aggregate`` only. combine_kwargs : dict, optional Keyword arguments to pass on to ``combine`` only. kwargs : All remaining keywords will be passed to ``chunk``, ``combine``, and ``aggregate``. Examples -------- >>> import pandas as pd >>> import dask.dataframe as dd >>> df = pd.DataFrame({'x': range(50), 'y': range(50, 100)}) >>> ddf = dd.from_pandas(df, npartitions=4) Count the number of rows in a DataFrame. To do this, count the number of rows in each partition, then sum the results: >>> res = ddf.reduction(lambda x: x.count(), ... aggregate=lambda x: x.sum()) >>> res.compute() x 50 y 50 dtype: int64 Count the number of rows in a Series with elements greater than or equal to a value (provided via a keyword). >>> def count_greater(x, value=0): ... return (x >= value).sum() >>> res = ddf.x.reduction(count_greater, aggregate=lambda x: x.sum(), ... chunk_kwargs={'value': 25}) >>> res.compute() 25 Aggregate both the sum and count of a Series at the same time: >>> def sum_and_count(x): ... return pd.Series({'sum': x.sum(), 'count': x.count()}) >>> res = ddf.x.reduction(sum_and_count, aggregate=lambda x: x.sum()) >>> res.compute() count 50 sum 1225 dtype: int64 Doing the same, but for a DataFrame. Here ``chunk`` returns a DataFrame, meaning the input to ``aggregate`` is a DataFrame with an index with non-unique entries for both 'x' and 'y'. We groupby the index, and sum each group to get the final result. >>> def sum_and_count(x): ... return pd.DataFrame({'sum': x.sum(), 'count': x.count()}) >>> res = ddf.reduction(sum_and_count, ... aggregate=lambda x: x.groupby(level=0).sum()) >>> res.compute() count sum x 50 1225 y 50 3725 """ if aggregate is None: aggregate = chunk if combine is None: if combine_kwargs: raise ValueError("`combine_kwargs` provided with no `combine`") combine = aggregate combine_kwargs = aggregate_kwargs chunk_kwargs = chunk_kwargs.copy() if chunk_kwargs else {} chunk_kwargs['aca_chunk'] = chunk combine_kwargs = combine_kwargs.copy() if combine_kwargs else {} combine_kwargs['aca_combine'] = combine aggregate_kwargs = aggregate_kwargs.copy() if aggregate_kwargs else {} aggregate_kwargs['aca_aggregate'] = aggregate return aca(self, chunk=_reduction_chunk, aggregate=_reduction_aggregate, combine=_reduction_combine, meta=meta, token=token, split_every=split_every, chunk_kwargs=chunk_kwargs, aggregate_kwargs=aggregate_kwargs, combine_kwargs=combine_kwargs, **kwargs) @derived_from(pd.DataFrame) def pipe(self, func, *args, **kwargs): # Taken from pandas: # https://github.com/pydata/pandas/blob/master/pandas/core/generic.py#L2698-L2707 if isinstance(func, tuple): func, target = func if target in kwargs: raise ValueError('%s is both the pipe target and a keyword ' 'argument' % target) kwargs[target] = self return func(*args, **kwargs) else: return func(self, *args, **kwargs) def random_split(self, frac, random_state=None): """ Pseudorandomly split dataframe into different pieces row-wise Parameters ---------- frac : list List of floats that should sum to one. random_state: int or np.random.RandomState If int create a new RandomState with this as the seed Otherwise draw from the passed RandomState Examples -------- 50/50 split >>> a, b = df.random_split([0.5, 0.5]) # doctest: +SKIP 80/10/10 split, consistent random_state >>> a, b, c = df.random_split([0.8, 0.1, 0.1], random_state=123) # doctest: +SKIP See Also -------- dask.DataFrame.sample """ if not np.allclose(sum(frac), 1): raise ValueError("frac should sum to 1") state_data = random_state_data(self.npartitions, random_state) token = tokenize(self, frac, random_state) name = 'split-' + token dsk = {(name, i): (pd_split, (self._name, i), frac, state) for i, state in enumerate(state_data)} out = [] for i in range(len(frac)): name2 = 'split-%d-%s' % (i, token) dsk2 = {(name2, j): (getitem, (name, j), i) for j in range(self.npartitions)} out.append(type(self)(merge(self.dask, dsk, dsk2), name2, self._meta, self.divisions)) return out def head(self, n=5, npartitions=1, compute=True): """ First n rows of the dataset Parameters ---------- n : int, optional The number of rows to return. Default is 5. npartitions : int, optional Elements are only taken from the first ``npartitions``, with a default of 1. If there are fewer than ``n`` rows in the first ``npartitions`` a warning will be raised and any found rows returned. Pass -1 to use all partitions. compute : bool, optional Whether to compute the result, default is True. """ if npartitions <= -1: npartitions = self.npartitions if npartitions > self.npartitions: msg = "only {} partitions, head received {}" raise ValueError(msg.format(self.npartitions, npartitions)) name = 'head-%d-%d-%s' % (npartitions, n, self._name) if npartitions > 1: name_p = 'head-partial-%d-%s' % (n, self._name) dsk = {} for i in range(npartitions): dsk[(name_p, i)] = (M.head, (self._name, i), n) concat = (_concat, [(name_p, i) for i in range(npartitions)]) dsk[(name, 0)] = (safe_head, concat, n) else: dsk = {(name, 0): (safe_head, (self._name, 0), n)} result = new_dd_object(merge(self.dask, dsk), name, self._meta, [self.divisions[0], self.divisions[npartitions]]) if compute: result = result.compute() return result def tail(self, n=5, compute=True): """ Last n rows of the dataset Caveat, the only checks the last n rows of the last partition. """ name = 'tail-%d-%s' % (n, self._name) dsk = {(name, 0): (M.tail, (self._name, self.npartitions - 1), n)} result = new_dd_object(merge(self.dask, dsk), name, self._meta, self.divisions[-2:]) if compute: result = result.compute() return result @property def loc(self): """ Purely label-location based indexer for selection by label. >>> df.loc["b"] # doctest: +SKIP >>> df.loc["b":"d"] # doctest: +SKIP""" from .indexing import _LocIndexer return _LocIndexer(self) # NOTE: `iloc` is not implemented because of performance concerns. # see https://github.com/dask/dask/pull/507 def repartition(self, divisions=None, npartitions=None, freq=None, force=False): """ Repartition dataframe along new divisions Parameters ---------- divisions : list, optional List of partitions to be used. If specified npartitions will be ignored. npartitions : int, optional Number of partitions of output, must be less than npartitions of input. Only used if divisions isn't specified. freq : str, pd.Timedelta A period on which to partition timeseries data like ``'7D'`` or ``'12h'`` or ``pd.Timedelta(hours=12)``. Assumes a datetime index. force : bool, default False Allows the expansion of the existing divisions. If False then the new divisions lower and upper bounds must be the same as the old divisions. Examples -------- >>> df = df.repartition(npartitions=10) # doctest: +SKIP >>> df = df.repartition(divisions=[0, 5, 10, 20]) # doctest: +SKIP >>> df = df.repartition(freq='7d') # doctest: +SKIP """ if npartitions is not None and divisions is not None: warnings.warn("When providing both npartitions and divisions to " "repartition only npartitions is used.") if npartitions is not None: return repartition_npartitions(self, npartitions) elif divisions is not None: return repartition(self, divisions, force=force) elif freq is not None: return repartition_freq(self, freq=freq) else: raise ValueError( "Provide either divisions= or npartitions= to repartition") @derived_from(pd.DataFrame) def fillna(self, value=None, method=None, limit=None, axis=None): axis = self._validate_axis(axis) if method is None and limit is not None: raise NotImplementedError("fillna with set limit and method=None") if isinstance(value, _Frame): test_value = value._meta_nonempty.values[0] else: test_value = value meta = self._meta_nonempty.fillna(value=test_value, method=method, limit=limit, axis=axis) if axis == 1 or method is None: # Control whether or not dask's partition alignment happens. # We don't want for a pandas Series. # We do want it for a dask Series if isinstance(value, pd.Series): args = () kwargs = {'value': value} else: args = (value,) kwargs = {} return self.map_partitions(M.fillna, *args, method=method, limit=limit, axis=axis, meta=meta, **kwargs) if method in ('pad', 'ffill'): method = 'ffill' skip_check = 0 before, after = 1 if limit is None else limit, 0 else: method = 'bfill' skip_check = self.npartitions - 1 before, after = 0, 1 if limit is None else limit if limit is None: name = 'fillna-chunk-' + tokenize(self, method) dsk = {(name, i): (methods.fillna_check, (self._name, i), method, i != skip_check) for i in range(self.npartitions)} parts = new_dd_object(merge(dsk, self.dask), name, meta, self.divisions) else: parts = self return parts.map_overlap(M.fillna, before, after, method=method, limit=limit, meta=meta) @derived_from(pd.DataFrame) def ffill(self, axis=None, limit=None): return self.fillna(method='ffill', limit=limit, axis=axis) @derived_from(pd.DataFrame) def bfill(self, axis=None, limit=None): return self.fillna(method='bfill', limit=limit, axis=axis) def sample(self, frac, replace=False, random_state=None): """ Random sample of items Parameters ---------- frac : float, optional Fraction of axis items to return. replace: boolean, optional Sample with or without replacement. Default = False. random_state: int or ``np.random.RandomState`` If int we create a new RandomState with this as the seed Otherwise we draw from the passed RandomState See Also -------- DataFrame.random_split pandas.DataFrame.sample """ if random_state is None: random_state = np.random.RandomState() name = 'sample-' + tokenize(self, frac, replace, random_state) state_data = random_state_data(self.npartitions, random_state) dsk = {(name, i): (methods.sample, (self._name, i), state, frac, replace) for i, state in enumerate(state_data)} return new_dd_object(merge(self.dask, dsk), name, self._meta, self.divisions) def to_hdf(self, path_or_buf, key, mode='a', append=False, get=None, **kwargs): """ See dd.to_hdf docstring for more information """ from .io import to_hdf return to_hdf(self, path_or_buf, key, mode, append, get=get, **kwargs) def to_parquet(self, path, *args, **kwargs): """ See dd.to_parquet docstring for more information """ from .io import to_parquet return to_parquet(self, path, *args, **kwargs) def to_csv(self, filename, **kwargs): """ See dd.to_csv docstring for more information """ from .io import to_csv return to_csv(self, filename, **kwargs) def to_delayed(self): """ See dd.to_delayed docstring for more information """ return to_delayed(self) @classmethod def _get_unary_operator(cls, op): return lambda self: elemwise(op, self) @classmethod def _get_binary_operator(cls, op, inv=False): if inv: return lambda self, other: elemwise(op, other, self) else: return lambda self, other: elemwise(op, self, other) def rolling(self, window, min_periods=None, freq=None, center=False, win_type=None, axis=0): """Provides rolling transformations. Parameters ---------- window : int, str, offset Size of the moving window. This is the number of observations used for calculating the statistic. The window size must not be so large as to span more than one adjacent partition. If using an offset or offset alias like '5D', the data must have a ``DatetimeIndex`` .. versionchanged:: 0.15.0 Now accepts offsets and string offset aliases min_periods : int, default None Minimum number of observations in window required to have a value (otherwise result is NA). center : boolean, default False Set the labels at the center of the window. win_type : string, default None Provide a window type. The recognized window types are identical to pandas. axis : int, default 0 Returns ------- a Rolling object on which to call a method to compute a statistic Notes ----- The `freq` argument is not supported. """ from dask.dataframe.rolling import Rolling if isinstance(window, int): if window < 0: raise ValueError('window must be >= 0') if min_periods is not None: if not isinstance(min_periods, int): raise ValueError('min_periods must be an integer') if min_periods < 0: raise ValueError('min_periods must be >= 0') return Rolling(self, window=window, min_periods=min_periods, freq=freq, center=center, win_type=win_type, axis=axis) @derived_from(pd.DataFrame) def diff(self, periods=1, axis=0): axis = self._validate_axis(axis) if not isinstance(periods, int): raise TypeError("periods must be an integer") if axis == 1: return self.map_partitions(M.diff, token='diff', periods=periods, axis=1) before, after = (periods, 0) if periods > 0 else (0, -periods) return self.map_overlap(M.diff, before, after, token='diff', periods=periods) @derived_from(pd.DataFrame) def shift(self, periods=1, freq=None, axis=0): axis = self._validate_axis(axis) if not isinstance(periods, int): raise TypeError("periods must be an integer") if axis == 1: return self.map_partitions(M.shift, token='shift', periods=periods, freq=freq, axis=1) if freq is None: before, after = (periods, 0) if periods > 0 else (0, -periods) return self.map_overlap(M.shift, before, after, token='shift', periods=periods) # Let pandas error on invalid arguments meta = self._meta_nonempty.shift(periods, freq=freq) out = self.map_partitions(M.shift, token='shift', periods=periods, freq=freq, meta=meta) return maybe_shift_divisions(out, periods, freq=freq) def _reduction_agg(self, name, axis=None, skipna=True, split_every=False): axis = self._validate_axis(axis) meta = getattr(self._meta_nonempty, name)(axis=axis, skipna=skipna) token = self._token_prefix + name method = getattr(M, name) if axis == 1: return self.map_partitions(method, meta=meta, token=token, skipna=skipna, axis=axis) else: result = self.reduction(method, meta=meta, token=token, skipna=skipna, axis=axis, split_every=split_every) if isinstance(self, DataFrame): result.divisions = (min(self.columns), max(self.columns)) return result @derived_from(pd.DataFrame) def abs(self): _raise_if_object_series(self, "abs") meta = self._meta_nonempty.abs() return self.map_partitions(M.abs, meta=meta) @derived_from(pd.DataFrame) def all(self, axis=None, skipna=True, split_every=False): return self._reduction_agg('all', axis=axis, skipna=skipna, split_every=split_every) @derived_from(pd.DataFrame) def any(self, axis=None, skipna=True, split_every=False): return self._reduction_agg('any', axis=axis, skipna=skipna, split_every=split_every) @derived_from(pd.DataFrame) def sum(self, axis=None, skipna=True, split_every=False): return self._reduction_agg('sum', axis=axis, skipna=skipna, split_every=split_every) @derived_from(pd.DataFrame) def prod(self, axis=None, skipna=True, split_every=False): return self._reduction_agg('prod', axis=axis, skipna=skipna, split_every=split_every) @derived_from(pd.DataFrame) def max(self, axis=None, skipna=True, split_every=False): return self._reduction_agg('max', axis=axis, skipna=skipna, split_every=split_every) @derived_from(pd.DataFrame) def min(self, axis=None, skipna=True, split_every=False): return self._reduction_agg('min', axis=axis, skipna=skipna, split_every=split_every) @derived_from(pd.DataFrame) def idxmax(self, axis=None, skipna=True, split_every=False): fn = 'idxmax' axis = self._validate_axis(axis) meta = self._meta_nonempty.idxmax(axis=axis, skipna=skipna) if axis == 1: return map_partitions(M.idxmax, self, meta=meta, token=self._token_prefix + fn, skipna=skipna, axis=axis) else: scalar = not isinstance(meta, pd.Series) result = aca([self], chunk=idxmaxmin_chunk, aggregate=idxmaxmin_agg, combine=idxmaxmin_combine, meta=meta, aggregate_kwargs={'scalar': scalar}, token=self._token_prefix + fn, split_every=split_every, skipna=skipna, fn=fn) if isinstance(self, DataFrame): result.divisions = (min(self.columns), max(self.columns)) return result @derived_from(pd.DataFrame) def idxmin(self, axis=None, skipna=True, split_every=False): fn = 'idxmin' axis = self._validate_axis(axis) meta = self._meta_nonempty.idxmax(axis=axis) if axis == 1: return map_partitions(M.idxmin, self, meta=meta, token=self._token_prefix + fn, skipna=skipna, axis=axis) else: scalar = not isinstance(meta, pd.Series) result = aca([self], chunk=idxmaxmin_chunk, aggregate=idxmaxmin_agg, combine=idxmaxmin_combine, meta=meta, aggregate_kwargs={'scalar': scalar}, token=self._token_prefix + fn, split_every=split_every, skipna=skipna, fn=fn) if isinstance(self, DataFrame): result.divisions = (min(self.columns), max(self.columns)) return result @derived_from(pd.DataFrame) def count(self, axis=None, split_every=False): axis = self._validate_axis(axis) token = self._token_prefix + 'count' if axis == 1: meta = self._meta_nonempty.count(axis=axis) return self.map_partitions(M.count, meta=meta, token=token, axis=axis) else: meta = self._meta_nonempty.count() result = self.reduction(M.count, aggregate=M.sum, meta=meta, token=token, split_every=split_every) if isinstance(self, DataFrame): result.divisions = (min(self.columns), max(self.columns)) return result @derived_from(pd.DataFrame) def mean(self, axis=None, skipna=True, split_every=False): axis = self._validate_axis(axis) _raise_if_object_series(self, "mean") meta = self._meta_nonempty.mean(axis=axis, skipna=skipna) if axis == 1: return map_partitions(M.mean, self, meta=meta, token=self._token_prefix + 'mean', axis=axis, skipna=skipna) else: num = self._get_numeric_data() s = num.sum(skipna=skipna, split_every=split_every) n = num.count(split_every=split_every) name = self._token_prefix + 'mean-%s' % tokenize(self, axis, skipna) result = map_partitions(methods.mean_aggregate, s, n, token=name, meta=meta) if isinstance(self, DataFrame): result.divisions = (min(self.columns), max(self.columns)) return result @derived_from(pd.DataFrame) def var(self, axis=None, skipna=True, ddof=1, split_every=False): axis = self._validate_axis(axis) _raise_if_object_series(self, "var") meta = self._meta_nonempty.var(axis=axis, skipna=skipna) if axis == 1: return map_partitions(M.var, self, meta=meta, token=self._token_prefix + 'var', axis=axis, skipna=skipna, ddof=ddof) else: num = self._get_numeric_data() x = 1.0 * num.sum(skipna=skipna, split_every=split_every) x2 = 1.0 * (num ** 2).sum(skipna=skipna, split_every=split_every) n = num.count(split_every=split_every) name = self._token_prefix + 'var' result = map_partitions(methods.var_aggregate, x2, x, n, token=name, meta=meta, ddof=ddof) if isinstance(self, DataFrame): result.divisions = (min(self.columns), max(self.columns)) return result @derived_from(pd.DataFrame) def std(self, axis=None, skipna=True, ddof=1, split_every=False): axis = self._validate_axis(axis) _raise_if_object_series(self, "std") meta = self._meta_nonempty.std(axis=axis, skipna=skipna) if axis == 1: return map_partitions(M.std, self, meta=meta, token=self._token_prefix + 'std', axis=axis, skipna=skipna, ddof=ddof) else: v = self.var(skipna=skipna, ddof=ddof, split_every=split_every) name = self._token_prefix + 'std' return map_partitions(np.sqrt, v, meta=meta, token=name) @derived_from(pd.DataFrame) def sem(self, axis=None, skipna=None, ddof=1, split_every=False): axis = self._validate_axis(axis) _raise_if_object_series(self, "sem") meta = self._meta_nonempty.sem(axis=axis, skipna=skipna, ddof=ddof) if axis == 1: return map_partitions(M.sem, self, meta=meta, token=self._token_prefix + 'sem', axis=axis, skipna=skipna, ddof=ddof) else: num = self._get_numeric_data() v = num.var(skipna=skipna, ddof=ddof, split_every=split_every) n = num.count(split_every=split_every) name = self._token_prefix + 'sem' result = map_partitions(np.sqrt, v / n, meta=meta, token=name) if isinstance(self, DataFrame): result.divisions = (min(self.columns), max(self.columns)) return result def quantile(self, q=0.5, axis=0): """ Approximate row-wise and precise column-wise quantiles of DataFrame Parameters ---------- q : list/array of floats, default 0.5 (50%) Iterable of numbers ranging from 0 to 1 for the desired quantiles axis : {0, 1, 'index', 'columns'} (default 0) 0 or 'index' for row-wise, 1 or 'columns' for column-wise """ axis = self._validate_axis(axis) keyname = 'quantiles-concat--' + tokenize(self, q, axis) if axis == 1: if isinstance(q, list): # Not supported, the result will have current index as columns raise ValueError("'q' must be scalar when axis=1 is specified") return map_partitions(M.quantile, self, q, axis, token=keyname, meta=(q, 'f8')) else: _raise_if_object_series(self, "quantile") meta = self._meta.quantile(q, axis=axis) num = self._get_numeric_data() quantiles = tuple(quantile(self[c], q) for c in num.columns) dask = {} dask = merge(dask, *[_q.dask for _q in quantiles]) qnames = [(_q._name, 0) for _q in quantiles] if isinstance(quantiles[0], Scalar): dask[(keyname, 0)] = (pd.Series, qnames, num.columns, None, meta.name) divisions = (min(num.columns), max(num.columns)) return Series(dask, keyname, meta, divisions) else: dask[(keyname, 0)] = (methods.concat, qnames, 1) return DataFrame(dask, keyname, meta, quantiles[0].divisions) @derived_from(pd.DataFrame) def describe(self, split_every=False): # currently, only numeric describe is supported num = self._get_numeric_data() if self.ndim == 2 and len(num.columns) == 0: raise ValueError("DataFrame contains only non-numeric data.") elif self.ndim == 1 and self.dtype == 'object': raise ValueError("Cannot compute ``describe`` on object dtype.") stats = [num.count(split_every=split_every), num.mean(split_every=split_every), num.std(split_every=split_every), num.min(split_every=split_every), num.quantile([0.25, 0.5, 0.75]), num.max(split_every=split_every)] stats_names = [(s._name, 0) for s in stats] name = 'describe--' + tokenize(self, split_every) dsk = merge(num.dask, *(s.dask for s in stats)) dsk[(name, 0)] = (methods.describe_aggregate, stats_names) return new_dd_object(dsk, name, num._meta, divisions=[None, None]) def _cum_agg(self, token, chunk, aggregate, axis, skipna=True, chunk_kwargs=None): """ Wrapper for cumulative operation """ axis = self._validate_axis(axis) if axis == 1: name = '{0}{1}(axis=1)'.format(self._token_prefix, token) return self.map_partitions(chunk, token=name, **chunk_kwargs) else: # cumulate each partitions name1 = '{0}{1}-map'.format(self._token_prefix, token) cumpart = map_partitions(chunk, self, token=name1, meta=self, **chunk_kwargs) name2 = '{0}{1}-take-last'.format(self._token_prefix, token) cumlast = map_partitions(_take_last, cumpart, skipna, meta=pd.Series([]), token=name2) name = '{0}{1}'.format(self._token_prefix, token) cname = '{0}{1}-cum-last'.format(self._token_prefix, token) # aggregate cumulated partisions and its previous last element dask = {} dask[(name, 0)] = (cumpart._name, 0) for i in range(1, self.npartitions): # store each cumulative step to graph to reduce computation if i == 1: dask[(cname, i)] = (cumlast._name, i - 1) else: # aggregate with previous cumulation results dask[(cname, i)] = (aggregate, (cname, i - 1), (cumlast._name, i - 1)) dask[(name, i)] = (aggregate, (cumpart._name, i), (cname, i)) return new_dd_object(merge(dask, cumpart.dask, cumlast.dask), name, chunk(self._meta), self.divisions) @derived_from(pd.DataFrame) def cumsum(self, axis=None, skipna=True): return self._cum_agg('cumsum', chunk=M.cumsum, aggregate=operator.add, axis=axis, skipna=skipna, chunk_kwargs=dict(axis=axis, skipna=skipna)) @derived_from(pd.DataFrame) def cumprod(self, axis=None, skipna=True): return self._cum_agg('cumprod', chunk=M.cumprod, aggregate=operator.mul, axis=axis, skipna=skipna, chunk_kwargs=dict(axis=axis, skipna=skipna)) @derived_from(pd.DataFrame) def cummax(self, axis=None, skipna=True): return self._cum_agg('cummax', chunk=M.cummax, aggregate=methods.cummax_aggregate, axis=axis, skipna=skipna, chunk_kwargs=dict(axis=axis, skipna=skipna)) @derived_from(pd.DataFrame) def cummin(self, axis=None, skipna=True): return self._cum_agg('cummin', chunk=M.cummin, aggregate=methods.cummin_aggregate, axis=axis, skipna=skipna, chunk_kwargs=dict(axis=axis, skipna=skipna)) @derived_from(pd.DataFrame) def where(self, cond, other=np.nan): # cond and other may be dask instance, # passing map_partitions via keyword will not be aligned return map_partitions(M.where, self, cond, other) @derived_from(pd.DataFrame) def mask(self, cond, other=np.nan): return map_partitions(M.mask, self, cond, other) @derived_from(pd.DataFrame) def notnull(self): return self.map_partitions(M.notnull) @derived_from(pd.DataFrame) def isnull(self): return self.map_partitions(M.isnull) @derived_from(pd.DataFrame) def isin(self, values): return elemwise(M.isin, self, list(values)) @derived_from(pd.DataFrame) def astype(self, dtype): # XXX: Pandas will segfault for empty dataframes when setting # categorical dtypes. This operation isn't allowed currently anyway. We # get the metadata with a non-empty frame to throw the error instead of # segfaulting. if isinstance(self._meta, pd.DataFrame) and is_categorical_dtype(dtype): meta = self._meta_nonempty.astype(dtype) else: meta = self._meta.astype(dtype) if hasattr(dtype, 'items'): # Pandas < 0.21.0, no `categories` attribute, so unknown # Pandas >= 0.21.0, known if `categories` attribute is not None set_unknown = [k for k, v in dtype.items() if (is_categorical_dtype(v) and getattr(v, 'categories', None) is None)] meta = clear_known_categories(meta, cols=set_unknown) elif (is_categorical_dtype(dtype) and getattr(dtype, 'categories', None) is None): meta = clear_known_categories(meta) return self.map_partitions(M.astype, dtype=dtype, meta=meta) @derived_from(pd.Series) def append(self, other): # because DataFrame.append will override the method, # wrap by pd.Series.append docstring from .multi import concat if isinstance(other, (list, dict)): msg = "append doesn't support list or dict input" raise NotImplementedError(msg) return concat([self, other], join='outer', interleave_partitions=False) @derived_from(pd.DataFrame) def align(self, other, join='outer', axis=None, fill_value=None): meta1, meta2 = _emulate(M.align, self, other, join, axis=axis, fill_value=fill_value) aligned = self.map_partitions(M.align, other, join=join, axis=axis, fill_value=fill_value) token = tokenize(self, other, join, axis, fill_value) name1 = 'align1-' + token dsk1 = {(name1, i): (getitem, key, 0) for i, key in enumerate(aligned.__dask_keys__())} dsk1.update(aligned.dask) result1 = new_dd_object(dsk1, name1, meta1, aligned.divisions) name2 = 'align2-' + token dsk2 = {(name2, i): (getitem, key, 1) for i, key in enumerate(aligned.__dask_keys__())} dsk2.update(aligned.dask) result2 = new_dd_object(dsk2, name2, meta2, aligned.divisions) return result1, result2 @derived_from(pd.DataFrame) def combine(self, other, func, fill_value=None, overwrite=True): return self.map_partitions(M.combine, other, func, fill_value=fill_value, overwrite=overwrite) @derived_from(pd.DataFrame) def combine_first(self, other): return self.map_partitions(M.combine_first, other) @classmethod def _bind_operator_method(cls, name, op): """ bind operator method like DataFrame.add to this class """ raise NotImplementedError @derived_from(pd.DataFrame) def resample(self, rule, how=None, closed=None, label=None): from .tseries.resample import _resample return _resample(self, rule, how=how, closed=closed, label=label) @derived_from(pd.DataFrame) def first(self, offset): # Let pandas error on bad args self._meta_nonempty.first(offset) if not self.known_divisions: raise ValueError("`first` is not implemented for unknown divisions") offset = pd.tseries.frequencies.to_offset(offset) date = self.divisions[0] + offset end = self.loc._get_partitions(date) include_right = offset.isAnchored() or not hasattr(offset, '_inc') if end == self.npartitions - 1: divs = self.divisions else: divs = self.divisions[:end + 1] + (date,) name = 'first-' + tokenize(self, offset) dsk = {(name, i): (self._name, i) for i in range(end)} dsk[(name, end)] = (methods.boundary_slice, (self._name, end), None, date, include_right, True, 'ix') return new_dd_object(merge(self.dask, dsk), name, self, divs) @derived_from(pd.DataFrame) def last(self, offset): # Let pandas error on bad args self._meta_nonempty.first(offset) if not self.known_divisions: raise ValueError("`last` is not implemented for unknown divisions") offset = pd.tseries.frequencies.to_offset(offset) date = self.divisions[-1] - offset start = self.loc._get_partitions(date) if start == 0: divs = self.divisions else: divs = (date,) + self.divisions[start + 1:] name = 'last-' + tokenize(self, offset) dsk = {(name, i + 1): (self._name, j + 1) for i, j in enumerate(range(start, self.npartitions))} dsk[(name, 0)] = (methods.boundary_slice, (self._name, start), date, None, True, False, 'ix') return new_dd_object(merge(self.dask, dsk), name, self, divs) def nunique_approx(self, split_every=None): """Approximate number of unique rows. This method uses the HyperLogLog algorithm for cardinality estimation to compute the approximate number of unique rows. The approximate error is 0.406%. Parameters ---------- split_every : int, optional Group partitions into groups of this size while performing a tree-reduction. If set to False, no tree-reduction will be used. Default is 8. Returns ------- a float representing the approximate number of elements """ from . import hyperloglog # here to avoid circular import issues return aca([self], chunk=hyperloglog.compute_hll_array, combine=hyperloglog.reduce_state, aggregate=hyperloglog.estimate_count, split_every=split_every, b=16, meta=float) @property def values(self): """ Return a dask.array of the values of this dataframe Warning: This creates a dask.array without precise shape information. Operations that depend on shape information, like slicing or reshaping, will not work. """ from ..array.core import Array name = 'values-' + tokenize(self) chunks = ((np.nan,) * self.npartitions,) x = self._meta.values if isinstance(self, DataFrame): chunks = chunks + ((x.shape[1],),) suffix = (0,) else: suffix = () dsk = {(name, i) + suffix: (getattr, key, 'values') for (i, key) in enumerate(self.__dask_keys__())} return Array(merge(self.dask, dsk), name, chunks, x.dtype) def _raise_if_object_series(x, funcname): """ Utility function to raise an error if an object column does not support a certain operation like `mean`. """ if isinstance(x, Series) and hasattr(x, "dtype") and x.dtype == object: raise ValueError("`%s` not supported with object series" % funcname) class Series(_Frame): """ Parallel Pandas Series Do not use this class directly. Instead use functions like ``dd.read_csv``, ``dd.read_parquet``, or ``dd.from_pandas``. Parameters ---------- dsk: dict The dask graph to compute this Series _name: str The key prefix that specifies which keys in the dask comprise this particular Series meta: pandas.Series An empty ``pandas.Series`` with names, dtypes, and index matching the expected output. divisions: tuple of index values Values along which we partition our blocks on the index See Also -------- dask.dataframe.DataFrame """ _partition_type = pd.Series _token_prefix = 'series-' def __array_wrap__(self, array, context=None): if isinstance(context, tuple) and len(context) > 0: index = context[1][0].index return pd.Series(array, index=index, name=self.name) @property def name(self): return self._meta.name @name.setter def name(self, name): self._meta.name = name renamed = _rename_dask(self, name) # update myself self.dask.update(renamed.dask) self._name = renamed._name @property def ndim(self): """ Return dimensionality """ return 1 @property def dtype(self): """ Return data type """ return self._meta.dtype @cache_readonly def dt(self): """ Namespace of datetime methods """ return DatetimeAccessor(self) @cache_readonly def cat(self): return CategoricalAccessor(self) @cache_readonly def str(self): """ Namespace for string methods """ return StringAccessor(self) def __dir__(self): o = set(dir(type(self))) o.update(self.__dict__) # Remove the `cat` and `str` accessors if not available. We can't # decide this statically for the `dt` accessor, as it works on # datetime-like things as well. for accessor in ['cat', 'str']: if not hasattr(self._meta, accessor): o.remove(accessor) return list(o) @property def nbytes(self): """ Number of bytes """ return self.reduction(methods.nbytes, np.sum, token='nbytes', meta=int, split_every=False) @property def _repr_data(self): return _repr_data_series(self._meta, self._repr_divisions) def __repr__(self): """ have to overwrite footer """ if self.name is not None: footer = "Name: {name}, dtype: {dtype}".format(name=self.name, dtype=self.dtype) else: footer = "dtype: {dtype}".format(dtype=self.dtype) return """Dask {klass} Structure: {data} {footer} Dask Name: {name}, {task} tasks""".format(klass=self.__class__.__name__, data=self.to_string(), footer=footer, name=key_split(self._name), task=len(self.dask)) @derived_from(pd.Series) def round(self, decimals=0): return elemwise(M.round, self, decimals) @derived_from(pd.DataFrame) def to_timestamp(self, freq=None, how='start', axis=0): df = elemwise(M.to_timestamp, self, freq, how, axis) df.divisions = tuple(pd.Index(self.divisions).to_timestamp()) return df def quantile(self, q=0.5): """ Approximate quantiles of Series q : list/array of floats, default 0.5 (50%) Iterable of numbers ranging from 0 to 1 for the desired quantiles """ return quantile(self, q) def _repartition_quantiles(self, npartitions, upsample=1.0): """ Approximate quantiles of Series used for repartitioning """ from .partitionquantiles import partition_quantiles return partition_quantiles(self, npartitions, upsample=upsample) def __getitem__(self, key): if isinstance(key, Series) and self.divisions == key.divisions: name = 'index-%s' % tokenize(self, key) dsk = dict(((name, i), (operator.getitem, (self._name, i), (key._name, i))) for i in range(self.npartitions)) return Series(merge(self.dask, key.dask, dsk), name, self._meta, self.divisions) raise NotImplementedError() @derived_from(pd.DataFrame) def _get_numeric_data(self, how='any', subset=None): return self @derived_from(pd.Series) def iteritems(self): for i in range(self.npartitions): s = self.get_partition(i).compute() for item in s.iteritems(): yield item @classmethod def _validate_axis(cls, axis=0): if axis not in (0, 'index', None): raise ValueError('No axis named {0}'.format(axis)) # convert to numeric axis return {None: 0, 'index': 0}.get(axis, axis) @derived_from(pd.Series) def groupby(self, by=None, **kwargs): from dask.dataframe.groupby import SeriesGroupBy return SeriesGroupBy(self, by=by, **kwargs) @derived_from(pd.Series) def count(self, split_every=False): return super(Series, self).count(split_every=split_every) def unique(self, split_every=None, split_out=1): """ Return Series of unique values in the object. Includes NA values. Returns ------- uniques : Series """ return aca(self, chunk=methods.unique, aggregate=methods.unique, meta=self._meta, token='unique', split_every=split_every, series_name=self.name, split_out=split_out) @derived_from(pd.Series) def nunique(self, split_every=None): return self.drop_duplicates(split_every=split_every).count() @derived_from(pd.Series) def value_counts(self, split_every=None, split_out=1): return aca(self, chunk=M.value_counts, aggregate=methods.value_counts_aggregate, combine=methods.value_counts_combine, meta=self._meta.value_counts(), token='value-counts', split_every=split_every, split_out=split_out, split_out_setup=split_out_on_index) @derived_from(pd.Series) def nlargest(self, n=5, split_every=None): return aca(self, chunk=M.nlargest, aggregate=M.nlargest, meta=self._meta, token='series-nlargest', split_every=split_every, n=n) @derived_from(pd.Series) def nsmallest(self, n=5, split_every=None): return aca(self, chunk=M.nsmallest, aggregate=M.nsmallest, meta=self._meta, token='series-nsmallest', split_every=split_every, n=n) @derived_from(pd.Series) def isin(self, values): return elemwise(M.isin, self, list(values)) @insert_meta_param_description(pad=12) @derived_from(pd.Series) def map(self, arg, na_action=None, meta=no_default): if not (isinstance(arg, (pd.Series, dict)) or callable(arg)): raise TypeError("arg must be pandas.Series, dict or callable." " Got {0}".format(type(arg))) name = 'map-' + tokenize(self, arg, na_action) dsk = {(name, i): (M.map, k, arg, na_action) for i, k in enumerate(self.__dask_keys__())} dsk.update(self.dask) if meta is no_default: meta = _emulate(M.map, self, arg, na_action=na_action) else: meta = make_meta(meta) return Series(dsk, name, meta, self.divisions) @derived_from(pd.Series) def dropna(self): return self.map_partitions(M.dropna) @derived_from(pd.Series) def between(self, left, right, inclusive=True): return self.map_partitions(M.between, left=left, right=right, inclusive=inclusive) @derived_from(pd.Series) def clip(self, lower=None, upper=None, out=None): if out is not None: raise ValueError("'out' must be None") # np.clip may pass out return self.map_partitions(M.clip, lower=lower, upper=upper) @derived_from(pd.Series) def clip_lower(self, threshold): return self.map_partitions(M.clip_lower, threshold=threshold) @derived_from(pd.Series) def clip_upper(self, threshold): return self.map_partitions(M.clip_upper, threshold=threshold) @derived_from(pd.Series) def align(self, other, join='outer', axis=None, fill_value=None): return super(Series, self).align(other, join=join, axis=axis, fill_value=fill_value) @derived_from(pd.Series) def combine(self, other, func, fill_value=None): return self.map_partitions(M.combine, other, func, fill_value=fill_value) @derived_from(pd.Series) def combine_first(self, other): return self.map_partitions(M.combine_first, other) def to_bag(self, index=False): """ Craeate a Dask Bag from a Series """ from .io import to_bag return to_bag(self, index) @derived_from(pd.Series) def to_frame(self, name=None): return self.map_partitions(M.to_frame, name, meta=self._meta.to_frame(name)) @derived_from(pd.Series) def to_string(self, max_rows=5): # option_context doesn't affect return self._repr_data.to_string(max_rows=max_rows) @classmethod def _bind_operator_method(cls, name, op): """ bind operator method like DataFrame.add to this class """ def meth(self, other, level=None, fill_value=None, axis=0): if level is not None: raise NotImplementedError('level must be None') axis = self._validate_axis(axis) meta = _emulate(op, self, other, axis=axis, fill_value=fill_value) return map_partitions(op, self, other, meta=meta, axis=axis, fill_value=fill_value) meth.__doc__ = op.__doc__ bind_method(cls, name, meth) @classmethod def _bind_comparison_method(cls, name, comparison): """ bind comparison method like DataFrame.add to this class """ def meth(self, other, level=None, axis=0): if level is not None: raise NotImplementedError('level must be None') axis = self._validate_axis(axis) return elemwise(comparison, self, other, axis=axis) meth.__doc__ = comparison.__doc__ bind_method(cls, name, meth) @insert_meta_param_description(pad=12) def apply(self, func, convert_dtype=True, meta=no_default, args=(), **kwds): """ Parallel version of pandas.Series.apply Parameters ---------- func : function Function to apply convert_dtype : boolean, default True Try to find better dtype for elementwise function results. If False, leave as dtype=object. $META args : tuple Positional arguments to pass to function in addition to the value. Additional keyword arguments will be passed as keywords to the function. Returns ------- applied : Series or DataFrame if func returns a Series. Examples -------- >>> import dask.dataframe as dd >>> s = pd.Series(range(5), name='x') >>> ds = dd.from_pandas(s, npartitions=2) Apply a function elementwise across the Series, passing in extra arguments in ``args`` and ``kwargs``: >>> def myadd(x, a, b=1): ... return x + a + b >>> res = ds.apply(myadd, args=(2,), b=1.5) By default, dask tries to infer the output metadata by running your provided function on some fake data. This works well in many cases, but can sometimes be expensive, or even fail. To avoid this, you can manually specify the output metadata with the ``meta`` keyword. This can be specified in many forms, for more information see ``dask.dataframe.utils.make_meta``. Here we specify the output is a Series with name ``'x'``, and dtype ``float64``: >>> res = ds.apply(myadd, args=(2,), b=1.5, meta=('x', 'f8')) In the case where the metadata doesn't change, you can also pass in the object itself directly: >>> res = ds.apply(lambda x: x + 1, meta=ds) See Also -------- dask.Series.map_partitions """ if meta is no_default: msg = ("`meta` is not specified, inferred from partial data. " "Please provide `meta` if the result is unexpected.\n" " Before: .apply(func)\n" " After: .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result\n" " or: .apply(func, meta=('x', 'f8')) for series result") warnings.warn(msg) meta = _emulate(M.apply, self._meta_nonempty, func, convert_dtype=convert_dtype, args=args, **kwds) return map_partitions(M.apply, self, func, convert_dtype, args, meta=meta, **kwds) @derived_from(pd.Series) def cov(self, other, min_periods=None, split_every=False): from .multi import concat if not isinstance(other, Series): raise TypeError("other must be a dask.dataframe.Series") df = concat([self, other], axis=1) return cov_corr(df, min_periods, scalar=True, split_every=split_every) @derived_from(pd.Series) def corr(self, other, method='pearson', min_periods=None, split_every=False): from .multi import concat if not isinstance(other, Series): raise TypeError("other must be a dask.dataframe.Series") if method != 'pearson': raise NotImplementedError("Only Pearson correlation has been " "implemented") df = concat([self, other], axis=1) return cov_corr(df, min_periods, corr=True, scalar=True, split_every=split_every) @derived_from(pd.Series) def autocorr(self, lag=1, split_every=False): if not isinstance(lag, int): raise TypeError("lag must be an integer") return self.corr(self if lag == 0 else self.shift(lag), split_every=split_every) @derived_from(pd.Series) def memory_usage(self, index=True, deep=False): from ..delayed import delayed result = self.map_partitions(M.memory_usage, index=index, deep=deep) return delayed(sum)(result.to_delayed()) class Index(Series): _partition_type = pd.Index _token_prefix = 'index-' _dt_attributes = {'nanosecond', 'microsecond', 'millisecond', 'dayofyear', 'minute', 'hour', 'day', 'dayofweek', 'second', 'week', 'weekday', 'weekofyear', 'month', 'quarter', 'year'} _cat_attributes = {'known', 'as_known', 'as_unknown', 'add_categories', 'categories', 'remove_categories', 'reorder_categories', 'as_ordered', 'codes', 'remove_unused_categories', 'set_categories', 'as_unordered', 'ordered', 'rename_categories'} def __getattr__(self, key): if is_categorical_dtype(self.dtype) and key in self._cat_attributes: return getattr(self.cat, key) elif key in self._dt_attributes: return getattr(self.dt, key) raise AttributeError("'Index' object has no attribute %r" % key) def __dir__(self): out = super(Index, self).__dir__() out.extend(self._dt_attributes) if is_categorical_dtype(self.dtype): out.extend(self._cat_attributes) return out @property def index(self): msg = "'{0}' object has no attribute 'index'" raise AttributeError(msg.format(self.__class__.__name__)) def __array_wrap__(self, array, context=None): return pd.Index(array, name=self.name) def head(self, n=5, compute=True): """ First n items of the Index. Caveat, this only checks the first partition. """ name = 'head-%d-%s' % (n, self._name) dsk = {(name, 0): (operator.getitem, (self._name, 0), slice(0, n))} result = new_dd_object(merge(self.dask, dsk), name, self._meta, self.divisions[:2]) if compute: result = result.compute() return result @derived_from(pd.Index) def max(self, split_every=False): return self.reduction(M.max, meta=self._meta_nonempty.max(), token=self._token_prefix + 'max', split_every=split_every) @derived_from(pd.Index) def min(self, split_every=False): return self.reduction(M.min, meta=self._meta_nonempty.min(), token=self._token_prefix + 'min', split_every=split_every) def count(self, split_every=False): return self.reduction(methods.index_count, np.sum, token='index-count', meta=int, split_every=split_every) @derived_from(pd.Index) def shift(self, periods=1, freq=None): if isinstance(self._meta, pd.PeriodIndex): if freq is not None: raise ValueError("PeriodIndex doesn't accept `freq` argument") meta = self._meta_nonempty.shift(periods) out = self.map_partitions(M.shift, periods, meta=meta, token='shift') else: # Pandas will raise for other index types that don't implement shift meta = self._meta_nonempty.shift(periods, freq=freq) out = self.map_partitions(M.shift, periods, token='shift', meta=meta, freq=freq) if freq is None: freq = meta.freq return maybe_shift_divisions(out, periods, freq=freq) class DataFrame(_Frame): """ Parallel Pandas DataFrame Do not use this class directly. Instead use functions like ``dd.read_csv``, ``dd.read_parquet``, or ``dd.from_pandas``. Parameters ---------- dask: dict The dask graph to compute this DataFrame name: str The key prefix that specifies which keys in the dask comprise this particular DataFrame meta: pandas.DataFrame An empty ``pandas.DataFrame`` with names, dtypes, and index matching the expected output. divisions: tuple of index values Values along which we partition our blocks on the index """ _partition_type = pd.DataFrame _token_prefix = 'dataframe-' def __array_wrap__(self, array, context=None): if isinstance(context, tuple) and len(context) > 0: index = context[1][0].index return pd.DataFrame(array, index=index, columns=self.columns) @property def columns(self): return self._meta.columns @columns.setter def columns(self, columns): renamed = _rename_dask(self, columns) self._meta = renamed._meta self._name = renamed._name self.dask.update(renamed.dask) def __getitem__(self, key): name = 'getitem-%s' % tokenize(self, key) if np.isscalar(key) or isinstance(key, tuple): if isinstance(self._meta.index, (pd.DatetimeIndex, pd.PeriodIndex)): if key not in self._meta.columns: return self.loc[key] # error is raised from pandas meta = self._meta[_extract_meta(key)] dsk = dict(((name, i), (operator.getitem, (self._name, i), key)) for i in range(self.npartitions)) return new_dd_object(merge(self.dask, dsk), name, meta, self.divisions) elif isinstance(key, slice): return self.loc[key] if isinstance(key, list): # error is raised from pandas meta = self._meta[_extract_meta(key)] dsk = dict(((name, i), (operator.getitem, (self._name, i), key)) for i in range(self.npartitions)) return new_dd_object(merge(self.dask, dsk), name, meta, self.divisions) if isinstance(key, Series): # do not perform dummy calculation, as columns will not be changed. # if self.divisions != key.divisions: from .multi import _maybe_align_partitions self, key = _maybe_align_partitions([self, key]) dsk = {(name, i): (M._getitem_array, (self._name, i), (key._name, i)) for i in range(self.npartitions)} return new_dd_object(merge(self.dask, key.dask, dsk), name, self, self.divisions) raise NotImplementedError(key) def __setitem__(self, key, value): if isinstance(key, (tuple, list)): df = self.assign(**{k: value[c] for k, c in zip(key, value.columns)}) else: df = self.assign(**{key: value}) self.dask = df.dask self._name = df._name self._meta = df._meta self.divisions = df.divisions def __delitem__(self, key): result = self.drop([key], axis=1) self.dask = result.dask self._name = result._name self._meta = result._meta def __setattr__(self, key, value): try: columns = object.__getattribute__(self, '_meta').columns except AttributeError: columns = () if key in columns: self[key] = value else: object.__setattr__(self, key, value) def __getattr__(self, key): if key in self.columns: meta = self._meta[key] name = 'getitem-%s' % tokenize(self, key) dsk = dict(((name, i), (operator.getitem, (self._name, i), key)) for i in range(self.npartitions)) return new_dd_object(merge(self.dask, dsk), name, meta, self.divisions) raise AttributeError("'DataFrame' object has no attribute %r" % key) def __dir__(self): o = set(dir(type(self))) o.update(self.__dict__) o.update(c for c in self.columns if (isinstance(c, pd.compat.string_types) and pd.compat.isidentifier(c))) return list(o) @property def ndim(self): """ Return dimensionality """ return 2 @property def dtypes(self): """ Return data types """ return self._meta.dtypes @derived_from(pd.DataFrame) def get_dtype_counts(self): return self._meta.get_dtype_counts() @derived_from(pd.DataFrame) def get_ftype_counts(self): return self._meta.get_ftype_counts() @derived_from(pd.DataFrame) def select_dtypes(self, include=None, exclude=None): cs = self._meta.select_dtypes(include=include, exclude=exclude).columns return self[list(cs)] def set_index(self, other, drop=True, sorted=False, npartitions=None, divisions=None, **kwargs): """Set the DataFrame index (row labels) using an existing column This realigns the dataset to be sorted by a new column. This can have a significant impact on performance, because joins, groupbys, lookups, etc. are all much faster on that column. However, this performance increase comes with a cost, sorting a parallel dataset requires expensive shuffles. Often we ``set_index`` once directly after data ingest and filtering and then perform many cheap computations off of the sorted dataset. This function operates exactly like ``pandas.set_index`` except with different performance costs (it is much more expensive). Under normal operation this function does an initial pass over the index column to compute approximate qunatiles to serve as future divisions. It then passes over the data a second time, splitting up each input partition into several pieces and sharing those pieces to all of the output partitions now in sorted order. In some cases we can alleviate those costs, for example if your dataset is sorted already then we can avoid making many small pieces or if you know good values to split the new index column then we can avoid the initial pass over the data. For example if your new index is a datetime index and your data is already sorted by day then this entire operation can be done for free. You can control these options with the following parameters. Parameters ---------- df: Dask DataFrame index: string or Dask Series npartitions: int, None, or 'auto' The ideal number of output partitions. If None use the same as the input. If 'auto' then decide by memory use. shuffle: string, optional Either ``'disk'`` for single-node operation or ``'tasks'`` for distributed operation. Will be inferred by your current scheduler. sorted: bool, optional If the index column is already sorted in increasing order. Defaults to False divisions: list, optional Known values on which to separate index values of the partitions. See http://dask.pydata.org/en/latest/dataframe-design.html#partitions Defaults to computing this with a single pass over the data. Note that if ``sorted=True``, specified divisions are assumed to match the existing partitions in the data. If this is untrue, you should leave divisions empty and call ``repartition`` after ``set_index``. compute: bool Whether or not to trigger an immediate computation. Defaults to False. Examples -------- >>> df2 = df.set_index('x') # doctest: +SKIP >>> df2 = df.set_index(d.x) # doctest: +SKIP >>> df2 = df.set_index(d.timestamp, sorted=True) # doctest: +SKIP A common case is when we have a datetime column that we know to be sorted and is cleanly divided by day. We can set this index for free by specifying both that the column is pre-sorted and the particular divisions along which is is separated >>> import pandas as pd >>> divisions = pd.date_range('2000', '2010', freq='1D') >>> df2 = df.set_index('timestamp', sorted=True, divisions=divisions) # doctest: +SKIP """ pre_sorted = sorted del sorted if divisions is not None: check_divisions(divisions) if pre_sorted: from .shuffle import set_sorted_index return set_sorted_index(self, other, drop=drop, divisions=divisions, **kwargs) else: from .shuffle import set_index return set_index(self, other, drop=drop, npartitions=npartitions, divisions=divisions, **kwargs) @derived_from(pd.DataFrame) def nlargest(self, n=5, columns=None, split_every=None): token = 'dataframe-nlargest' return aca(self, chunk=M.nlargest, aggregate=M.nlargest, meta=self._meta, token=token, split_every=split_every, n=n, columns=columns) @derived_from(pd.DataFrame) def nsmallest(self, n=5, columns=None, split_every=None): token = 'dataframe-nsmallest' return aca(self, chunk=M.nsmallest, aggregate=M.nsmallest, meta=self._meta, token=token, split_every=split_every, n=n, columns=columns) @derived_from(pd.DataFrame) def groupby(self, by=None, **kwargs): from dask.dataframe.groupby import DataFrameGroupBy return DataFrameGroupBy(self, by=by, **kwargs) @wraps(categorize) def categorize(self, columns=None, index=None, split_every=None, **kwargs): return categorize(self, columns=columns, index=index, split_every=split_every, **kwargs) @derived_from(pd.DataFrame) def assign(self, **kwargs): for k, v in kwargs.items(): if not (isinstance(v, (Series, Scalar, pd.Series)) or callable(v) or np.isscalar(v)): raise TypeError("Column assignment doesn't support type " "{0}".format(type(v).__name__)) pairs = list(sum(kwargs.items(), ())) # Figure out columns of the output df2 = self._meta.assign(**_extract_meta(kwargs)) return elemwise(methods.assign, self, *pairs, meta=df2) @derived_from(pd.DataFrame) def rename(self, index=None, columns=None): if index is not None: raise ValueError("Cannot rename index.") # *args here is index, columns but columns arg is already used return self.map_partitions(M.rename, None, columns=columns) def query(self, expr, **kwargs): """ Filter dataframe with complex expression Blocked version of pd.DataFrame.query This is like the sequential version except that this will also happen in many threads. This may conflict with ``numexpr`` which will use multiple threads itself. We recommend that you set numexpr to use a single thread import numexpr numexpr.set_nthreads(1) See also -------- pandas.DataFrame.query """ name = 'query-%s' % tokenize(self, expr) if kwargs: name = name + '--' + tokenize(kwargs) dsk = dict(((name, i), (apply, M.query, ((self._name, i), (expr,), kwargs))) for i in range(self.npartitions)) else: dsk = dict(((name, i), (M.query, (self._name, i), expr)) for i in range(self.npartitions)) meta = self._meta.query(expr, **kwargs) return new_dd_object(merge(dsk, self.dask), name, meta, self.divisions) @derived_from(pd.DataFrame) def eval(self, expr, inplace=None, **kwargs): if inplace is None: if PANDAS_VERSION >= '0.21.0': inplace = False if '=' in expr and inplace in (True, None): raise NotImplementedError("Inplace eval not supported." " Please use inplace=False") meta = self._meta.eval(expr, inplace=inplace, **kwargs) return self.map_partitions(M.eval, expr, meta=meta, inplace=inplace, **kwargs) @derived_from(pd.DataFrame) def dropna(self, how='any', subset=None): return self.map_partitions(M.dropna, how=how, subset=subset) @derived_from(pd.DataFrame) def clip(self, lower=None, upper=None, out=None): if out is not None: raise ValueError("'out' must be None") return self.map_partitions(M.clip, lower=lower, upper=upper) @derived_from(pd.DataFrame) def clip_lower(self, threshold): return self.map_partitions(M.clip_lower, threshold=threshold) @derived_from(pd.DataFrame) def clip_upper(self, threshold): return self.map_partitions(M.clip_upper, threshold=threshold) @derived_from(pd.DataFrame) def to_timestamp(self, freq=None, how='start', axis=0): df = elemwise(M.to_timestamp, self, freq, how, axis) df.divisions = tuple(pd.Index(self.divisions).to_timestamp()) return df def to_bag(self, index=False): """Convert to a dask Bag of tuples of each row. Parameters ---------- index : bool, optional If True, the index is included as the first element of each tuple. Default is False. """ from .io import to_bag return to_bag(self, index) @derived_from(pd.DataFrame) def to_string(self, max_rows=5): # option_context doesn't affect return self._repr_data.to_string(max_rows=max_rows, show_dimensions=False) def _get_numeric_data(self, how='any', subset=None): # calculate columns to avoid unnecessary calculation numerics = self._meta._get_numeric_data() if len(numerics.columns) < len(self.columns): name = self._token_prefix + '-get_numeric_data' return self.map_partitions(M._get_numeric_data, meta=numerics, token=name) else: # use myself if all numerics return self @classmethod def _validate_axis(cls, axis=0): if axis not in (0, 1, 'index', 'columns', None): raise ValueError('No axis named {0}'.format(axis)) # convert to numeric axis return {None: 0, 'index': 0, 'columns': 1}.get(axis, axis) @derived_from(pd.DataFrame) def drop(self, labels, axis=0, errors='raise'): axis = self._validate_axis(axis) if axis == 1: return self.map_partitions(M.drop, labels, axis=axis, errors=errors) raise NotImplementedError("Drop currently only works for axis=1") @derived_from(pd.DataFrame) def merge(self, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, suffixes=('_x', '_y'), indicator=False, npartitions=None, shuffle=None): if not isinstance(right, (DataFrame, pd.DataFrame)): raise ValueError('right must be DataFrame') from .multi import merge return merge(self, right, how=how, on=on, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index, suffixes=suffixes, npartitions=npartitions, indicator=indicator, shuffle=shuffle) @derived_from(pd.DataFrame) def join(self, other, on=None, how='left', lsuffix='', rsuffix='', npartitions=None, shuffle=None): if not isinstance(other, (DataFrame, pd.DataFrame)): raise ValueError('other must be DataFrame') from .multi import merge return merge(self, other, how=how, left_index=on is None, right_index=True, left_on=on, suffixes=[lsuffix, rsuffix], npartitions=npartitions, shuffle=shuffle) @derived_from(pd.DataFrame) def append(self, other): if isinstance(other, Series): msg = ('Unable to appending dd.Series to dd.DataFrame.' 'Use pd.Series to append as row.') raise ValueError(msg) elif isinstance(other, pd.Series): other = other.to_frame().T return super(DataFrame, self).append(other) @derived_from(pd.DataFrame) def iterrows(self): for i in range(self.npartitions): df = self.get_partition(i).compute() for row in df.iterrows(): yield row @derived_from(pd.DataFrame) def itertuples(self): for i in range(self.npartitions): df = self.get_partition(i).compute() for row in df.itertuples(): yield row @classmethod def _bind_operator_method(cls, name, op): """ bind operator method like DataFrame.add to this class """ # name must be explicitly passed for div method whose name is truediv def meth(self, other, axis='columns', level=None, fill_value=None): if level is not None: raise NotImplementedError('level must be None') axis = self._validate_axis(axis) if axis in (1, 'columns'): # When axis=1 and other is a series, `other` is transposed # and the operator is applied broadcast across rows. This # isn't supported with dd.Series. if isinstance(other, Series): msg = 'Unable to {0} dd.Series with axis=1'.format(name) raise ValueError(msg) elif isinstance(other, pd.Series): # Special case for pd.Series to avoid unwanted partitioning # of other. We pass it in as a kwarg to prevent this. meta = _emulate(op, self, other=other, axis=axis, fill_value=fill_value) return map_partitions(op, self, other=other, meta=meta, axis=axis, fill_value=fill_value) meta = _emulate(op, self, other, axis=axis, fill_value=fill_value) return map_partitions(op, self, other, meta=meta, axis=axis, fill_value=fill_value) meth.__doc__ = op.__doc__ bind_method(cls, name, meth) @classmethod def _bind_comparison_method(cls, name, comparison): """ bind comparison method like DataFrame.add to this class """ def meth(self, other, axis='columns', level=None): if level is not None: raise NotImplementedError('level must be None') axis = self._validate_axis(axis) return elemwise(comparison, self, other, axis=axis) meth.__doc__ = comparison.__doc__ bind_method(cls, name, meth) @insert_meta_param_description(pad=12) def apply(self, func, axis=0, args=(), meta=no_default, **kwds): """ Parallel version of pandas.DataFrame.apply This mimics the pandas version except for the following: 1. Only ``axis=1`` is supported (and must be specified explicitly). 2. The user should provide output metadata via the `meta` keyword. Parameters ---------- func : function Function to apply to each column/row axis : {0 or 'index', 1 or 'columns'}, default 0 - 0 or 'index': apply function to each column (NOT SUPPORTED) - 1 or 'columns': apply function to each row $META args : tuple Positional arguments to pass to function in addition to the array/series Additional keyword arguments will be passed as keywords to the function Returns ------- applied : Series or DataFrame Examples -------- >>> import dask.dataframe as dd >>> df = pd.DataFrame({'x': [1, 2, 3, 4, 5], ... 'y': [1., 2., 3., 4., 5.]}) >>> ddf = dd.from_pandas(df, npartitions=2) Apply a function to row-wise passing in extra arguments in ``args`` and ``kwargs``: >>> def myadd(row, a, b=1): ... return row.sum() + a + b >>> res = ddf.apply(myadd, axis=1, args=(2,), b=1.5) By default, dask tries to infer the output metadata by running your provided function on some fake data. This works well in many cases, but can sometimes be expensive, or even fail. To avoid this, you can manually specify the output metadata with the ``meta`` keyword. This can be specified in many forms, for more information see ``dask.dataframe.utils.make_meta``. Here we specify the output is a Series with name ``'x'``, and dtype ``float64``: >>> res = ddf.apply(myadd, axis=1, args=(2,), b=1.5, meta=('x', 'f8')) In the case where the metadata doesn't change, you can also pass in the object itself directly: >>> res = ddf.apply(lambda row: row + 1, axis=1, meta=ddf) See Also -------- dask.DataFrame.map_partitions """ axis = self._validate_axis(axis) if axis == 0: msg = ("dd.DataFrame.apply only supports axis=1\n" " Try: df.apply(func, axis=1)") raise NotImplementedError(msg) if meta is no_default: msg = ("`meta` is not specified, inferred from partial data. " "Please provide `meta` if the result is unexpected.\n" " Before: .apply(func)\n" " After: .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result\n" " or: .apply(func, meta=('x', 'f8')) for series result") warnings.warn(msg) meta = _emulate(M.apply, self._meta_nonempty, func, axis=axis, args=args, **kwds) return map_partitions(M.apply, self, func, axis, False, False, None, args, meta=meta, **kwds) @derived_from(pd.DataFrame) def applymap(self, func, meta='__no_default__'): return elemwise(M.applymap, self, func, meta=meta) @derived_from(pd.DataFrame) def round(self, decimals=0): return elemwise(M.round, self, decimals) @derived_from(pd.DataFrame) def cov(self, min_periods=None, split_every=False): return cov_corr(self, min_periods, split_every=split_every) @derived_from(pd.DataFrame) def corr(self, method='pearson', min_periods=None, split_every=False): if method != 'pearson': raise NotImplementedError("Only Pearson correlation has been " "implemented") return cov_corr(self, min_periods, True, split_every=split_every) def info(self, buf=None, verbose=False, memory_usage=False): """ Concise summary of a Dask DataFrame. """ if buf is None: import sys buf = sys.stdout lines = [str(type(self))] if len(self.columns) == 0: lines.append('Index: 0 entries') lines.append('Empty %s' % type(self).__name__) put_lines(buf, lines) return # Group and execute the required computations computations = {} if verbose: computations.update({'index': self.index, 'count': self.count()}) if memory_usage: computations.update({'memory_usage': self.map_partitions(M.memory_usage, index=True)}) computations = dict(zip(computations.keys(), da.compute(*computations.values()))) if verbose: index = computations['index'] counts = computations['count'] lines.append(index.summary()) lines.append('Data columns (total {} columns):'.format(len(self.columns))) if PANDAS_VERSION >= '0.20.0': from pandas.io.formats.printing import pprint_thing else: from pandas.formats.printing import pprint_thing space = max([len(pprint_thing(k)) for k in self.columns]) + 3 column_template = '{!s:<%d} {} non-null {}' % space column_info = [column_template.format(pprint_thing(x[0]), x[1], x[2]) for x in zip(self.columns, counts, self.dtypes)] else: column_info = [self.columns.summary(name='Columns')] lines.extend(column_info) dtype_counts = ['%s(%d)' % k for k in sorted(self.dtypes.value_counts().iteritems(), key=str)] lines.append('dtypes: {}'.format(', '.join(dtype_counts))) if memory_usage: memory_int = computations['memory_usage'].sum() lines.append('memory usage: {}\n'.format(memory_repr(memory_int))) put_lines(buf, lines) @derived_from(pd.DataFrame) def memory_usage(self, index=True, deep=False): result = self.map_partitions(M.memory_usage, index=index, deep=deep) result = result.groupby(result.index).sum() return result def pivot_table(self, index=None, columns=None, values=None, aggfunc='mean'): """ Create a spreadsheet-style pivot table as a DataFrame. Target ``columns`` must have category dtype to infer result's ``columns``. ``index``, ``columns``, ``values`` and ``aggfunc`` must be all scalar. Parameters ---------- values : scalar column to aggregate index : scalar column to be index columns : scalar column to be columns aggfunc : {'mean', 'sum', 'count'}, default 'mean' Returns ------- table : DataFrame """ from .reshape import pivot_table return pivot_table(self, index=index, columns=columns, values=values, aggfunc=aggfunc) def to_records(self, index=False): from .io import to_records return to_records(self) @derived_from(pd.DataFrame) def to_html(self, max_rows=5): # pd.Series doesn't have html repr data = self._repr_data.to_html(max_rows=max_rows, show_dimensions=False) return self._HTML_FMT.format(data=data, name=key_split(self._name), task=len(self.dask)) @property def _repr_data(self): meta = self._meta index = self._repr_divisions values = {c: _repr_data_series(meta[c], index) for c in meta.columns} return pd.DataFrame(values, columns=meta.columns) _HTML_FMT = """
Dask DataFrame Structure:
{data}
Dask Name: {name}, {task} tasks
""" def _repr_html_(self): data = self._repr_data.to_html(max_rows=5, show_dimensions=False, notebook=True) return self._HTML_FMT.format(data=data, name=key_split(self._name), task=len(self.dask)) def _select_columns_or_index(self, columns_or_index): """ Parameters ---------- columns_or_index Column or index name, or a list of these Returns ------- dd.DataFrame Dask DataFrame with columns corresponding to each column or index level in columns_or_index. If included, the column corresponding to the index level is named _index """ # Ensure columns_or_index is a list columns_or_index = (columns_or_index if isinstance(columns_or_index, list) else [columns_or_index]) column_names = [n for n in columns_or_index if self._is_column_label(n)] selected_df = self[column_names] if self._contains_index_name(columns_or_index): # Index name was included selected_df = selected_df.assign(_index=self.index) return selected_df def _is_column_label(self, c): """ Test whether a value matches the label of a column in the DataFrame """ return (not is_dask_collection(c) and (np.isscalar(c) or isinstance(c, tuple)) and c in self.columns) def _is_index_label(self, i): """ Test whether a value matches the label of the index of the DataFrame """ return (self.index.name is not None and not is_dask_collection(i) and (np.isscalar(i) or isinstance(i, tuple)) and i == self.index.name) def _contains_index_name(self, columns_or_index): """ Test whether the input contains the label of the index of the DataFrame """ if isinstance(columns_or_index, list): return (any(self._is_index_label(n) and not self._is_column_label(n) for n in columns_or_index)) else: return (self._is_index_label(columns_or_index) and not self._is_column_label(columns_or_index)) # bind operators for op in [operator.abs, operator.add, operator.and_, operator_div, operator.eq, operator.gt, operator.ge, operator.inv, operator.lt, operator.le, operator.mod, operator.mul, operator.ne, operator.neg, operator.or_, operator.pow, operator.sub, operator.truediv, operator.floordiv, operator.xor]: _Frame._bind_operator(op) Scalar._bind_operator(op) for name in ['add', 'sub', 'mul', 'div', 'truediv', 'floordiv', 'mod', 'pow', 'radd', 'rsub', 'rmul', 'rdiv', 'rtruediv', 'rfloordiv', 'rmod', 'rpow']: meth = getattr(pd.DataFrame, name) DataFrame._bind_operator_method(name, meth) meth = getattr(pd.Series, name) Series._bind_operator_method(name, meth) for name in ['lt', 'gt', 'le', 'ge', 'ne', 'eq']: meth = getattr(pd.DataFrame, name) DataFrame._bind_comparison_method(name, meth) meth = getattr(pd.Series, name) Series._bind_comparison_method(name, meth) def is_broadcastable(dfs, s): """ This Series is broadcastable against another dataframe in the sequence """ return (isinstance(s, Series) and s.npartitions == 1 and s.known_divisions and any(s.divisions == (min(df.columns), max(df.columns)) for df in dfs if isinstance(df, DataFrame))) def elemwise(op, *args, **kwargs): """ Elementwise operation for dask.Dataframes """ meta = kwargs.pop('meta', no_default) _name = funcname(op) + '-' + tokenize(op, kwargs, *args) args = _maybe_from_pandas(args) from .multi import _maybe_align_partitions args = _maybe_align_partitions(args) dasks = [arg for arg in args if isinstance(arg, (_Frame, Scalar))] dfs = [df for df in dasks if isinstance(df, _Frame)] divisions = dfs[0].divisions _is_broadcastable = partial(is_broadcastable, dfs) dfs = list(remove(_is_broadcastable, dfs)) n = len(divisions) - 1 other = [(i, arg) for i, arg in enumerate(args) if not isinstance(arg, (_Frame, Scalar))] # adjust the key length of Scalar keys = [d.__dask_keys__() * n if isinstance(d, Scalar) or _is_broadcastable(d) else d.__dask_keys__() for d in dasks] if other: dsk = {(_name, i): (apply, partial_by_order, list(frs), {'function': op, 'other': other}) for i, frs in enumerate(zip(*keys))} else: dsk = {(_name, i): (op,) + frs for i, frs in enumerate(zip(*keys))} dsk = merge(dsk, *[d.dask for d in dasks]) if meta is no_default: if len(dfs) >= 2 and len(dasks) != len(dfs): # should not occur in current funcs msg = 'elemwise with 2 or more DataFrames and Scalar is not supported' raise NotImplementedError(msg) meta = _emulate(op, *args, **kwargs) return new_dd_object(dsk, _name, meta, divisions) def _maybe_from_pandas(dfs): from .io import from_pandas dfs = [from_pandas(df, 1) if isinstance(df, (pd.Series, pd.DataFrame)) else df for df in dfs] return dfs def hash_shard(df, nparts, split_out_setup=None, split_out_setup_kwargs=None): if split_out_setup: h = split_out_setup(df, **(split_out_setup_kwargs or {})) else: h = df h = hash_pandas_object(h, index=False) if isinstance(h, pd.Series): h = h._values h %= nparts return {i: df.iloc[h == i] for i in range(nparts)} def split_evenly(df, k): """ Split dataframe into k roughly equal parts """ divisions = np.linspace(0, len(df), k + 1).astype(int) return {i: df.iloc[divisions[i]: divisions[i + 1]] for i in range(k)} def split_out_on_index(df): h = df.index if isinstance(h, pd.MultiIndex): h = pd.DataFrame([], index=h).reset_index() return h def split_out_on_cols(df, cols=None): return df[cols] @insert_meta_param_description def apply_concat_apply(args, chunk=None, aggregate=None, combine=None, meta=no_default, token=None, chunk_kwargs=None, aggregate_kwargs=None, combine_kwargs=None, split_every=None, split_out=None, split_out_setup=None, split_out_setup_kwargs=None, **kwargs): """Apply a function to blocks, then concat, then apply again Parameters ---------- args : Positional arguments for the `chunk` function. All `dask.dataframe` objects should be partitioned and indexed equivalently. chunk : function [block-per-arg] -> block Function to operate on each block of data aggregate : function concatenated-block -> block Function to operate on the concatenated result of chunk combine : function concatenated-block -> block, optional Function to operate on intermediate concatenated results of chunk in a tree-reduction. If not provided, defaults to aggregate. $META token : str, optional The name to use for the output keys. chunk_kwargs : dict, optional Keywords for the chunk function only. aggregate_kwargs : dict, optional Keywords for the aggregate function only. combine_kwargs : dict, optional Keywords for the combine function only. split_every : int, optional Group partitions into groups of this size while performing a tree-reduction. If set to False, no tree-reduction will be used, and all intermediates will be concatenated and passed to ``aggregate``. Default is 8. split_out : int, optional Number of output partitions. Split occurs after first chunk reduction. split_out_setup : callable, optional If provided, this function is called on each chunk before performing the hash-split. It should return a pandas object, where each row (excluding the index) is hashed. If not provided, the chunk is hashed as is. split_out_setup_kwargs : dict, optional Keywords for the `split_out_setup` function only. kwargs : All remaining keywords will be passed to ``chunk``, ``aggregate``, and ``combine``. Examples -------- >>> def chunk(a_block, b_block): ... pass >>> def agg(df): ... pass >>> apply_concat_apply([a, b], chunk=chunk, aggregate=agg) # doctest: +SKIP """ if chunk_kwargs is None: chunk_kwargs = dict() if aggregate_kwargs is None: aggregate_kwargs = dict() chunk_kwargs.update(kwargs) aggregate_kwargs.update(kwargs) if combine is None: if combine_kwargs: raise ValueError("`combine_kwargs` provided with no `combine`") combine = aggregate combine_kwargs = aggregate_kwargs else: if combine_kwargs is None: combine_kwargs = dict() combine_kwargs.update(kwargs) if not isinstance(args, (tuple, list)): args = [args] npartitions = set(arg.npartitions for arg in args if isinstance(arg, _Frame)) if len(npartitions) > 1: raise ValueError("All arguments must have same number of partitions") npartitions = npartitions.pop() if split_every is None: split_every = 8 elif split_every is False: split_every = npartitions elif split_every < 2 or not isinstance(split_every, int): raise ValueError("split_every must be an integer >= 2") token_key = tokenize(token or (chunk, aggregate), meta, args, chunk_kwargs, aggregate_kwargs, combine_kwargs, split_every, split_out, split_out_setup, split_out_setup_kwargs) # Chunk a = '{0}-chunk-{1}'.format(token or funcname(chunk), token_key) if len(args) == 1 and isinstance(args[0], _Frame) and not chunk_kwargs: dsk = {(a, 0, i, 0): (chunk, key) for i, key in enumerate(args[0].__dask_keys__())} else: dsk = {(a, 0, i, 0): (apply, chunk, [(x._name, i) if isinstance(x, _Frame) else x for x in args], chunk_kwargs) for i in range(args[0].npartitions)} # Split if split_out and split_out > 1: split_prefix = 'split-%s' % token_key shard_prefix = 'shard-%s' % token_key for i in range(args[0].npartitions): dsk[(split_prefix, i)] = (hash_shard, (a, 0, i, 0), split_out, split_out_setup, split_out_setup_kwargs) for j in range(split_out): dsk[(shard_prefix, 0, i, j)] = (getitem, (split_prefix, i), j) a = shard_prefix else: split_out = 1 # Combine b = '{0}-combine-{1}'.format(token or funcname(combine), token_key) k = npartitions depth = 0 while k > split_every: for part_i, inds in enumerate(partition_all(split_every, range(k))): for j in range(split_out): conc = (_concat, [(a, depth, i, j) for i in inds]) if combine_kwargs: dsk[(b, depth + 1, part_i, j)] = (apply, combine, [conc], combine_kwargs) else: dsk[(b, depth + 1, part_i, j)] = (combine, conc) k = part_i + 1 a = b depth += 1 # Aggregate for j in range(split_out): b = '{0}-agg-{1}'.format(token or funcname(aggregate), token_key) conc = (_concat, [(a, depth, i, j) for i in range(k)]) if aggregate_kwargs: dsk[(b, j)] = (apply, aggregate, [conc], aggregate_kwargs) else: dsk[(b, j)] = (aggregate, conc) if meta is no_default: meta_chunk = _emulate(apply, chunk, args, chunk_kwargs) meta = _emulate(apply, aggregate, [_concat([meta_chunk])], aggregate_kwargs) meta = make_meta(meta) for arg in args: if isinstance(arg, _Frame): dsk.update(arg.dask) divisions = [None] * (split_out + 1) return new_dd_object(dsk, b, meta, divisions) aca = apply_concat_apply def _extract_meta(x, nonempty=False): """ Extract internal cache data (``_meta``) from dd.DataFrame / dd.Series """ if isinstance(x, Scalar): return x._meta_nonempty if nonempty else x._meta elif isinstance(x, _Frame): if (isinstance(x, Series) and x.npartitions == 1 and x.known_divisions): # may be broadcastable return x._meta else: return x._meta_nonempty if nonempty else x._meta elif isinstance(x, list): return [_extract_meta(_x, nonempty) for _x in x] elif isinstance(x, tuple): return tuple([_extract_meta(_x, nonempty) for _x in x]) elif isinstance(x, dict): res = {} for k in x: res[k] = _extract_meta(x[k], nonempty) return res else: return x def _emulate(func, *args, **kwargs): """ Apply a function using args / kwargs. If arguments contain dd.DataFrame / dd.Series, using internal cache (``_meta``) for calculation """ with raise_on_meta_error(funcname(func)): return func(*_extract_meta(args, True), **_extract_meta(kwargs, True)) @insert_meta_param_description def map_partitions(func, *args, **kwargs): """ Apply Python function on each DataFrame partition. Parameters ---------- func : function Function applied to each partition. args, kwargs : Arguments and keywords to pass to the function. At least one of the args should be a Dask.dataframe. $META """ meta = kwargs.pop('meta', no_default) if meta is not no_default: meta = make_meta(meta) assert callable(func) if 'token' in kwargs: name = kwargs.pop('token') token = tokenize(meta, *args, **kwargs) else: name = funcname(func) token = tokenize(func, meta, *args, **kwargs) name = '{0}-{1}'.format(name, token) from .multi import _maybe_align_partitions args = _maybe_from_pandas(args) args = _maybe_align_partitions(args) if meta is no_default: meta = _emulate(func, *args, **kwargs) if all(isinstance(arg, Scalar) for arg in args): dask = {(name, 0): (apply, func, (tuple, [(arg._name, 0) for arg in args]), kwargs)} return Scalar(merge(dask, *[arg.dask for arg in args]), name, meta) elif not isinstance(meta, (pd.Series, pd.DataFrame, pd.Index)): # If `meta` is not a pandas object, the concatenated results will be a # different type meta = _concat([meta]) meta = make_meta(meta) dfs = [df for df in args if isinstance(df, _Frame)] dsk = {} for i in range(dfs[0].npartitions): values = [(arg._name, i if isinstance(arg, _Frame) else 0) if isinstance(arg, (_Frame, Scalar)) else arg for arg in args] dsk[(name, i)] = (apply_and_enforce, func, values, kwargs, meta) dasks = [arg.dask for arg in args if isinstance(arg, (_Frame, Scalar))] return new_dd_object(merge(dsk, *dasks), name, meta, args[0].divisions) def apply_and_enforce(func, args, kwargs, meta): """Apply a function, and enforce the output to match meta Ensures the output has the same columns, even if empty.""" df = func(*args, **kwargs) if isinstance(df, (pd.DataFrame, pd.Series, pd.Index)): if len(df) == 0: return meta c = meta.columns if isinstance(df, pd.DataFrame) else meta.name return _rename(c, df) return df def _rename(columns, df): """ Rename columns of pd.DataFrame or name of pd.Series. Not for dd.DataFrame or dd.Series. Parameters ---------- columns : tuple, string, pd.DataFrame or pd.Series Column names, Series name or pandas instance which has the target column names / name. df : pd.DataFrame or pd.Series target DataFrame / Series to be renamed """ assert not isinstance(df, _Frame) if columns is no_default: return df if isinstance(columns, Iterator): columns = list(columns) if isinstance(df, pd.DataFrame): if isinstance(columns, pd.DataFrame): columns = columns.columns if not isinstance(columns, pd.Index): columns = pd.Index(columns) if (len(columns) == len(df.columns) and type(columns) is type(df.columns) and columns.equals(df.columns)): # if target is identical, rename is not necessary return df # deep=False doesn't doesn't copy any data/indices, so this is cheap df = df.copy(deep=False) df.columns = columns return df elif isinstance(df, (pd.Series, pd.Index)): if isinstance(columns, (pd.Series, pd.Index)): columns = columns.name if df.name == columns: return df return df.rename(columns) # map_partition may pass other types return df def _rename_dask(df, names): """ Destructively rename columns of dd.DataFrame or name of dd.Series. Not for pd.DataFrame or pd.Series. Internaly used to overwrite dd.DataFrame.columns and dd.Series.name We can't use map_partition because it applies function then rename Parameters ---------- df : dd.DataFrame or dd.Series target DataFrame / Series to be renamed names : tuple, string Column names/Series name """ assert isinstance(df, _Frame) metadata = _rename(names, df._meta) name = 'rename-{0}'.format(tokenize(df, metadata)) dsk = {} for i in range(df.npartitions): dsk[name, i] = (_rename, metadata, (df._name, i)) return new_dd_object(merge(dsk, df.dask), name, metadata, df.divisions) def quantile(df, q): """Approximate quantiles of Series. Parameters ---------- q : list/array of floats Iterable of numbers ranging from 0 to 100 for the desired quantiles """ assert isinstance(df, Series) from dask.array.percentile import _percentile, merge_percentiles # currently, only Series has quantile method if isinstance(df, Index): meta = pd.Series(df._meta_nonempty).quantile(q) else: meta = df._meta_nonempty.quantile(q) if isinstance(meta, pd.Series): # Index.quantile(list-like) must be pd.Series, not pd.Index df_name = df.name finalize_tsk = lambda tsk: (pd.Series, tsk, q, None, df_name) return_type = Series else: finalize_tsk = lambda tsk: (getitem, tsk, 0) return_type = Scalar q = [q] # pandas uses quantile in [0, 1] # numpy / everyone else uses [0, 100] qs = np.asarray(q) * 100 token = tokenize(df, qs) if len(qs) == 0: name = 'quantiles-' + token empty_index = pd.Index([], dtype=float) return Series({(name, 0): pd.Series([], name=df.name, index=empty_index)}, name, df._meta, [None, None]) else: new_divisions = [np.min(q), np.max(q)] df = df.dropna() name = 'quantiles-1-' + token val_dsk = {(name, i): (_percentile, (getattr, key, 'values'), qs) for i, key in enumerate(df.__dask_keys__())} name2 = 'quantiles-2-' + token len_dsk = {(name2, i): (len, key) for i, key in enumerate(df.__dask_keys__())} name3 = 'quantiles-3-' + token merge_dsk = {(name3, 0): finalize_tsk((merge_percentiles, qs, [qs] * df.npartitions, sorted(val_dsk), sorted(len_dsk)))} dsk = merge(df.dask, val_dsk, len_dsk, merge_dsk) return return_type(dsk, name3, meta, new_divisions) def cov_corr(df, min_periods=None, corr=False, scalar=False, split_every=False): """DataFrame covariance and pearson correlation. Computes pairwise covariance or correlation of columns, excluding NA/null values. Parameters ---------- df : DataFrame min_periods : int, optional Minimum number of observations required per pair of columns to have a valid result. corr : bool, optional If True, compute the Pearson correlation. If False [default], compute the covariance. scalar : bool, optional If True, compute covariance between two variables as a scalar. Only valid if `df` has 2 columns. If False [default], compute the entire covariance/correlation matrix. split_every : int, optional Group partitions into groups of this size while performing a tree-reduction. If set to False, no tree-reduction will be used. Default is False. """ if min_periods is None: min_periods = 2 elif min_periods < 2: raise ValueError("min_periods must be >= 2") if split_every is False: split_every = df.npartitions elif split_every < 2 or not isinstance(split_every, int): raise ValueError("split_every must be an integer >= 2") df = df._get_numeric_data() if scalar and len(df.columns) != 2: raise ValueError("scalar only valid for 2 column dataframe") token = tokenize(df, min_periods, scalar, split_every) funcname = 'corr' if corr else 'cov' a = '{0}-chunk-{1}'.format(funcname, df._name) dsk = {(a, i): (cov_corr_chunk, f, corr) for (i, f) in enumerate(df.__dask_keys__())} prefix = '{0}-combine-{1}-'.format(funcname, df._name) k = df.npartitions b = a depth = 0 while k > split_every: b = prefix + str(depth) for part_i, inds in enumerate(partition_all(split_every, range(k))): dsk[(b, part_i)] = (cov_corr_combine, [(a, i) for i in inds], corr) k = part_i + 1 a = b depth += 1 name = '{0}-{1}'.format(funcname, token) dsk[(name, 0)] = (cov_corr_agg, [(a, i) for i in range(k)], df.columns, min_periods, corr, scalar) dsk.update(df.dask) if scalar: return Scalar(dsk, name, 'f8') meta = make_meta([(c, 'f8') for c in df.columns], index=df.columns) return DataFrame(dsk, name, meta, (df.columns[0], df.columns[-1])) def cov_corr_chunk(df, corr=False): """Chunk part of a covariance or correlation computation""" mat = df.values mask = np.isfinite(mat) keep = np.bitwise_and(mask[:, None, :], mask[:, :, None]) x = np.where(keep, mat[:, None, :], np.nan) sums = np.nansum(x, 0) counts = keep.astype('int').sum(0) cov = df.cov().values dtype = [('sum', sums.dtype), ('count', counts.dtype), ('cov', cov.dtype)] if corr: m = np.nansum((x - sums / np.where(counts, counts, np.nan)) ** 2, 0) dtype.append(('m', m.dtype)) out = np.empty(counts.shape, dtype=dtype) out['sum'] = sums out['count'] = counts out['cov'] = cov * (counts - 1) if corr: out['m'] = m return out def cov_corr_combine(data, corr=False): data = np.concatenate(data).reshape((len(data),) + data[0].shape) sums = np.nan_to_num(data['sum']) counts = data['count'] cum_sums = np.cumsum(sums, 0) cum_counts = np.cumsum(counts, 0) s1 = cum_sums[:-1] s2 = sums[1:] n1 = cum_counts[:-1] n2 = counts[1:] d = (s2 / n2) - (s1 / n1) C = (np.nansum((n1 * n2) / (n1 + n2) * (d * d.transpose((0, 2, 1))), 0) + np.nansum(data['cov'], 0)) out = np.empty(C.shape, dtype=data.dtype) out['sum'] = cum_sums[-1] out['count'] = cum_counts[-1] out['cov'] = C if corr: nobs = np.where(cum_counts[-1], cum_counts[-1], np.nan) mu = cum_sums[-1] / nobs counts_na = np.where(counts, counts, np.nan) m = np.nansum(data['m'] + counts * (sums / counts_na - mu) ** 2, axis=0) out['m'] = m return out def cov_corr_agg(data, cols, min_periods=2, corr=False, scalar=False): out = cov_corr_combine(data, corr) counts = out['count'] C = out['cov'] C[counts < min_periods] = np.nan if corr: m2 = out['m'] den = np.sqrt(m2 * m2.T) else: den = np.where(counts, counts, np.nan) - 1 mat = C / den if scalar: return mat[0, 1] return pd.DataFrame(mat, columns=cols, index=cols) def pd_split(df, p, random_state=None): """ Split DataFrame into multiple pieces pseudorandomly >>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], ... 'b': [2, 3, 4, 5, 6, 7]}) >>> a, b = pd_split(df, [0.5, 0.5], random_state=123) # roughly 50/50 split >>> a a b 1 2 3 2 3 4 5 6 7 >>> b a b 0 1 2 3 4 5 4 5 6 """ p = list(p) index = pseudorandom(len(df), p, random_state) return [df.iloc[index == i] for i in range(len(p))] def _take_last(a, skipna=True): """ take last row (Series) of DataFrame / last value of Series considering NaN. Parameters ---------- a : pd.DataFrame or pd.Series skipna : bool, default True Whether to exclude NaN """ if skipna is False: return a.iloc[-1] else: # take last valid value excluding NaN, NaN location may be different # in each columns group_dummy = np.ones(len(a.index)) last_row = a.groupby(group_dummy).last() if isinstance(a, pd.DataFrame): return pd.Series(last_row.values[0], index=a.columns) else: return last_row.values[0] def check_divisions(divisions): if not isinstance(divisions, (list, tuple)): raise ValueError('New division must be list or tuple') divisions = list(divisions) if divisions != sorted(divisions): raise ValueError('New division must be sorted') if len(divisions[:-1]) != len(list(unique(divisions[:-1]))): msg = 'New division must be unique, except for the last element' raise ValueError(msg) def repartition_divisions(a, b, name, out1, out2, force=False): """ dask graph to repartition dataframe by new divisions Parameters ---------- a : tuple old divisions b : tuple, list new divisions name : str name of old dataframe out1 : str name of temporary splits out2 : str name of new dataframe force : bool, default False Allows the expansion of the existing divisions. If False then the new divisions lower and upper bounds must be the same as the old divisions. Examples -------- >>> repartition_divisions([1, 3, 7], [1, 4, 6, 7], 'a', 'b', 'c') # doctest: +SKIP {('b', 0): (, ('a', 0), 1, 3, False), ('b', 1): (, ('a', 1), 3, 4, False), ('b', 2): (, ('a', 1), 4, 6, False), ('b', 3): (, ('a', 1), 6, 7, False) ('c', 0): (, (, [('b', 0), ('b', 1)])), ('c', 1): ('b', 2), ('c', 2): ('b', 3)} """ check_divisions(b) if len(b) < 2: # minimum division is 2 elements, like [0, 0] raise ValueError('New division must be longer than 2 elements') if force: if a[0] < b[0]: msg = ('left side of the new division must be equal or smaller ' 'than old division') raise ValueError(msg) if a[-1] > b[-1]: msg = ('right side of the new division must be equal or larger ' 'than old division') raise ValueError(msg) else: if a[0] != b[0]: msg = 'left side of old and new divisions are different' raise ValueError(msg) if a[-1] != b[-1]: msg = 'right side of old and new divisions are different' raise ValueError(msg) def _is_single_last_div(x): """Whether last division only contains single label""" return len(x) >= 2 and x[-1] == x[-2] c = [a[0]] d = dict() low = a[0] i, j = 1, 1 # indices for old/new divisions k = 0 # index for temp divisions last_elem = _is_single_last_div(a) # process through old division # left part of new division can be processed in this loop while (i < len(a) and j < len(b)): if a[i] < b[j]: # tuple is something like: # (methods.boundary_slice, ('from_pandas-#', 0), 3, 4, False)) d[(out1, k)] = (methods.boundary_slice, (name, i - 1), low, a[i], False) low = a[i] i += 1 elif a[i] > b[j]: d[(out1, k)] = (methods.boundary_slice, (name, i - 1), low, b[j], False) low = b[j] j += 1 else: d[(out1, k)] = (methods.boundary_slice, (name, i - 1), low, b[j], False) low = b[j] i += 1 j += 1 c.append(low) k += 1 # right part of new division can remain if a[-1] < b[-1] or b[-1] == b[-2]: for _j in range(j, len(b)): # always use right-most of old division # because it may contain last element m = len(a) - 2 d[(out1, k)] = (methods.boundary_slice, (name, m), low, b[_j], False) low = b[_j] c.append(low) k += 1 else: # even if new division is processed through, # right-most element of old division can remain if last_elem and i < len(a): d[(out1, k)] = (methods.boundary_slice, (name, i - 1), a[i], a[i], False) k += 1 c.append(a[-1]) # replace last element of tuple with True d[(out1, k - 1)] = d[(out1, k - 1)][:-1] + (True,) i, j = 0, 1 last_elem = _is_single_last_div(c) while j < len(b): tmp = [] while c[i] < b[j]: tmp.append((out1, i)) i += 1 if last_elem and c[i] == b[-1] and (b[-1] != b[-2] or j == len(b) - 1) and i < k: # append if last split is not included tmp.append((out1, i)) i += 1 if len(tmp) == 0: # dummy slice to return empty DataFrame or Series, # which retain original data attributes (columns / name) d[(out2, j - 1)] = (methods.boundary_slice, (name, 0), a[0], a[0], False) elif len(tmp) == 1: d[(out2, j - 1)] = tmp[0] else: if not tmp: raise ValueError('check for duplicate partitions\nold:\n%s\n\n' 'new:\n%s\n\ncombined:\n%s' % (pformat(a), pformat(b), pformat(c))) d[(out2, j - 1)] = (methods.concat, tmp) j += 1 return d def repartition_freq(df, freq=None): """ Repartition a timeseries dataframe by a new frequency """ freq = pd.Timedelta(freq) if not isinstance(df.divisions[0], pd.Timestamp): raise TypeError("Can only repartition on frequency for timeseries") divisions = pd.DatetimeIndex(start=df.divisions[0].ceil(freq), end=df.divisions[-1], freq=freq).tolist() if not len(divisions): divisions = [df.divisions[0], df.divisions[-1]] else: if divisions[-1] != df.divisions[-1]: divisions.append(df.divisions[-1]) if divisions[0] != df.divisions[0]: divisions = [df.divisions[0]] + divisions return df.repartition(divisions=divisions) def repartition_npartitions(df, npartitions): """ Repartition dataframe to a smaller number of partitions """ new_name = 'repartition-%d-%s' % (npartitions, tokenize(df)) if df.npartitions == npartitions: return df elif df.npartitions > npartitions: npartitions_ratio = df.npartitions / npartitions new_partitions_boundaries = [int(new_partition_index * npartitions_ratio) for new_partition_index in range(npartitions + 1)] dsk = {} for new_partition_index in range(npartitions): value = (methods.concat, [(df._name, old_partition_index) for old_partition_index in range(new_partitions_boundaries[new_partition_index], new_partitions_boundaries[new_partition_index + 1])]) dsk[new_name, new_partition_index] = value divisions = [df.divisions[new_partition_index] for new_partition_index in new_partitions_boundaries] return new_dd_object(merge(df.dask, dsk), new_name, df._meta, divisions) else: original_divisions = divisions = pd.Series(df.divisions) if (df.known_divisions and (np.issubdtype(divisions.dtype, np.datetime64) or np.issubdtype(divisions.dtype, np.number))): if np.issubdtype(divisions.dtype, np.datetime64): divisions = divisions.values.astype('float64') if isinstance(divisions, pd.Series): divisions = divisions.values n = len(divisions) divisions = np.interp(x=np.linspace(0, n, npartitions + 1), xp=np.linspace(0, n, n), fp=divisions) if np.issubdtype(original_divisions.dtype, np.datetime64): divisions = pd.Series(divisions).astype(original_divisions.dtype).tolist() elif np.issubdtype(original_divisions.dtype, np.integer): divisions = divisions.astype(original_divisions.dtype) if isinstance(divisions, np.ndarray): divisions = divisions.tolist() divisions = list(divisions) divisions[0] = df.divisions[0] divisions[-1] = df.divisions[-1] return df.repartition(divisions=divisions) else: ratio = npartitions / df.npartitions split_name = 'split-%s' % tokenize(df, npartitions) dsk = {} last = 0 j = 0 for i in range(df.npartitions): new = last + ratio if i == df.npartitions - 1: k = npartitions - j else: k = int(new - last) dsk[(split_name, i)] = (split_evenly, (df._name, i), k) for jj in range(k): dsk[(new_name, j)] = (getitem, (split_name, i), jj) j += 1 last = new divisions = [None] * (npartitions + 1) return new_dd_object(merge(df.dask, dsk), new_name, df._meta, divisions) def repartition(df, divisions=None, force=False): """ Repartition dataframe along new divisions Dask.DataFrame objects are partitioned along their index. Often when multiple dataframes interact we need to align these partitionings. The ``repartition`` function constructs a new DataFrame object holding the same data but partitioned on different values. It does this by performing a sequence of ``loc`` and ``concat`` calls to split and merge the previous generation of partitions. Parameters ---------- divisions : list List of partitions to be used force : bool, default False Allows the expansion of the existing divisions. If False then the new divisions lower and upper bounds must be the same as the old divisions. Examples -------- >>> df = df.repartition([0, 5, 10, 20]) # doctest: +SKIP Also works on Pandas objects >>> ddf = dd.repartition(df, [0, 5, 10, 20]) # doctest: +SKIP """ token = tokenize(df, divisions) if isinstance(df, _Frame): tmp = 'repartition-split-' + token out = 'repartition-merge-' + token dsk = repartition_divisions(df.divisions, divisions, df._name, tmp, out, force=force) return new_dd_object(merge(df.dask, dsk), out, df._meta, divisions) elif isinstance(df, (pd.Series, pd.DataFrame)): name = 'repartition-dataframe-' + token from .utils import shard_df_on_index dfs = shard_df_on_index(df, divisions[1:-1]) dsk = dict(((name, i), df) for i, df in enumerate(dfs)) return new_dd_object(dsk, name, df, divisions) raise ValueError('Data must be DataFrame or Series') def _reduction_chunk(x, aca_chunk=None, **kwargs): o = aca_chunk(x, **kwargs) # Return a dataframe so that the concatenated version is also a dataframe return o.to_frame().T if isinstance(o, pd.Series) else o def _reduction_combine(x, aca_combine=None, **kwargs): if isinstance(x, list): x = pd.Series(x) o = aca_combine(x, **kwargs) # Return a dataframe so that the concatenated version is also a dataframe return o.to_frame().T if isinstance(o, pd.Series) else o def _reduction_aggregate(x, aca_aggregate=None, **kwargs): if isinstance(x, list): x = pd.Series(x) return aca_aggregate(x, **kwargs) def idxmaxmin_chunk(x, fn=None, skipna=True): minmax = 'max' if fn == 'idxmax' else 'min' if len(x) > 0: idx = getattr(x, fn)(skipna=skipna) value = getattr(x, minmax)(skipna=skipna) else: idx = value = pd.Series([], dtype='i8') if isinstance(idx, pd.Series): return pd.DataFrame({'idx': idx, 'value': value}) return pd.DataFrame({'idx': [idx], 'value': [value]}) def idxmaxmin_row(x, fn=None, skipna=True): minmax = 'max' if fn == 'idxmax' else 'min' if len(x) > 0: x = x.set_index('idx') idx = [getattr(x.value, fn)(skipna=skipna)] value = [getattr(x.value, minmax)(skipna=skipna)] else: idx = value = pd.Series([], dtype='i8') return pd.DataFrame({'idx': idx, 'value': value}) def idxmaxmin_combine(x, fn=None, skipna=True): if len(x) == 0: return x return (x.groupby(level=0) .apply(idxmaxmin_row, fn=fn, skipna=skipna) .reset_index(level=1, drop=True)) def idxmaxmin_agg(x, fn=None, skipna=True, scalar=False): res = idxmaxmin_combine(x, fn, skipna=skipna)['idx'] if len(res) == 0: raise ValueError("attempt to get argmax of an empty sequence") if scalar: return res[0] res.name = None return res def safe_head(df, n): r = df.head(n=n) if len(r) != n: msg = ("Insufficient elements for `head`. {0} elements " "requested, only {1} elements available. Try passing larger " "`npartitions` to `head`.") warnings.warn(msg.format(n, len(r))) return r def maybe_shift_divisions(df, periods, freq): """Maybe shift divisions by periods of size freq Used to shift the divisions for the `shift` method. If freq isn't a fixed size (not anchored or relative), then the divisions are shifted appropriately. Otherwise the divisions are cleared. Parameters ---------- df : dd.DataFrame, dd.Series, or dd.Index periods : int The number of periods to shift. freq : DateOffset, timedelta, or time rule string The frequency to shift by. """ if isinstance(freq, str): freq = pd.tseries.frequencies.to_offset(freq) if (isinstance(freq, pd.DateOffset) and (freq.isAnchored() or not hasattr(freq, 'delta'))): # Can't infer divisions on relative or anchored offsets, as # divisions may now split identical index value. # (e.g. index_partitions = [[1, 2, 3], [3, 4, 5]]) return df.clear_divisions() if df.known_divisions: divs = pd.Series(range(len(df.divisions)), index=df.divisions) divisions = divs.shift(periods, freq=freq).index return type(df)(df.dask, df._name, df._meta, divisions) return df def to_delayed(df): """ Create Dask Delayed objects from a Dask Dataframe Returns a list of delayed values, one value per partition. Examples -------- >>> partitions = df.to_delayed() # doctest: +SKIP """ from dask.delayed import Delayed keys = df.__dask_keys__() dsk = df.__dask_optimize__(df.__dask_graph__(), keys) return [Delayed(k, dsk) for k in keys] @wraps(pd.to_datetime) def to_datetime(arg, **kwargs): meta = pd.Series([pd.Timestamp('2000')]) return map_partitions(pd.to_datetime, arg, meta=meta, **kwargs) @wraps(pd.to_timedelta) def to_timedelta(arg, unit='ns', errors='raise'): meta = pd.Series([pd.Timedelta(1, unit=unit)]) return map_partitions(pd.to_timedelta, arg, unit=unit, errors=errors, meta=meta) def _repr_data_series(s, index): """A helper for creating the ``_repr_data`` property""" npartitions = len(index) - 1 if is_categorical_dtype(s): if has_known_categories(s): dtype = 'category[known]' else: dtype = 'category[unknown]' else: dtype = str(s.dtype) return pd.Series([dtype] + ['...'] * npartitions, index=index, name=s.name) if PY3: _Frame.to_delayed.__doc__ = to_delayed.__doc__ dask-0.16.0/dask/dataframe/groupby.py000066400000000000000000001250051320364734500174410ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import collections import itertools as it import operator import warnings import numpy as np import pandas as pd from .core import (DataFrame, Series, aca, map_partitions, merge, new_dd_object, no_default, split_out_on_index) from .methods import drop_columns from .shuffle import shuffle from .utils import make_meta, insert_meta_param_description, raise_on_meta_error from ..base import tokenize from ..utils import derived_from, M, funcname, itemgetter # ############################################# # # GroupBy implementation notes # # Dask groupby supports reductions, i.e., mean, sum and alike, and apply. The # former do not shuffle the data and are efficiently implemented as tree # reductions. The latter is implemented by shuffling the underlying partiitons # such that all items of a group can be found in the same parititon. # # The argument to ``.groupby``, the index, can be a ``str``, ``dd.DataFrame``, # ``dd.Series``, or a list thereof. In operations on the grouped object, the # divisions of the the grouped object and the items of index have to align. # Currently, there is no support to shuffle the index values as part of the # groupby operation. Therefore, the alignment has to be guaranteed by the # caller. # # To operate on matchings paritions, most groupby operations exploit the # corresponding support in ``apply_concat_apply``. Specifically, this function # operates on matching paritiotns of frame-like objects passed as varargs. # # After the inital chunk step, the passed index is implicitly passed along to # subsequent operations as the index of the parittions. Groupby operations on # the individual parttions can then access the index via the ``levels`` # parameter of the ``groupby`` function. The correct arguments is determined by # the ``_determine_levels`` function. # # To minimize overhead, series in an index that were obtained by getitem on the # object to group are not passed as series to the various operations, but as # columnn keys. This transformation is implemented as ``_normalize_index``. # # ############################################# def _determine_levels(index): """Determine the correct levels argument to groupby. """ if isinstance(index, (tuple, list)) and len(index) > 1: return list(range(len(index))) else: return 0 def _normalize_index(df, index): """Replace series with column names in an index wherever possible. """ if not isinstance(df, DataFrame): return index elif isinstance(index, list): return [_normalize_index(df, col) for col in index] elif (isinstance(index, Series) and index.name in df.columns and index._name == df[index.name]._name): return index.name elif (isinstance(index, DataFrame) and set(index.columns).issubset(df.columns) and index._name == df[index.columns]._name): return list(index.columns) else: return index def _maybe_slice(grouped, columns): """ Slice columns if grouped is pd.DataFrameGroupBy """ if isinstance(grouped, pd.core.groupby.DataFrameGroupBy): if columns is not None: if isinstance(columns, (tuple, list, set, pd.Index)): columns = list(columns) return grouped[columns] return grouped def _is_aligned(df, by): """Check if `df` and `by` have aligned indices""" if isinstance(by, (pd.Series, pd.DataFrame)): return df.index.equals(by.index) elif isinstance(by, (list, tuple)): return all(_is_aligned(df, i) for i in by) else: return True def _groupby_raise_unaligned(df, **kwargs): """Groupby, but raise if df and `by` key are unaligned. Pandas supports grouping by a column that doesn't align with the input frame/series/index. However, the reindexing this causes doesn't seem to be threadsafe, and can result in incorrect results. Since grouping by an unaligned key is generally a bad idea, we just error loudly in dask. For more information see pandas GH issue #15244 and Dask GH issue #1876.""" by = kwargs.get('by', None) if by is not None and not _is_aligned(df, by): msg = ("Grouping by an unaligned index is unsafe and unsupported.\n" "This can be caused by filtering only one of the object or\n" "grouping key. For example, the following works in pandas,\n" "but not in dask:\n" "\n" "df[df.foo < 0].groupby(df.bar)\n" "\n" "This can be avoided by either filtering beforehand, or\n" "passing in the name of the column instead:\n" "\n" "df2 = df[df.foo < 0]\n" "df2.groupby(df2.bar)\n" "# or\n" "df[df.foo < 0].groupby('bar')\n" "\n" "For more information see dask GH issue #1876.") raise ValueError(msg) elif by is not None and len(by): # since we're coming through apply, `by` will be a tuple. # Pandas treats tuples as a single key, and lists as multiple keys # We want multiple keys kwargs.update(by=list(by)) return df.groupby(**kwargs) def _groupby_slice_apply(df, grouper, key, func): # No need to use raise if unaligned here - this is only called after # shuffling, which makes everything aligned already g = df.groupby(grouper) if key: g = g[key] return g.apply(func) def _groupby_get_group(df, by_key, get_key, columns): # SeriesGroupBy may pass df which includes group key grouped = _groupby_raise_unaligned(df, by=by_key) if get_key in grouped.groups: if isinstance(df, pd.DataFrame): grouped = grouped[columns] return grouped.get_group(get_key) else: # to create empty DataFrame/Series, which has the same # dtype as the original if isinstance(df, pd.DataFrame): # may be SeriesGroupBy df = df[columns] return df.iloc[0:0] ############################################################### # Aggregation ############################################################### # Implementation detail: use class to make it easier to pass inside spec class Aggregation(object): """A user defined aggregation. Parameters ---------- name : str the name of the aggregation. It should be unique, since intermediate result will be identified by this name. chunk : callable a function that will be called with the grouped column of each partition. It can either return a single series or a tuple of series. The index has to be equal to the groups. agg : callable a function that will be called to aggregate the results of each chunk. Again the argument(s) will be grouped series. If ``chunk`` returned a tuple, ``agg`` will be called with all of them as individual positional arguments. finalize : callable an optional finalizer that will be called with the results from the aggregation. Examples -------- ``sum`` can be implemented as:: custom_sum = dd.Aggregation('custom_sum', lambda s: s.sum(), lambda s0: s0.sum()) df.groupby('g').agg(custom_sum) and ``mean`` can be implemented as:: custom_mean = dd.Aggregation( 'custom_mean', lambda s: (s.count(), s.sum()), lambda count, sum: (count.sum(), sum.sum()), lambda count, sum: sum / count, ) df.groupby('g').agg(custom_mean) """ def __init__(self, name, chunk, agg, finalize=None): self.chunk = chunk self.agg = agg self.finalize = finalize self.__name__ = name def _groupby_aggregate(df, aggfunc=None, levels=None): return aggfunc(df.groupby(level=levels, sort=False)) def _apply_chunk(df, *index, **kwargs): func = kwargs.pop('chunk') columns = kwargs.pop('columns') g = _groupby_raise_unaligned(df, by=index) if isinstance(df, pd.Series) or columns is None: return func(g) else: if isinstance(columns, (tuple, list, set, pd.Index)): columns = list(columns) return func(g[columns]) def _var_chunk(df, *index): if isinstance(df, pd.Series): df = df.to_frame() g = _groupby_raise_unaligned(df, by=index) x = g.sum() x2 = g.agg(lambda x: (x**2).sum()).rename(columns=lambda c: c + '-x2') n = g.count().rename(columns=lambda c: c + '-count') return pd.concat([x, x2, n], axis=1) def _var_combine(g, levels): return g.groupby(level=levels, sort=False).sum() def _var_agg(g, levels, ddof): g = g.groupby(level=levels, sort=False).sum() nc = len(g.columns) x = g[g.columns[:nc // 3]] x2 = g[g.columns[nc // 3:2 * nc // 3]].rename(columns=lambda c: c[:-3]) n = g[g.columns[-nc // 3:]].rename(columns=lambda c: c[:-6]) # TODO: replace with _finalize_var? result = x2 - x ** 2 / n div = (n - ddof) div[div < 0] = 0 result /= div result[(n - ddof) == 0] = np.nan assert isinstance(result, pd.DataFrame) return result ############################################################### # nunique ############################################################### def _nunique_df_chunk(df, *index, **kwargs): levels = kwargs.pop('levels') name = kwargs.pop('name') g = _groupby_raise_unaligned(df, by=index) grouped = g[[name]].apply(pd.DataFrame.drop_duplicates) # we set the index here to force a possibly duplicate index # for our reduce step if isinstance(levels, list): grouped.index = pd.MultiIndex.from_arrays([ grouped.index.get_level_values(level=level) for level in levels ]) else: grouped.index = grouped.index.get_level_values(level=levels) return grouped def _nunique_df_combine(df, levels): result = df.groupby(level=levels, sort=False).apply(pd.DataFrame.drop_duplicates) if isinstance(levels, list): result.index = pd.MultiIndex.from_arrays([ result.index.get_level_values(level=level) for level in levels ]) else: result.index = result.index.get_level_values(level=levels) return result def _nunique_df_aggregate(df, levels, name): return df.groupby(level=levels, sort=False)[name].nunique() def _nunique_series_chunk(df, *index, **_ignored_): # convert series to data frame, then hand over to dataframe code path assert isinstance(df, pd.Series) df = df.to_frame() kwargs = dict(name=df.columns[0], levels=_determine_levels(index)) return _nunique_df_chunk(df, *index, **kwargs) ############################################################### # Aggregate support # # Aggregate is implemented as: # # 1. group-by-aggregate all partitions into intermediate values # 2. collect all partitions into a single partition # 3. group-by-aggregate the result into intermediate values # 4. transform all intermediate values into the result # # In Step 1 and 3 the dataframe is grouped on the same columns. # ############################################################### def _make_agg_id(func, column): return '{!s}-{!s}-{}'.format(func, column, tokenize(func, column)) def _normalize_spec(spec, non_group_columns): """ Return a list of ``(result_column, func, input_column)`` tuples. Spec can be - a function - a list of functions - a dictionary that maps input-columns to functions - a dictionary that maps input-columns to a lists of functions - a dictionary that maps input-columns to a dictionaries that map output-columns to functions. The non-group columns are a list of all column names that are not used in the groupby operation. Usually, the result columns are mutli-level names, returned as tuples. If only a single function is supplied or dictionary mapping columns to single functions, simple names are returned as strings (see the first two examples below). Examples -------- >>> _normalize_spec('mean', ['a', 'b', 'c']) [('a', 'mean', 'a'), ('b', 'mean', 'b'), ('c', 'mean', 'c')] >>> spec = collections.OrderedDict([('a', 'mean'), ('b', 'count')]) >>> _normalize_spec(spec, ['a', 'b', 'c']) [('a', 'mean', 'a'), ('b', 'count', 'b')] >>> _normalize_spec(['var', 'mean'], ['a', 'b', 'c']) ... # doctest: +NORMALIZE_WHITESPACE [(('a', 'var'), 'var', 'a'), (('a', 'mean'), 'mean', 'a'), \ (('b', 'var'), 'var', 'b'), (('b', 'mean'), 'mean', 'b'), \ (('c', 'var'), 'var', 'c'), (('c', 'mean'), 'mean', 'c')] >>> spec = collections.OrderedDict([('a', 'mean'), ('b', ['sum', 'count'])]) >>> _normalize_spec(spec, ['a', 'b', 'c']) ... # doctest: +NORMALIZE_WHITESPACE [(('a', 'mean'), 'mean', 'a'), (('b', 'sum'), 'sum', 'b'), \ (('b', 'count'), 'count', 'b')] >>> spec = collections.OrderedDict() >>> spec['a'] = ['mean', 'size'] >>> spec['b'] = collections.OrderedDict([('e', 'count'), ('f', 'var')]) >>> _normalize_spec(spec, ['a', 'b', 'c']) ... # doctest: +NORMALIZE_WHITESPACE [(('a', 'mean'), 'mean', 'a'), (('a', 'size'), 'size', 'a'), \ (('b', 'e'), 'count', 'b'), (('b', 'f'), 'var', 'b')] """ if not isinstance(spec, dict): spec = collections.OrderedDict(zip(non_group_columns, it.repeat(spec))) res = [] if isinstance(spec, dict): for input_column, subspec in spec.items(): if isinstance(subspec, dict): res.extend(((input_column, result_column), func, input_column) for result_column, func in subspec.items()) else: if not isinstance(subspec, list): subspec = [subspec] res.extend(((input_column, funcname(func)), func, input_column) for func in subspec) else: raise ValueError("unsupported agg spec of type {}".format(type(spec))) compounds = (list, tuple, dict) use_flat_columns = not any(isinstance(subspec, compounds) for subspec in spec.values()) if use_flat_columns: res = [(input_col, func, input_col) for (_, func, input_col) in res] return res def _build_agg_args(spec): """ Create transformation functions for a normalized aggregate spec. Parameters ---------- spec: a list of (result-column, aggregation-function, input-column) triples. To work with all arugment forms understood by pandas use ``_normalize_spec`` to normalize the argment before passing it on to ``_build_agg_args``. Returns ------- chunk_funcs: a list of (intermediate-column, function, keyword) triples that are applied on grouped chunks of the initial dataframe. agg_funcs: a list of (intermediate-column, functions, keword) triples that are applied on the grouped concatination of the preprocessed chunks. finalizers: a list of (result-column, function, keyword) triples that are applied after the ``agg_funcs``. They are used to create final results from intermediate representations. """ known_np_funcs = {np.min: 'min', np.max: 'max'} # check that there are no name conflicts for a single input column by_name = {} for _, func, input_column in spec: key = funcname(known_np_funcs.get(func, func)), input_column by_name.setdefault(key, []).append((func, input_column)) for funcs in by_name.values(): if len(funcs) != 1: raise ValueError('conflicting aggregation functions: {}'.format(funcs)) chunks = {} aggs = {} finalizers = [] for (result_column, func, input_column) in spec: if not isinstance(func, Aggregation): func = funcname(known_np_funcs.get(func, func)) impls = _build_agg_args_single(result_column, func, input_column) # overwrite existing result-columns, generate intermediates only once chunks.update((spec[0], spec) for spec in impls['chunk_funcs']) aggs.update((spec[0], spec) for spec in impls['aggregate_funcs']) finalizers.append(impls['finalizer']) chunks = sorted(chunks.values()) aggs = sorted(aggs.values()) return chunks, aggs, finalizers def _build_agg_args_single(result_column, func, input_column): simple_impl = { 'sum': (M.sum, M.sum), 'min': (M.min, M.min), 'max': (M.max, M.max), 'count': (M.count, M.sum), 'size': (M.size, M.sum), } if func in simple_impl.keys(): return _build_agg_args_simple(result_column, func, input_column, simple_impl[func]) elif func == 'var': return _build_agg_args_var(result_column, func, input_column) elif func == 'std': return _build_agg_args_std(result_column, func, input_column) elif func == 'mean': return _build_agg_args_mean(result_column, func, input_column) elif isinstance(func, Aggregation): return _build_agg_args_custom(result_column, func, input_column) else: raise ValueError("unknown aggregate {}".format(func)) def _build_agg_args_simple(result_column, func, input_column, impl_pair): intermediate = _make_agg_id(func, input_column) chunk_impl, agg_impl = impl_pair return dict( chunk_funcs=[(intermediate, _apply_func_to_column, dict(column=input_column, func=chunk_impl))], aggregate_funcs=[(intermediate, _apply_func_to_column, dict(column=intermediate, func=agg_impl))], finalizer=(result_column, itemgetter(intermediate), dict()), ) def _build_agg_args_var(result_column, func, input_column): int_sum = _make_agg_id('sum', input_column) int_sum2 = _make_agg_id('sum2', input_column) int_count = _make_agg_id('count', input_column) return dict( chunk_funcs=[ (int_sum, _apply_func_to_column, dict(column=input_column, func=M.sum)), (int_count, _apply_func_to_column, dict(column=input_column, func=M.count)), (int_sum2, _compute_sum_of_squares, dict(column=input_column)), ], aggregate_funcs=[ (col, _apply_func_to_column, dict(column=col, func=M.sum)) for col in (int_sum, int_count, int_sum2) ], finalizer=(result_column, _finalize_var, dict(sum_column=int_sum, count_column=int_count, sum2_column=int_sum2)), ) def _build_agg_args_std(result_column, func, input_column): impls = _build_agg_args_var(result_column, func, input_column) result_column, _, kwargs = impls['finalizer'] impls['finalizer'] = (result_column, _finalize_std, kwargs) return impls def _build_agg_args_mean(result_column, func, input_column): int_sum = _make_agg_id('sum', input_column) int_count = _make_agg_id('count', input_column) return dict( chunk_funcs=[ (int_sum, _apply_func_to_column, dict(column=input_column, func=M.sum)), (int_count, _apply_func_to_column, dict(column=input_column, func=M.count)), ], aggregate_funcs=[ (col, _apply_func_to_column, dict(column=col, func=M.sum)) for col in (int_sum, int_count) ], finalizer=(result_column, _finalize_mean, dict(sum_column=int_sum, count_column=int_count)), ) def _build_agg_args_custom(result_column, func, input_column): col = _make_agg_id(funcname(func), input_column) if func.finalize is None: finalizer = (result_column, operator.itemgetter(col), dict()) else: finalizer = ( result_column, _apply_func_to_columns, dict(func=func.finalize, prefix=col) ) return dict( chunk_funcs=[ (col, _apply_func_to_column, dict(func=func.chunk, column=input_column)) ], aggregate_funcs=[ (col, _apply_func_to_columns, dict(func=func.agg, prefix=col)) ], finalizer=finalizer ) def _groupby_apply_funcs(df, *index, **kwargs): """ Group a dataframe and apply multiple aggregation functions. Parameters ---------- df: pandas.DataFrame The dataframe to work on. index: list of groupers If given, they are added to the keyword arguments as the ``by`` argument. funcs: list of result-colum, function, keywordargument triples The list of functions that are applied on the grouped data frame. Has to be passed as a keyword argument. kwargs: All keyword arguments, but ``funcs``, are passed verbatim to the groupby operation of the dataframe Returns ------- aggregated: the aggregated dataframe. """ if len(index): # since we're coming through apply, `by` will be a tuple. # Pandas treats tuples as a single key, and lists as multiple keys # We want multiple keys kwargs.update(by=list(index)) funcs = kwargs.pop('funcs') grouped = _groupby_raise_unaligned(df, **kwargs) result = collections.OrderedDict() for result_column, func, func_kwargs in funcs: r = func(grouped, **func_kwargs) if isinstance(r, tuple): for idx, s in enumerate(r): result['{}-{}'.format(result_column, idx)] = s else: result[result_column] = r return pd.DataFrame(result) def _compute_sum_of_squares(grouped, column): base = grouped[column] if column is not None else grouped return base.apply(lambda x: (x ** 2).sum()) def _agg_finalize(df, aggregate_funcs, finalize_funcs, level): # finish the final aggregation level df = _groupby_apply_funcs(df, funcs=aggregate_funcs, level=level) # and finalize the result result = collections.OrderedDict() for result_column, func, kwargs in finalize_funcs: result[result_column] = func(df, **kwargs) return pd.DataFrame(result) def _apply_func_to_column(df_like, column, func): if column is None: return func(df_like) return func(df_like[column]) def _apply_func_to_columns(df_like, prefix, func): if isinstance(df_like, pd.DataFrame): columns = df_like.columns else: # handle GroupBy objects columns = df_like._selected_obj.columns columns = sorted(col for col in columns if col.startswith(prefix)) columns = [df_like[col] for col in columns] return func(*columns) def _finalize_mean(df, sum_column, count_column): return df[sum_column] / df[count_column] def _finalize_var(df, count_column, sum_column, sum2_column, ddof=1): n = df[count_column] x = df[sum_column] x2 = df[sum2_column] result = x2 - x ** 2 / n div = (n - ddof) div[div < 0] = 0 result /= div result[(n - ddof) == 0] = np.nan return result def _finalize_std(df, count_column, sum_column, sum2_column, ddof=1): result = _finalize_var(df, count_column, sum_column, sum2_column, ddof) return np.sqrt(result) def _cum_agg_aligned(part, cum_last, index, columns, func, initial): align = cum_last.reindex(part.set_index(index).index, fill_value=initial) align.index = part.index return func(part[columns], align) def _cum_agg_filled(a, b, func, initial): union = a.index.union(b.index) return func(a.reindex(union, fill_value=initial), b.reindex(union, fill_value=initial), fill_value=initial) def _cumcount_aggregate(a, b, fill_value=None): return a.add(b, fill_value=fill_value) + 1 class _GroupBy(object): """ Superclass for DataFrameGroupBy and SeriesGroupBy Parameters ---------- obj: DataFrame or Series DataFrame or Series to be grouped by: str, list or Series The key for grouping slice: str, list The slice keys applied to GroupBy result """ def __init__(self, df, by=None, slice=None): assert isinstance(df, (DataFrame, Series)) self.obj = df # grouping key passed via groupby method self.index = _normalize_index(df, by) if isinstance(self.index, list): do_index_partition_align = all( item.divisions == df.divisions if isinstance(item, Series) else True for item in self.index ) elif isinstance(self.index, Series): do_index_partition_align = df.divisions == self.index.divisions else: do_index_partition_align = True if not do_index_partition_align: raise NotImplementedError("The grouped object and index of the " "groupby must have the same divisions.") # slicing key applied to _GroupBy instance self._slice = slice if isinstance(self.index, list): index_meta = [item._meta if isinstance(item, Series) else item for item in self.index] elif isinstance(self.index, Series): index_meta = self.index._meta else: index_meta = self.index self._meta = self.obj._meta.groupby(index_meta) @property def _meta_nonempty(self): """ Return a pd.DataFrameGroupBy / pd.SeriesGroupBy which contains sample data. """ sample = self.obj._meta_nonempty if isinstance(self.index, list): index_meta = [item._meta_nonempty if isinstance(item, Series) else item for item in self.index] elif isinstance(self.index, Series): index_meta = self.index._meta_nonempty else: index_meta = self.index grouped = sample.groupby(index_meta) return _maybe_slice(grouped, self._slice) def _aca_agg(self, token, func, aggfunc=None, split_every=None, split_out=1): if aggfunc is None: aggfunc = func meta = func(self._meta) columns = meta.name if isinstance(meta, pd.Series) else meta.columns token = self._token_prefix + token levels = _determine_levels(self.index) return aca([self.obj, self.index] if not isinstance(self.index, list) else [self.obj] + self.index, chunk=_apply_chunk, chunk_kwargs=dict(chunk=func, columns=columns), aggregate=_groupby_aggregate, meta=meta, token=token, split_every=split_every, aggregate_kwargs=dict(aggfunc=aggfunc, levels=levels), split_out=split_out, split_out_setup=split_out_on_index) def _cum_agg(self, token, chunk, aggregate, initial): """ Wrapper for cumulative groupby operation """ meta = chunk(self._meta) columns = meta.name if isinstance(meta, pd.Series) else meta.columns index = self.index if isinstance(self.index, list) else [self.index] name = self._token_prefix + token name_part = name + '-map' name_last = name + '-take-last' name_cum = name + '-cum-last' # cumulate each partitions cumpart_raw = map_partitions(_apply_chunk, self.obj, *index, chunk=chunk, columns=columns, token=name_part, meta=meta) cumpart_raw_frame = (cumpart_raw.to_frame() if isinstance(meta, pd.Series) else cumpart_raw) cumpart_ext = cumpart_raw_frame.assign( **{i: self.obj[i] if np.isscalar(i) and i in self.obj.columns else self.obj.index for i in index}) # Use pd.Grouper objects to specify that we are grouping by columns. # Otherwise, pandas will throw an ambiguity warning if the # DataFrame's index (self.obj.index) was included in the grouping # specification (self.index). See pandas #14432 index_groupers = [pd.Grouper(key=ind) for ind in index] cumlast = map_partitions(_apply_chunk, cumpart_ext, *index_groupers, columns=0 if columns is None else columns, chunk=M.last, meta=meta, token=name_last) # aggregate cumulated partisions and its previous last element dask = {} dask[(name, 0)] = (cumpart_raw._name, 0) for i in range(1, self.obj.npartitions): # store each cumulative step to graph to reduce computation if i == 1: dask[(name_cum, i)] = (cumlast._name, i - 1) else: # aggregate with previous cumulation results dask[(name_cum, i)] = (_cum_agg_filled, (name_cum, i - 1), (cumlast._name, i - 1), aggregate, initial) dask[(name, i)] = (_cum_agg_aligned, (cumpart_ext._name, i), (name_cum, i), index, 0 if columns is None else columns, aggregate, initial) return new_dd_object(merge(dask, cumpart_ext.dask, cumlast.dask), name, chunk(self._meta), self.obj.divisions) @derived_from(pd.core.groupby.GroupBy) def cumsum(self, axis=0): if axis: return self.obj.cumsum(axis=axis) else: return self._cum_agg('cumsum', chunk=M.cumsum, aggregate=M.add, initial=0) @derived_from(pd.core.groupby.GroupBy) def cumprod(self, axis=0): if axis: return self.obj.cumprod(axis=axis) else: return self._cum_agg('cumprod', chunk=M.cumprod, aggregate=M.mul, initial=1) @derived_from(pd.core.groupby.GroupBy) def cumcount(self, axis=None): return self._cum_agg('cumcount', chunk=M.cumcount, aggregate=_cumcount_aggregate, initial=-1) @derived_from(pd.core.groupby.GroupBy) def sum(self, split_every=None, split_out=1): return self._aca_agg(token='sum', func=M.sum, split_every=split_every, split_out=split_out) @derived_from(pd.core.groupby.GroupBy) def min(self, split_every=None, split_out=1): return self._aca_agg(token='min', func=M.min, split_every=split_every, split_out=split_out) @derived_from(pd.core.groupby.GroupBy) def max(self, split_every=None, split_out=1): return self._aca_agg(token='max', func=M.max, split_every=split_every, split_out=split_out) @derived_from(pd.core.groupby.GroupBy) def count(self, split_every=None, split_out=1): return self._aca_agg(token='count', func=M.count, aggfunc=M.sum, split_every=split_every, split_out=split_out) @derived_from(pd.core.groupby.GroupBy) def mean(self, split_every=None, split_out=1): return (self.sum(split_every=split_every, split_out=split_out) / self.count(split_every=split_every, split_out=split_out)) @derived_from(pd.core.groupby.GroupBy) def size(self, split_every=None, split_out=1): return self._aca_agg(token='size', func=M.size, aggfunc=M.sum, split_every=split_every, split_out=split_out) @derived_from(pd.core.groupby.GroupBy) def var(self, ddof=1, split_every=None, split_out=1): levels = _determine_levels(self.index) result = aca([self.obj, self.index] if not isinstance(self.index, list) else [self.obj] + self.index, chunk=_var_chunk, aggregate=_var_agg, combine=_var_combine, token=self._token_prefix + 'var', aggregate_kwargs={'ddof': ddof, 'levels': levels}, combine_kwargs={'levels': levels}, split_every=split_every, split_out=split_out, split_out_setup=split_out_on_index) if isinstance(self.obj, Series): result = result[result.columns[0]] if self._slice: result = result[self._slice] return result @derived_from(pd.core.groupby.GroupBy) def std(self, ddof=1, split_every=None, split_out=1): v = self.var(ddof, split_every=split_every, split_out=split_out) result = map_partitions(np.sqrt, v, meta=v) return result @derived_from(pd.core.groupby.GroupBy) def get_group(self, key): token = self._token_prefix + 'get_group' meta = self._meta.obj if isinstance(meta, pd.DataFrame) and self._slice is not None: meta = meta[self._slice] columns = meta.columns if isinstance(meta, pd.DataFrame) else meta.name return map_partitions(_groupby_get_group, self.obj, self.index, key, columns, meta=meta, token=token) def aggregate(self, arg, split_every, split_out=1): if isinstance(self.obj, DataFrame): if isinstance(self.index, tuple) or np.isscalar(self.index): group_columns = {self.index} elif isinstance(self.index, list): group_columns = {i for i in self.index if isinstance(i, tuple) or np.isscalar(i)} else: group_columns = set() if self._slice: # pandas doesn't exclude the grouping column in a SeriesGroupBy # like df.groupby('a')['a'].agg(...) non_group_columns = self._slice if not isinstance(non_group_columns, list): non_group_columns = [non_group_columns] else: # NOTE: this step relies on the index normalization to replace # series with their name in an index. non_group_columns = [col for col in self.obj.columns if col not in group_columns] spec = _normalize_spec(arg, non_group_columns) elif isinstance(self.obj, Series): if isinstance(arg, (list, tuple, dict)): # implementation detail: if self.obj is a series, a pseudo column # None is used to denote the series itself. This pseudo column is # removed from the result columns before passing the spec along. spec = _normalize_spec({None: arg}, []) spec = [(result_column, func, input_column) for ((_, result_column), func, input_column) in spec] else: spec = _normalize_spec({None: arg}, []) spec = [(self.obj.name, func, input_column) for (_, func, input_column) in spec] else: raise ValueError("aggregate on unknown object {}".format(self.obj)) chunk_funcs, aggregate_funcs, finalizers = _build_agg_args(spec) if isinstance(self.index, (tuple, list)) and len(self.index) > 1: levels = list(range(len(self.index))) else: levels = 0 if not isinstance(self.index, list): chunk_args = [self.obj, self.index] else: chunk_args = [self.obj] + self.index return aca(chunk_args, chunk=_groupby_apply_funcs, chunk_kwargs=dict(funcs=chunk_funcs), combine=_groupby_apply_funcs, combine_kwargs=dict(funcs=aggregate_funcs, level=levels), aggregate=_agg_finalize, aggregate_kwargs=dict( aggregate_funcs=aggregate_funcs, finalize_funcs=finalizers, level=levels, ), token='aggregate', split_every=split_every, split_out=split_out, split_out_setup=split_out_on_index) @insert_meta_param_description(pad=12) def apply(self, func, meta=no_default): """ Parallel version of pandas GroupBy.apply This mimics the pandas version except for the following: 1. The user should provide output metadata. 2. If the grouper does not align with the index then this causes a full shuffle. The order of rows within each group may not be preserved. Parameters ---------- func: function Function to apply $META Returns ------- applied : Series or DataFrame depending on columns keyword """ if meta is no_default: msg = ("`meta` is not specified, inferred from partial data. " "Please provide `meta` if the result is unexpected.\n" " Before: .apply(func)\n" " After: .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result\n" " or: .apply(func, meta=('x', 'f8')) for series result") warnings.warn(msg, stacklevel=2) with raise_on_meta_error("groupby.apply({0})".format(funcname(func))): meta = self._meta_nonempty.apply(func) meta = make_meta(meta) # Validate self.index if (isinstance(self.index, list) and any(isinstance(item, Series) for item in self.index)): raise NotImplementedError("groupby-apply with a multiple Series " "is currently not supported") df = self.obj should_shuffle = not (df.known_divisions and df._contains_index_name(self.index)) if should_shuffle: if isinstance(self.index, DataFrame): # add index columns to dataframe df2 = df.assign(**{'_index_' + c: self.index[c] for c in self.index.columns}) index = self.index elif isinstance(self.index, Series): df2 = df.assign(_index=self.index) index = self.index else: df2 = df index = df._select_columns_or_index(self.index) df3 = shuffle(df2, index) # shuffle dataframe and index else: df3 = df if should_shuffle and isinstance(self.index, DataFrame): # extract index from dataframe cols = ['_index_' + c for c in self.index.columns] index2 = df3[cols] if isinstance(meta, pd.DataFrame): df4 = df3.map_partitions(drop_columns, cols, meta.columns.dtype) else: df4 = df3.drop(cols, axis=1) elif should_shuffle and isinstance(self.index, Series): index2 = df3['_index'] index2.name = self.index.name if isinstance(meta, pd.DataFrame): df4 = df3.map_partitions(drop_columns, '_index', meta.columns.dtype) else: df4 = df3.drop('_index', axis=1) else: df4 = df3 index2 = self.index # Perform embarrassingly parallel groupby-apply df5 = map_partitions(_groupby_slice_apply, df4, index2, self._slice, func, meta=meta) return df5 class DataFrameGroupBy(_GroupBy): _token_prefix = 'dataframe-groupby-' def __getitem__(self, key): if isinstance(key, list): g = DataFrameGroupBy(self.obj, by=self.index, slice=key) else: g = SeriesGroupBy(self.obj, by=self.index, slice=key) # error is raised from pandas g._meta = g._meta[key] return g def __dir__(self): return sorted(set(dir(type(self)) + list(self.__dict__) + list(filter(pd.compat.isidentifier, self.obj.columns)))) def __getattr__(self, key): try: return self[key] except KeyError as e: raise AttributeError(e) @derived_from(pd.core.groupby.DataFrameGroupBy) def aggregate(self, arg, split_every=None, split_out=1): if arg == 'size': return self.size() return super(DataFrameGroupBy, self).aggregate(arg, split_every=split_every, split_out=split_out) @derived_from(pd.core.groupby.DataFrameGroupBy) def agg(self, arg, split_every=None, split_out=1): return self.aggregate(arg, split_every=split_every, split_out=split_out) class SeriesGroupBy(_GroupBy): _token_prefix = 'series-groupby-' def __init__(self, df, by=None, slice=None): # for any non series object, raise pandas-compat error message if isinstance(df, Series): if isinstance(by, Series): pass elif isinstance(by, list): if len(by) == 0: raise ValueError("No group keys passed!") non_series_items = [item for item in by if not isinstance(item, Series)] # raise error from pandas, if applicable df._meta.groupby(non_series_items) else: # raise error from pandas, if applicable df._meta.groupby(by) super(SeriesGroupBy, self).__init__(df, by=by, slice=slice) def nunique(self, split_every=None, split_out=1): name = self._meta.obj.name levels = _determine_levels(self.index) if isinstance(self.obj, DataFrame): chunk = _nunique_df_chunk else: chunk = _nunique_series_chunk return aca([self.obj, self.index] if not isinstance(self.index, list) else [self.obj] + self.index, chunk=chunk, aggregate=_nunique_df_aggregate, combine=_nunique_df_combine, token='series-groupby-nunique', chunk_kwargs={'levels': levels, 'name': name}, aggregate_kwargs={'levels': levels, 'name': name}, combine_kwargs={'levels': levels}, split_every=split_every, split_out=split_out, split_out_setup=split_out_on_index) @derived_from(pd.core.groupby.SeriesGroupBy) def aggregate(self, arg, split_every=None, split_out=1): result = super(SeriesGroupBy, self).aggregate(arg, split_every=split_every, split_out=split_out) if self._slice: result = result[self._slice] if not isinstance(arg, (list, dict)): result = result[result.columns[0]] return result @derived_from(pd.core.groupby.SeriesGroupBy) def agg(self, arg, split_every=None, split_out=1): return self.aggregate(arg, split_every=split_every, split_out=split_out) dask-0.16.0/dask/dataframe/hashing.py000066400000000000000000000144351320364734500173770ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import numpy as np import pandas as pd from .utils import PANDAS_VERSION # In Pandas 0.19.2, a function to hash pandas objects was introduced. Object # arrays are assumed to be strings, and are hashed with a cython implementation # of siphash. However, the version in 0.19.2 hashes categoricals based on their # integer codes, instead of taking into account the values they represent. This # is fixed in pandas > 0.19.2. To support versions 0.19.0 and up, we do do the # following: # # - For versions > 0.19.2, we use the provided `hash_pandas_object` function. # - For 0.19.0 through 0.19.2, we copy the definition of `hash_pandas_object` # from pandas master (will be released as 0.20.0). # - For 0.19.0 and 0.19.1, we use python's `hash` builtin to hash strings. # - For 0.19.2, we use the `hash_object_array` method provided in pandas # (an implementation of siphash) # # When dask drops support for pandas <= 0.19.2, all this can be removed. # XXX: Pandas uses release branches > 0.19.0, which doesn't play well with # versioneer, since the release tags aren't ancestors of master. As such, we # need to use this hacky awfulness to check if we're > 0.19.2. if PANDAS_VERSION >= '0.20.0': from pandas.util import hash_pandas_object elif PANDAS_VERSION not in ('0.19.1', '0.19.2') and PANDAS_VERSION > '0.19.0+460': from pandas.tools.hashing import hash_pandas_object else: from pandas.types.common import (is_categorical_dtype, is_numeric_dtype, is_datetime64_dtype, is_timedelta64_dtype) from pandas.lib import is_bool_array if PANDAS_VERSION == '0.19.2': from pandas._hash import hash_object_array else: # 0.19.0 and 0.19.1 def hash_object_array(x, hash_key, encoding): return np.array([hash(i) for i in x], dtype=np.uint64) # 16 byte long hashing key _default_hash_key = '0123456789123456' def hash_pandas_object(obj, index=True, encoding='utf8', hash_key=None, categorize=True): if hash_key is None: hash_key = _default_hash_key def adder(h, hashed_to_add): h = np.multiply(h, np.uint(3), h) return np.add(h, hashed_to_add, h) if isinstance(obj, pd.Index): h = hash_array(obj.values, encoding, hash_key, categorize).astype('uint64') h = pd.Series(h, index=obj, dtype='uint64') elif isinstance(obj, pd.Series): h = hash_array(obj.values, encoding, hash_key, categorize).astype('uint64') if index: h = adder(h, hash_pandas_object(obj.index, index=False, encoding=encoding, hash_key=hash_key, categorize=categorize).values) h = pd.Series(h, index=obj.index, dtype='uint64') elif isinstance(obj, pd.DataFrame): cols = obj.iteritems() first_series = next(cols)[1] h = hash_array(first_series.values, encoding, hash_key, categorize).astype('uint64') for _, col in cols: h = adder(h, hash_array(col.values, encoding, hash_key, categorize)) if index: h = adder(h, hash_pandas_object(obj.index, index=False, encoding=encoding, hash_key=hash_key, categorize=categorize).values) h = pd.Series(h, index=obj.index, dtype='uint64') else: raise TypeError("Unexpected type for hashing %s" % type(obj)) return h def _hash_categorical(c, encoding, hash_key): hashed = hash_array(c.categories.values, encoding, hash_key, categorize=False) mask = c.isnull() if len(hashed): result = hashed.take(c.codes) else: result = np.zeros(len(mask), dtype='uint64') if mask.any(): result[mask] = np.iinfo(np.uint64).max return result def hash_array(vals, encoding='utf8', hash_key=None, categorize=True): if hash_key is None: hash_key = _default_hash_key # For categoricals, we hash the categories, then remap the codes to the # hash values. (This check is above the complex check so that we don't # ask numpy if categorical is a subdtype of complex, as it will choke. if is_categorical_dtype(vals.dtype): return _hash_categorical(vals, encoding, hash_key) # we'll be working with everything as 64-bit values, so handle this # 128-bit value early if np.issubdtype(vals.dtype, np.complex128): return hash_array(vals.real) + 23 * hash_array(vals.imag) # First, turn whatever array this is into unsigned 64-bit ints, if we # can manage it. if is_bool_array(vals): vals = vals.astype('u8') elif ((is_datetime64_dtype(vals) or is_timedelta64_dtype(vals) or is_numeric_dtype(vals)) and vals.dtype.itemsize <= 8): vals = vals.view('u{}'.format(vals.dtype.itemsize)).astype('u8') else: # With repeated values, its MUCH faster to categorize object # dtypes, then hash and rename categories. We allow skipping the # categorization when the values are known/likely to be unique. if categorize: codes, categories = pd.factorize(vals, sort=False) cat = pd.Categorical(codes, pd.Index(categories), ordered=False, fastpath=True) return _hash_categorical(cat, encoding, hash_key) vals = hash_object_array(vals, hash_key, encoding) # Then, redistribute these 64-bit ints within the space of 64-bit ints vals ^= vals >> 30 vals *= np.uint64(0xbf58476d1ce4e5b9) vals ^= vals >> 27 vals *= np.uint64(0x94d049bb133111eb) vals ^= vals >> 31 return vals dask-0.16.0/dask/dataframe/hyperloglog.py000066400000000000000000000047231320364734500203100ustar00rootroot00000000000000# -*- coding: utf-8 -*- u"""Implementation of HyperLogLog This implements the HyperLogLog algorithm for cardinality estimation, found in Philippe Flajolet, Éric Fusy, Olivier Gandouet and Frédéric Meunier. "HyperLogLog: the analysis of a near-optimal cardinality estimation algorithm". 2007 Conference on Analysis of Algorithms. Nice, France (2007) """ from __future__ import absolute_import, division, print_function import numpy as np import pandas as pd from .hashing import hash_pandas_object def compute_first_bit(a): "Compute the position of the first nonzero bit for each int in an array." # TODO: consider making this less memory-hungry bits = np.bitwise_and.outer(a, 1 << np.arange(32)) bits = bits.cumsum(axis=1).astype(np.bool) return 33 - bits.sum(axis=1) def compute_hll_array(obj, b): # b is the number of bits if not 8 <= b <= 16: raise ValueError('b should be between 8 and 16') num_bits_discarded = 32 - b m = 1 << b # Get an array of the hashes hashes = hash_pandas_object(obj, index=False) if isinstance(hashes, pd.Series): hashes = hashes._values hashes = hashes.astype(np.uint32) # Of the first b bits, which is the first nonzero? j = hashes >> num_bits_discarded first_bit = compute_first_bit(hashes) # Pandas can do the max aggregation df = pd.DataFrame({'j': j, 'first_bit': first_bit}) series = df.groupby('j').max()['first_bit'] # Return a dense array so we can concat them and get a result # that is easy to deal with return series.reindex(np.arange(m), fill_value=0).values.astype(np.uint8) def reduce_state(Ms, b): m = 1 << b # We concatenated all of the states, now we need to get the max # value for each j in both Ms = Ms.reshape((len(Ms) // m), m) return Ms.max(axis=0) def estimate_count(Ms, b): m = 1 << b # Combine one last time M = reduce_state(Ms, b) # Estimate cardinality, no adjustments alpha = 0.7213 / (1 + 1.079 / m) E = alpha * m / (2.0 ** -M.astype('f8')).sum() * m # ^^^^ starts as unsigned, need a signed type for # negation operator to do something useful # Apply adjustments for small / big cardinalities, if applicable if E < 2.5 * m: V = (M == 0).sum() if V: return m * np.log(m / V) if E > 2**32 / 30.0: return -2**32 * np.log1p(-E / 2**32) return E dask-0.16.0/dask/dataframe/indexing.py000066400000000000000000000237001320364734500175560ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from datetime import datetime from collections import defaultdict from toolz import merge import bisect import numpy as np import pandas as pd from .core import new_dd_object, Series from . import methods from ..base import tokenize class _LocIndexer(object): """ Helper class for the .loc accessor """ def __init__(self, obj): self.obj = obj @property def _name(self): return self.obj._name def _make_meta(self, iindexer, cindexer): """ get metadata """ if cindexer is None: return self.obj else: return self.obj._meta.loc[:, cindexer] def __getitem__(self, key): if isinstance(key, tuple): # multi-dimensional selection if len(key) > self.obj.ndim: # raise from pandas msg = 'Too many indexers' raise pd.core.indexing.IndexingError(msg) iindexer = key[0] cindexer = key[1] else: # if self.obj is Series, cindexer is always None iindexer = key cindexer = None return self._loc(iindexer, cindexer) def _loc(self, iindexer, cindexer): """ Helper function for the .loc accessor """ if isinstance(iindexer, Series): return self._loc_series(iindexer, cindexer) if self.obj.known_divisions: iindexer = self._maybe_partial_time_string(iindexer) if isinstance(iindexer, slice): return self._loc_slice(iindexer, cindexer) elif isinstance(iindexer, (list, np.ndarray)): return self._loc_list(iindexer, cindexer) else: # element should raise KeyError return self._loc_element(iindexer, cindexer) else: if isinstance(iindexer, (list, np.ndarray)): # applying map_pattition to each partitions # results in duplicated NaN rows msg = 'Cannot index with list against unknown division' raise KeyError(msg) elif not isinstance(iindexer, slice): iindexer = slice(iindexer, iindexer) meta = self._make_meta(iindexer, cindexer) return self.obj.map_partitions(methods.try_loc, iindexer, cindexer, meta=meta) def _maybe_partial_time_string(self, iindexer): """ Convert index-indexer for partial time string slicing if obj.index is DatetimeIndex / PeriodIndex """ iindexer = _maybe_partial_time_string(self.obj._meta_nonempty.index, iindexer, kind='loc') return iindexer def _loc_series(self, iindexer, cindexer): meta = self._make_meta(iindexer, cindexer) return self.obj.map_partitions(methods.loc, iindexer, cindexer, token='loc-series', meta=meta) def _loc_list(self, iindexer, cindexer): name = 'loc-%s' % tokenize(iindexer, self.obj) parts = self._get_partitions(iindexer) meta = self._make_meta(iindexer, cindexer) if len(iindexer): dsk = {} divisions = [] items = sorted(parts.items()) for i, (div, indexer) in enumerate(items): dsk[name, i] = (methods.loc, (self._name, div), indexer, cindexer) # append minimum value as division divisions.append(sorted(indexer)[0]) # append maximum value of the last division divisions.append(sorted(items[-1][1])[-1]) else: divisions = [None, None] dsk = {(name, 0): meta.head(0)} return new_dd_object(merge(self.obj.dask, dsk), name, meta=meta, divisions=divisions) def _loc_element(self, iindexer, cindexer): name = 'loc-%s' % tokenize(iindexer, self.obj) part = self._get_partitions(iindexer) if iindexer < self.obj.divisions[0] or iindexer > self.obj.divisions[-1]: raise KeyError('the label [%s] is not in the index' % str(iindexer)) dsk = {(name, 0): (methods.loc, (self._name, part), slice(iindexer, iindexer), cindexer)} meta = self._make_meta(iindexer, cindexer) return new_dd_object(merge(self.obj.dask, dsk), name, meta=meta, divisions=[iindexer, iindexer]) def _get_partitions(self, keys): if isinstance(keys, (list, np.ndarray)): return _partitions_of_index_values(self.obj.divisions, keys) else: # element return _partition_of_index_value(self.obj.divisions, keys) def _coerce_loc_index(self, key): return _coerce_loc_index(self.obj.divisions, key) def _loc_slice(self, iindexer, cindexer): name = 'loc-%s' % tokenize(iindexer, cindexer, self) assert isinstance(iindexer, slice) assert iindexer.step in (None, 1) if iindexer.start is not None: start = self._get_partitions(iindexer.start) else: start = 0 if iindexer.stop is not None: stop = self._get_partitions(iindexer.stop) else: stop = self.obj.npartitions - 1 if iindexer.start is None and self.obj.known_divisions: istart = self.obj.divisions[0] else: istart = self._coerce_loc_index(iindexer.start) if iindexer.stop is None and self.obj.known_divisions: istop = self.obj.divisions[-1] else: istop = self._coerce_loc_index(iindexer.stop) if stop == start: dsk = {(name, 0): (methods.loc, (self._name, start), slice(iindexer.start, iindexer.stop), cindexer)} divisions = [istart, istop] else: dsk = {(name, 0): (methods.loc, (self._name, start), slice(iindexer.start, None), cindexer)} for i in range(1, stop - start): if cindexer is None: dsk[name, i] = (self._name, start + i) else: dsk[name, i] = (methods.loc, (self._name, start + i), slice(None, None), cindexer) dsk[name, stop - start] = (methods.loc, (self._name, stop), slice(None, iindexer.stop), cindexer) if iindexer.start is None: div_start = self.obj.divisions[0] else: div_start = max(istart, self.obj.divisions[start]) if iindexer.stop is None: div_stop = self.obj.divisions[-1] else: div_stop = min(istop, self.obj.divisions[stop + 1]) divisions = ((div_start, ) + self.obj.divisions[start + 1:stop + 1] + (div_stop, )) assert len(divisions) == len(dsk) + 1 meta = self._make_meta(iindexer, cindexer) return new_dd_object(merge(self.obj.dask, dsk), name, meta=meta, divisions=divisions) def _partition_of_index_value(divisions, val): """ In which partition does this value lie? >>> _partition_of_index_value([0, 5, 10], 3) 0 >>> _partition_of_index_value([0, 5, 10], 8) 1 >>> _partition_of_index_value([0, 5, 10], 100) 1 >>> _partition_of_index_value([0, 5, 10], 5) # left-inclusive divisions 1 """ if divisions[0] is None: msg = "Can not use loc on DataFrame without known divisions" raise ValueError(msg) val = _coerce_loc_index(divisions, val) i = bisect.bisect_right(divisions, val) return min(len(divisions) - 2, max(0, i - 1)) def _partitions_of_index_values(divisions, values): """ Return defaultdict of division and values pairs Each key corresponds to the division which values are index values belong to the division. >>> sorted(_partitions_of_index_values([0, 5, 10], [3]).items()) [(0, [3])] >>> sorted(_partitions_of_index_values([0, 5, 10], [3, 8, 5]).items()) [(0, [3]), (1, [8, 5])] """ if divisions[0] is None: msg = "Can not use loc on DataFrame without known divisions" raise ValueError(msg) results = defaultdict(list) values = pd.Index(values, dtype=object) for val in values: i = bisect.bisect_right(divisions, val) div = min(len(divisions) - 2, max(0, i - 1)) results[div].append(val) return results def _coerce_loc_index(divisions, o): """ Transform values to be comparable against divisions This is particularly valuable to use with pandas datetimes """ if divisions and isinstance(divisions[0], datetime): return pd.Timestamp(o) if divisions and isinstance(divisions[0], np.datetime64): return np.datetime64(o).astype(divisions[0].dtype) return o def _maybe_partial_time_string(index, indexer, kind): """ Convert indexer for partial string selection if data has DatetimeIndex/PeriodIndex """ # do not pass dd.Index assert isinstance(index, pd.Index) if not isinstance(index, (pd.DatetimeIndex, pd.PeriodIndex)): return indexer if isinstance(indexer, slice): if isinstance(indexer.start, pd.compat.string_types): start = index._maybe_cast_slice_bound(indexer.start, 'left', kind) else: start = indexer.start if isinstance(indexer.stop, pd.compat.string_types): stop = index._maybe_cast_slice_bound(indexer.stop, 'right', kind) else: stop = indexer.stop return slice(start, stop) elif isinstance(indexer, pd.compat.string_types): start = index._maybe_cast_slice_bound(indexer, 'left', 'loc') stop = index._maybe_cast_slice_bound(indexer, 'right', 'loc') return slice(min(start, stop), max(start, stop)) return indexer dask-0.16.0/dask/dataframe/io/000077500000000000000000000000001320364734500160045ustar00rootroot00000000000000dask-0.16.0/dask/dataframe/io/__init__.py000066400000000000000000000007231320364734500201170ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import from .io import (from_array, from_bcolz, from_array, from_bcolz, from_pandas, from_dask_array, from_delayed, dataframe_from_ctable, to_bag, to_records) from .csv import read_csv, to_csv, read_table from .hdf import read_hdf, to_hdf from .sql import read_sql_table from . import demo try: from .parquet import read_parquet, to_parquet except ImportError: pass dask-0.16.0/dask/dataframe/io/csv.py000066400000000000000000000512741320364734500171620ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import from io import BytesIO from warnings import warn, catch_warnings, simplefilter import sys try: import psutil except ImportError: psutil = None import pandas as pd from ...bytes import read_bytes from ...bytes.core import write_bytes from ...bytes.compression import seekable_files, files as cfiles from ...compatibility import PY2, PY3 from ...delayed import delayed from ...utils import asciitable from ..utils import clear_known_categories, PANDAS_VERSION from .io import from_delayed if PANDAS_VERSION >= '0.20.0': from pandas.api.types import (is_integer_dtype, is_float_dtype, is_object_dtype, is_datetime64_any_dtype) else: from pandas.types.common import (is_integer_dtype, is_float_dtype, is_object_dtype, is_datetime64_any_dtype) delayed = delayed(pure=True) def pandas_read_text(reader, b, header, kwargs, dtypes=None, columns=None, write_header=True, enforce=False): """ Convert a block of bytes to a Pandas DataFrame Parameters ---------- reader : callable ``pd.read_csv`` or ``pd.read_table``. b : bytestring The content to be parsed with ``reader`` header : bytestring An optional header to prepend to ``b`` kwargs : dict A dictionary of keyword arguments to be passed to ``reader`` dtypes : dict DTypes to assign to columns See Also -------- dask.dataframe.csv.read_pandas_from_bytes """ bio = BytesIO() if write_header and not b.startswith(header.rstrip()): bio.write(header) bio.write(b) bio.seek(0) df = reader(bio, **kwargs) if dtypes: coerce_dtypes(df, dtypes) if enforce and columns and (list(df.columns) != list(columns)): raise ValueError("Columns do not match", df.columns, columns) elif columns: df.columns = columns return df def coerce_dtypes(df, dtypes): """ Coerce dataframe to dtypes safely Operates in place Parameters ---------- df: Pandas DataFrame dtypes: dict like {'x': float} """ bad_dtypes = [] bad_dates = [] errors = [] for c in df.columns: if c in dtypes and df.dtypes[c] != dtypes[c]: actual = df.dtypes[c] desired = dtypes[c] if is_float_dtype(actual) and is_integer_dtype(desired): bad_dtypes.append((c, actual, desired)) elif is_object_dtype(actual) and is_datetime64_any_dtype(desired): # This can only occur when parse_dates is specified, but an # invalid date is encountered. Pandas then silently falls back # to object dtype. Since `object_array.astype(datetime)` will # silently overflow, error here and report. bad_dates.append(c) else: try: df[c] = df[c].astype(dtypes[c]) except Exception as e: bad_dtypes.append((c, actual, desired)) errors.append((c, e)) if bad_dtypes: if errors: ex = '\n'.join("- %s\n %r" % (c, e) for c, e in sorted(errors, key=lambda x: str(x[0]))) exceptions = ("The following columns also raised exceptions on " "conversion:\n\n%s\n\n") % ex extra = "" else: exceptions = "" # All mismatches are int->float, also suggest `assume_missing=True` extra = ("\n\nAlternatively, provide `assume_missing=True` " "to interpret\n" "all unspecified integer columns as floats.") bad_dtypes = sorted(bad_dtypes, key=lambda x: str(x[0])) table = asciitable(['Column', 'Found', 'Expected'], bad_dtypes) dtype_kw = ('dtype={%s}' % ',\n' ' '.join("%r: '%s'" % (k, v) for (k, v, _) in bad_dtypes)) dtype_msg = ( "{table}\n\n" "{exceptions}" "Usually this is due to dask's dtype inference failing, and\n" "*may* be fixed by specifying dtypes manually by adding:\n\n" "{dtype_kw}\n\n" "to the call to `read_csv`/`read_table`." "{extra}").format(table=table, exceptions=exceptions, dtype_kw=dtype_kw, extra=extra) else: dtype_msg = None if bad_dates: also = " also " if bad_dtypes else " " cols = '\n'.join("- %s" % c for c in bad_dates) date_msg = ( "The following columns{also}failed to properly parse as dates:\n\n" "{cols}\n\n" "This is usually due to an invalid value in that column. To\n" "diagnose and fix it's recommended to drop these columns from the\n" "`parse_dates` keyword, and manually convert them to dates later\n" "using `dd.to_datetime`.").format(also=also, cols=cols) else: date_msg = None if bad_dtypes or bad_dates: rule = "\n\n%s\n\n" % ('-' * 61) msg = ("Mismatched dtypes found in `pd.read_csv`/`pd.read_table`.\n\n" "%s" % (rule.join(filter(None, [dtype_msg, date_msg])))) raise ValueError(msg) def text_blocks_to_pandas(reader, block_lists, header, head, kwargs, collection=True, enforce=False): """ Convert blocks of bytes to a dask.dataframe or other high-level object This accepts a list of lists of values of bytes where each list corresponds to one file, and the value of bytes concatenate to comprise the entire file, in order. Parameters ---------- reader : callable ``pd.read_csv`` or ``pd.read_table``. block_lists : list of lists of delayed values of bytes The lists of bytestrings where each list corresponds to one logical file header : bytestring The header, found at the front of the first file, to be prepended to all blocks head : pd.DataFrame An example Pandas DataFrame to be used for metadata. Can be ``None`` if ``collection==False`` kwargs : dict Keyword arguments to pass down to ``reader`` collection: boolean, optional (defaults to True) Returns ------- A dask.dataframe or list of delayed values """ dtypes = head.dtypes.to_dict() columns = list(head.columns) delayed_pandas_read_text = delayed(pandas_read_text) dfs = [] for blocks in block_lists: if not blocks: continue df = delayed_pandas_read_text(reader, blocks[0], header, kwargs, dtypes, columns, write_header=False, enforce=enforce) dfs.append(df) rest_kwargs = kwargs.copy() rest_kwargs.pop('skiprows', None) for b in blocks[1:]: dfs.append(delayed_pandas_read_text(reader, b, header, rest_kwargs, dtypes, columns, enforce=enforce)) if collection: head = clear_known_categories(head) return from_delayed(dfs, head) else: return dfs def auto_blocksize(total_memory, cpu_count): memory_factor = 10 blocksize = int(total_memory // cpu_count / memory_factor) return min(blocksize, int(64e6)) # guess blocksize if psutil is installed or use acceptable default one if not if psutil is not None: with catch_warnings(): simplefilter("ignore", RuntimeWarning) TOTAL_MEM = psutil.virtual_memory().total CPU_COUNT = psutil.cpu_count() AUTO_BLOCKSIZE = auto_blocksize(TOTAL_MEM, CPU_COUNT) else: AUTO_BLOCKSIZE = 2**25 def read_pandas(reader, urlpath, blocksize=AUTO_BLOCKSIZE, collection=True, lineterminator=None, compression=None, sample=256000, enforce=False, assume_missing=False, storage_options=None, **kwargs): reader_name = reader.__name__ if lineterminator is not None and len(lineterminator) == 1: kwargs['lineterminator'] = lineterminator else: lineterminator = '\n' if 'index' in kwargs or 'index_col' in kwargs: raise ValueError("Keyword 'index' not supported " "dd.{0}(...).set_index('my-index') " "instead".format(reader_name)) for kw in ['iterator', 'chunksize']: if kw in kwargs: raise ValueError("{0} not supported for " "dd.{1}".format(kw, reader_name)) if kwargs.get('nrows', None): raise ValueError("The 'nrows' keyword is not supported by " "`dd.{0}`. To achieve the same behavior, it's " "recommended to use `dd.{0}(...)." "head(n=nrows)`".format(reader_name)) if isinstance(kwargs.get('skiprows'), list): raise TypeError("List of skiprows not supported for " "dd.{0}".format(reader_name)) if isinstance(kwargs.get('header'), list): raise TypeError("List of header rows not supported for " "dd.{0}".format(reader_name)) if blocksize and compression not in seekable_files: warn("Warning %s compression does not support breaking apart files\n" "Please ensure that each individual file can fit in memory and\n" "use the keyword ``blocksize=None to remove this message``\n" "Setting ``blocksize=None``" % compression) blocksize = None if compression not in seekable_files and compression not in cfiles: raise NotImplementedError("Compression format %s not installed" % compression) b_lineterminator = lineterminator.encode() b_sample, values = read_bytes(urlpath, delimiter=b_lineterminator, blocksize=blocksize, sample=sample, compression=compression, **(storage_options or {})) if not isinstance(values[0], (tuple, list)): values = [values] # Get header row, and check that sample is long enough. If the file # contains a header row, we need at least 2 nonempty rows + the number of # rows to skip. skiprows = kwargs.get('skiprows', 0) header = kwargs.get('header', 'infer') need = 1 if header is None else 2 parts = b_sample.split(b_lineterminator, skiprows + need) # If the last partition is empty, don't count it nparts = 0 if not parts else len(parts) - int(not parts[-1]) if nparts < skiprows + need and len(b_sample) >= sample: raise ValueError("Sample is not large enough to include at least one " "row of data. Please increase the number of bytes " "in `sample` in the call to `read_csv`/`read_table`") header = b'' if header is None else parts[skiprows] + b_lineterminator # Use sample to infer dtypes head = reader(BytesIO(b_sample), **kwargs) specified_dtypes = kwargs.get('dtype', {}) if specified_dtypes is None: specified_dtypes = {} # If specified_dtypes is a single type, then all columns were specified if assume_missing and isinstance(specified_dtypes, dict): # Convert all non-specified integer columns to floats for c in head.columns: if is_integer_dtype(head[c].dtype) and c not in specified_dtypes: head[c] = head[c].astype(float) return text_blocks_to_pandas(reader, values, header, head, kwargs, collection=collection, enforce=enforce) READ_DOC_TEMPLATE = """ Read {file_type} files into a Dask.DataFrame This parallelizes the ``pandas.{reader}`` function in the following ways: - It supports loading many files at once using globstrings: >>> df = dd.{reader}('myfiles.*.csv') # doctest: +SKIP - In some cases it can break up large files: >>> df = dd.{reader}('largefile.csv', blocksize=25e6) # 25MB chunks # doctest: +SKIP - It can read CSV files from external resources (e.g. S3, HDFS) by providing a URL: >>> df = dd.{reader}('s3://bucket/myfiles.*.csv') # doctest: +SKIP >>> df = dd.{reader}('hdfs:///myfiles.*.csv') # doctest: +SKIP >>> df = dd.{reader}('hdfs://namenode.example.com/myfiles.*.csv') # doctest: +SKIP Internally ``dd.{reader}`` uses ``pandas.{reader}`` and supports many of the same keyword arguments with the same performance guarantees. See the docstring for ``pandas.{reader}`` for more information on available keyword arguments. Parameters ---------- urlpath : string Absolute or relative filepath, URL (may include protocols like ``s3://``), or globstring for {file_type} files. blocksize : int or None, optional Number of bytes by which to cut up larger files. Default value is computed based on available physical memory and the number of cores. If ``None``, use a single block for each file. collection : boolean, optional Return a dask.dataframe if True or list of dask.delayed objects if False sample : int, optional Number of bytes to use when determining dtypes assume_missing : bool, optional If True, all integer columns that aren't specified in ``dtype`` are assumed to contain missing values, and are converted to floats. Default is False. storage_options : dict, optional Extra options that make sense for a particular storage connection, e.g. host, port, username, password, etc. **kwargs Extra keyword arguments to forward to ``pandas.{reader}``. Notes ----- Dask dataframe tries to infer the ``dtype`` of each column by reading a sample from the start of the file (or of the first file if it's a glob). Usually this works fine, but if the ``dtype`` is different later in the file (or in other files) this can cause issues. For example, if all the rows in the sample had integer dtypes, but later on there was a ``NaN``, then this would error at compute time. To fix this, you have a few options: - Provide explicit dtypes for the offending columns using the ``dtype`` keyword. This is the recommended solution. - Use the ``assume_missing`` keyword to assume that all columns inferred as integers contain missing values, and convert them to floats. - Increase the size of the sample using the ``sample`` keyword. It should also be noted that this function may fail if a {file_type} file includes quoted strings that contain the line terminator. To get around this you can specify ``blocksize=None`` to not split files into multiple partitions, at the cost of reduced parallelism. """ def make_reader(reader, reader_name, file_type): def read(urlpath, blocksize=AUTO_BLOCKSIZE, collection=True, lineterminator=None, compression=None, sample=256000, enforce=False, assume_missing=False, storage_options=None, **kwargs): return read_pandas(reader, urlpath, blocksize=blocksize, collection=collection, lineterminator=lineterminator, compression=compression, sample=sample, enforce=enforce, assume_missing=assume_missing, storage_options=storage_options, **kwargs) read.__doc__ = READ_DOC_TEMPLATE.format(reader=reader_name, file_type=file_type) read.__name__ = reader_name return read read_csv = make_reader(pd.read_csv, 'read_csv', 'CSV') read_table = make_reader(pd.read_table, 'read_table', 'delimited') @delayed def _to_csv_chunk(df, **kwargs): import io if PY2: out = io.BytesIO() else: out = io.StringIO() df.to_csv(out, **kwargs) out.seek(0) if PY2: return out.getvalue() encoding = kwargs.get('encoding', sys.getdefaultencoding()) return out.getvalue().encode(encoding) def to_csv(df, filename, name_function=None, compression=None, compute=True, get=None, storage_options=None, **kwargs): """ Store Dask DataFrame to CSV files One filename per partition will be created. You can specify the filenames in a variety of ways. Use a globstring:: >>> df.to_csv('/path/to/data/export-*.csv') # doctest: +SKIP The * will be replaced by the increasing sequence 0, 1, 2, ... :: /path/to/data/export-0.csv /path/to/data/export-1.csv Use a globstring and a ``name_function=`` keyword argument. The name_function function should expect an integer and produce a string. Strings produced by name_function must preserve the order of their respective partition indices. >>> from datetime import date, timedelta >>> def name(i): ... return str(date(2015, 1, 1) + i * timedelta(days=1)) >>> name(0) '2015-01-01' >>> name(15) '2015-01-16' >>> df.to_csv('/path/to/data/export-*.csv', name_function=name) # doctest: +SKIP :: /path/to/data/export-2015-01-01.csv /path/to/data/export-2015-01-02.csv ... You can also provide an explicit list of paths:: >>> paths = ['/path/to/data/alice.csv', '/path/to/data/bob.csv', ...] # doctest: +SKIP >>> df.to_csv(paths) # doctest: +SKIP Parameters ---------- filename : string Path glob indicating the naming scheme for the output files name_function : callable, default None Function accepting an integer (partition index) and producing a string to replace the asterisk in the given filename globstring. Should preserve the lexicographic order of partitions compression : string or None String like 'gzip' or 'xz'. Must support efficient random access. Filenames with extensions corresponding to known compression algorithms (gz, bz2) will be compressed accordingly automatically sep : character, default ',' Field delimiter for the output file na_rep : string, default '' Missing data representation float_format : string, default None Format string for floating point numbers columns : sequence, optional Columns to write header : boolean or list of string, default True Write out column names. If a list of string is given it is assumed to be aliases for the column names index : boolean, default True Write row names (index) index_label : string or sequence, or False, default None Column label for index column(s) if desired. If None is given, and `header` and `index` are True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex. If False do not print fields for index names. Use index_label=False for easier importing in R nanRep : None deprecated, use na_rep mode : str Python write mode, default 'w' encoding : string, optional A string representing the encoding to use in the output file, defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. compression : string, optional a string representing the compression to use in the output file, allowed values are 'gzip', 'bz2', 'xz', only used when the first argument is a filename line_terminator : string, default '\\n' The newline character or character sequence to use in the output file quoting : optional constant from csv module defaults to csv.QUOTE_MINIMAL quotechar : string (length 1), default '\"' character used to quote fields doublequote : boolean, default True Control quoting of `quotechar` inside a field escapechar : string (length 1), default None character used to escape `sep` and `quotechar` when appropriate chunksize : int or None rows to write at a time tupleize_cols : boolean, default False write multi_index columns as a list of tuples (if True) or new (expanded format) if False) date_format : string, default None Format string for datetime objects decimal: string, default '.' Character recognized as decimal separator. E.g. use ',' for European data storage_options: dict Parameters passed on to the backend filesystem class. Returns ------- The names of the file written if they were computed right away If not, the delayed tasks associated to the writing of the files """ values = [_to_csv_chunk(d, **kwargs) for d in df.to_delayed()] (values, names) = write_bytes(values, filename, name_function, compression, encoding=None, **(storage_options or {})) if compute: delayed(values).compute(get=get) return names else: return values if PY3: from ..core import _Frame _Frame.to_csv.__doc__ = to_csv.__doc__ dask-0.16.0/dask/dataframe/io/demo.py000066400000000000000000000175271320364734500173160ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import pandas as pd import numpy as np from ..core import tokenize, DataFrame from .io import from_delayed from ...delayed import delayed from ...utils import random_state_data __all__ = ['make_timeseries'] def make_float(n, rstate): return rstate.rand(n) * 2 - 1 def make_int(n, rstate): return rstate.poisson(1000, size=n) names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank', 'George', 'Hannah', 'Ingrid', 'Jerry', 'Kevin', 'Laura', 'Michael', 'Norbert', 'Oliver', 'Patricia', 'Quinn', 'Ray', 'Sarah', 'Tim', 'Ursula', 'Victor', 'Wendy', 'Xavier', 'Yvonne', 'Zelda'] def make_string(n, rstate): return rstate.choice(names, size=n) def make_categorical(n, rstate): return pd.Categorical.from_codes(rstate.randint(0, len(names), size=n), names) make = {float: make_float, int: make_int, str: make_string, object: make_string, 'category': make_categorical} def make_timeseries_part(start, end, dtypes, freq, state_data): index = pd.DatetimeIndex(start=start, end=end, freq=freq) state = np.random.RandomState(state_data) columns = dict((k, make[dt](len(index), state)) for k, dt in dtypes.items()) df = pd.DataFrame(columns, index=index, columns=sorted(columns)) if df.index[-1] == end: df = df.iloc[:-1] return df def make_timeseries(start, end, dtypes, freq, partition_freq, seed=None): """ Create timeseries dataframe with random data Parameters ---------- start: datetime (or datetime-like string) Start of time series end: datetime (or datetime-like string) End of time series dtypes: dict Mapping of column names to types. Valid types include {float, int, str, 'category'} freq: string String like '2s' or '1H' or '12W' for the time series frequency partition_freq: string String like '1M' or '2Y' to divide the dataframe into partitions seed: int (optional) Randomstate seed >>> import dask.dataframe as dd >>> df = dd.demo.make_timeseries('2000', '2010', ... {'value': float, 'name': str, 'id': int}, ... freq='2H', partition_freq='1D', seed=1) >>> df.head() # doctest: +SKIP id name value 2000-01-01 00:00:00 969 Jerry -0.309014 2000-01-01 02:00:00 1010 Ray -0.760675 2000-01-01 04:00:00 1016 Patricia -0.063261 2000-01-01 06:00:00 960 Charlie 0.788245 2000-01-01 08:00:00 1031 Kevin 0.466002 """ divisions = list(pd.DatetimeIndex(start=start, end=end, freq=partition_freq)) state_data = random_state_data(len(divisions) - 1, seed) name = 'make-timeseries-' + tokenize(start, end, dtypes, freq, partition_freq, state_data) dsk = {(name, i): (make_timeseries_part, divisions[i], divisions[i + 1], dtypes, freq, state_data[i]) for i in range(len(divisions) - 1)} head = make_timeseries_part('2000', '2000', dtypes, '1H', state_data[0]) return DataFrame(dsk, name, head, divisions) def generate_day(date, open, high, low, close, volume, freq=pd.Timedelta(seconds=60), random_state=None): """ Generate a day of financial data from open/close high/low values """ if not isinstance(random_state, np.random.RandomState): random_state = np.random.RandomState(random_state) if not isinstance(date, pd.Timestamp): date = pd.Timestamp(date) if not isinstance(freq, pd.Timedelta): freq = pd.Timedelta(freq) time = pd.date_range(date + pd.Timedelta(hours=9), date + pd.Timedelta(hours=12 + 4), freq=freq / 5, name='timestamp') n = len(time) while True: values = (random_state.random_sample(n) - 0.5).cumsum() values *= (high - low) / (values.max() - values.min()) # scale values += np.linspace(open - values[0], close - values[-1], len(values)) # endpoints assert np.allclose(open, values[0]) assert np.allclose(close, values[-1]) mx = max(close, open) mn = min(close, open) ind = values > mx values[ind] = (values[ind] - mx) * (high - mx) / (values.max() - mx) + mx ind = values < mn values[ind] = (values[ind] - mn) * (low - mn) / (values.min() - mn) + mn # The process fails if min/max are the same as open close. This is rare if (np.allclose(values.max(), high) and np.allclose(values.min(), low)): break s = pd.Series(values.round(3), index=time) rs = s.resample(freq) # TODO: add in volume return pd.DataFrame({'open': rs.first(), 'close': rs.last(), 'high': rs.max(), 'low': rs.min()}) def daily_stock(symbol, start, stop, freq=pd.Timedelta(seconds=1), data_source='yahoo', random_state=None): """ Create artificial stock data This data matches daily open/high/low/close values from Yahoo! Finance, but interpolates values within each day with random values. This makes the results look natural without requiring the downloading of large volumes of data. This is useful for education and benchmarking. Parameters ---------- symbol: string A stock symbol like "GOOG" or "F" start: date, str, or pd.Timestamp The start date, input will be fed into pd.Timestamp for normalization stop: date, str, or pd.Timestamp The start date, input will be fed into pd.Timestamp for normalization freq: timedelta, str, or pd.Timedelta The frequency of sampling data_source: str, optional defaults to 'yahoo'. See pandas_datareader.data.DataReader for options random_state: int, np.random.RandomState object random seed, defaults to randomly chosen Examples -------- >>> import dask.dataframe as dd # doctest: +SKIP >>> df = dd.demo.daily_stock('GOOG', '2010', '2011', freq='1s') # doctest: +SKIP >>> df # doctest: +SKIP Dask DataFrame Structure: close high low open npartitions=252 2010-01-04 09:00:00 float64 float64 float64 float64 2010-01-05 09:00:00 ... ... ... ... ... ... ... ... ... 2010-12-31 09:00:00 ... ... ... ... 2010-12-31 16:00:00 ... ... ... ... Dask Name: from-delayed, 504 tasks >>> df.head() # doctest: +SKIP close high low open timestamp 2010-01-04 09:00:00 626.944 626.964 626.944 626.951 2010-01-04 09:00:01 626.906 626.931 626.906 626.931 2010-01-04 09:00:02 626.901 626.911 626.901 626.905 2010-01-04 09:00:03 626.920 626.920 626.905 626.905 2010-01-04 09:00:04 626.894 626.917 626.894 626.906 """ from pandas_datareader import data df = data.DataReader(symbol, data_source, start, stop) seeds = random_state_data(len(df), random_state=random_state) parts = [] divisions = [] for i, seed in zip(range(len(df)), seeds): s = df.iloc[i] if s.isnull().any(): continue part = delayed(generate_day)(s.name, s.loc['Open'], s.loc['High'], s.loc['Low'], s.loc['Close'], s.loc['Volume'], freq=freq, random_state=seed) parts.append(part) divisions.append(s.name + pd.Timedelta(hours=9)) divisions.append(s.name + pd.Timedelta(hours=12 + 4)) meta = generate_day('2000-01-01', 1, 2, 0, 1, 100) return from_delayed(parts, meta=meta, divisions=divisions) dask-0.16.0/dask/dataframe/io/hdf.py000066400000000000000000000353161320364734500171270ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from fnmatch import fnmatch from glob import glob import os import uuid from warnings import warn import pandas as pd from toolz import merge from .io import _link from ..core import DataFrame, new_dd_object from ... import multiprocessing from ...base import tokenize, compute_as_if_collection from ...bytes.utils import build_name_function from ...compatibility import PY3 from ...context import _globals from ...delayed import Delayed, delayed from ...local import get_sync from ...utils import effective_get, get_scheduler_lock def _pd_to_hdf(pd_to_hdf, lock, args, kwargs=None): """ A wrapper function around pd_to_hdf that enables locking""" if lock: lock.acquire() try: pd_to_hdf(*args, **kwargs) finally: if lock: lock.release() return None def to_hdf(df, path, key, mode='a', append=False, get=None, name_function=None, compute=True, lock=None, dask_kwargs={}, **kwargs): """ Store Dask Dataframe to Hierarchical Data Format (HDF) files This is a parallel version of the Pandas function of the same name. Please see the Pandas docstring for more detailed information about shared keyword arguments. This function differs from the Pandas version by saving the many partitions of a Dask DataFrame in parallel, either to many files, or to many datasets within the same file. You may specify this parallelism with an asterix ``*`` within the filename or datapath, and an optional ``name_function``. The asterix will be replaced with an increasing sequence of integers starting from ``0`` or with the result of calling ``name_function`` on each of those integers. This function only supports the Pandas ``'table'`` format, not the more specialized ``'fixed'`` format. Parameters ---------- path: string Path to a target filename. May contain a ``*`` to denote many filenames key: string Datapath within the files. May contain a ``*`` to denote many locations name_function: function A function to convert the ``*`` in the above options to a string. Should take in a number from 0 to the number of partitions and return a string. (see examples below) compute: bool Whether or not to execute immediately. If False then this returns a ``dask.Delayed`` value. lock: Lock, optional Lock to use to prevent concurrency issues. By default a ``threading.Lock``, ``multiprocessing.Lock`` or ``SerializableLock`` will be used depending on your scheduler if a lock is required. See dask.utils.get_scheduler_lock for more information about lock selection. **other: See pandas.to_hdf for more information Examples -------- Save Data to a single file >>> df.to_hdf('output.hdf', '/data') # doctest: +SKIP Save data to multiple datapaths within the same file: >>> df.to_hdf('output.hdf', '/data-*') # doctest: +SKIP Save data to multiple files: >>> df.to_hdf('output-*.hdf', '/data') # doctest: +SKIP Save data to multiple files, using the multiprocessing scheduler: >>> df.to_hdf('output-*.hdf', '/data', get=dask.multiprocessing.get) # doctest: +SKIP Specify custom naming scheme. This writes files as '2000-01-01.hdf', '2000-01-02.hdf', '2000-01-03.hdf', etc.. >>> from datetime import date, timedelta >>> base = date(year=2000, month=1, day=1) >>> def name_function(i): ... ''' Convert integer 0 to n to a string ''' ... return base + timedelta(days=i) >>> df.to_hdf('*.hdf', '/data', name_function=name_function) # doctest: +SKIP Returns ------- None: if compute == True delayed value: if compute == False See Also -------- read_hdf: to_parquet: """ name = 'to-hdf-' + uuid.uuid1().hex pd_to_hdf = getattr(df._partition_type, 'to_hdf') single_file = True single_node = True # if path is string, format using i_name if isinstance(path, str): if path.count('*') + key.count('*') > 1: raise ValueError("A maximum of one asterisk is accepted in file " "path and dataset key") fmt_obj = lambda path, i_name: path.replace('*', i_name) if '*' in path: single_file = False else: if key.count('*') > 1: raise ValueError("A maximum of one asterisk is accepted in " "dataset key") fmt_obj = lambda path, _: path if '*' in key: single_node = False if 'format' in kwargs and kwargs['format'] != 'table': raise ValueError("Dask only support 'table' format in hdf files.") if mode not in ('a', 'w', 'r+'): raise ValueError("Mode must be one of 'a', 'w' or 'r+'") if name_function is None: name_function = build_name_function(df.npartitions - 1) # we guarantee partition order is preserved when its saved and read # so we enforce name_function to maintain the order of its input. if not (single_file and single_node): formatted_names = [name_function(i) for i in range(df.npartitions)] if formatted_names != sorted(formatted_names): warn("To preserve order between partitions name_function " "must preserve the order of its input") # If user did not specify scheduler and write is sequential default to the # sequential scheduler. otherwise let the _get method choose the scheduler if get is None and 'get' not in _globals and single_node and single_file: get = get_sync # handle lock default based on whether we're writing to a single entity _actual_get = effective_get(get, df) if lock is None: if not single_node: lock = True elif not single_file and _actual_get is not multiprocessing.get: # if we're writing to multiple files with the multiprocessing # scheduler we don't need to lock lock = True else: lock = False if lock: lock = get_scheduler_lock(get, df) kwargs.update({'format': 'table', 'mode': mode, 'append': append}) dsk = dict() i_name = name_function(0) dsk[(name, 0)] = (_pd_to_hdf, pd_to_hdf, lock, [(df._name, 0), fmt_obj(path, i_name), key.replace('*', i_name)], kwargs) kwargs2 = kwargs.copy() if single_file: kwargs2['mode'] = 'a' if single_node: kwargs2['append'] = True filenames = [] for i in range(0,df.npartitions): i_name = name_function(i) filenames.append(fmt_obj(path, i_name)) for i in range(1, df.npartitions): i_name = name_function(i) task = (_pd_to_hdf, pd_to_hdf, lock, [(df._name, i), fmt_obj(path, i_name), key.replace('*', i_name)], kwargs2) if single_file: link_dep = i - 1 if single_node else 0 task = (_link, (name, link_dep), task) dsk[(name, i)] = task dsk = merge(df.dask, dsk) if single_file and single_node: keys = [(name, df.npartitions - 1)] else: keys = [(name, i) for i in range(df.npartitions)] if compute: compute_as_if_collection(DataFrame, dsk, keys, get=get, **dask_kwargs) return filenames else: return delayed([Delayed(k, dsk) for k in keys]) dont_use_fixed_error_message = """ This HDFStore is not partitionable and can only be use monolithically with pandas. In the future when creating HDFStores use the ``format='table'`` option to ensure that your dataset can be parallelized""" read_hdf_error_msg = """ The start and stop keywords are not supported when reading from more than one file/dataset. The combination is ambiguous because it could be interpreted as the starting and stopping index per file, or starting and stopping index of the global dataset.""" def _read_single_hdf(path, key, start=0, stop=None, columns=None, chunksize=int(1e6), sorted_index=False, lock=None, mode='a'): """ Read a single hdf file into a dask.dataframe. Used for each file in read_hdf. """ def get_keys_stops_divisions(path, key, stop, sorted_index): """ Get the "keys" or group identifiers which match the given key, which can contain wildcards. This uses the hdf file identified by the given path. Also get the index of the last row of data for each matched key. """ with pd.HDFStore(path, mode=mode) as hdf: keys = [k for k in hdf.keys() if fnmatch(k, key)] stops = [] divisions = [] for k in keys: storer = hdf.get_storer(k) if storer.format_type != 'table': raise TypeError(dont_use_fixed_error_message) if stop is None: stops.append(storer.nrows) elif stop > storer.nrows: raise ValueError("Stop keyword exceeds dataset number " "of rows ({})".format(storer.nrows)) else: stops.append(stop) if sorted_index: division_start = storer.read_column('index', start=0, stop=1)[0] division_end = storer.read_column('index', start=storer.nrows - 1, stop=storer.nrows)[0] divisions.append([division_start, division_end]) else: divisions.append(None) return keys, stops, divisions def one_path_one_key(path, key, start, stop, columns, chunksize, division, lock): """ Get the data frame corresponding to one path and one key (which should not contain any wildcards). """ empty = pd.read_hdf(path, key, mode=mode, stop=0) if columns is not None: empty = empty[columns] token = tokenize((path, os.path.getmtime(path), key, start, stop, empty, chunksize, division)) name = 'read-hdf-' + token if empty.ndim == 1: base = {'name': empty.name, 'mode': mode} else: base = {'columns': empty.columns, 'mode': mode} if start >= stop: raise ValueError("Start row number ({}) is above or equal to stop " "row number ({})".format(start, stop)) if division: dsk = {(name, 0): (_pd_read_hdf, path, key, lock, base)} divisions = division else: def update(s): new = base.copy() new.update({'start': s, 'stop': s + chunksize}) return new dsk = dict(((name, i), (_pd_read_hdf, path, key, lock, update(s))) for i, s in enumerate(range(start, stop, chunksize))) divisions = [None] * (len(dsk) + 1) return new_dd_object(dsk, name, empty, divisions) keys, stops, divisions = get_keys_stops_divisions(path, key, stop, sorted_index) if (start != 0 or stop is not None) and len(keys) > 1: raise NotImplementedError(read_hdf_error_msg) from ..multi import concat return concat([one_path_one_key(path, k, start, s, columns, chunksize, d, lock) for k, s, d in zip(keys, stops, divisions)]) def _pd_read_hdf(path, key, lock, kwargs): """ Read from hdf5 file with a lock """ if lock: lock.acquire() try: result = pd.read_hdf(path, key, **kwargs) finally: if lock: lock.release() return result def read_hdf(pattern, key, start=0, stop=None, columns=None, chunksize=1000000, sorted_index=False, lock=True, mode='a'): """ Read HDF files into a Dask DataFrame Read hdf files into a dask dataframe. This function is like ``pandas.read_hdf``, except it can read from a single large file, or from multiple files, or from multiple keys from the same file. Parameters ---------- pattern : string, list File pattern (string), buffer to read from, or list of file paths. Can contain wildcards. key : group identifier in the store. Can contain wildcards start : optional, integer (defaults to 0), row number to start at stop : optional, integer (defaults to None, the last row), row number to stop at columns : list of columns, optional A list of columns that if not None, will limit the return columns (default is None) chunksize : positive integer, optional Maximal number of rows per partition (default is 1000000). sorted_index : boolean, optional Option to specify whether or not the input hdf files have a sorted index (default is False). lock : boolean, optional Option to use a lock to prevent concurrency issues (default is True). mode : {'a', 'r', 'r+'}, default 'a'. Mode to use when opening file(s). 'r' Read-only; no data can be modified. 'a' Append; an existing file is opened for reading and writing, and if the file does not exist it is created. 'r+' It is similar to 'a', but the file must already exist. Returns ------- dask.DataFrame Examples -------- Load single file >>> dd.read_hdf('myfile.1.hdf5', '/x') # doctest: +SKIP Load multiple files >>> dd.read_hdf('myfile.*.hdf5', '/x') # doctest: +SKIP >>> dd.read_hdf(['myfile.1.hdf5', 'myfile.2.hdf5'], '/x') # doctest: +SKIP Load multiple datasets >>> dd.read_hdf('myfile.1.hdf5', '/*') # doctest: +SKIP """ if lock is True: lock = get_scheduler_lock() key = key if key.startswith('/') else '/' + key if isinstance(pattern, str): paths = sorted(glob(pattern)) else: paths = pattern if (start != 0 or stop is not None) and len(paths) > 1: raise NotImplementedError(read_hdf_error_msg) if chunksize <= 0: raise ValueError("Chunksize must be a positive integer") if (start != 0 or stop is not None) and sorted_index: raise ValueError("When assuming pre-partitioned data, data must be " "read in its entirety using the same chunksizes") from ..multi import concat return concat([_read_single_hdf(path, key, start=start, stop=stop, columns=columns, chunksize=chunksize, sorted_index=sorted_index, lock=lock, mode=mode) for path in paths]) if PY3: from ..core import _Frame _Frame.to_hdf.__doc__ = to_hdf.__doc__ dask-0.16.0/dask/dataframe/io/io.py000066400000000000000000000457661320364734500170070ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from math import ceil from operator import getitem import os from threading import Lock import pandas as pd import numpy as np from toolz import merge from ...base import tokenize from ...compatibility import unicode, PY3 from ... import array as da from ...delayed import delayed from ..core import DataFrame, Series, new_dd_object from ..shuffle import set_partition from ..utils import insert_meta_param_description, check_meta, make_meta from ...utils import M, ensure_dict lock = Lock() def _meta_from_array(x, columns=None): """ Create empty pd.DataFrame or pd.Series which has correct dtype """ if x.ndim > 2: raise ValueError('from_array does not input more than 2D array, got' ' array with shape %r' % (x.shape,)) if getattr(x.dtype, 'names', None) is not None: # record array has named columns if columns is None: columns = list(x.dtype.names) elif np.isscalar(columns): raise ValueError("For a struct dtype, columns must be a list.") elif not all(i in x.dtype.names for i in columns): extra = sorted(set(columns).difference(x.dtype.names)) raise ValueError("dtype {0} doesn't have fields " "{1}".format(x.dtype, extra)) fields = x.dtype.fields dtypes = [fields[n][0] if n in fields else 'f8' for n in columns] elif x.ndim == 1: if np.isscalar(columns) or columns is None: return pd.Series([], name=columns, dtype=x.dtype) elif len(columns) == 1: return pd.DataFrame(np.array([], dtype=x.dtype), columns=columns) raise ValueError("For a 1d array, columns must be a scalar or single " "element list") else: if np.isnan(x.shape[1]): raise ValueError("Shape along axis 1 must be known") if columns is None: columns = list(range(x.shape[1])) if x.ndim == 2 else [0] elif len(columns) != x.shape[1]: raise ValueError("Number of column names must match width of the " "array. Got {0} names for {1} " "columns".format(len(columns), x.shape[1])) dtypes = [x.dtype] * len(columns) data = {c: np.array([], dtype=dt) for (c, dt) in zip(columns, dtypes)} return pd.DataFrame(data, columns=columns) def from_array(x, chunksize=50000, columns=None): """ Read any slicable array into a Dask Dataframe Uses getitem syntax to pull slices out of the array. The array need not be a NumPy array but must support slicing syntax x[50000:100000] and have 2 dimensions: x.ndim == 2 or have a record dtype: x.dtype == [('name', 'O'), ('balance', 'i8')] """ if isinstance(x, da.Array): return from_dask_array(x, columns=columns) meta = _meta_from_array(x, columns) divisions = tuple(range(0, len(x), chunksize)) divisions = divisions + (len(x) - 1,) token = tokenize(x, chunksize, columns) name = 'from_array-' + token dsk = {} for i in range(0, int(ceil(len(x) / chunksize))): data = (getitem, x, slice(i * chunksize, (i + 1) * chunksize)) if isinstance(meta, pd.Series): dsk[name, i] = (pd.Series, data, None, meta.dtype, meta.name) else: dsk[name, i] = (pd.DataFrame, data, None, meta.columns) return new_dd_object(dsk, name, meta, divisions) def from_pandas(data, npartitions=None, chunksize=None, sort=True, name=None): """ Construct a Dask DataFrame from a Pandas DataFrame This splits an in-memory Pandas dataframe into several parts and constructs a dask.dataframe from those parts on which Dask.dataframe can operate in parallel. Note that, despite parallelism, Dask.dataframe may not always be faster than Pandas. We recommend that you stay with Pandas for as long as possible before switching to Dask.dataframe. Parameters ---------- df : pandas.DataFrame or pandas.Series The DataFrame/Series with which to construct a Dask DataFrame/Series npartitions : int, optional The number of partitions of the index to create. Note that depending on the size and index of the dataframe, the output may have fewer partitions than requested. chunksize : int, optional The number of rows per index partition to use. sort: bool Sort input first to obtain cleanly divided partitions or don't sort and don't get cleanly divided partitions name: string, optional An optional keyname for the dataframe. Defaults to hashing the input Returns ------- dask.DataFrame or dask.Series A dask DataFrame/Series partitioned along the index Examples -------- >>> df = pd.DataFrame(dict(a=list('aabbcc'), b=list(range(6))), ... index=pd.date_range(start='20100101', periods=6)) >>> ddf = from_pandas(df, npartitions=3) >>> ddf.divisions # doctest: +NORMALIZE_WHITESPACE (Timestamp('2010-01-01 00:00:00', freq='D'), Timestamp('2010-01-03 00:00:00', freq='D'), Timestamp('2010-01-05 00:00:00', freq='D'), Timestamp('2010-01-06 00:00:00', freq='D')) >>> ddf = from_pandas(df.a, npartitions=3) # Works with Series too! >>> ddf.divisions # doctest: +NORMALIZE_WHITESPACE (Timestamp('2010-01-01 00:00:00', freq='D'), Timestamp('2010-01-03 00:00:00', freq='D'), Timestamp('2010-01-05 00:00:00', freq='D'), Timestamp('2010-01-06 00:00:00', freq='D')) Raises ------ TypeError If something other than a ``pandas.DataFrame`` or ``pandas.Series`` is passed in. See Also -------- from_array : Construct a dask.DataFrame from an array that has record dtype read_csv : Construct a dask.DataFrame from a CSV file """ if isinstance(getattr(data, 'index', None), pd.MultiIndex): raise NotImplementedError("Dask does not support MultiIndex Dataframes.") if not isinstance(data, (pd.Series, pd.DataFrame)): raise TypeError("Input must be a pandas DataFrame or Series") if ((npartitions is None) == (chunksize is None)): raise ValueError('Exactly one of npartitions and chunksize must be specified.') nrows = len(data) if chunksize is None: chunksize = int(ceil(nrows / npartitions)) else: npartitions = int(ceil(nrows / chunksize)) name = name or ('from_pandas-' + tokenize(data, chunksize)) if not nrows: return new_dd_object({(name, 0): data}, name, data, [None, None]) if sort and not data.index.is_monotonic_increasing: data = data.sort_index(ascending=True) if sort: divisions, locations = sorted_division_locations(data.index, chunksize=chunksize) else: locations = list(range(0, nrows, chunksize)) + [len(data)] divisions = [None] * len(locations) dsk = dict(((name, i), data.iloc[start: stop]) for i, (start, stop) in enumerate(zip(locations[:-1], locations[1:]))) return new_dd_object(dsk, name, data, divisions) def from_bcolz(x, chunksize=None, categorize=True, index=None, lock=lock, **kwargs): """ Read BColz CTable into a Dask Dataframe BColz is a fast on-disk compressed column store with careful attention given to compression. https://bcolz.readthedocs.io/en/latest/ Parameters ---------- x : bcolz.ctable chunksize : int, optional The size(rows) of blocks to pull out from ctable. categorize : bool, defaults to True Automatically categorize all string dtypes index : string, optional Column to make the index lock: bool or Lock Lock to use when reading or False for no lock (not-thread-safe) See Also -------- from_array: more generic function not optimized for bcolz """ if lock is True: lock = Lock() import dask.array as da import bcolz if isinstance(x, (str, unicode)): x = bcolz.ctable(rootdir=x) bc_chunklen = max(x[name].chunklen for name in x.names) if chunksize is None and bc_chunklen > 10000: chunksize = bc_chunklen categories = dict() if categorize: for name in x.names: if (np.issubdtype(x.dtype[name], np.string_) or np.issubdtype(x.dtype[name], np.unicode_) or np.issubdtype(x.dtype[name], np.object_)): a = da.from_array(x[name], chunks=(chunksize * len(x.names),)) categories[name] = da.unique(a).compute() columns = tuple(x.dtype.names) divisions = tuple(range(0, len(x), chunksize)) divisions = divisions + (len(x) - 1,) if x.rootdir: token = tokenize((x.rootdir, os.path.getmtime(x.rootdir)), chunksize, categorize, index, kwargs) else: token = tokenize((id(x), x.shape, x.dtype), chunksize, categorize, index, kwargs) new_name = 'from_bcolz-' + token dsk = dict(((new_name, i), (dataframe_from_ctable, x, (slice(i * chunksize, (i + 1) * chunksize),), columns, categories, lock)) for i in range(0, int(ceil(len(x) / chunksize)))) meta = dataframe_from_ctable(x, slice(0, 0), columns, categories, lock) result = DataFrame(dsk, new_name, meta, divisions) if index: assert index in x.names a = da.from_array(x[index], chunks=(chunksize * len(x.names),)) q = np.linspace(0, 100, len(x) // chunksize + 2) divisions = tuple(da.percentile(a, q).compute()) return set_partition(result, index, divisions, **kwargs) else: return result def dataframe_from_ctable(x, slc, columns=None, categories=None, lock=lock): """ Get DataFrame from bcolz.ctable Parameters ---------- x: bcolz.ctable slc: slice columns: list of column names or None >>> import bcolz >>> x = bcolz.ctable([[1, 2, 3, 4], [10, 20, 30, 40]], names=['a', 'b']) >>> dataframe_from_ctable(x, slice(1, 3)) a b 1 2 20 2 3 30 >>> dataframe_from_ctable(x, slice(1, 3), columns=['b']) b 1 20 2 30 >>> dataframe_from_ctable(x, slice(1, 3), columns='b') 1 20 2 30 Name: b, dtype: int... """ import bcolz if columns is None: columns = x.dtype.names if isinstance(columns, tuple): columns = list(columns) x = x[columns] if type(slc) is slice: start = slc.start stop = slc.stop if slc.stop < len(x) else len(x) else: start = slc[0].start stop = slc[0].stop if slc[0].stop < len(x) else len(x) idx = pd.Index(range(start, stop)) if lock: lock.acquire() try: if isinstance(x, bcolz.ctable): chunks = [x[name][slc] for name in columns] if categories is not None: chunks = [pd.Categorical.from_codes( np.searchsorted(categories[name], chunk), categories[name], True) if name in categories else chunk for name, chunk in zip(columns, chunks)] result = pd.DataFrame(dict(zip(columns, chunks)), columns=columns, index=idx) elif isinstance(x, bcolz.carray): chunk = x[slc] if categories is not None and columns and columns in categories: chunk = pd.Categorical.from_codes( np.searchsorted(categories[columns], chunk), categories[columns], True) result = pd.Series(chunk, name=columns, index=idx) finally: if lock: lock.release() return result def from_dask_array(x, columns=None): """ Create a Dask DataFrame from a Dask Array. Converts a 2d array into a DataFrame and a 1d array into a Series. Parameters ---------- x: da.Array columns: list or string list of column names if DataFrame, single string if Series Examples -------- >>> import dask.array as da >>> import dask.dataframe as dd >>> x = da.ones((4, 2), chunks=(2, 2)) >>> df = dd.io.from_dask_array(x, columns=['a', 'b']) >>> df.compute() a b 0 1.0 1.0 1 1.0 1.0 2 1.0 1.0 3 1.0 1.0 See Also -------- dask.bag.to_dataframe: from dask.bag dask.dataframe._Frame.values: Reverse conversion dask.dataframe._Frame.to_records: Reverse conversion """ meta = _meta_from_array(x, columns) if x.ndim == 2 and len(x.chunks[1]) > 1: x = x.rechunk({1: x.shape[1]}) name = 'from-dask-array' + tokenize(x, columns) if np.isnan(sum(x.shape)): divisions = [None] * (len(x.chunks[0]) + 1) index = [None] * len(x.chunks[0]) else: divisions = [0] for c in x.chunks[0]: divisions.append(divisions[-1] + c) index = [(np.arange, a, b, 1, 'i8') for a, b in zip(divisions[:-1], divisions[1:])] divisions[-1] -= 1 dsk = {} for i, (chunk, ind) in enumerate(zip(x.__dask_keys__(), index)): if x.ndim == 2: chunk = chunk[0] if isinstance(meta, pd.Series): dsk[name, i] = (pd.Series, chunk, ind, x.dtype, meta.name) else: dsk[name, i] = (pd.DataFrame, chunk, ind, meta.columns) return new_dd_object(merge(ensure_dict(x.dask), dsk), name, meta, divisions) def _link(token, result): """ A dummy function to link results together in a graph We use this to enforce an artificial sequential ordering on tasks that don't explicitly pass around a shared resource """ return None def _df_to_bag(df, index=False): if isinstance(df, pd.DataFrame): return list(map(tuple, df.itertuples(index))) elif isinstance(df, pd.Series): return list(df.iteritems()) if index else list(df) def to_bag(df, index=False): """Create Dask Bag from a Dask DataFrame Parameters ---------- index : bool, optional If True, the elements are tuples of ``(index, value)``, otherwise they're just the ``value``. Default is False. Examples -------- >>> bag = df.to_bag() # doctest: +SKIP """ from ...bag.core import Bag if not isinstance(df, (DataFrame, Series)): raise TypeError("df must be either DataFrame or Series") name = 'to_bag-' + tokenize(df, index) dsk = dict(((name, i), (_df_to_bag, block, index)) for (i, block) in enumerate(df.__dask_keys__())) dsk.update(df.__dask_optimize__(df.__dask_graph__(), df.__dask_keys__())) return Bag(dsk, name, df.npartitions) def to_records(df): """ Create Dask Array from a Dask Dataframe Warning: This creates a dask.array without precise shape information. Operations that depend on shape information, like slicing or reshaping, will not work. Examples -------- >>> df.to_records() # doctest: +SKIP dask.array See Also -------- dask.dataframe._Frame.values dask.dataframe.from_dask_array """ from ...array.core import Array if not isinstance(df, (DataFrame, Series)): raise TypeError("df must be either DataFrame or Series") name = 'to-records-' + tokenize(df) dsk = {(name, i): (M.to_records, key) for (i, key) in enumerate(df.__dask_keys__())} x = df._meta.to_records() chunks = ((np.nan,) * df.npartitions,) return Array(merge(df.dask, dsk), name, chunks, x.dtype) @insert_meta_param_description def from_delayed(dfs, meta=None, divisions=None, prefix='from-delayed'): """ Create Dask DataFrame from many Dask Delayed objects Parameters ---------- dfs : list of Delayed An iterable of ``dask.delayed.Delayed`` objects, such as come from ``dask.delayed`` These comprise the individual partitions of the resulting dataframe. $META divisions : tuple, str, optional Partition boundaries along the index. For tuple, see http://dask.pydata.org/en/latest/dataframe-design.html#partitions For string 'sorted' will compute the delayed values to find index values. Assumes that the indexes are mutually sorted. If None, then won't use index information prefix : str, optional Prefix to prepend to the keys. """ from dask.delayed import Delayed if isinstance(dfs, Delayed): dfs = [dfs] dfs = [delayed(df) if not isinstance(df, Delayed) and hasattr(df, 'key') else df for df in dfs] for df in dfs: if not isinstance(df, Delayed): raise TypeError("Expected Delayed object, got %s" % type(df).__name__) if meta is None: meta = dfs[0].compute() meta = make_meta(meta) name = prefix + '-' + tokenize(*dfs) dsk = merge(df.dask for df in dfs) dsk.update({(name, i): (check_meta, df.key, meta, 'from_delayed') for (i, df) in enumerate(dfs)}) if divisions is None or divisions == 'sorted': divs = [None] * (len(dfs) + 1) else: divs = tuple(divisions) if len(divs) != len(dfs) + 1: raise ValueError("divisions should be a tuple of len(dfs) + 1") df = new_dd_object(dsk, name, meta, divs) if divisions == 'sorted': from ..shuffle import compute_divisions divisions = compute_divisions(df) df.divisions = divisions return df def sorted_division_locations(seq, npartitions=None, chunksize=None): """ Find division locations and values in sorted list Examples -------- >>> L = ['A', 'B', 'C', 'D', 'E', 'F'] >>> sorted_division_locations(L, chunksize=2) (['A', 'C', 'E', 'F'], [0, 2, 4, 6]) >>> sorted_division_locations(L, chunksize=3) (['A', 'D', 'F'], [0, 3, 6]) >>> L = ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'C'] >>> sorted_division_locations(L, chunksize=3) (['A', 'B', 'C'], [0, 4, 8]) >>> sorted_division_locations(L, chunksize=2) (['A', 'B', 'C'], [0, 4, 8]) >>> sorted_division_locations(['A'], chunksize=2) (['A', 'A'], [0, 1]) """ if ((npartitions is None) == (chunksize is None)): raise ValueError('Exactly one of npartitions and chunksize must be specified.') if npartitions: chunksize = ceil(len(seq) / npartitions) positions = [0] values = [seq[0]] for pos in list(range(0, len(seq), chunksize)): if pos <= positions[-1]: continue while pos + 1 < len(seq) and seq[pos - 1] == seq[pos]: pos += 1 values.append(seq[pos]) if pos == len(seq) - 1: pos += 1 positions.append(pos) if positions[-1] != len(seq): positions.append(len(seq)) values.append(seq[-1]) return values, positions if PY3: DataFrame.to_records.__doc__ = to_records.__doc__ DataFrame.to_bag.__doc__ = to_bag.__doc__ dask-0.16.0/dask/dataframe/io/parquet.py000066400000000000000000000541601320364734500200450ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import copy import json import warnings import numpy as np import pandas as pd from toolz import unique from ..core import DataFrame, Series from ..utils import UNKNOWN_CATEGORIES from ...base import tokenize, normalize_token from ...compatibility import PY3 from ...delayed import delayed from ...bytes.core import get_fs_paths_myopen __all__ = ('read_parquet', 'to_parquet') def _meta_from_dtypes(to_read_columns, file_columns, file_dtypes, index_cols): meta = pd.DataFrame({c: pd.Series([], dtype=d) for (c, d) in file_dtypes.items()}, columns=[c for c in file_columns if c in file_dtypes]) df = meta[list(to_read_columns)] if not index_cols: return df if not isinstance(index_cols, list): index_cols = [index_cols] df = df.set_index(index_cols) if len(index_cols) == 1 and index_cols[0] == '__index_level_0__': df.index.name = None return df # ---------------------------------------------------------------------- # Fastparquet interface def _read_fastparquet(fs, paths, myopen, columns=None, filters=None, categories=None, index=None, storage_options=None): import fastparquet from fastparquet.util import check_column_names if filters is None: filters = [] if isinstance(columns, list): columns = tuple(columns) if len(paths) > 1: pf = fastparquet.ParquetFile(paths, open_with=myopen, sep=myopen.fs.sep) else: try: pf = fastparquet.ParquetFile(paths[0] + fs.sep + '_metadata', open_with=myopen, sep=fs.sep) except Exception: pf = fastparquet.ParquetFile(paths[0], open_with=myopen, sep=fs.sep) check_column_names(pf.columns, categories) name = 'read-parquet-' + tokenize(pf, columns, categories) rgs = [rg for rg in pf.row_groups if not(fastparquet.api.filter_out_stats(rg, filters, pf.schema)) and not(fastparquet.api.filter_out_cats(rg, filters))] if index is False: index_col = None elif index is None: index_col = pf._get_index() else: index_col = index if columns is None: all_columns = tuple(pf.columns + list(pf.cats)) else: all_columns = columns if not isinstance(all_columns, tuple): out_type = Series all_columns = (all_columns,) else: out_type = DataFrame if index_col and index_col not in all_columns: all_columns = all_columns + (index_col,) if categories is None: categories = pf.categories dtypes = pf._dtypes(categories) meta = _meta_from_dtypes(all_columns, tuple(pf.columns + list(pf.cats)), dtypes, index_col) for cat in categories: if cat in meta: meta[cat] = pd.Series(pd.Categorical([], categories=[UNKNOWN_CATEGORIES]), index=meta.index) if out_type == Series: assert len(meta.columns) == 1 meta = meta[meta.columns[0]] dsk = {(name, i): (_read_parquet_row_group, myopen, pf.row_group_filename(rg), index_col, all_columns, rg, out_type == Series, categories, pf.schema, pf.cats, pf.dtypes, pf.file_scheme) for i, rg in enumerate(rgs)} if not dsk: # empty dataframe dsk = {(name, 0): meta} divisions = (None, None) return out_type(dsk, name, meta, divisions) if index_col: minmax = fastparquet.api.sorted_partitioned_columns(pf) if index_col in minmax: divisions = (list(minmax[index_col]['min']) + [minmax[index_col]['max'][-1]]) divisions = [divisions[i] for i, rg in enumerate(pf.row_groups) if rg in rgs] + [divisions[-1]] else: divisions = (None,) * (len(rgs) + 1) else: divisions = (None,) * (len(rgs) + 1) if isinstance(divisions[0], np.datetime64): divisions = [pd.Timestamp(d) for d in divisions] return out_type(dsk, name, meta, divisions) def _read_parquet_row_group(open, fn, index, columns, rg, series, categories, schema, cs, dt, scheme, *args): from fastparquet.api import _pre_allocate from fastparquet.core import read_row_group_file if not isinstance(columns, (tuple, list)): columns = (columns,) series = True if index and index not in columns: columns = columns + type(columns)([index]) df, views = _pre_allocate(rg.num_rows, columns, categories, index, cs, dt) read_row_group_file(fn, rg, columns, categories, schema, cs, open=open, assign=views, scheme=scheme) if series: return df[df.columns[0]] else: return df def _write_partition_fastparquet(df, fs, path, filename, fmd, compression, partition_on): from fastparquet.writer import partition_on_columns, make_part_file # Fastparquet mutates this in a non-threadsafe manner. For now we just copy # it before forwarding to fastparquet. fmd = copy.copy(fmd) if not len(df): # Write nothing for empty partitions rgs = None elif partition_on: rgs = partition_on_columns(df, partition_on, path, filename, fmd, fs.sep, compression, fs.open, fs.mkdirs) else: # Fastparquet current doesn't properly set `num_rows` in the output # metadata. Set it here to fix that. fmd.num_rows = len(df) with fs.open(fs.sep.join([path, filename]), 'wb') as fil: rgs = make_part_file(fil, df, fmd.schema, compression=compression, fmd=fmd) return rgs def _write_fastparquet(df, path, write_index=None, append=False, ignore_divisions=False, partition_on=None, storage_options=None, compression=None, **kwargs): import fastparquet fs, paths, open_with = get_fs_paths_myopen(path, None, 'wb', **(storage_options or {})) fs.mkdirs(path) sep = fs.sep object_encoding = kwargs.pop('object_encoding', 'utf8') if object_encoding == 'infer' or (isinstance(object_encoding, dict) and 'infer' in object_encoding.values()): raise ValueError('"infer" not allowed as object encoding, ' 'because this required data in memory.') divisions = df.divisions if write_index is True or write_index is None and df.known_divisions: df = df.reset_index() index_cols = [df.columns[0]] else: ignore_divisions = True index_cols = [] if append: pf = fastparquet.api.ParquetFile(path, open_with=open_with, sep=sep) if pf.file_scheme not in ['hive', 'empty', 'flat']: raise ValueError('Requested file scheme is hive, ' 'but existing file scheme is not.') elif ((set(pf.columns) != set(df.columns) - set(partition_on)) or (set(partition_on) != set(pf.cats))): raise ValueError('Appended columns not the same.\n' 'New: {} | Previous: {}' .format(pf.columns, list(df.columns))) elif set(pf.dtypes[c] for c in pf.columns) != set(df[pf.columns].dtypes): raise ValueError('Appended dtypes differ.\n{}' .format(set(pf.dtypes.items()) ^ set(df.dtypes.iteritems()))) else: df = df[pf.columns + partition_on] fmd = pf.fmd i_offset = fastparquet.writer.find_max_part(fmd.row_groups) if not ignore_divisions: minmax = fastparquet.api.sorted_partitioned_columns(pf) old_end = minmax[index_cols[0]]['max'][-1] if divisions[0] < old_end: raise ValueError( 'Appended divisions overlapping with the previous ones.\n' 'New: {} | Previous: {}'.format(old_end, divisions[0])) else: fmd = fastparquet.writer.make_metadata(df._meta, object_encoding=object_encoding, index_cols=index_cols, ignore_columns=partition_on, **kwargs) i_offset = 0 filenames = ['part.%i.parquet' % (i + i_offset) for i in range(df.npartitions)] write = delayed(_write_partition_fastparquet) writes = [write(part, fs, path, filename, fmd, compression, partition_on) for filename, part in zip(filenames, df.to_delayed())] return delayed(_write_metadata)(writes, filenames, fmd, path, open_with, sep) def _write_metadata(writes, filenames, fmd, path, open_with, sep): """ Write Parquet metadata after writing all row groups See Also -------- to_parquet """ import fastparquet fmd = copy.copy(fmd) for fn, rg in zip(filenames, writes): if rg is not None: if isinstance(rg, list): for r in rg: fmd.row_groups.append(r) else: for chunk in rg.columns: chunk.file_path = fn fmd.row_groups.append(rg) fn = sep.join([path, '_metadata']) fastparquet.writer.write_common_metadata(fn, fmd, open_with=open_with, no_row_groups=False) fn = sep.join([path, '_common_metadata']) fastparquet.writer.write_common_metadata(fn, fmd, open_with=open_with) # ---------------------------------------------------------------------- # PyArrow interface def _read_pyarrow(fs, paths, file_opener, columns=None, filters=None, categories=None, index=None): from ...bytes.core import get_pyarrow_filesystem import pyarrow.parquet as pq if filters is not None: raise NotImplementedError("Predicate pushdown not implemented") if categories is not None: raise NotImplementedError("Categorical reads not yet implemented") if isinstance(columns, tuple): columns = list(columns) dataset = pq.ParquetDataset(paths, filesystem=get_pyarrow_filesystem(fs)) schema = dataset.schema.to_arrow_schema() has_pandas_metadata = schema.metadata is not None and b'pandas' in schema.metadata task_name = 'read-parquet-' + tokenize(dataset, columns) if columns is None: all_columns = schema.names else: all_columns = columns if not isinstance(all_columns, list): out_type = Series all_columns = [all_columns] else: out_type = DataFrame if index is False: index_cols = [] elif index is None: if has_pandas_metadata: pandas_metadata = json.loads(schema.metadata[b'pandas'].decode('utf8')) index_cols = pandas_metadata.get('index_columns', []) else: index_cols = [] else: index_cols = index if isinstance(index, list) else [index] if index_cols: all_columns = list(unique(all_columns + index_cols)) dtypes = _get_pyarrow_dtypes(schema) meta = _meta_from_dtypes(all_columns, schema.names, dtypes, index_cols) if out_type == Series: assert len(meta.columns) == 1 meta = meta[meta.columns[0]] if dataset.pieces: divisions = (None,) * (len(dataset.pieces) + 1) task_plan = { (task_name, i): (_read_pyarrow_parquet_piece, file_opener, piece, all_columns, index_cols, out_type == Series, dataset.partitions) for i, piece in enumerate(dataset.pieces) } else: divisions = (None, None) task_plan = {(task_name, 0): meta} return out_type(task_plan, task_name, meta, divisions) def _get_pyarrow_dtypes(schema): dtypes = {} for i in range(len(schema)): field = schema[i] numpy_dtype = field.type.to_pandas_dtype() dtypes[field.name] = numpy_dtype return dtypes def _read_pyarrow_parquet_piece(open_file_func, piece, columns, index_cols, is_series, partitions): with open_file_func(piece.path, mode='rb') as f: table = piece.read(columns=columns, partitions=partitions, use_pandas_metadata=True, file=f) df = table.to_pandas() if (index_cols and df.index.name is None and len(df.columns.intersection(index_cols))): # Index should be set, but it isn't df = df.set_index(index_cols) elif not index_cols and df.index.name is not None: # Index shouldn't be set, but it is df = df.reset_index(drop=False) if is_series: return df[df.columns[0]] else: return df def _write_pyarrow(df, path, write_index=None, append=False, ignore_divisions=False, partition_on=None, storage_options=None, **kwargs): if append: raise NotImplementedError("`append` not implemented for " "`engine='pyarrow'`") if partition_on: raise NotImplementedError("`partition_on` not implemented for " "`engine='pyarrow'`") if write_index is None and df.known_divisions: write_index = True fs, paths, open_with = get_fs_paths_myopen(path, None, 'wb', **(storage_options or {})) fs.mkdirs(path) template = fs.sep.join([path, 'part.%i.parquet']) write = delayed(_write_partition_pyarrow) first_kwargs = kwargs.copy() first_kwargs['metadata_path'] = fs.sep.join([path, '_metadata']) writes = [write(part, open_with, template % i, write_index, **(kwargs if i else first_kwargs)) for i, part in enumerate(df.to_delayed())] return delayed(writes) def _write_partition_pyarrow(df, open_with, filename, write_index, metadata_path=None, **kwargs): import pyarrow as pa from pyarrow import parquet t = pa.Table.from_pandas(df, preserve_index=write_index) with open_with(filename, 'wb') as fil: parquet.write_table(t, fil, **kwargs) if metadata_path is not None: with open_with(metadata_path, 'wb') as fil: parquet.write_metadata(t.schema, fil, **kwargs) # ---------------------------------------------------------------------- # User API _ENGINES = {} def get_engine(engine): """Get the parquet engine backend implementation. Parameters ---------- engine : {'auto', 'fastparquet', 'pyarrow'}, default 'auto' Parquet reader library to use. Default is first installed in this list. Returns ------- A dict containing a ``'read'`` and ``'write'`` function. """ if engine in _ENGINES: return _ENGINES[engine] if engine == 'auto': for eng in ['fastparquet', 'pyarrow']: try: return get_engine(eng) except ImportError: pass else: raise ImportError("Please install either fastparquet or pyarrow") elif engine == 'fastparquet': try: import fastparquet except ImportError: raise ImportError("fastparquet not installed") @normalize_token.register(fastparquet.ParquetFile) def normalize_ParquetFile(pf): return (type(pf), pf.fn, pf.sep) + normalize_token(pf.open) _ENGINES['fastparquet'] = eng = {'read': _read_fastparquet, 'write': _write_fastparquet} return eng elif engine == 'pyarrow': try: import pyarrow.parquet as pq except ImportError: raise ImportError("pyarrow not installed") @normalize_token.register(pq.ParquetDataset) def normalize_PyArrowParquetDataset(ds): return (type(ds), ds.paths) _ENGINES['pyarrow'] = eng = {'read': _read_pyarrow, 'write': _write_pyarrow} return eng elif engine == 'arrow': warnings.warn("parquet with `engine='arrow'` is deprecated, " "use `engine='pyarrow'` instead") return get_engine('pyarrow') else: raise ValueError('Unsupported engine type: {0}'.format(engine)) def read_parquet(path, columns=None, filters=None, categories=None, index=None, storage_options=None, engine='auto'): """ Read ParquetFile into a Dask DataFrame This reads a directory of Parquet data into a Dask.dataframe, one file per partition. It selects the index among the sorted columns if any exist. Parameters ---------- path : string Source directory for data. May be a glob string. Prepend with protocol like ``s3://`` or ``hdfs://`` for remote data. columns: list or None List of column names to load filters: list List of filters to apply, like ``[('x', '>', 0), ...]``. This implements row-group (partition) -level filtering only, i.e., to prevent the loading of some chunks of the data, and only if relevant statistics have been included in the metadata. index: string or None (default) or False Name of index column to use if that column is sorted; False to force dask to not use any column as the index categories: list, dict or None For any fields listed here, if the parquet encoding is Dictionary, the column will be created with dtype category. Use only if it is guaranteed that the column is encoded as dictionary in all row-groups. If a list, assumes up to 2**16-1 labels; if a dict, specify the number of labels expected; if None, will load categories automatically for data written by dask/fastparquet, not otherwise. storage_options : dict Key/value pairs to be passed on to the file-system backend, if any. engine : {'auto', 'fastparquet', 'pyarrow'}, default 'auto' Parquet reader library to use. If only one library is installed, it will use that one; if both, it will use 'fastparquet' Examples -------- >>> df = read_parquet('s3://bucket/my-parquet-data') # doctest: +SKIP See Also -------- to_parquet """ fs, paths, file_opener = get_fs_paths_myopen(path, None, 'rb', **(storage_options or {})) read = get_engine(engine)['read'] return read(fs, paths, file_opener, columns=columns, filters=filters, categories=categories, index=index) def to_parquet(df, path, engine='auto', compression='default', write_index=None, append=False, ignore_divisions=False, partition_on=None, storage_options=None, compute=True, **kwargs): """Store Dask.dataframe to Parquet files Notes ----- Each partition will be written to a separate file. Parameters ---------- df : dask.dataframe.DataFrame path : string Destination directory for data. Prepend with protocol like ``s3://`` or ``hdfs://`` for remote data. engine : {'auto', 'fastparquet', 'pyarrow'}, default 'auto' Parquet library to use. If only one library is installed, it will use that one; if both, it will use 'fastparquet'. compression : string or dict, optional Either a string like ``"snappy"`` or a dictionary mapping column names to compressors like ``{"name": "gzip", "values": "snappy"}``. The default is ``"default"``, which uses the default compression for whichever engine is selected. write_index : boolean, optional Whether or not to write the index. Defaults to True *if* divisions are known. append : bool, optional If False (default), construct data-set from scratch. If True, add new row-group(s) to an existing data-set. In the latter case, the data-set must exist, and the schema must match the input data. ignore_divisions : bool, optional If False (default) raises error when previous divisions overlap with the new appended divisions. Ignored if append=False. partition_on : list, optional Construct directory-based partitioning by splitting on these fields' values. Each dask partition will result in one or more datafiles, there will be no global groupby. storage_options : dict, optional Key/value pairs to be passed on to the file-system backend, if any. compute : bool, optional If True (default) then the result is computed immediately. If False then a ``dask.delayed`` object is returned for future computation. **kwargs Extra options to be passed on to the specific backend. Examples -------- >>> df = dd.read_csv(...) # doctest: +SKIP >>> to_parquet('/path/to/output/', df, compression='snappy') # doctest: +SKIP See Also -------- read_parquet: Read parquet data to dask.dataframe """ # TODO: remove once deprecation cycle is finished if isinstance(path, DataFrame): warnings.warn("DeprecationWarning: The order of `df` and `path` in " "`dd.to_parquet` has switched, please update your code") df, path = path, df partition_on = partition_on or [] if set(partition_on) - set(df.columns): raise ValueError('Partitioning on non-existent column') if compression != 'default': kwargs['compression'] = compression write = get_engine(engine)['write'] out = write(df, path, write_index=write_index, append=append, ignore_divisions=ignore_divisions, partition_on=partition_on, storage_options=storage_options, **kwargs) if compute: out.compute() return None return out if PY3: DataFrame.to_parquet.__doc__ = to_parquet.__doc__ dask-0.16.0/dask/dataframe/io/sql.py000066400000000000000000000123541320364734500171620ustar00rootroot00000000000000import numpy as np import pandas as pd import six from ... import delayed from .io import from_delayed def read_sql_table(table, uri, index_col, divisions=None, npartitions=None, limits=None, columns=None, bytes_per_chunk=256 * 2**20, **kwargs): """ Create dataframe from an SQL table. If neither divisions or npartitions is given, the memory footprint of the first five rows will be determined, and partitions of size ~256MB will be used. Parameters ---------- table : string or sqlalchemy expression Select columns from here. uri : string Full sqlalchemy URI for the database connection index_col : string Column which becomes the index, and defines the partitioning. Should be a indexed column in the SQL server, and numerical. Could be a function to return a value, e.g., ``sql.func.abs(sql.column('value')).label('abs(value)')``. Labeling columns created by functions or arithmetic operations is required. divisions: sequence Values of the index column to split the table by. npartitions : int Number of partitions, if divisions is not given. Will split the values of the index column linearly between limits, if given, or the column max/min. limits: 2-tuple or None Manually give upper and lower range of values for use with npartitions; if None, first fetches max/min from the DB. Upper limit, if given, is inclusive. columns : list of strings or None Which columns to select; if None, gets all; can include sqlalchemy functions, e.g., ``sql.func.abs(sql.column('value')).label('abs(value)')``. Labeling columns created by functions or arithmetic operations is recommended. bytes_per_chunk: int If both divisions and npartitions is None, this is the target size of each partition, in bytes kwargs : dict Additional parameters to pass to `pd.read_sql()` Returns ------- dask.dataframe Examples -------- >>> df = dd.read_sql('accounts', 'sqlite:///path/to/bank.db', ... npartitions=10, index_col='id') # doctest: +SKIP """ import sqlalchemy as sa from sqlalchemy import sql from sqlalchemy.sql import elements if index_col is None: raise ValueError("Must specify index column to partition on") engine = sa.create_engine(uri) meta = sa.MetaData() if isinstance(table, six.string_types): schema = kwargs.pop('schema', None) table = sa.Table(table, meta, autoload=True, autoload_with=engine, schema=schema) index = (table.columns[index_col] if isinstance(index_col, six.string_types) else index_col) if not isinstance(index_col, six.string_types + (elements.Label,)): raise ValueError('Use label when passing an SQLAlchemy instance' ' as the index (%s)' % index) if divisions and npartitions: raise TypeError('Must supply either divisions or npartitions, not both') columns = ([(table.columns[c] if isinstance(c, six.string_types) else c) for c in columns] if columns else list(table.columns)) if index_col not in columns: columns.append(table.columns[index_col] if isinstance(index_col, six.string_types) else index_col) if isinstance(index_col, six.string_types): kwargs['index_col'] = index_col else: # function names get pandas auto-named kwargs['index_col'] = index_col.name q = sql.select(columns).limit(5).select_from(table) head = pd.read_sql(q, engine, **kwargs) if divisions is None: if limits is None: # calculate max and min for given index q = sql.select([sql.func.max(index), sql.func.min(index)] ).select_from(table) minmax = pd.read_sql(q, engine) maxi, mini = minmax.iloc[0] dtype = minmax.dtypes['max_1'] else: mini, maxi = limits dtype = pd.Series(limits).dtype if npartitions is None: q = sql.select([sql.func.count(index)]).select_from(table) count = pd.read_sql(q, engine)['count_1'][0] bytes_per_row = (head.memory_usage(deep=True, index=True)).sum() / 5 npartitions = round(count * bytes_per_row / bytes_per_chunk) or 1 if dtype.kind == "M": divisions = pd.date_range( start=mini, end=maxi, freq='%iS' % ( (maxi - mini) / npartitions).total_seconds()).tolist() divisions[0] = mini divisions[-1] = maxi else: divisions = np.linspace(mini, maxi, npartitions + 1).tolist() parts = [] lowers, uppers = divisions[:-1], divisions[1:] for i, (lower, upper) in enumerate(zip(lowers, uppers)): cond = index <= upper if i == len(lowers) - 1 else index < upper q = sql.select(columns).where(sql.and_(index >= lower, cond) ).select_from(table) parts.append(delayed(pd.read_sql)(q, uri, **kwargs)) return from_delayed(parts, head, divisions=divisions) dask-0.16.0/dask/dataframe/io/tests/000077500000000000000000000000001320364734500171465ustar00rootroot00000000000000dask-0.16.0/dask/dataframe/io/tests/__init__.py000066400000000000000000000000001320364734500212450ustar00rootroot00000000000000dask-0.16.0/dask/dataframe/io/tests/test_csv.py000066400000000000000000001021231320364734500213510ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import from io import BytesIO import os import gzip from time import sleep import pytest pd = pytest.importorskip('pandas') dd = pytest.importorskip('dask.dataframe') from toolz import partition_all, valmap import pandas.util.testing as tm import dask import dask.dataframe as dd from dask.base import compute_as_if_collection from dask.dataframe.io.csv import (text_blocks_to_pandas, pandas_read_text, auto_blocksize) from dask.dataframe.utils import assert_eq, has_known_categories, PANDAS_VERSION from dask.bytes.core import read_bytes from dask.utils import filetexts, filetext, tmpfile, tmpdir from dask.bytes.compression import compress, files as cfiles, seekable_files fmt_bs = [(fmt, None) for fmt in cfiles] + [(fmt, 10) for fmt in seekable_files] def normalize_text(s): return '\n'.join(map(str.strip, s.strip().split('\n'))) csv_text = """ name,amount Alice,100 Bob,-200 Charlie,300 Dennis,400 Edith,-500 Frank,600 Alice,200 Frank,-200 Bob,600 Alice,400 Frank,200 Alice,300 Edith,600 """.strip() tsv_text = csv_text.replace(',', '\t') tsv_text2 = """ name amount Alice 100 Bob -200 Charlie 300 Dennis 400 Edith -500 Frank 600 Alice 200 Frank -200 Bob 600 Alice 400 Frank 200 Alice 300 Edith 600 """.strip() timeseries = """ Date,Open,High,Low,Close,Volume,Adj Close 2015-08-28,198.50,199.839996,197.919998,199.240005,143298900,199.240005 2015-08-27,197.020004,199.419998,195.210007,199.160004,266244700,199.160004 2015-08-26,192.080002,194.789993,188.369995,194.679993,328058100,194.679993 2015-08-25,195.429993,195.449997,186.919998,187.229996,353966700,187.229996 2015-08-24,197.630005,197.630005,182.399994,189.550003,478672400,189.550003 2015-08-21,201.729996,203.940002,197.520004,197.630005,328271500,197.630005 2015-08-20,206.509995,208.289993,203.899994,204.009995,185865600,204.009995 2015-08-19,209.089996,210.009995,207.350006,208.279999,167316300,208.279999 2015-08-18,210.259995,210.679993,209.699997,209.929993,70043800,209.929993 """.strip() csv_files = {'2014-01-01.csv': (b'name,amount,id\n' b'Alice,100,1\n' b'Bob,200,2\n' b'Charlie,300,3\n'), '2014-01-02.csv': (b'name,amount,id\n'), '2014-01-03.csv': (b'name,amount,id\n' b'Dennis,400,4\n' b'Edith,500,5\n' b'Frank,600,6\n')} tsv_files = {k: v.replace(b',', b'\t') for (k, v) in csv_files.items()} expected = pd.concat([pd.read_csv(BytesIO(csv_files[k])) for k in sorted(csv_files)]) comment_header = b"""# some header lines # that may be present # in a data file # before any data""" csv_and_table = pytest.mark.parametrize('reader,files', [(pd.read_csv, csv_files), (pd.read_table, tsv_files)]) @csv_and_table def test_pandas_read_text(reader, files): b = files['2014-01-01.csv'] df = pandas_read_text(reader, b, b'', {}) assert list(df.columns) == ['name', 'amount', 'id'] assert len(df) == 3 assert df.id.sum() == 1 + 2 + 3 @csv_and_table def test_pandas_read_text_kwargs(reader, files): b = files['2014-01-01.csv'] df = pandas_read_text(reader, b, b'', {'usecols': ['name', 'id']}) assert list(df.columns) == ['name', 'id'] @csv_and_table def test_pandas_read_text_dtype_coercion(reader, files): b = files['2014-01-01.csv'] df = pandas_read_text(reader, b, b'', {}, {'amount': 'float'}) assert df.amount.dtype == 'float' @csv_and_table def test_pandas_read_text_with_header(reader, files): b = files['2014-01-01.csv'] header, b = b.split(b'\n', 1) header = header + b'\n' df = pandas_read_text(reader, b, header, {}) assert list(df.columns) == ['name', 'amount', 'id'] assert len(df) == 3 assert df.id.sum() == 1 + 2 + 3 @csv_and_table def test_text_blocks_to_pandas_simple(reader, files): blocks = [[files[k]] for k in sorted(files)] kwargs = {} head = pandas_read_text(reader, files['2014-01-01.csv'], b'', {}) header = files['2014-01-01.csv'].split(b'\n')[0] + b'\n' df = text_blocks_to_pandas(reader, blocks, header, head, kwargs, collection=True) assert isinstance(df, dd.DataFrame) assert list(df.columns) == ['name', 'amount', 'id'] values = text_blocks_to_pandas(reader, blocks, header, head, kwargs, collection=False) assert isinstance(values, list) assert len(values) == 3 assert all(hasattr(item, 'dask') for item in values) result = df.amount.sum().compute(get=dask.get) assert result == (100 + 200 + 300 + 400 + 500 + 600) @csv_and_table def test_text_blocks_to_pandas_kwargs(reader, files): blocks = [files[k] for k in sorted(files)] blocks = [[b] for b in blocks] kwargs = {'usecols': ['name', 'id']} head = pandas_read_text(reader, files['2014-01-01.csv'], b'', kwargs) header = files['2014-01-01.csv'].split(b'\n')[0] + b'\n' df = text_blocks_to_pandas(reader, blocks, header, head, kwargs, collection=True) assert list(df.columns) == ['name', 'id'] result = df.compute() assert (result.columns == df.columns).all() @csv_and_table def test_text_blocks_to_pandas_blocked(reader, files): header = files['2014-01-01.csv'].split(b'\n')[0] + b'\n' blocks = [] for k in sorted(files): b = files[k] lines = b.split(b'\n') blocks.append([b'\n'.join(bs) for bs in partition_all(2, lines)]) df = text_blocks_to_pandas(reader, blocks, header, expected.head(), {}) assert_eq(df.compute().reset_index(drop=True), expected.reset_index(drop=True), check_dtype=False) expected2 = expected[['name', 'id']] df = text_blocks_to_pandas(reader, blocks, header, expected2.head(), {'usecols': ['name', 'id']}) assert_eq(df.compute().reset_index(drop=True), expected2.reset_index(drop=True), check_dtype=False) @pytest.mark.parametrize('dd_read,pd_read,files', [(dd.read_csv, pd.read_csv, csv_files), (dd.read_table, pd.read_table, tsv_files)]) def test_skiprows(dd_read, pd_read, files): files = {name: comment_header + b'\n' + content for name, content in files.items()} skip = len(comment_header.splitlines()) with filetexts(files, mode='b'): df = dd_read('2014-01-*.csv', skiprows=skip) expected_df = pd.concat([pd_read(n, skiprows=skip) for n in sorted(files)]) assert_eq(df, expected_df, check_dtype=False) csv_blocks = [[b'aa,bb\n1,1.0\n2,2.0', b'10,20\n30,40'], [b'aa,bb\n1,1.0\n2,2.0', b'10,20\n30,40']] tsv_blocks = [[b'aa\tbb\n1\t1.0\n2\t2.0', b'10\t20\n30\t40'], [b'aa\tbb\n1\t1.0\n2\t2.0', b'10\t20\n30\t40']] @pytest.mark.parametrize('reader,blocks', [(pd.read_csv, csv_blocks), (pd.read_table, tsv_blocks)]) def test_enforce_dtypes(reader, blocks): head = reader(BytesIO(blocks[0][0]), header=0) header = blocks[0][0].split(b'\n')[0] + b'\n' dfs = text_blocks_to_pandas(reader, blocks, header, head, {}, collection=False) dfs = dask.compute(*dfs, get=dask.get) assert all(df.dtypes.to_dict() == head.dtypes.to_dict() for df in dfs) @pytest.mark.parametrize('reader,blocks', [(pd.read_csv, csv_blocks), (pd.read_table, tsv_blocks)]) def test_enforce_columns(reader, blocks): # Replace second header with different column name blocks = [blocks[0], [blocks[1][0].replace(b'a', b'A'), blocks[1][1]]] head = reader(BytesIO(blocks[0][0]), header=0) header = blocks[0][0].split(b'\n')[0] + b'\n' with pytest.raises(ValueError): dfs = text_blocks_to_pandas(reader, blocks, header, head, {}, collection=False, enforce=True) dask.compute(*dfs, get=dask.get) ############################# # read_csv and read_table # ############################# @pytest.mark.parametrize('dd_read,pd_read,text,sep', [(dd.read_csv, pd.read_csv, csv_text, ','), (dd.read_table, pd.read_table, tsv_text, '\t'), (dd.read_table, pd.read_table, tsv_text2, '\s+')]) def test_read_csv(dd_read, pd_read, text, sep): with filetext(text) as fn: f = dd_read(fn, blocksize=30, lineterminator=os.linesep, sep=sep) assert list(f.columns) == ['name', 'amount'] # index may be different result = f.compute(get=dask.get).reset_index(drop=True) assert_eq(result, pd_read(fn, sep=sep)) @pytest.mark.parametrize('dd_read,pd_read,files', [(dd.read_csv, pd.read_csv, csv_files), (dd.read_table, pd.read_table, tsv_files)]) def test_read_csv_files(dd_read, pd_read, files): with filetexts(files, mode='b'): df = dd_read('2014-01-*.csv') assert_eq(df, expected, check_dtype=False) fn = '2014-01-01.csv' df = dd_read(fn) expected2 = pd_read(BytesIO(files[fn])) assert_eq(df, expected2, check_dtype=False) # After this point, we test just using read_csv, as all functionality # for both is implemented using the same code. def test_read_csv_index(): with filetext(csv_text) as fn: f = dd.read_csv(fn, blocksize=20).set_index('amount') result = f.compute(get=dask.get) assert result.index.name == 'amount' blocks = compute_as_if_collection(dd.DataFrame, f.dask, f.__dask_keys__(), get=dask.get) for i, block in enumerate(blocks): if i < len(f.divisions) - 2: assert (block.index < f.divisions[i + 1]).all() if i > 0: assert (block.index >= f.divisions[i]).all() expected = pd.read_csv(fn).set_index('amount') assert_eq(result, expected) def test_usecols(): with filetext(timeseries) as fn: df = dd.read_csv(fn, blocksize=30, usecols=['High', 'Low']) expected = pd.read_csv(fn, usecols=['High', 'Low']) assert (df.compute().values == expected.values).all() def test_skipinitialspace(): text = normalize_text(""" name, amount Alice,100 Bob,-200 Charlie,300 Dennis,400 Edith,-500 Frank,600 """) with filetext(text) as fn: df = dd.read_csv(fn, skipinitialspace=True, blocksize=20) assert 'amount' in df.columns assert df.amount.max().compute() == 600 def test_consistent_dtypes(): text = normalize_text(""" name,amount Alice,100.5 Bob,-200.5 Charlie,300 Dennis,400 Edith,-500 Frank,600 """) with filetext(text) as fn: df = dd.read_csv(fn, blocksize=30) assert df.amount.compute().dtype == float def test_consistent_dtypes_2(): text1 = normalize_text(""" name,amount Alice,100 Bob,-200 Charlie,300 """) text2 = normalize_text(""" name,amount 1,400 2,-500 Frank,600 """) with filetexts({'foo.1.csv': text1, 'foo.2.csv': text2}): df = dd.read_csv('foo.*.csv', blocksize=25) assert df.name.dtype == object assert df.name.compute().dtype == object @pytest.mark.skipif(PANDAS_VERSION < '0.19.2', reason="Not available in pandas <= 0.19.2") def test_categorical_dtypes(): text1 = normalize_text(""" fruit,count apple,10 apple,25 pear,100 orange,15 """) text2 = normalize_text(""" fruit,count apple,200 banana,300 orange,400 banana,10 """) with filetexts({'foo.1.csv': text1, 'foo.2.csv': text2}): df = dd.read_csv('foo.*.csv', dtype={'fruit': 'category'}, blocksize=25) assert df.fruit.dtype == 'category' assert not has_known_categories(df.fruit) res = df.compute() assert res.fruit.dtype == 'category' assert (sorted(res.fruit.cat.categories) == ['apple', 'banana', 'orange', 'pear']) @pytest.mark.slow def test_compression_multiple_files(): with tmpdir() as tdir: f = gzip.open(os.path.join(tdir, 'a.csv.gz'), 'wb') f.write(csv_text.encode()) f.close() f = gzip.open(os.path.join(tdir, 'b.csv.gz'), 'wb') f.write(csv_text.encode()) f.close() with tm.assert_produces_warning(UserWarning): df = dd.read_csv(os.path.join(tdir, '*.csv.gz'), compression='gzip') assert len(df.compute()) == (len(csv_text.split('\n')) - 1) * 2 def test_empty_csv_file(): with filetext('a,b') as fn: df = dd.read_csv(fn, header=0) assert len(df.compute()) == 0 assert list(df.columns) == ['a', 'b'] def test_read_csv_sensitive_to_enforce(): with filetexts(csv_files, mode='b'): a = dd.read_csv('2014-01-*.csv', enforce=True) b = dd.read_csv('2014-01-*.csv', enforce=False) assert a._name != b._name @pytest.mark.parametrize('fmt,blocksize', fmt_bs) def test_read_csv_compression(fmt, blocksize): files2 = valmap(compress[fmt], csv_files) with filetexts(files2, mode='b'): df = dd.read_csv('2014-01-*.csv', compression=fmt, blocksize=blocksize) assert_eq(df.compute(get=dask.get).reset_index(drop=True), expected.reset_index(drop=True), check_dtype=False) def test_warn_non_seekable_files(): files2 = valmap(compress['gzip'], csv_files) with filetexts(files2, mode='b'): with pytest.warns(UserWarning) as w: df = dd.read_csv('2014-01-*.csv', compression='gzip') assert df.npartitions == 3 assert len(w) == 1 msg = str(w[0].message) assert 'gzip' in msg assert 'blocksize=None' in msg with pytest.warns(None) as w: df = dd.read_csv('2014-01-*.csv', compression='gzip', blocksize=None) assert len(w) == 0 with pytest.raises(NotImplementedError): with pytest.warns(UserWarning): # needed for pytest df = dd.read_csv('2014-01-*.csv', compression='foo') def test_windows_line_terminator(): text = 'a,b\r\n1,2\r\n2,3\r\n3,4\r\n4,5\r\n5,6\r\n6,7' with filetext(text) as fn: df = dd.read_csv(fn, blocksize=5, lineterminator='\r\n') assert df.b.sum().compute() == 2 + 3 + 4 + 5 + 6 + 7 assert df.a.sum().compute() == 1 + 2 + 3 + 4 + 5 + 6 def test_header_None(): with filetexts({'.tmp.1.csv': '1,2', '.tmp.2.csv': '', '.tmp.3.csv': '3,4'}): df = dd.read_csv('.tmp.*.csv', header=None) expected = pd.DataFrame({0: [1, 3], 1: [2, 4]}) assert_eq(df.compute().reset_index(drop=True), expected) def test_auto_blocksize(): assert isinstance(auto_blocksize(3000, 15), int) assert auto_blocksize(3000, 3) == 100 assert auto_blocksize(5000, 2) == 250 def test_auto_blocksize_max64mb(): blocksize = auto_blocksize(1000000000000, 3) assert blocksize == int(64e6) assert isinstance(blocksize, int) def test_auto_blocksize_csv(monkeypatch): psutil = pytest.importorskip('psutil') try: from unittest import mock except ImportError: mock = pytest.importorskip('mock') total_memory = psutil.virtual_memory().total cpu_count = psutil.cpu_count() mock_read_bytes = mock.Mock(wraps=read_bytes) monkeypatch.setattr(dask.dataframe.io.csv, 'read_bytes', mock_read_bytes) expected_block_size = auto_blocksize(total_memory, cpu_count) with filetexts(csv_files, mode='b'): dd.read_csv('2014-01-01.csv') assert mock_read_bytes.called assert mock_read_bytes.call_args[1]['blocksize'] == expected_block_size def test_head_partial_line_fix(): files = {'.overflow1.csv': ('a,b\n' '0,"abcdefghijklmnopqrstuvwxyz"\n' '1,"abcdefghijklmnopqrstuvwxyz"'), '.overflow2.csv': ('a,b\n' '111111,-11111\n' '222222,-22222\n' '333333,-33333\n')} with filetexts(files): # 64 byte file, 52 characters is mid-quote; this should not cause exception in head-handling code. dd.read_csv('.overflow1.csv', sample=52) # 35 characters is cuts off before the second number on the last line # Should sample to end of line, otherwise pandas will infer `b` to be # a float dtype df = dd.read_csv('.overflow2.csv', sample=35) assert (df.dtypes == 'i8').all() def test_read_csv_raises_on_no_files(): fn = '.not.a.real.file.csv' try: dd.read_csv(fn) assert False except (OSError, IOError) as e: assert fn in str(e) def test_read_csv_has_deterministic_name(): with filetext(csv_text) as fn: a = dd.read_csv(fn) b = dd.read_csv(fn) assert a._name == b._name assert sorted(a.dask.keys(), key=str) == sorted(b.dask.keys(), key=str) assert isinstance(a._name, str) c = dd.read_csv(fn, skiprows=1, na_values=[0]) assert a._name != c._name def test_multiple_read_csv_has_deterministic_name(): with filetexts({'_foo.1.csv': csv_text, '_foo.2.csv': csv_text}): a = dd.read_csv('_foo.*.csv') b = dd.read_csv('_foo.*.csv') assert sorted(a.dask.keys(), key=str) == sorted(b.dask.keys(), key=str) def test_csv_with_integer_names(): with filetext('alice,1\nbob,2') as fn: df = dd.read_csv(fn, header=None) assert list(df.columns) == [0, 1] @pytest.mark.slow def test_read_csv_of_modified_file_has_different_name(): with filetext(csv_text) as fn: sleep(1) a = dd.read_csv(fn) sleep(1) with open(fn, 'a') as f: f.write('\nGeorge,700') os.fsync(f) b = dd.read_csv(fn) assert sorted(a.dask, key=str) != sorted(b.dask, key=str) def test_late_dtypes(): text = 'numbers,names,more_numbers,integers,dates\n' for i in range(1000): text += '1,,2,3,2017-10-31 00:00:00\n' text += '1.5,bar,2.5,3,4998-01-01 00:00:00\n' date_msg = ("\n" "\n" "-------------------------------------------------------------\n" "\n" "The following columns also failed to properly parse as dates:\n" "\n" "- dates\n" "\n" "This is usually due to an invalid value in that column. To\n" "diagnose and fix it's recommended to drop these columns from the\n" "`parse_dates` keyword, and manually convert them to dates later\n" "using `dd.to_datetime`.") with filetext(text) as fn: sol = pd.read_csv(fn) msg = ("Mismatched dtypes found in `pd.read_csv`/`pd.read_table`.\n" "\n" "+--------------+---------+----------+\n" "| Column | Found | Expected |\n" "+--------------+---------+----------+\n" "| more_numbers | float64 | int64 |\n" "| names | object | float64 |\n" "| numbers | float64 | int64 |\n" "+--------------+---------+----------+\n" "\n" "- names\n" " ValueError(.*)\n" "\n" "Usually this is due to dask's dtype inference failing, and\n" "*may* be fixed by specifying dtypes manually by adding:\n" "\n" "dtype={'more_numbers': 'float64',\n" " 'names': 'object',\n" " 'numbers': 'float64'}\n" "\n" "to the call to `read_csv`/`read_table`.") with pytest.raises(ValueError) as e: dd.read_csv(fn, sample=50, parse_dates=['dates']).compute(get=dask.get) assert e.match(msg + date_msg) with pytest.raises(ValueError) as e: dd.read_csv(fn, sample=50).compute(get=dask.get) assert e.match(msg) msg = ("Mismatched dtypes found in `pd.read_csv`/`pd.read_table`.\n" "\n" "+--------------+---------+----------+\n" "| Column | Found | Expected |\n" "+--------------+---------+----------+\n" "| more_numbers | float64 | int64 |\n" "| numbers | float64 | int64 |\n" "+--------------+---------+----------+\n" "\n" "Usually this is due to dask's dtype inference failing, and\n" "*may* be fixed by specifying dtypes manually by adding:\n" "\n" "dtype={'more_numbers': 'float64',\n" " 'numbers': 'float64'}\n" "\n" "to the call to `read_csv`/`read_table`.\n" "\n" "Alternatively, provide `assume_missing=True` to interpret\n" "all unspecified integer columns as floats.") with pytest.raises(ValueError) as e: dd.read_csv(fn, sample=50, dtype={'names': 'O'}).compute(get=dask.get) assert str(e.value) == msg with pytest.raises(ValueError) as e: dd.read_csv(fn, sample=50, parse_dates=['dates'], dtype={'names': 'O'}).compute(get=dask.get) assert str(e.value) == msg + date_msg msg = ("Mismatched dtypes found in `pd.read_csv`/`pd.read_table`.\n" "\n" "The following columns failed to properly parse as dates:\n" "\n" "- dates\n" "\n" "This is usually due to an invalid value in that column. To\n" "diagnose and fix it's recommended to drop these columns from the\n" "`parse_dates` keyword, and manually convert them to dates later\n" "using `dd.to_datetime`.") with pytest.raises(ValueError) as e: dd.read_csv(fn, sample=50, parse_dates=['dates'], dtype={'more_numbers': float, 'names': object, 'numbers': float}).compute(get=dask.get) assert str(e.value) == msg # Specifying dtypes works res = dd.read_csv(fn, sample=50, dtype={'more_numbers': float, 'names': object, 'numbers': float}) assert_eq(res, sol) def test_assume_missing(): text = 'numbers,names,more_numbers,integers\n' for i in range(1000): text += '1,foo,2,3\n' text += '1.5,bar,2.5,3\n' with filetext(text) as fn: sol = pd.read_csv(fn) # assume_missing affects all columns res = dd.read_csv(fn, sample=50, assume_missing=True) assert_eq(res, sol.astype({'integers': float})) # assume_missing doesn't override specified dtypes res = dd.read_csv(fn, sample=50, assume_missing=True, dtype={'integers': 'int64'}) assert_eq(res, sol) # assume_missing works with dtype=None res = dd.read_csv(fn, sample=50, assume_missing=True, dtype=None) assert_eq(res, sol.astype({'integers': float})) text = 'numbers,integers\n' for i in range(1000): text += '1,2\n' text += '1.5,2\n' with filetext(text) as fn: sol = pd.read_csv(fn) # assume_missing ignored when all dtypes specifed df = dd.read_csv(fn, sample=30, dtype='int64', assume_missing=True) assert df.numbers.dtype == 'int64' def test_index_col(): with filetext(csv_text) as fn: try: dd.read_csv(fn, blocksize=30, index_col='name') assert False except ValueError as e: assert 'set_index' in str(e) def test_read_csv_with_datetime_index_partitions_one(): with filetext(timeseries) as fn: df = pd.read_csv(fn, index_col=0, header=0, usecols=[0, 4], parse_dates=['Date']) # blocksize set to explicitly set to single chunk ddf = dd.read_csv(fn, header=0, usecols=[0, 4], parse_dates=['Date'], blocksize=10000000).set_index('Date') assert_eq(df, ddf) # because fn is so small, by default, this will only be one chunk ddf = dd.read_csv(fn, header=0, usecols=[0, 4], parse_dates=['Date']).set_index('Date') assert_eq(df, ddf) def test_read_csv_with_datetime_index_partitions_n(): with filetext(timeseries) as fn: df = pd.read_csv(fn, index_col=0, header=0, usecols=[0, 4], parse_dates=['Date']) # because fn is so small, by default, set chunksize small ddf = dd.read_csv(fn, header=0, usecols=[0, 4], parse_dates=['Date'], blocksize=400).set_index('Date') assert_eq(df, ddf) @pytest.mark.parametrize('encoding', ['utf-16', 'utf-16-le', 'utf-16-be']) def test_encoding_gh601(encoding): ar = pd.Series(range(0, 100)) br = ar % 7 cr = br * 3.3 dr = br / 1.9836 test_df = pd.DataFrame({'a': ar, 'b': br, 'c': cr, 'd': dr}) with tmpfile('.csv') as fn: test_df.to_csv(fn, encoding=encoding, index=False) a = pd.read_csv(fn, encoding=encoding) d = dd.read_csv(fn, encoding=encoding, blocksize=1000) d = d.compute() d.index = range(len(d.index)) assert_eq(d, a) def test_read_csv_header_issue_823(): text = '''a b c-d\n1 2 3\n4 5 6'''.replace(' ', '\t') with filetext(text) as fn: df = dd.read_csv(fn, sep='\t') assert_eq(df, pd.read_csv(fn, sep='\t')) df = dd.read_csv(fn, delimiter='\t') assert_eq(df, pd.read_csv(fn, delimiter='\t')) def test_none_usecols(): with filetext(csv_text) as fn: df = dd.read_csv(fn, usecols=None) assert_eq(df, pd.read_csv(fn, usecols=None)) def test_parse_dates_multi_column(): pdmc_text = normalize_text(""" ID,date,time 10,2003-11-04,180036 11,2003-11-05,125640 12,2003-11-01,2519 13,2003-10-22,142559 14,2003-10-24,163113 15,2003-10-20,170133 16,2003-11-11,160448 17,2003-11-03,171759 18,2003-11-07,190928 19,2003-10-21,84623 20,2003-10-25,192207 21,2003-11-13,180156 22,2003-11-15,131037 """) with filetext(pdmc_text) as fn: ddf = dd.read_csv(fn, parse_dates=[['date', 'time']]) df = pd.read_csv(fn, parse_dates=[['date', 'time']]) assert (df.columns == ddf.columns).all() assert len(df) == len(ddf) def test_read_csv_sep(): sep_text = normalize_text(""" name###amount alice###100 bob###200 charlie###300""") with filetext(sep_text) as fn: ddf = dd.read_csv(fn, sep="###", engine="python") df = pd.read_csv(fn, sep="###", engine="python") assert (df.columns == ddf.columns).all() assert len(df) == len(ddf) def test_read_csv_slash_r(): data = b'0,my\n1,data\n' * 1000 + b'2,foo\rbar' with filetext(data, mode='wb') as fn: dd.read_csv(fn, header=None, sep=',', lineterminator='\n', names=['a', 'b'], blocksize=200).compute(get=dask.get) def test_read_csv_singleton_dtype(): data = b'a,b\n1,2\n3,4\n5,6' with filetext(data, mode='wb') as fn: assert_eq(pd.read_csv(fn, dtype=float), dd.read_csv(fn, dtype=float)) def test_robust_column_mismatch(): files = csv_files.copy() k = sorted(files)[-1] files[k] = files[k].replace(b'name', b'Name') with filetexts(files, mode='b'): ddf = dd.read_csv('2014-01-*.csv') df = pd.read_csv('2014-01-01.csv') assert (df.columns == ddf.columns).all() assert_eq(ddf, ddf) def test_error_if_sample_is_too_small(): text = ('AAAAA,BBBBB,CCCCC,DDDDD,EEEEE\n' '1,2,3,4,5\n' '6,7,8,9,10\n' '11,12,13,14,15') with filetext(text) as fn: # Sample size stops mid header row sample = 20 with pytest.raises(ValueError): dd.read_csv(fn, sample=sample) # Saying no header means this is fine assert_eq(dd.read_csv(fn, sample=sample, header=None), pd.read_csv(fn, header=None)) skiptext = ('# skip\n' '# these\n' '# lines\n') text = skiptext + text with filetext(text) as fn: # Sample size stops mid header row sample = 20 + len(skiptext) with pytest.raises(ValueError): dd.read_csv(fn, sample=sample, skiprows=3) # Saying no header means this is fine assert_eq(dd.read_csv(fn, sample=sample, header=None, skiprows=3), pd.read_csv(fn, header=None, skiprows=3)) ############ # to_csv # ############ def test_to_csv(): df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}) for npartitions in [1, 2]: a = dd.from_pandas(df, npartitions) with tmpdir() as dn: a.to_csv(dn, index=False) result = dd.read_csv(os.path.join(dn, '*')).compute().reset_index(drop=True) assert_eq(result, df) with tmpdir() as dn: r = a.to_csv(dn, index=False, compute=False) dask.compute(*r, get=dask.get) result = dd.read_csv(os.path.join(dn, '*')).compute().reset_index(drop=True) assert_eq(result, df) with tmpdir() as dn: fn = os.path.join(dn, 'data_*.csv') a.to_csv(fn, index=False) result = dd.read_csv(fn).compute().reset_index(drop=True) assert_eq(result, df) def test_to_csv_multiple_files_cornercases(): df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}) a = dd.from_pandas(df, 2) with tmpdir() as dn: with pytest.raises(ValueError): fn = os.path.join(dn, "data_*_*.csv") a.to_csv(fn) df16 = pd.DataFrame({'x': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p'], 'y': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}) a = dd.from_pandas(df16, 16) with tmpdir() as dn: fn = os.path.join(dn, 'data_*.csv') a.to_csv(fn, index=False) result = dd.read_csv(fn).compute().reset_index(drop=True) assert_eq(result, df16) # test handling existing files when links are optimized out a = dd.from_pandas(df, 2) with tmpdir() as dn: a.to_csv(dn, index=False) fn = os.path.join(dn, 'data_*.csv') a.to_csv(fn, mode='w', index=False) result = dd.read_csv(fn).compute().reset_index(drop=True) assert_eq(result, df) # test handling existing files when links are optimized out a = dd.from_pandas(df16, 16) with tmpdir() as dn: a.to_csv(dn, index=False) fn = os.path.join(dn, 'data_*.csv') a.to_csv(fn, mode='w', index=False) result = dd.read_csv(fn).compute().reset_index(drop=True) assert_eq(result, df16) @pytest.mark.xfail(reason="to_csv does not support compression") def test_to_csv_gzip(): df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}, index=[1., 2., 3., 4.]) for npartitions in [1, 2]: a = dd.from_pandas(df, npartitions) with tmpfile('csv') as fn: a.to_csv(fn, compression='gzip') result = pd.read_csv(fn, index_col=0, compression='gzip') tm.assert_frame_equal(result, df) def test_to_csv_simple(): df0 = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}, index=[1., 2., 3., 4.]) df = dd.from_pandas(df0, npartitions=2) with tmpdir() as dir: dir = str(dir) df.to_csv(dir) assert os.listdir(dir) result = dd.read_csv(os.path.join(dir, '*')).compute() assert (result.x.values == df0.x.values).all() def test_to_csv_series(): df0 = pd.Series(['a', 'b', 'c', 'd'], index=[1., 2., 3., 4.]) df = dd.from_pandas(df0, npartitions=2) with tmpdir() as dir: dir = str(dir) df.to_csv(dir) assert os.listdir(dir) result = dd.read_csv(os.path.join(dir, '*'), header=None, names=['x']).compute() assert (result.x == df0).all() def test_to_csv_with_get(): from dask.multiprocessing import get as mp_get flag = [False] def my_get(*args, **kwargs): flag[0] = True return mp_get(*args, **kwargs) df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}) ddf = dd.from_pandas(df, npartitions=2) with tmpdir() as dn: ddf.to_csv(dn, index=False, get=my_get) assert flag[0] result = dd.read_csv(os.path.join(dn, '*')).compute().reset_index(drop=True) assert_eq(result, df) def test_to_csv_paths(): df = pd.DataFrame({"A": range(10)}) ddf = dd.from_pandas(df, npartitions=2) assert ddf.to_csv("foo*.csv") == ['foo0.csv', 'foo1.csv'] os.remove('foo0.csv') os.remove('foo1.csv') dask-0.16.0/dask/dataframe/io/tests/test_demo.py000066400000000000000000000046241320364734500215110ustar00rootroot00000000000000import pandas.util.testing as tm import pandas as pd import pytest import dask.dataframe as dd from dask.dataframe.utils import assert_eq def test_make_timeseries(): df = dd.demo.make_timeseries('2000', '2015', {'A': float, 'B': int, 'C': str}, freq='2D', partition_freq='6M') assert df.divisions[0] == pd.Timestamp('2000-01-31', freq='6M') assert df.divisions[-1] == pd.Timestamp('2014-07-31', freq='6M') tm.assert_index_equal(df.columns, pd.Index(['A', 'B', 'C'])) assert df['A'].head().dtype == float assert df['B'].head().dtype == int assert df['C'].head().dtype == object assert df.divisions == tuple(pd.DatetimeIndex(start='2000', end='2015', freq='6M')) tm.assert_frame_equal(df.head(), df.head()) a = dd.demo.make_timeseries('2000', '2015', {'A': float, 'B': int, 'C': str}, freq='2D', partition_freq='6M', seed=123) b = dd.demo.make_timeseries('2000', '2015', {'A': float, 'B': int, 'C': str}, freq='2D', partition_freq='6M', seed=123) c = dd.demo.make_timeseries('2000', '2015', {'A': float, 'B': int, 'C': str}, freq='2D', partition_freq='6M', seed=456) d = dd.demo.make_timeseries('2000', '2015', {'A': float, 'B': int, 'C': str}, freq='2D', partition_freq='3M', seed=123) e = dd.demo.make_timeseries('2000', '2015', {'A': float, 'B': int, 'C': str}, freq='1D', partition_freq='6M', seed=123) tm.assert_frame_equal(a.head(), b.head()) assert not (a.head(10) == c.head(10)).all().all() assert a._name == b._name assert a._name != c._name assert a._name != d._name assert a._name != e._name def test_no_overlaps(): df = dd.demo.make_timeseries('2000', '2001', {'A': float}, freq='3H', partition_freq='3M') assert all(df.get_partition(i).index.max().compute() < df.get_partition(i + 1).index.min().compute() for i in range(df.npartitions - 2)) @pytest.mark.xfail @pytest.mark.network def test_daily_stock(): pytest.importorskip('pandas_datareader') df = dd.demo.daily_stock('GOOG', start='2010-01-01', stop='2010-01-30', freq='1h') assert isinstance(df, dd.DataFrame) assert 10 < df.npartitions < 31 assert_eq(df, df) dask-0.16.0/dask/dataframe/io/tests/test_hdf.py000066400000000000000000000454231320364734500213300ustar00rootroot00000000000000import numpy as np import pandas as pd import pandas.util.testing as tm import sys import os import dask import pytest from time import sleep import dask.dataframe as dd from dask.utils import tmpfile, tmpdir, dependency_depth from dask.dataframe.utils import assert_eq def test_to_hdf(): pytest.importorskip('tables') df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}, index=[1., 2., 3., 4.]) a = dd.from_pandas(df, 2) with tmpfile('h5') as fn: a.to_hdf(fn, '/data') out = pd.read_hdf(fn, '/data') tm.assert_frame_equal(df, out[:]) with tmpfile('h5') as fn: a.x.to_hdf(fn, '/data') out = pd.read_hdf(fn, '/data') tm.assert_series_equal(df.x, out[:]) a = dd.from_pandas(df, 1) with tmpfile('h5') as fn: a.to_hdf(fn, '/data') out = pd.read_hdf(fn, '/data') tm.assert_frame_equal(df, out[:]) # test compute = False with tmpfile('h5') as fn: r = a.to_hdf(fn, '/data', compute=False) r.compute() out = pd.read_hdf(fn, '/data') tm.assert_frame_equal(df, out[:]) def test_to_hdf_multiple_nodes(): pytest.importorskip('tables') df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}, index=[1., 2., 3., 4.]) a = dd.from_pandas(df, 2) df16 = pd.DataFrame({'x': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p'], 'y': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}, index=[1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.]) b = dd.from_pandas(df16, 16) # saving to multiple nodes with tmpfile('h5') as fn: a.to_hdf(fn, '/data*') out = dd.read_hdf(fn, '/data*') assert_eq(df, out) # saving to multiple nodes making sure order is kept with tmpfile('h5') as fn: b.to_hdf(fn, '/data*') out = dd.read_hdf(fn, '/data*') assert_eq(df16, out) # saving to multiple datasets with custom name_function with tmpfile('h5') as fn: a.to_hdf(fn, '/data_*', name_function=lambda i: 'a' * (i + 1)) out = dd.read_hdf(fn, '/data_*') assert_eq(df, out) out = pd.read_hdf(fn, '/data_a') tm.assert_frame_equal(out, df.iloc[:2]) out = pd.read_hdf(fn, '/data_aa') tm.assert_frame_equal(out, df.iloc[2:]) # test multiple nodes with hdf object with tmpfile('h5') as fn: with pd.HDFStore(fn) as hdf: b.to_hdf(hdf, '/data*') out = dd.read_hdf(fn, '/data*') assert_eq(df16, out) def test_to_hdf_multiple_files(): pytest.importorskip('tables') df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}, index=[1., 2., 3., 4.]) a = dd.from_pandas(df, 2) df16 = pd.DataFrame({'x': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p'], 'y': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}, index=[1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.]) b = dd.from_pandas(df16, 16) # saving to multiple files with tmpdir() as dn: fn = os.path.join(dn, 'data_*.h5') a.to_hdf(fn, '/data') out = dd.read_hdf(fn, '/data') assert_eq(df, out) # saving to multiple files making sure order is kept with tmpdir() as dn: fn = os.path.join(dn, 'data_*.h5') b.to_hdf(fn, '/data') out = dd.read_hdf(fn, '/data') assert_eq(df16, out) # saving to multiple files with custom name_function with tmpdir() as dn: fn = os.path.join(dn, 'data_*.h5') a.to_hdf(fn, '/data', name_function=lambda i: 'a' * (i + 1)) out = dd.read_hdf(fn, '/data') assert_eq(df, out) out = pd.read_hdf(os.path.join(dn, 'data_a.h5'), '/data') tm.assert_frame_equal(out, df.iloc[:2]) out = pd.read_hdf(os.path.join(dn, 'data_aa.h5'), '/data') tm.assert_frame_equal(out, df.iloc[2:]) # test hdf object with tmpfile('h5') as fn: with pd.HDFStore(fn) as hdf: a.to_hdf(hdf, '/data*') out = dd.read_hdf(fn, '/data*') assert_eq(df, out) def test_to_hdf_modes_multiple_nodes(): pytest.importorskip('tables') df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}, index=[1., 2., 3., 4.]) # appending a single partition to existing data a = dd.from_pandas(df, 1) with tmpfile('h5') as fn: a.to_hdf(fn, '/data2') a.to_hdf(fn, '/data*', mode='a') out = dd.read_hdf(fn, '/data*') assert_eq(df.append(df), out) # overwriting a file with a single partition a = dd.from_pandas(df, 1) with tmpfile('h5') as fn: a.to_hdf(fn, '/data2') a.to_hdf(fn, '/data*', mode='w') out = dd.read_hdf(fn, '/data*') assert_eq(df, out) # appending two partitions to existing data a = dd.from_pandas(df, 2) with tmpfile('h5') as fn: a.to_hdf(fn, '/data2') a.to_hdf(fn, '/data*', mode='a') out = dd.read_hdf(fn, '/data*') assert_eq(df.append(df), out) # overwriting a file with two partitions a = dd.from_pandas(df, 2) with tmpfile('h5') as fn: a.to_hdf(fn, '/data2') a.to_hdf(fn, '/data*', mode='w') out = dd.read_hdf(fn, '/data*') assert_eq(df, out) # overwriting a single partition, keeping other partitions a = dd.from_pandas(df, 2) with tmpfile('h5') as fn: a.to_hdf(fn, '/data1') a.to_hdf(fn, '/data2') a.to_hdf(fn, '/data*', mode='a', append=False) out = dd.read_hdf(fn, '/data*') assert_eq(df.append(df), out) def test_to_hdf_modes_multiple_files(): pytest.importorskip('tables') df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}, index=[1., 2., 3., 4.]) # appending a single partition to existing data a = dd.from_pandas(df, 1) with tmpdir() as dn: fn = os.path.join(dn, 'data*') a.to_hdf(os.path.join(dn, 'data2'), '/data') a.to_hdf(fn, '/data', mode='a') out = dd.read_hdf(fn, '/data*') assert_eq(df.append(df), out) # appending two partitions to existing data a = dd.from_pandas(df, 2) with tmpdir() as dn: fn = os.path.join(dn, 'data*') a.to_hdf(os.path.join(dn, 'data2'), '/data') a.to_hdf(fn, '/data', mode='a') out = dd.read_hdf(fn, '/data') assert_eq(df.append(df), out) # overwriting a file with two partitions a = dd.from_pandas(df, 2) with tmpdir() as dn: fn = os.path.join(dn, 'data*') a.to_hdf(os.path.join(dn, 'data1'), '/data') a.to_hdf(fn, '/data', mode='w') out = dd.read_hdf(fn, '/data') assert_eq(df, out) # overwriting a single partition, keeping other partitions a = dd.from_pandas(df, 2) with tmpdir() as dn: fn = os.path.join(dn, 'data*') a.to_hdf(os.path.join(dn, 'data1'), '/data') a.to_hdf(fn, '/data', mode='a', append=False) out = dd.read_hdf(fn, '/data') assert_eq(df.append(df), out) def test_to_hdf_link_optimizations(): """testing dask link levels is correct by calculating the depth of the dask graph""" pytest.importorskip('tables') df16 = pd.DataFrame({'x': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p'], 'y': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}, index=[1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.]) a = dd.from_pandas(df16, 16) # saving to multiple hdf files, no links are needed # expected layers: from_pandas, to_hdf, list = depth of 3 with tmpdir() as dn: fn = os.path.join(dn, 'data*') d = a.to_hdf(fn, '/data', compute=False) assert dependency_depth(d.dask) == 3 # saving to a single hdf file with multiple nodes # all subsequent nodes depend on the first # expected layers: from_pandas, first to_hdf(creates file+node), subsequent to_hdfs, list = 4 with tmpfile() as fn: d = a.to_hdf(fn, '/data*', compute=False) assert dependency_depth(d.dask) == 4 # saving to a single hdf file with a single node # every node depends on the previous node # expected layers: from_pandas, to_hdf times npartitions(15), list = 2 + npartitions = 17 with tmpfile() as fn: d = a.to_hdf(fn, '/data', compute=False) assert dependency_depth(d.dask) == 2 + a.npartitions @pytest.mark.slow def test_to_hdf_lock_delays(): pytest.importorskip('tables') df16 = pd.DataFrame({'x': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p'], 'y': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}, index=[1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.]) a = dd.from_pandas(df16, 16) # adding artifichial delays to make sure last tasks finish first # that's a way to simulate last tasks finishing last def delayed_nop(i): if i[1] < 10: sleep(0.1 * (10 - i[1])) return i # saving to multiple hdf nodes with tmpfile() as fn: a = a.apply(delayed_nop, axis=1, meta=a) a.to_hdf(fn, '/data*') out = dd.read_hdf(fn, '/data*') assert_eq(df16, out) # saving to multiple hdf files # adding artifichial delays to make sure last tasks finish first with tmpdir() as dn: fn = os.path.join(dn, 'data*') a = a.apply(delayed_nop, axis=1, meta=a) a.to_hdf(fn, '/data') out = dd.read_hdf(fn, '/data') assert_eq(df16, out) def test_to_hdf_exceptions(): pytest.importorskip('tables') df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}, index=[1., 2., 3., 4.]) a = dd.from_pandas(df, 1) # triggering too many asterisks error with tmpdir() as dn: with pytest.raises(ValueError): fn = os.path.join(dn, 'data_*.h5') a.to_hdf(fn, '/data_*') # triggering too many asterisks error with tmpfile() as fn: with pd.HDFStore(fn) as hdf: with pytest.raises(ValueError): a.to_hdf(hdf, '/data_*_*') @pytest.mark.parametrize('get', [dask.get, dask.threaded.get, dask.multiprocessing.get]) @pytest.mark.parametrize('npartitions', [1, 4, 10]) def test_to_hdf_schedulers(get, npartitions): pytest.importorskip('tables') df = pd.DataFrame({'x': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p'], 'y': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}, index=[1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.]) a = dd.from_pandas(df, npartitions=npartitions) # test single file single node with tmpfile('h5') as fn: a.to_hdf(fn, '/data', get=get) out = pd.read_hdf(fn, '/data') assert_eq(df, out) # test multiple files single node with tmpdir() as dn: fn = os.path.join(dn, 'data_*.h5') a.to_hdf(fn, '/data', get=get) out = dd.read_hdf(fn, '/data') assert_eq(df, out) # test single file multiple nodes with tmpfile('h5') as fn: a.to_hdf(fn, '/data*', get=get) out = dd.read_hdf(fn, '/data*') assert_eq(df, out) def test_to_hdf_kwargs(): pytest.importorskip('tables') df = pd.DataFrame({'A': ['a', 'aaaa']}) ddf = dd.from_pandas(df, npartitions=2) with tmpfile('h5') as fn: ddf.to_hdf(fn, 'foo4', format='table', min_itemsize=4) df2 = pd.read_hdf(fn, 'foo4') tm.assert_frame_equal(df, df2) @pytest.mark.skipif(sys.version_info[:2] == (3, 3), reason="Python3.3 uses pytest2.7.2, w/o warns method") def test_to_fmt_warns(): pytest.importorskip('tables') df16 = pd.DataFrame({'x': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p'], 'y': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}, index=[1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.]) a = dd.from_pandas(df16, 16) # testing warning when breaking order with tmpfile('h5') as fn: with pytest.warns(None): a.to_hdf(fn, '/data*', name_function=str) # testing warning when breaking order with tmpdir() as dn: with pytest.warns(None): fn = os.path.join(dn, "data_*.csv") a.to_csv(fn, name_function=str) @pytest.mark.parametrize('data, compare', [ (pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}, index=[1., 2., 3., 4.]), tm.assert_frame_equal), (pd.Series([1, 2, 3, 4], name='a'), tm.assert_series_equal), ]) def test_read_hdf(data, compare): pytest.importorskip('tables') with tmpfile('h5') as fn: data.to_hdf(fn, '/data') try: dd.read_hdf(fn, 'data', chunksize=2, mode='r') assert False except TypeError as e: assert "format='table'" in str(e) with tmpfile('h5') as fn: data.to_hdf(fn, '/data', format='table') a = dd.read_hdf(fn, '/data', chunksize=2, mode='r') assert a.npartitions == 2 compare(a.compute(), data) compare(dd.read_hdf(fn, '/data', chunksize=2, start=1, stop=3, mode='r').compute(), pd.read_hdf(fn, '/data', start=1, stop=3)) assert (sorted(dd.read_hdf(fn, '/data', mode='r').dask) == sorted(dd.read_hdf(fn, '/data', mode='r').dask)) def test_read_hdf_multiply_open(): """Test that we can read from a file that's already opened elsewhere in read-only mode.""" pytest.importorskip('tables') df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}, index=[1., 2., 3., 4.]) with tmpfile('h5') as fn: df.to_hdf(fn, '/data', format='table') with pd.HDFStore(fn, mode='r'): dd.read_hdf(fn, '/data', chunksize=2, mode='r') def test_read_hdf_multiple(): pytest.importorskip('tables') df = pd.DataFrame({'x': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p'], 'y': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}, index=[1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.]) a = dd.from_pandas(df, 16) with tmpfile('h5') as fn: a.to_hdf(fn, '/data*') r = dd.read_hdf(fn, '/data*', sorted_index=True) assert a.npartitions == r.npartitions assert a.divisions == r.divisions assert_eq(a, r) def test_read_hdf_start_stop_values(): pytest.importorskip('tables') df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}, index=[1., 2., 3., 4.]) with tmpfile('h5') as fn: df.to_hdf(fn, '/data', format='table') with pytest.raises(ValueError) as e: dd.read_hdf(fn, '/data', stop=10) assert 'number of rows' in str(e) with pytest.raises(ValueError) as e: dd.read_hdf(fn, '/data', start=10) assert 'is above or equal to' in str(e) with pytest.raises(ValueError) as e: dd.read_hdf(fn, '/data', chunksize=-1) assert 'positive integer' in str(e) def test_hdf_globbing(): pytest.importorskip('tables') df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}, index=[1., 2., 3., 4.]) with tmpdir() as tdir: df.to_hdf(os.path.join(tdir, 'one.h5'), '/foo/data', format='table') df.to_hdf(os.path.join(tdir, 'two.h5'), '/bar/data', format='table') df.to_hdf(os.path.join(tdir, 'two.h5'), '/foo/data', format='table') with dask.set_options(get=dask.get): res = dd.read_hdf(os.path.join(tdir, 'one.h5'), '/*/data', chunksize=2) assert res.npartitions == 2 tm.assert_frame_equal(res.compute(), df) res = dd.read_hdf(os.path.join(tdir, 'one.h5'), '/*/data', chunksize=2, start=1, stop=3) expected = pd.read_hdf(os.path.join(tdir, 'one.h5'), '/foo/data', start=1, stop=3) tm.assert_frame_equal(res.compute(), expected) res = dd.read_hdf(os.path.join(tdir, 'two.h5'), '/*/data', chunksize=2) assert res.npartitions == 2 + 2 tm.assert_frame_equal(res.compute(), pd.concat([df] * 2)) res = dd.read_hdf(os.path.join(tdir, '*.h5'), '/foo/data', chunksize=2) assert res.npartitions == 2 + 2 tm.assert_frame_equal(res.compute(), pd.concat([df] * 2)) res = dd.read_hdf(os.path.join(tdir, '*.h5'), '/*/data', chunksize=2) assert res.npartitions == 2 + 2 + 2 tm.assert_frame_equal(res.compute(), pd.concat([df] * 3)) def test_hdf_file_list(): pytest.importorskip('tables') df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}, index=[1., 2., 3., 4.]) with tmpdir() as tdir: df.iloc[:2].to_hdf(os.path.join(tdir, 'one.h5'), 'dataframe', format='table') df.iloc[2:].to_hdf(os.path.join(tdir, 'two.h5'), 'dataframe', format='table') with dask.set_options(get=dask.get): input_files = [os.path.join(tdir, 'one.h5'), os.path.join(tdir, 'two.h5')] res = dd.read_hdf(input_files, 'dataframe') tm.assert_frame_equal(res.compute(), df) def test_read_hdf_doesnt_segfault(): pytest.importorskip('tables') with tmpfile('h5') as fn: N = 40 df = pd.DataFrame(np.random.randn(N, 3)) with pd.HDFStore(fn, mode='w') as store: store.append('/x', df) ddf = dd.read_hdf(fn, '/x', chunksize=2) assert len(ddf) == N def test_hdf_filenames(): df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [1, 2, 3, 4]}, index=[1., 2., 3., 4.]) ddf = dd.from_pandas(df, npartitions=2) assert ddf.to_hdf("foo*.hdf5", "key") == ["foo0.hdf5", "foo1.hdf5"] os.remove("foo0.hdf5") os.remove("foo1.hdf5") dask-0.16.0/dask/dataframe/io/tests/test_io.py000066400000000000000000000440251320364734500211730ustar00rootroot00000000000000import numpy as np import pandas as pd import pandas.util.testing as tm import pytest from threading import Lock from multiprocessing.pool import ThreadPool import dask.array as da import dask.dataframe as dd from dask.dataframe.io.io import _meta_from_array from dask.delayed import Delayed, delayed from dask.utils import tmpfile from dask.local import get_sync from dask.dataframe.utils import assert_eq, is_categorical_dtype #################### # Arrays and BColz # #################### def test_meta_from_array(): x = np.array([[1, 2], [3, 4]], dtype=np.int64) res = _meta_from_array(x) assert isinstance(res, pd.DataFrame) assert res[0].dtype == np.int64 assert res[1].dtype == np.int64 tm.assert_index_equal(res.columns, pd.Index([0, 1])) x = np.array([[1., 2.], [3., 4.]], dtype=np.float64) res = _meta_from_array(x, columns=['a', 'b']) assert isinstance(res, pd.DataFrame) assert res['a'].dtype == np.float64 assert res['b'].dtype == np.float64 tm.assert_index_equal(res.columns, pd.Index(['a', 'b'])) with pytest.raises(ValueError): _meta_from_array(x, columns=['a', 'b', 'c']) np.random.seed(42) x = np.random.rand(201, 2) x = dd.from_array(x, chunksize=50, columns=['a', 'b']) assert len(x.divisions) == 6 # Should be 5 partitions and the end def test_meta_from_1darray(): x = np.array([1., 2., 3.], dtype=np.float64) res = _meta_from_array(x) assert isinstance(res, pd.Series) assert res.dtype == np.float64 x = np.array([1, 2, 3], dtype=np.object_) res = _meta_from_array(x, columns='x') assert isinstance(res, pd.Series) assert res.name == 'x' assert res.dtype == np.object_ x = np.array([1, 2, 3], dtype=np.object_) res = _meta_from_array(x, columns=['x']) assert isinstance(res, pd.DataFrame) assert res['x'].dtype == np.object_ tm.assert_index_equal(res.columns, pd.Index(['x'])) with pytest.raises(ValueError): _meta_from_array(x, columns=['a', 'b']) def test_meta_from_recarray(): x = np.array([(i, i * 10) for i in range(10)], dtype=[('a', np.float64), ('b', np.int64)]) res = _meta_from_array(x) assert isinstance(res, pd.DataFrame) assert res['a'].dtype == np.float64 assert res['b'].dtype == np.int64 tm.assert_index_equal(res.columns, pd.Index(['a', 'b'])) res = _meta_from_array(x, columns=['b', 'a']) assert isinstance(res, pd.DataFrame) assert res['a'].dtype == np.float64 assert res['b'].dtype == np.int64 tm.assert_index_equal(res.columns, pd.Index(['b', 'a'])) with pytest.raises(ValueError): _meta_from_array(x, columns=['a', 'b', 'c']) def test_from_array(): x = np.arange(10 * 3).reshape(10, 3) d = dd.from_array(x, chunksize=4) assert isinstance(d, dd.DataFrame) tm.assert_index_equal(d.columns, pd.Index([0, 1, 2])) assert d.divisions == (0, 4, 8, 9) assert (d.compute().values == x).all() d = dd.from_array(x, chunksize=4, columns=list('abc')) assert isinstance(d, dd.DataFrame) tm.assert_index_equal(d.columns, pd.Index(['a', 'b', 'c'])) assert d.divisions == (0, 4, 8, 9) assert (d.compute().values == x).all() with pytest.raises(ValueError): dd.from_array(np.ones(shape=(10, 10, 10))) def test_from_array_with_record_dtype(): x = np.array([(i, i * 10) for i in range(10)], dtype=[('a', 'i4'), ('b', 'i4')]) d = dd.from_array(x, chunksize=4) assert isinstance(d, dd.DataFrame) assert list(d.columns) == ['a', 'b'] assert d.divisions == (0, 4, 8, 9) assert (d.compute().to_records(index=False) == x).all() def test_from_bcolz_multiple_threads(): bcolz = pytest.importorskip('bcolz') pool = ThreadPool(processes=5) def check(i): t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']], names=['x', 'y', 'a']) d = dd.from_bcolz(t, chunksize=2) assert d.npartitions == 2 assert is_categorical_dtype(d.dtypes['a']) assert list(d.x.compute(get=get_sync)) == [1, 2, 3] assert list(d.a.compute(get=get_sync)) == ['a', 'b', 'a'] d = dd.from_bcolz(t, chunksize=2, index='x') L = list(d.index.compute(get=get_sync)) assert L == [1, 2, 3] or L == [1, 3, 2] # Names assert (sorted(dd.from_bcolz(t, chunksize=2).dask) == sorted(dd.from_bcolz(t, chunksize=2).dask)) assert (sorted(dd.from_bcolz(t, chunksize=2).dask) != sorted(dd.from_bcolz(t, chunksize=3).dask)) pool.map(check, range(5)) def test_from_bcolz(): bcolz = pytest.importorskip('bcolz') t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']], names=['x', 'y', 'a']) d = dd.from_bcolz(t, chunksize=2) assert d.npartitions == 2 assert is_categorical_dtype(d.dtypes['a']) assert list(d.x.compute(get=get_sync)) == [1, 2, 3] assert list(d.a.compute(get=get_sync)) == ['a', 'b', 'a'] L = list(d.index.compute(get=get_sync)) assert L == [0, 1, 2] d = dd.from_bcolz(t, chunksize=2, index='x') L = list(d.index.compute(get=get_sync)) assert L == [1, 2, 3] or L == [1, 3, 2] # Names assert (sorted(dd.from_bcolz(t, chunksize=2).dask) == sorted(dd.from_bcolz(t, chunksize=2).dask)) assert (sorted(dd.from_bcolz(t, chunksize=2).dask) != sorted(dd.from_bcolz(t, chunksize=3).dask)) dsk = dd.from_bcolz(t, chunksize=3).dask t.append((4, 4., 'b')) t.flush() assert (sorted(dd.from_bcolz(t, chunksize=2).dask) != sorted(dsk)) def test_from_bcolz_no_lock(): bcolz = pytest.importorskip('bcolz') locktype = type(Lock()) t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']], names=['x', 'y', 'a'], chunklen=2) a = dd.from_bcolz(t, chunksize=2) b = dd.from_bcolz(t, chunksize=2, lock=True) c = dd.from_bcolz(t, chunksize=2, lock=False) assert_eq(a, b) assert_eq(a, c) assert not any(isinstance(item, locktype) for v in c.dask.values() for item in v) def test_from_bcolz_filename(): bcolz = pytest.importorskip('bcolz') with tmpfile('.bcolz') as fn: t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']], names=['x', 'y', 'a'], rootdir=fn) t.flush() d = dd.from_bcolz(fn, chunksize=2) assert list(d.x.compute()) == [1, 2, 3] def test_from_bcolz_column_order(): bcolz = pytest.importorskip('bcolz') t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']], names=['x', 'y', 'a']) df = dd.from_bcolz(t, chunksize=2) assert list(df.loc[0].compute().columns) == ['x', 'y', 'a'] def test_from_pandas_dataframe(): a = list('aaaaaaabbbbbbbbccccccc') df = pd.DataFrame(dict(a=a, b=np.random.randn(len(a))), index=pd.date_range(start='20120101', periods=len(a))) ddf = dd.from_pandas(df, 3) assert len(ddf.dask) == 3 assert len(ddf.divisions) == len(ddf.dask) + 1 assert isinstance(ddf.divisions[0], type(df.index[0])) tm.assert_frame_equal(df, ddf.compute()) ddf = dd.from_pandas(df, chunksize=8) msg = 'Exactly one of npartitions and chunksize must be specified.' with pytest.raises(ValueError) as err: dd.from_pandas(df, npartitions=2, chunksize=2) assert msg in str(err.value) with pytest.raises((ValueError, AssertionError)) as err: dd.from_pandas(df) assert msg in str(err.value) assert len(ddf.dask) == 3 assert len(ddf.divisions) == len(ddf.dask) + 1 assert isinstance(ddf.divisions[0], type(df.index[0])) tm.assert_frame_equal(df, ddf.compute()) def test_from_pandas_small(): df = pd.DataFrame({'x': [1, 2, 3]}) for i in [1, 2, 30]: a = dd.from_pandas(df, i) assert len(a.compute()) == 3 assert a.divisions[0] == 0 assert a.divisions[-1] == 2 a = dd.from_pandas(df, chunksize=i) assert len(a.compute()) == 3 assert a.divisions[0] == 0 assert a.divisions[-1] == 2 for sort in [True, False]: for i in [0, 2]: df = pd.DataFrame({'x': [0] * i}) ddf = dd.from_pandas(df, npartitions=5, sort=sort) assert_eq(df, ddf) s = pd.Series([0] * i, name='x') ds = dd.from_pandas(s, npartitions=5, sort=sort) assert_eq(s, ds) @pytest.mark.xfail(reason="") def test_from_pandas_npartitions_is_accurate(): df = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': list('abdabd')}, index=[10, 20, 30, 40, 50, 60]) for n in [1, 2, 4, 5]: assert dd.from_pandas(df, npartitions=n).npartitions == n def test_from_pandas_series(): n = 20 s = pd.Series(np.random.randn(n), index=pd.date_range(start='20120101', periods=n)) ds = dd.from_pandas(s, 3) assert len(ds.dask) == 3 assert len(ds.divisions) == len(ds.dask) + 1 assert isinstance(ds.divisions[0], type(s.index[0])) tm.assert_series_equal(s, ds.compute()) ds = dd.from_pandas(s, chunksize=8) assert len(ds.dask) == 3 assert len(ds.divisions) == len(ds.dask) + 1 assert isinstance(ds.divisions[0], type(s.index[0])) tm.assert_series_equal(s, ds.compute()) def test_from_pandas_non_sorted(): df = pd.DataFrame({'x': [1, 2, 3]}, index=[3, 1, 2]) ddf = dd.from_pandas(df, npartitions=2, sort=False) assert not ddf.known_divisions assert_eq(df, ddf) ddf = dd.from_pandas(df, chunksize=2, sort=False) assert not ddf.known_divisions assert_eq(df, ddf) def test_from_pandas_single_row(): df = pd.DataFrame({'x': [1]}, index=[1]) ddf = dd.from_pandas(df, npartitions=1) assert ddf.divisions == (1, 1) assert_eq(ddf, df) @pytest.mark.skipif(np.__version__ < '1.11', reason='datetime unit unsupported in NumPy < 1.11') def test_from_pandas_with_datetime_index(): df = pd.DataFrame({"Date": ["2015-08-28", "2015-08-27", "2015-08-26", "2015-08-25", "2015-08-24", "2015-08-21", "2015-08-20", "2015-08-19", "2015-08-18"], "Val": list(range(9))}) df.Date = df.Date.astype('datetime64[ns]') ddf = dd.from_pandas(df, 2) assert_eq(df, ddf) ddf = dd.from_pandas(df, chunksize=2) assert_eq(df, ddf) def test_DataFrame_from_dask_array(): x = da.ones((10, 3), chunks=(4, 2)) df = dd.from_dask_array(x, ['a', 'b', 'c']) assert isinstance(df, dd.DataFrame) tm.assert_index_equal(df.columns, pd.Index(['a', 'b', 'c'])) assert list(df.divisions) == [0, 4, 8, 9] assert (df.compute(get=get_sync).values == x.compute(get=get_sync)).all() # dd.from_array should re-route to from_dask_array df2 = dd.from_array(x, columns=['a', 'b', 'c']) assert isinstance(df, dd.DataFrame) tm.assert_index_equal(df2.columns, df.columns) assert df2.divisions == df.divisions def test_Series_from_dask_array(): x = da.ones(10, chunks=4) ser = dd.from_dask_array(x, 'a') assert isinstance(ser, dd.Series) assert ser.name == 'a' assert list(ser.divisions) == [0, 4, 8, 9] assert (ser.compute(get=get_sync).values == x.compute(get=get_sync)).all() ser = dd.from_dask_array(x) assert isinstance(ser, dd.Series) assert ser.name is None # dd.from_array should re-route to from_dask_array ser2 = dd.from_array(x) assert isinstance(ser2, dd.Series) assert_eq(ser, ser2) def test_from_dask_array_compat_numpy_array(): x = da.ones((3, 3, 3), chunks=2) with pytest.raises(ValueError): dd.from_dask_array(x) # dask with pytest.raises(ValueError): dd.from_array(x.compute()) # numpy x = da.ones((10, 3), chunks=(3, 3)) d1 = dd.from_dask_array(x) # dask assert isinstance(d1, dd.DataFrame) assert (d1.compute().values == x.compute()).all() tm.assert_index_equal(d1.columns, pd.Index([0, 1, 2])) d2 = dd.from_array(x.compute()) # numpy assert isinstance(d1, dd.DataFrame) assert (d2.compute().values == x.compute()).all() tm.assert_index_equal(d2.columns, pd.Index([0, 1, 2])) with pytest.raises(ValueError): dd.from_dask_array(x, columns=['a']) # dask with pytest.raises(ValueError): dd.from_array(x.compute(), columns=['a']) # numpy d1 = dd.from_dask_array(x, columns=['a', 'b', 'c']) # dask assert isinstance(d1, dd.DataFrame) assert (d1.compute().values == x.compute()).all() tm.assert_index_equal(d1.columns, pd.Index(['a', 'b', 'c'])) d2 = dd.from_array(x.compute(), columns=['a', 'b', 'c']) # numpy assert isinstance(d1, dd.DataFrame) assert (d2.compute().values == x.compute()).all() tm.assert_index_equal(d2.columns, pd.Index(['a', 'b', 'c'])) def test_from_dask_array_compat_numpy_array_1d(): x = da.ones(10, chunks=3) d1 = dd.from_dask_array(x) # dask assert isinstance(d1, dd.Series) assert (d1.compute().values == x.compute()).all() assert d1.name is None d2 = dd.from_array(x.compute()) # numpy assert isinstance(d1, dd.Series) assert (d2.compute().values == x.compute()).all() assert d2.name is None d1 = dd.from_dask_array(x, columns='name') # dask assert isinstance(d1, dd.Series) assert (d1.compute().values == x.compute()).all() assert d1.name == 'name' d2 = dd.from_array(x.compute(), columns='name') # numpy assert isinstance(d1, dd.Series) assert (d2.compute().values == x.compute()).all() assert d2.name == 'name' # passing list via columns results in DataFrame d1 = dd.from_dask_array(x, columns=['name']) # dask assert isinstance(d1, dd.DataFrame) assert (d1.compute().values == x.compute()).all() tm.assert_index_equal(d1.columns, pd.Index(['name'])) d2 = dd.from_array(x.compute(), columns=['name']) # numpy assert isinstance(d1, dd.DataFrame) assert (d2.compute().values == x.compute()).all() tm.assert_index_equal(d2.columns, pd.Index(['name'])) def test_from_dask_array_struct_dtype(): x = np.array([(1, 'a'), (2, 'b')], dtype=[('a', 'i4'), ('b', 'object')]) y = da.from_array(x, chunks=(1,)) df = dd.from_dask_array(y) tm.assert_index_equal(df.columns, pd.Index(['a', 'b'])) assert_eq(df, pd.DataFrame(x)) assert_eq(dd.from_dask_array(y, columns=['b', 'a']), pd.DataFrame(x, columns=['b', 'a'])) def test_from_dask_array_unknown_chunks(): # Series dx = da.Array({('x', 0): np.arange(5), ('x', 1): np.arange(5, 11)}, 'x', ((np.nan, np.nan,),), np.arange(1).dtype) df = dd.from_dask_array(dx) assert isinstance(df, dd.Series) assert not df.known_divisions assert_eq(df, pd.Series(np.arange(11)), check_index=False) # DataFrame dsk = {('x', 0, 0): np.random.random((2, 3)), ('x', 1, 0): np.random.random((5, 3))} dx = da.Array(dsk, 'x', ((np.nan, np.nan,), (3,)), np.float64) df = dd.from_dask_array(dx) assert isinstance(df, dd.DataFrame) assert not df.known_divisions assert_eq(df, pd.DataFrame(dx.compute()), check_index=False) # Unknown width dx = da.Array(dsk, 'x', ((np.nan, np.nan,), (np.nan,)), np.float64) with pytest.raises(ValueError): df = dd.from_dask_array(dx) def test_to_bag(): pytest.importorskip('dask.bag') a = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [2, 3, 4, 5]}, index=pd.Index([1., 2., 3., 4.], name='ind')) ddf = dd.from_pandas(a, 2) assert ddf.to_bag().compute() == list(a.itertuples(False)) assert ddf.to_bag(True).compute() == list(a.itertuples(True)) assert ddf.x.to_bag(True).compute() == list(a.x.iteritems()) assert ddf.x.to_bag().compute() == list(a.x) def test_to_records(): pytest.importorskip('dask.array') from dask.array.utils import assert_eq df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [2, 3, 4, 5]}, index=pd.Index([1., 2., 3., 4.], name='ind')) ddf = dd.from_pandas(df, 2) assert_eq(df.to_records(), ddf.to_records()) def test_from_delayed(): df = pd.DataFrame(data=np.random.normal(size=(10, 4)), columns=list('abcd')) parts = [df.iloc[:1], df.iloc[1:3], df.iloc[3:6], df.iloc[6:10]] dfs = [delayed(parts.__getitem__)(i) for i in range(4)] meta = dfs[0].compute() my_len = lambda x: pd.Series([len(x)]) for divisions in [None, [0, 1, 3, 6, 10]]: ddf = dd.from_delayed(dfs, meta=meta, divisions=divisions) assert_eq(ddf, df) assert list(ddf.map_partitions(my_len).compute()) == [1, 2, 3, 4] assert ddf.known_divisions == (divisions is not None) s = dd.from_delayed([d.a for d in dfs], meta=meta.a, divisions=divisions) assert_eq(s, df.a) assert list(s.map_partitions(my_len).compute()) == [1, 2, 3, 4] assert ddf.known_divisions == (divisions is not None) meta2 = [(c, 'f8') for c in df.columns] assert_eq(dd.from_delayed(dfs, meta=meta2), df) assert_eq(dd.from_delayed([d.a for d in dfs], meta=('a', 'f8')), df.a) with pytest.raises(ValueError): dd.from_delayed(dfs, meta=meta, divisions=[0, 1, 3, 6]) with pytest.raises(ValueError) as e: dd.from_delayed(dfs, meta=meta.a).compute() assert str(e.value).startswith('Metadata mismatch found in `from_delayed`') def test_from_delayed_sorted(): a = pd.DataFrame({'x': [1, 2]}, index=[1, 10]) b = pd.DataFrame({'x': [4, 1]}, index=[100, 200]) A = dd.from_delayed([delayed(a), delayed(b)], divisions='sorted') assert A.known_divisions assert A.divisions == (1, 100, 200) def test_to_delayed(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [10, 20, 30, 40]}) ddf = dd.from_pandas(df, npartitions=2) a, b = ddf.to_delayed() assert isinstance(a, Delayed) assert isinstance(b, Delayed) assert_eq(a.compute(), df.iloc[:2]) def test_to_delayed_optimizes(): df = pd.DataFrame({'x': list(range(20))}) ddf = dd.from_pandas(df, npartitions=20) x = (ddf + 1).loc[:2] d = x.to_delayed()[0] assert len(d.dask) < 20 dask-0.16.0/dask/dataframe/io/tests/test_parquet.py000066400000000000000000000503401320364734500222420ustar00rootroot00000000000000from __future__ import (absolute_import, division, print_function, unicode_literals) import os import numpy as np import pandas as pd import pandas.util.testing as tm import pytest import dask import dask.multiprocessing import dask.dataframe as dd from dask.dataframe.utils import assert_eq try: import fastparquet except ImportError: fastparquet = False try: import pyarrow.parquet as pq except ImportError: pq = False df = pd.DataFrame({'x': [6, 2, 3, 4, 5], 'y': [1.0, 2.0, 1.0, 2.0, 1.0]}, index=pd.Index([10, 20, 30, 40, 50], name='myindex')) ddf = dd.from_pandas(df, npartitions=3) @pytest.fixture(params=[pytest.mark.skipif(not fastparquet, 'fastparquet', reason='fastparquet not found'), pytest.mark.skipif(not pq, 'pyarrow', reason='pyarrow not found')]) def engine(request): return request.param def check_fastparquet(): if not fastparquet: pytest.skip('fastparquet not found') def check_pyarrow(): if not pq: pytest.skip('pyarrow not found') def write_read_engines(xfail_arrow_to_fastparquet=True): if xfail_arrow_to_fastparquet: xfail = (pytest.mark.xfail(reason="Can't read arrow directories with fastparquet"),) else: xfail = () ff = () if fastparquet else (pytest.mark.skip(reason='fastparquet not found'),) aa = () if pq else (pytest.mark.skip(reason='pyarrow not found'),) engines = [pytest.param('fastparquet', 'fastparquet', marks=ff), pytest.param('pyarrow', 'pyarrow', marks=aa), pytest.param('fastparquet', 'pyarrow', marks=ff + aa), pytest.param('pyarrow', 'fastparquet', marks=ff + aa + xfail)] return pytest.mark.parametrize(('write_engine', 'read_engine'), engines) write_read_engines_xfail = write_read_engines(xfail_arrow_to_fastparquet=True) @write_read_engines_xfail def test_local(tmpdir, write_engine, read_engine): tmp = str(tmpdir) data = pd.DataFrame({'i32': np.arange(1000, dtype=np.int32), 'i64': np.arange(1000, dtype=np.int64), 'f': np.arange(1000, dtype=np.float64), 'bhello': np.random.choice(['hello', 'yo', 'people'], size=1000).astype("O")}) df = dd.from_pandas(data, chunksize=500) df.to_parquet(tmp, write_index=False, engine=write_engine) files = os.listdir(tmp) assert '_metadata' in files assert 'part.0.parquet' in files df2 = dd.read_parquet(tmp, index=False, engine=read_engine) assert len(df2.divisions) > 1 out = df2.compute(get=dask.get).reset_index() for column in df.columns: assert (data[column] == out[column]).all() @write_read_engines_xfail def test_index(tmpdir, write_engine, read_engine): fn = str(tmpdir) ddf.to_parquet(fn, engine=write_engine) ddf2 = dd.read_parquet(fn, engine=read_engine) assert_eq(df, ddf2) @pytest.mark.parametrize('index', [False, True]) @write_read_engines_xfail def test_empty(tmpdir, write_engine, read_engine, index): fn = str(tmpdir) df = pd.DataFrame({'a': ['a', 'b', 'b'], 'b': [4, 5, 6]})[:0] if index: df.set_index('a', inplace=True, drop=True) ddf = dd.from_pandas(df, npartitions=2) ddf.to_parquet(fn, write_index=index, engine=write_engine) read_df = dd.read_parquet(fn, engine=read_engine) assert_eq(df, read_df) @write_read_engines(xfail_arrow_to_fastparquet=False) def test_read_glob(tmpdir, write_engine, read_engine): fn = str(tmpdir) ddf.to_parquet(fn, engine=write_engine) os.unlink(os.path.join(fn, '_metadata')) files = os.listdir(fn) assert '_metadata' not in files ddf2 = dd.read_parquet(os.path.join(fn, '*'), engine=read_engine) assert_eq(df, ddf2) @write_read_engines_xfail def test_auto_add_index(tmpdir, write_engine, read_engine): fn = str(tmpdir) ddf.to_parquet(fn, engine=write_engine) ddf2 = dd.read_parquet(fn, columns=['x'], index='myindex', engine=read_engine) assert_eq(df[['x']], ddf2) @write_read_engines_xfail def test_index_column_false_index(tmpdir, write_engine, read_engine): fn = str(tmpdir) ddf.to_parquet(fn, engine=write_engine) ddf2 = dd.read_parquet(fn, columns=['myindex'], index=False, engine=read_engine) assert_eq(pd.DataFrame(df.index), ddf2, check_index=False) @pytest.mark.parametrize("columns", [['myindex'], []]) @pytest.mark.parametrize("index", ['myindex', None]) @write_read_engines_xfail def test_columns_index(tmpdir, write_engine, read_engine, columns, index): fn = str(tmpdir) ddf.to_parquet(fn, engine=write_engine) ddf2 = dd.read_parquet(fn, columns=columns, index=index, engine=read_engine) assert_eq(df[[]], ddf2) @write_read_engines_xfail def test_no_index(tmpdir, write_engine, read_engine): fn = str(tmpdir) df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) ddf = dd.from_pandas(df, npartitions=2) ddf.to_parquet(fn, write_index=False, engine=write_engine) ddf2 = dd.read_parquet(fn, engine=read_engine) assert_eq(df, ddf2, check_index=False) def test_read_series(tmpdir, engine): fn = str(tmpdir) ddf.to_parquet(fn, engine=engine) ddf2 = dd.read_parquet(fn, columns=['x'], engine=engine) assert_eq(df[['x']], ddf2) ddf2 = dd.read_parquet(fn, columns='x', index='myindex', engine=engine) assert_eq(df.x, ddf2) def test_names(tmpdir, engine): fn = str(tmpdir) ddf.to_parquet(fn, engine=engine) def read(fn, **kwargs): return dd.read_parquet(fn, engine=engine, **kwargs) assert (set(read(fn).dask) == set(read(fn).dask)) assert (set(read(fn).dask) != set(read(fn, columns=['x']).dask)) assert (set(read(fn, columns=('x',)).dask) == set(read(fn, columns=['x']).dask)) @pytest.mark.parametrize('c', [['x'], 'x', ['x', 'y'], []]) def test_optimize(tmpdir, c): check_fastparquet() fn = str(tmpdir) ddf.to_parquet(fn) ddf2 = dd.read_parquet(fn) assert_eq(df[c], ddf2[c]) x = ddf2[c] dsk = x.__dask_optimize__(x.dask, x.__dask_keys__()) assert len(dsk) == x.npartitions assert all(v[4] == c for v in dsk.values()) @pytest.mark.skipif(not hasattr(pd.DataFrame, 'to_parquet'), reason="no to_parquet method") @write_read_engines(False) def test_roundtrip_from_pandas(tmpdir, write_engine, read_engine): fn = str(tmpdir.join('test.parquet')) df = pd.DataFrame({'x': [1, 2, 3]}) df.to_parquet(fn, engine=write_engine) ddf = dd.read_parquet(fn, engine=read_engine) assert_eq(df, ddf) def test_categorical(tmpdir): check_fastparquet() tmp = str(tmpdir) df = pd.DataFrame({'x': ['a', 'b', 'c'] * 100}, dtype='category') ddf = dd.from_pandas(df, npartitions=3) dd.to_parquet(ddf, tmp) ddf2 = dd.read_parquet(tmp, categories=['x']) assert ddf2.compute().x.cat.categories.tolist() == ['a', 'b', 'c'] # autocat ddf2 = dd.read_parquet(tmp) assert ddf2.compute().x.cat.categories.tolist() == ['a', 'b', 'c'] ddf2.loc[:1000].compute() df.index.name = 'index' # defaults to 'index' in this case assert assert_eq(df, ddf2) # dereference cats ddf2 = dd.read_parquet(tmp, categories=[]) ddf2.loc[:1000].compute() assert (df.x == ddf2.x).all() def test_append(tmpdir, engine): """Test that appended parquet equal to the original one.""" check_fastparquet() tmp = str(tmpdir) df = pd.DataFrame({'i32': np.arange(1000, dtype=np.int32), 'i64': np.arange(1000, dtype=np.int64), 'f': np.arange(1000, dtype=np.float64), 'bhello': np.random.choice(['hello', 'yo', 'people'], size=1000).astype("O")}) df.index.name = 'index' half = len(df) // 2 ddf1 = dd.from_pandas(df.iloc[:half], chunksize=100) ddf2 = dd.from_pandas(df.iloc[half:], chunksize=100) ddf1.to_parquet(tmp) ddf2.to_parquet(tmp, append=True) ddf3 = dd.read_parquet(tmp, engine=engine) assert_eq(df, ddf3) def test_append_with_partition(tmpdir): check_fastparquet() tmp = str(tmpdir) df0 = pd.DataFrame({'lat': np.arange(0, 10), 'lon': np.arange(10, 20), 'value': np.arange(100, 110)}) df0.index.name = 'index' df1 = pd.DataFrame({'lat': np.arange(10, 20), 'lon': np.arange(10, 20), 'value': np.arange(120, 130)}) df1.index.name = 'index' dd_df0 = dd.from_pandas(df0, npartitions=1) dd_df1 = dd.from_pandas(df1, npartitions=1) dd.to_parquet(dd_df0, tmp, partition_on=['lon']) dd.to_parquet(dd_df1, tmp, partition_on=['lon'], append=True, ignore_divisions=True) out = dd.read_parquet(tmp).compute() out['lon'] = out.lon.astype('int64') # just to pass assert # sort required since partitioning breaks index order assert_eq(out.sort_values('value'), pd.concat([df0, df1])[out.columns], check_index=False) def test_append_wo_index(tmpdir): """Test append with write_index=False.""" check_fastparquet() tmp = str(tmpdir.join('tmp1.parquet')) df = pd.DataFrame({'i32': np.arange(1000, dtype=np.int32), 'i64': np.arange(1000, dtype=np.int64), 'f': np.arange(1000, dtype=np.float64), 'bhello': np.random.choice(['hello', 'yo', 'people'], size=1000).astype("O")}) half = len(df) // 2 ddf1 = dd.from_pandas(df.iloc[:half], chunksize=100) ddf2 = dd.from_pandas(df.iloc[half:], chunksize=100) ddf1.to_parquet(tmp) with pytest.raises(ValueError) as excinfo: ddf2.to_parquet(tmp, write_index=False, append=True) assert 'Appended columns' in str(excinfo.value) tmp = str(tmpdir.join('tmp2.parquet')) ddf1.to_parquet(tmp, write_index=False) ddf2.to_parquet(tmp, write_index=False, append=True) ddf3 = dd.read_parquet(tmp, index='f') assert_eq(df.set_index('f'), ddf3) def test_append_overlapping_divisions(tmpdir): """Test raising of error when divisions overlapping.""" check_fastparquet() tmp = str(tmpdir) df = pd.DataFrame({'i32': np.arange(1000, dtype=np.int32), 'i64': np.arange(1000, dtype=np.int64), 'f': np.arange(1000, dtype=np.float64), 'bhello': np.random.choice(['hello', 'yo', 'people'], size=1000).astype("O")}) half = len(df) // 2 ddf1 = dd.from_pandas(df.iloc[:half], chunksize=100) ddf2 = dd.from_pandas(df.iloc[half - 10:], chunksize=100) ddf1.to_parquet(tmp) with pytest.raises(ValueError) as excinfo: ddf2.to_parquet(tmp, append=True) assert 'Appended divisions' in str(excinfo.value) ddf2.to_parquet(tmp, append=True, ignore_divisions=True) def test_append_different_columns(tmpdir): """Test raising of error when non equal columns.""" check_fastparquet() tmp = str(tmpdir) df1 = pd.DataFrame({'i32': np.arange(100, dtype=np.int32)}) df2 = pd.DataFrame({'i64': np.arange(100, dtype=np.int64)}) df3 = pd.DataFrame({'i32': np.arange(100, dtype=np.int64)}) ddf1 = dd.from_pandas(df1, chunksize=2) ddf2 = dd.from_pandas(df2, chunksize=2) ddf3 = dd.from_pandas(df3, chunksize=2) ddf1.to_parquet(tmp) with pytest.raises(ValueError) as excinfo: ddf2.to_parquet(tmp, append=True) assert 'Appended columns' in str(excinfo.value) with pytest.raises(ValueError) as excinfo: ddf3.to_parquet(tmp, append=True) assert 'Appended dtypes' in str(excinfo.value) def test_ordering(tmpdir): check_fastparquet() tmp = str(tmpdir) df = pd.DataFrame({'a': [1, 2, 3], 'b': [10, 20, 30], 'c': [100, 200, 300]}, index=pd.Index([-1, -2, -3], name='myindex'), columns=['c', 'a', 'b']) ddf = dd.from_pandas(df, npartitions=2) dd.to_parquet(ddf, tmp) pf = fastparquet.ParquetFile(tmp) assert pf.columns == ['myindex', 'c', 'a', 'b'] ddf2 = dd.read_parquet(tmp, index='myindex') assert_eq(ddf, ddf2) def test_read_parquet_custom_columns(tmpdir, engine): tmp = str(tmpdir) data = pd.DataFrame({'i32': np.arange(1000, dtype=np.int32), 'f': np.arange(1000, dtype=np.float64)}) df = dd.from_pandas(data, chunksize=50) df.to_parquet(tmp) df2 = dd.read_parquet(tmp, columns=['i32', 'f'], engine=engine) assert_eq(df2, df2, check_index=False) df3 = dd.read_parquet(tmp, columns=['f', 'i32'], engine=engine) assert_eq(df3, df3, check_index=False) @pytest.mark.parametrize('df,write_kwargs,read_kwargs', [ (pd.DataFrame({'x': [3, 2, 1]}), {}, {}), (pd.DataFrame({'x': ['c', 'a', 'b']}), {'object_encoding': 'utf8'}, {}), (pd.DataFrame({'x': ['cc', 'a', 'bbb']}), {'object_encoding': 'utf8'}, {}), (pd.DataFrame({'x': [b'a', b'b', b'c']}), {'object_encoding': 'bytes'}, {}), (pd.DataFrame({'x': pd.Categorical(['a', 'b', 'a'])}), {'object_encoding': 'utf8'}, {'categories': ['x']}), (pd.DataFrame({'x': pd.Categorical([1, 2, 1])}), {}, {'categories': ['x']}), (pd.DataFrame({'x': list(map(pd.Timestamp, [3000, 2000, 1000]))}), {}, {}), (pd.DataFrame({'x': [3000, 2000, 1000]}).astype('M8[ns]'), {}, {}), pytest.mark.xfail((pd.DataFrame({'x': [3, 2, 1]}).astype('M8[ns]'), {}, {}), reason="Parquet doesn't support nanosecond precision"), (pd.DataFrame({'x': [3, 2, 1]}).astype('M8[us]'), {}, {}), (pd.DataFrame({'x': [3, 2, 1]}).astype('M8[ms]'), {}, {}), (pd.DataFrame({'x': [3, 2, 1]}).astype('uint16'), {}, {}), (pd.DataFrame({'x': [3, 2, 1]}).astype('float32'), {}, {}), (pd.DataFrame({'x': [3, 1, 2]}, index=[3, 2, 1]), {}, {}), (pd.DataFrame({'x': [3, 1, 5]}, index=pd.Index([1, 2, 3], name='foo')), {}, {}), (pd.DataFrame({'x': [1, 2, 3], 'y': [3, 2, 1]}), {}, {}), (pd.DataFrame({'x': [1, 2, 3], 'y': [3, 2, 1]}, columns=['y', 'x']), {}, {}), (pd.DataFrame({'0': [3, 2, 1]}), {}, {}), (pd.DataFrame({'x': [3, 2, None]}), {}, {}), (pd.DataFrame({'-': [3., 2., None]}), {}, {}), (pd.DataFrame({'.': [3., 2., None]}), {}, {}), (pd.DataFrame({' ': [3., 2., None]}), {}, {}), ]) def test_roundtrip(tmpdir, df, write_kwargs, read_kwargs): check_fastparquet() tmp = str(tmpdir) if df.index.name is None: df.index.name = 'index' ddf = dd.from_pandas(df, npartitions=2) dd.to_parquet(ddf, tmp, **write_kwargs) ddf2 = dd.read_parquet(tmp, index=df.index.name, **read_kwargs) assert_eq(ddf, ddf2) def test_categories(tmpdir): check_fastparquet() fn = str(tmpdir) df = pd.DataFrame({'x': [1, 2, 3, 4, 5], 'y': list('caaab')}) ddf = dd.from_pandas(df, npartitions=2) ddf['y'] = ddf.y.astype('category') ddf.to_parquet(fn) ddf2 = dd.read_parquet(fn, categories=['y']) with pytest.raises(NotImplementedError): ddf2.y.cat.categories assert set(ddf2.y.compute().cat.categories) == {'a', 'b', 'c'} cats_set = ddf2.map_partitions(lambda x: x.y.cat.categories).compute() assert cats_set.tolist() == ['a', 'c', 'a', 'b'] assert_eq(ddf.y, ddf2.y, check_names=False) with pytest.raises(TypeError): # attempt to load as category that which is not so encoded ddf2 = dd.read_parquet(fn, categories=['x']).compute() with pytest.raises(ValueError): # attempt to load as category unknown column ddf2 = dd.read_parquet(fn, categories=['foo']) def test_empty_partition(tmpdir, engine): fn = str(tmpdir) df = pd.DataFrame({"a": range(10), "b": range(10)}) ddf = dd.from_pandas(df, npartitions=5) ddf2 = ddf[ddf.a <= 5] ddf2.to_parquet(fn, engine=engine) ddf3 = dd.read_parquet(fn, engine=engine) sol = ddf2.compute() assert_eq(sol, ddf3, check_names=False, check_index=False) def test_timestamp_index(tmpdir, engine): fn = str(tmpdir) df = tm.makeTimeDataFrame() df.index.name = 'foo' ddf = dd.from_pandas(df, npartitions=5) ddf.to_parquet(fn, engine=engine) ddf2 = dd.read_parquet(fn, engine=engine) assert_eq(df, ddf2) def test_to_parquet_default_writes_nulls(tmpdir): check_fastparquet() check_pyarrow() fn = str(tmpdir.join('test.parquet')) df = pd.DataFrame({'c1': [1., np.nan, 2, np.nan, 3]}) ddf = dd.from_pandas(df, npartitions=1) ddf.to_parquet(fn) table = pq.read_table(fn) assert table[1].null_count == 2 def test_partition_on(tmpdir): check_fastparquet() tmpdir = str(tmpdir) df = pd.DataFrame({'a': np.random.choice(['A', 'B', 'C'], size=100), 'b': np.random.random(size=100), 'c': np.random.randint(1, 5, size=100)}) d = dd.from_pandas(df, npartitions=2) d.to_parquet(tmpdir, partition_on=['a']) out = dd.read_parquet(tmpdir, engine='fastparquet').compute() for val in df.a.unique(): assert set(df.b[df.a == val]) == set(out.b[out.a == val]) def test_filters(tmpdir): check_fastparquet() fn = str(tmpdir) df = pd.DataFrame({'at': ['ab', 'aa', 'ba', 'da', 'bb']}) ddf = dd.from_pandas(df, npartitions=1) # Ok with 1 partition and filters ddf.repartition(npartitions=1, force=True).to_parquet(fn, write_index=False) ddf2 = dd.read_parquet(fn, index=False, filters=[('at', '==', 'aa')]).compute() assert_eq(ddf2, ddf) # with >1 partition and no filters ddf.repartition(npartitions=2, force=True).to_parquet(fn) dd.read_parquet(fn).compute() assert_eq(ddf2, ddf) # with >1 partition and filters using base fastparquet ddf.repartition(npartitions=2, force=True).to_parquet(fn) df2 = fastparquet.ParquetFile(fn).to_pandas(filters=[('at', '==', 'aa')]) assert len(df2) > 0 # with >1 partition and filters ddf.repartition(npartitions=2, force=True).to_parquet(fn) dd.read_parquet(fn, filters=[('at', '==', 'aa')]).compute() assert len(ddf2) > 0 @pytest.mark.parametrize('get', [dask.threaded.get, dask.multiprocessing.get]) def test_to_parquet_lazy(tmpdir, get): check_fastparquet() tmpdir = str(tmpdir) df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': [1., 2., 3., 4.]}) df.index.name = 'index' ddf = dd.from_pandas(df, npartitions=2) value = ddf.to_parquet(tmpdir, compute=False) assert hasattr(value, 'dask') value.compute(get=get) assert os.path.exists(tmpdir) ddf2 = dd.read_parquet(tmpdir) assert_eq(ddf, ddf2) def test_timestamp96(tmpdir): check_fastparquet() fn = str(tmpdir) df = pd.DataFrame({'a': ['now']}, dtype='M8[ns]') ddf = dd.from_pandas(df, 1) ddf.to_parquet(fn, write_index=False, times='int96') pf = fastparquet.ParquetFile(fn) assert pf._schema[1].type == fastparquet.parquet_thrift.Type.INT96 out = dd.read_parquet(fn).compute() assert_eq(out, df) def test_drill_scheme(tmpdir): check_fastparquet() fn = str(tmpdir) N = 5 df1 = pd.DataFrame({c: np.random.random(N) for i, c in enumerate(['a', 'b', 'c'])}) df2 = pd.DataFrame({c: np.random.random(N) for i, c in enumerate(['a', 'b', 'c'])}) files = [] for d in ['test_data1', 'test_data2']: dn = os.path.join(fn, d) if not os.path.exists(dn): os.mkdir(dn) files.append(os.path.join(dn, 'data1.parq')) fastparquet.write(files[0], df1) fastparquet.write(files[1], df2) df = dd.read_parquet(files) assert 'dir0' in df.columns out = df.compute() assert 'dir0' in out assert (np.unique(out.dir0) == ['test_data1', 'test_data2']).all() def test_parquet_select_cats(tmpdir): check_fastparquet() fn = str(tmpdir) df = pd.DataFrame({ 'categories': pd.Series( np.random.choice(['a', 'b', 'c', 'd', 'e', 'f'], size=100), dtype='category'), 'ints': pd.Series(list(range(0, 100)), dtype='int'), 'floats': pd.Series(list(range(0, 100)), dtype='float')}) ddf = dd.from_pandas(df, 1) ddf.to_parquet(fn) rddf = dd.read_parquet(fn, columns=['ints']) assert list(rddf.columns) == ['ints'] rddf = dd.read_parquet(fn) assert list(rddf.columns) == list(df) dask-0.16.0/dask/dataframe/io/tests/test_sql.py000066400000000000000000000114571320364734500213660ustar00rootroot00000000000000from __future__ import (print_function, division, absolute_import, unicode_literals) import io import pytest from dask.dataframe.io.sql import read_sql_table from dask.utils import tmpfile from dask.dataframe.utils import assert_eq pd = pytest.importorskip('pandas') dd = pytest.importorskip('dask.dataframe') pytest.importorskip('sqlalchemy') pytest.importorskip('sqlite3') data = """ name,number,age,negish Alice,0,33,-5 Bob,1,40,-3 Chris,2,22,3 Dora,3,16,5 Edith,4,53,0 Francis,5,30,0 Garreth,6,20,0 """ df = pd.read_csv(io.StringIO(data), index_col='number') @pytest.yield_fixture def db(): with tmpfile() as f: uri = 'sqlite:///%s' % f df.to_sql('test', uri, index=True, if_exists='replace') yield uri def test_simple(db): # single chunk data = read_sql_table('test', db, npartitions=2, index_col='number' ).compute() assert (data.name == df.name).all() assert data.index.name == 'number' assert_eq(data, df) def test_npartitions(db): data = read_sql_table('test', db, columns=list(df.columns), npartitions=2, index_col='number') assert len(data.divisions) == 3 assert (data.name.compute() == df.name).all() data = read_sql_table('test', db, columns=['name'], npartitions=6, index_col="number") assert_eq(data, df[['name']]) data = read_sql_table('test', db, columns=list(df.columns), bytes_per_chunk=2**30, index_col='number') assert data.npartitions == 1 assert (data.name.compute() == df.name).all() def test_divisions(db): data = read_sql_table('test', db, columns=['name'], divisions=[0, 2, 4], index_col="number") assert data.divisions == (0, 2, 4) assert data.index.max().compute() == 4 assert_eq(data, df[['name']][df.index <= 4]) def test_division_or_partition(db): with pytest.raises(TypeError): read_sql_table('test', db, columns=['name'], index_col="number", divisions=[0, 2, 4], npartitions=3) out = read_sql_table('test', db, index_col="number", bytes_per_chunk=100) m = out.map_partitions(lambda d: d.memory_usage( deep=True, index=True).sum()).compute() assert (50 < m).all() and (m < 200).all() assert_eq(out, df) def test_range(db): data = read_sql_table('test', db, npartitions=2, index_col='number', limits=[1, 4]) assert data.index.min().compute() == 1 assert data.index.max().compute() == 4 def test_datetimes(): import datetime now = datetime.datetime.now() d = datetime.timedelta(seconds=1) df = pd.DataFrame({'a': list('ghjkl'), 'b': [now + i * d for i in range(2, -3, -1)]}) with tmpfile() as f: uri = 'sqlite:///%s' % f df.to_sql('test', uri, index=False, if_exists='replace') data = read_sql_table('test', uri, npartitions=2, index_col='b') assert data.index.dtype.kind == "M" assert data.divisions[0] == df.b.min() df2 = df.set_index('b') assert_eq(data.map_partitions(lambda x: x.sort_index()), df2.sort_index()) def test_with_func(db): from sqlalchemy import sql index = sql.func.abs(sql.column('negish')).label('abs') # function for the index, get all columns data = read_sql_table('test', db, npartitions=2, index_col=index) assert data.divisions[0] == 0 part = data.get_partition(0).compute() assert (part.index == 0).all() # now an arith op for one column too; it's name will be 'age' data = read_sql_table('test', db, npartitions=2, index_col=index, columns=[index, -sql.column('age')]) assert (data.age.compute() < 0).all() # a column that would have no name, give it a label index = (-sql.column('negish')).label('index') data = read_sql_table('test', db, npartitions=2, index_col=index, columns=['negish', 'age']) d = data.compute() assert (-d.index == d['negish']).all() def test_no_nameless_index(db): from sqlalchemy import sql index = (-sql.column('negish')) with pytest.raises(ValueError): read_sql_table('test', db, npartitions=2, index_col=index, columns=['negish', 'age', index]) index = sql.func.abs(sql.column('negish')) # function for the index, get all columns with pytest.raises(ValueError): read_sql_table('test', db, npartitions=2, index_col=index) def test_select_from_select(db): from sqlalchemy import sql s1 = sql.select([sql.column('number'), sql.column('name')] ).select_from(sql.table('test')) out = read_sql_table(s1, db, npartitions=2, index_col='number') assert_eq(out, df[['name']]) dask-0.16.0/dask/dataframe/methods.py000066400000000000000000000253331320364734500174200ustar00rootroot00000000000000from __future__ import print_function, absolute_import, division import warnings import numpy as np import pandas as pd from pandas.api.types import is_categorical_dtype from toolz import partition from .utils import PANDAS_VERSION if PANDAS_VERSION >= '0.20.0': from pandas.api.types import union_categoricals else: from pandas.types.concat import union_categoricals # --------------------------------- # indexing # --------------------------------- def loc(df, iindexer, cindexer=None): """ .loc for known divisions """ if cindexer is None: return df.loc[iindexer] else: return df.loc[iindexer, cindexer] def try_loc(df, iindexer, cindexer=None): """ .loc for unknown divisions """ try: return loc(df, iindexer, cindexer) except KeyError: return df.head(0).loc[:, cindexer] def boundary_slice(df, start, stop, right_boundary=True, left_boundary=True, kind='loc'): """Index slice start/stop. Can switch include/exclude boundaries. >>> df = pd.DataFrame({'x': [10, 20, 30, 40, 50]}, index=[1, 2, 2, 3, 4]) >>> boundary_slice(df, 2, None) x 2 20 2 30 3 40 4 50 >>> boundary_slice(df, 1, 3) x 1 10 2 20 2 30 3 40 >>> boundary_slice(df, 1, 3, right_boundary=False) x 1 10 2 20 2 30 """ if kind == 'loc' and not df.index.is_monotonic: # Pandas treats missing keys differently for label-slicing # on monotonic vs. non-monotonic indexes # If the index is monotonic, `df.loc[start:stop]` is fine. # If it's not, `df.loc[start:stop]` raises when `start` is missing if start is not None: if left_boundary: df = df[df.index >= start] else: df = df[df.index > start] if stop is not None: if right_boundary: df = df[df.index <= stop] else: df = df[df.index < stop] return df else: result = getattr(df, kind)[start:stop] if not right_boundary: right_index = result.index.get_slice_bound(stop, 'left', kind) result = result.iloc[:right_index] if not left_boundary: left_index = result.index.get_slice_bound(start, 'right', kind) result = result.iloc[left_index:] return result def index_count(x): # Workaround since Index doesn't implement `.count` return pd.notnull(x).sum() def mean_aggregate(s, n): try: return s / n except ZeroDivisionError: return np.float64(np.nan) def var_aggregate(x2, x, n, ddof): try: result = (x2 / n) - (x / n)**2 if ddof != 0: result = result * n / (n - ddof) return result except ZeroDivisionError: return np.float64(np.nan) def describe_aggregate(values): assert len(values) == 6 count, mean, std, min, q, max = values typ = pd.DataFrame if isinstance(count, pd.Series) else pd.Series part1 = typ([count, mean, std, min], index=['count', 'mean', 'std', 'min']) q.index = ['25%', '50%', '75%'] part3 = typ([max], index=['max']) return pd.concat([part1, q, part3]) def cummin_aggregate(x, y): if isinstance(x, (pd.Series, pd.DataFrame)): return x.where((x < y) | x.isnull(), y, axis=x.ndim - 1) else: # scalar return x if x < y else y def cummax_aggregate(x, y): if isinstance(x, (pd.Series, pd.DataFrame)): return x.where((x > y) | x.isnull(), y, axis=x.ndim - 1) else: # scalar return x if x > y else y def assign(df, *pairs): kwargs = dict(partition(2, pairs)) return df.assign(**kwargs) def unique(x, series_name=None): # unique returns np.ndarray, it must be wrapped return pd.Series(x.unique(), name=series_name) def value_counts_combine(x): return x.groupby(level=0).sum() def value_counts_aggregate(x): return x.groupby(level=0).sum().sort_values(ascending=False) def nbytes(x): return x.nbytes def size(x): return x.size def sample(df, state, frac, replace): rs = np.random.RandomState(state) return df.sample(random_state=rs, frac=frac, replace=replace) if len(df) > 0 else df def drop_columns(df, columns, dtype): df = df.drop(columns, axis=1) df.columns = df.columns.astype(dtype) return df def fillna_check(df, method, check=True): out = df.fillna(method=method) if check and out.isnull().values.all(axis=0).any(): raise ValueError("All NaN partition encountered in `fillna`. Try " "using ``df.repartition`` to increase the partition " "size, or specify `limit` in `fillna`.") return out # --------------------------------- # reshape # --------------------------------- def pivot_agg(df): return df.groupby(level=0).sum() def pivot_sum(df, index, columns, values): return pd.pivot_table(df, index=index, columns=columns, values=values, aggfunc='sum') def pivot_count(df, index, columns, values): # we cannot determine dtype until concatenationg all partitions. # make dtype deterministic, always coerce to np.float64 return pd.pivot_table(df, index=index, columns=columns, values=values, aggfunc='count').astype(np.float64) # --------------------------------- # concat # --------------------------------- if PANDAS_VERSION < '0.20.0': def _get_level_values(x, n): return x.get_level_values(n) else: def _get_level_values(x, n): return x._get_level_values(n) def concat(dfs, axis=0, join='outer', uniform=False): """Concatenate, handling some edge cases: - Unions categoricals between partitions - Ignores empty partitions Parameters ---------- dfs : list of DataFrame, Series, or Index axis : int or str, optional join : str, optional uniform : bool, optional Whether to treat ``dfs[0]`` as representative of ``dfs[1:]``. Set to True if all arguments have the same columns and dtypes (but not necessarily categories). Default is False. """ if axis == 1: return pd.concat(dfs, axis=axis, join=join) if len(dfs) == 1: return dfs[0] # Support concatenating indices along axis 0 if isinstance(dfs[0], pd.Index): if isinstance(dfs[0], pd.CategoricalIndex): return pd.CategoricalIndex(union_categoricals(dfs), name=dfs[0].name) elif isinstance(dfs[0], pd.MultiIndex): first, rest = dfs[0], dfs[1:] if all((isinstance(o, pd.MultiIndex) and o.nlevels >= first.nlevels) for o in rest): arrays = [concat([_get_level_values(i, n) for i in dfs]) for n in range(first.nlevels)] return pd.MultiIndex.from_arrays(arrays, names=first.names) to_concat = (first.values, ) + tuple(k._values for k in rest) new_tuples = np.concatenate(to_concat) try: return pd.MultiIndex.from_tuples(new_tuples, names=first.names) except Exception: return pd.Index(new_tuples) return dfs[0].append(dfs[1:]) # Handle categorical index separately dfs0_index = dfs[0].index if (isinstance(dfs0_index, pd.CategoricalIndex) or (isinstance(dfs0_index, pd.MultiIndex) and any(isinstance(i, pd.CategoricalIndex) for i in dfs0_index.levels))): dfs2 = [df.reset_index(drop=True) for df in dfs] ind = concat([df.index for df in dfs]) else: dfs2 = dfs ind = None # Concatenate the partitions together, handling categories as needed if (isinstance(dfs2[0], pd.DataFrame) if uniform else any(isinstance(df, pd.DataFrame) for df in dfs2)): if uniform: dfs3 = dfs2 cat_mask = dfs2[0].dtypes == 'category' else: # When concatenating mixed dataframes and series on axis 1, Pandas # converts series to dataframes with a single column named 0, then # concatenates. dfs3 = [df if isinstance(df, pd.DataFrame) else df.to_frame().rename(columns={df.name: 0}) for df in dfs2] # pandas may raise a RuntimeWarning for comparing ints and strs with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) cat_mask = pd.concat([(df.dtypes == 'category').to_frame().T for df in dfs3], join=join).any() if cat_mask.any(): not_cat = cat_mask[~cat_mask].index out = pd.concat([df[df.columns.intersection(not_cat)] for df in dfs3], join=join) for col in cat_mask.index.difference(not_cat): # Find an example of categoricals in this column for df in dfs3: sample = df.get(col) if sample is not None: break # Extract partitions, subbing in missing if needed parts = [] for df in dfs3: if col in df.columns: parts.append(df[col]) else: codes = np.full(len(df), -1, dtype='i8') data = pd.Categorical.from_codes(codes, sample.cat.categories, sample.cat.ordered) parts.append(data) out[col] = union_categoricals(parts) out = out.reindex(columns=cat_mask.index) else: # pandas may raise a RuntimeWarning for comparing ints and strs with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) out = pd.concat(dfs3, join=join) else: if is_categorical_dtype(dfs2[0].dtype): if ind is None: ind = concat([df.index for df in dfs2]) return pd.Series(union_categoricals(dfs2), index=ind, name=dfs2[0].name) out = pd.concat(dfs2, join=join) # Re-add the index if needed if ind is not None: out.index = ind return out def merge(left, right, how, left_on, right_on, left_index, right_index, indicator, suffixes, default_left, default_right): if not len(left): left = default_left if not len(right): right = default_right return pd.merge(left, right, how=how, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index, suffixes=suffixes, indicator=indicator) dask-0.16.0/dask/dataframe/multi.py000066400000000000000000000557001320364734500171100ustar00rootroot00000000000000""" Algorithms that Involve Multiple DataFrames =========================================== The pandas operations ``concat``, ``join``, and ``merge`` combine multiple DataFrames. This module contains analogous algorithms in the parallel case. There are two important cases: 1. We combine along a partitioned index 2. We combine along an unpartitioned index or other column In the first case we know which partitions of each dataframe interact with which others. This lets uss be significantly more clever and efficient. In the second case each partition from one dataset interacts with all partitions from the other. We handle this through a shuffle operation. Partitioned Joins ----------------- In the first case where we join along a partitioned index we proceed in the following stages. 1. Align the partitions of all inputs to be the same. This involves a call to ``dd.repartition`` which will split up and concat existing partitions as necessary. After this step all inputs have partitions that align with each other. This step is relatively cheap. See the function ``align_partitions``. 2. Remove unnecessary partitions based on the type of join we perform (left, right, inner, outer). We can do this at the partition level before any computation happens. We'll do it again on each partition when we call the in-memory function. See the function ``require``. 3. Embarrassingly parallel calls to ``pd.concat``, ``pd.join``, or ``pd.merge``. Now that the data is aligned and unnecessary blocks have been removed we can rely on the fast in-memory Pandas join machinery to execute joins per-partition. We know that all intersecting records exist within the same partition Hash Joins via Shuffle ---------------------- When we join along an unpartitioned index or along an arbitrary column any partition from one input might interact with any partition in another. In this case we perform a hash-join by shuffling data in each input by that column. This results in new inputs with the same partition structure cleanly separated along that column. We proceed with hash joins in the following stages: 1. Shuffle each input on the specified column. See the function ``dask.dataframe.shuffle.shuffle``. 2. Perform embarrassingly parallel join across shuffled inputs. """ from __future__ import absolute_import, division, print_function from functools import wraps, partial from warnings import warn from toolz import merge_sorted, unique, first import toolz import pandas as pd from ..base import tokenize from ..compatibility import apply from .core import (_Frame, DataFrame, Series, map_partitions, Index, _maybe_from_pandas, new_dd_object, is_broadcastable) from .io import from_pandas from . import methods from .shuffle import shuffle, rearrange_by_divisions from .utils import strip_unknown_categories def align_partitions(*dfs): """ Mutually partition and align DataFrame blocks This serves as precursor to multi-dataframe operations like join, concat, or merge. Parameters ---------- dfs: sequence of dd.DataFrame, dd.Series and dd.base.Scalar Sequence of dataframes to be aligned on their index Returns ------- dfs: sequence of dd.DataFrame, dd.Series and dd.base.Scalar These must have consistent divisions with each other divisions: tuple Full divisions sequence of the entire result result: list A list of lists of keys that show which data exist on which divisions """ _is_broadcastable = partial(is_broadcastable, dfs) dfs1 = [df for df in dfs if isinstance(df, _Frame) and not _is_broadcastable(df)] if len(dfs) == 0: raise ValueError("dfs contains no DataFrame and Series") if not all(df.known_divisions for df in dfs1): raise ValueError("Not all divisions are known, can't align " "partitions. Please use `set_index` " "to set the index.") divisions = list(unique(merge_sorted(*[df.divisions for df in dfs1]))) if len(divisions) == 1: # single value for index divisions = (divisions[0], divisions[0]) dfs2 = [df.repartition(divisions, force=True) if isinstance(df, _Frame) else df for df in dfs] result = list() inds = [0 for df in dfs] for d in divisions[:-1]: L = list() for i, df in enumerate(dfs2): if isinstance(df, _Frame): j = inds[i] divs = df.divisions if j < len(divs) - 1 and divs[j] == d: L.append((df._name, inds[i])) inds[i] += 1 else: L.append(None) else: # Scalar has no divisions L.append(None) result.append(L) return dfs2, tuple(divisions), result def _maybe_align_partitions(args): """Align DataFrame blocks if divisions are different. Note that if all divisions are unknown, but have equal npartitions, then they will be passed through unchanged. This is different than `align_partitions`, which will fail if divisions aren't all known""" _is_broadcastable = partial(is_broadcastable, args) dfs = [df for df in args if isinstance(df, _Frame) and not _is_broadcastable(df)] if not dfs: return args divisions = dfs[0].divisions if not all(df.divisions == divisions for df in dfs): dfs2 = iter(align_partitions(*dfs)[0]) return [a if not isinstance(a, _Frame) else next(dfs2) for a in args] return args def require(divisions, parts, required=None): """ Clear out divisions where required components are not present In left, right, or inner joins we exclude portions of the dataset if one side or the other is not present. We can achieve this at the partition level as well >>> divisions = [1, 3, 5, 7, 9] >>> parts = [(('a', 0), None), ... (('a', 1), ('b', 0)), ... (('a', 2), ('b', 1)), ... (None, ('b', 2))] >>> divisions2, parts2 = require(divisions, parts, required=[0]) >>> divisions2 (1, 3, 5, 7) >>> parts2 # doctest: +NORMALIZE_WHITESPACE ((('a', 0), None), (('a', 1), ('b', 0)), (('a', 2), ('b', 1))) >>> divisions2, parts2 = require(divisions, parts, required=[1]) >>> divisions2 (3, 5, 7, 9) >>> parts2 # doctest: +NORMALIZE_WHITESPACE ((('a', 1), ('b', 0)), (('a', 2), ('b', 1)), (None, ('b', 2))) >>> divisions2, parts2 = require(divisions, parts, required=[0, 1]) >>> divisions2 (3, 5, 7) >>> parts2 # doctest: +NORMALIZE_WHITESPACE ((('a', 1), ('b', 0)), (('a', 2), ('b', 1))) """ if not required: return divisions, parts for i in required: present = [j for j, p in enumerate(parts) if p[i] is not None] divisions = tuple(divisions[min(present): max(present) + 2]) parts = tuple(parts[min(present): max(present) + 1]) return divisions, parts ############################################################### # Join / Merge ############################################################### required = {'left': [0], 'right': [1], 'inner': [0, 1], 'outer': []} def merge_indexed_dataframes(lhs, rhs, how='left', lsuffix='', rsuffix='', indicator=False): """ Join two partitioned dataframes along their index """ (lhs, rhs), divisions, parts = align_partitions(lhs, rhs) divisions, parts = require(divisions, parts, required[how]) left_empty = lhs._meta right_empty = rhs._meta name = 'join-indexed-' + tokenize(lhs, rhs, how, lsuffix, rsuffix, indicator) dsk = dict() for i, (a, b) in enumerate(parts): if a is None and how in ('right', 'outer'): a = left_empty if b is None and how in ('left', 'outer'): b = right_empty dsk[(name, i)] = (methods.merge, a, b, how, None, None, True, True, indicator, (lsuffix, rsuffix), left_empty, right_empty) meta = pd.merge(lhs._meta_nonempty, rhs._meta_nonempty, how=how, left_index=True, right_index=True, suffixes=(lsuffix, rsuffix), indicator=indicator) return new_dd_object(toolz.merge(lhs.dask, rhs.dask, dsk), name, meta, divisions) shuffle_func = shuffle # name sometimes conflicts with keyword argument def hash_join(lhs, left_on, rhs, right_on, how='inner', npartitions=None, suffixes=('_x', '_y'), shuffle=None, indicator=False): """ Join two DataFrames on particular columns with hash join This shuffles both datasets on the joined column and then performs an embarrassingly parallel join partition-by-partition >>> hash_join(a, 'id', rhs, 'id', how='left', npartitions=10) # doctest: +SKIP """ if npartitions is None: npartitions = max(lhs.npartitions, rhs.npartitions) lhs2 = shuffle_func(lhs, left_on, npartitions=npartitions, shuffle=shuffle) rhs2 = shuffle_func(rhs, right_on, npartitions=npartitions, shuffle=shuffle) if isinstance(left_on, Index): left_on = None left_index = True else: left_index = False if isinstance(right_on, Index): right_on = None right_index = True else: right_index = False # dummy result meta = pd.merge(lhs._meta_nonempty, rhs._meta_nonempty, how=how, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index, suffixes=suffixes, indicator=indicator) if isinstance(left_on, list): left_on = (list, tuple(left_on)) if isinstance(right_on, list): right_on = (list, tuple(right_on)) token = tokenize(lhs2, left_on, rhs2, right_on, left_index, right_index, how, npartitions, suffixes, shuffle, indicator) name = 'hash-join-' + token dsk = dict(((name, i), (methods.merge, (lhs2._name, i), (rhs2._name, i), how, left_on, right_on, left_index, right_index, indicator, suffixes, lhs._meta, rhs._meta)) for i in range(npartitions)) divisions = [None] * (npartitions + 1) return new_dd_object(toolz.merge(lhs2.dask, rhs2.dask, dsk), name, meta, divisions) def single_partition_join(left, right, **kwargs): # if the merge is perfomed on_index, divisions can be kept, otherwise the # new index will not necessarily correspond the current divisions meta = pd.merge(left._meta_nonempty, right._meta_nonempty, **kwargs) name = 'merge-' + tokenize(left, right, **kwargs) if left.npartitions == 1: left_key = first(left.__dask_keys__()) dsk = {(name, i): (apply, pd.merge, [left_key, right_key], kwargs) for i, right_key in enumerate(right.__dask_keys__())} if kwargs.get('right_index'): divisions = right.divisions else: divisions = [None for _ in right.divisions] elif right.npartitions == 1: right_key = first(right.__dask_keys__()) dsk = {(name, i): (apply, pd.merge, [left_key, right_key], kwargs) for i, left_key in enumerate(left.__dask_keys__())} if kwargs.get('left_index'): divisions = left.divisions else: divisions = [None for _ in left.divisions] return new_dd_object(toolz.merge(dsk, left.dask, right.dask), name, meta, divisions) @wraps(pd.merge) def merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, suffixes=('_x', '_y'), indicator=False, npartitions=None, shuffle=None, max_branch=None): for o in [on, left_on, right_on]: if isinstance(o, _Frame): raise NotImplementedError( "Dask collections not currently allowed in merge columns") if not on and not left_on and not right_on and not left_index and not right_index: on = [c for c in left.columns if c in right.columns] if not on: left_index = right_index = True if on and not left_on and not right_on: left_on = right_on = on on = None if (isinstance(left, (pd.Series, pd.DataFrame)) and isinstance(right, (pd.Series, pd.DataFrame))): return pd.merge(left, right, how=how, on=on, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index, suffixes=suffixes, indicator=indicator) # Transform pandas objects into dask.dataframe objects if isinstance(left, (pd.Series, pd.DataFrame)): if right_index and left_on: # change to join on index left = left.set_index(left[left_on]) left_on = False left_index = True left = from_pandas(left, npartitions=1) # turn into DataFrame if isinstance(right, (pd.Series, pd.DataFrame)): if left_index and right_on: # change to join on index right = right.set_index(right[right_on]) right_on = False right_index = True right = from_pandas(right, npartitions=1) # turn into DataFrame # Both sides are now dd.DataFrame or dd.Series objects # Both sides indexed if (left_index and left.known_divisions and right_index and right.known_divisions): # Do indexed join return merge_indexed_dataframes(left, right, how=how, lsuffix=suffixes[0], rsuffix=suffixes[1], indicator=indicator) # Single partition on one side elif (left.npartitions == 1 and how in ('inner', 'right') or right.npartitions == 1 and how in ('inner', 'left')): return single_partition_join(left, right, how=how, right_on=right_on, left_on=left_on, left_index=left_index, right_index=right_index, suffixes=suffixes, indicator=indicator) # One side is indexed, the other not elif (left_index and left.known_divisions and not right_index or right_index and right.known_divisions and not left_index): left_empty = left._meta_nonempty right_empty = right._meta_nonempty meta = pd.merge(left_empty, right_empty, how=how, on=on, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index, suffixes=suffixes, indicator=indicator) if left_index and left.known_divisions: right = rearrange_by_divisions(right, right_on, left.divisions, max_branch, shuffle=shuffle) left = left.clear_divisions() elif right_index and right.known_divisions: left = rearrange_by_divisions(left, left_on, right.divisions, max_branch, shuffle=shuffle) right = right.clear_divisions() return map_partitions(pd.merge, left, right, meta=meta, how=how, on=on, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index, suffixes=suffixes, indicator=indicator) # Catch all hash join else: return hash_join(left, left.index if left_index else left_on, right, right.index if right_index else right_on, how, npartitions, suffixes, shuffle=shuffle, indicator=indicator) ############################################################### # Concat ############################################################### def concat_and_check(dfs): if len(set(map(len, dfs))) != 1: raise ValueError("Concatenated DataFrames of different lengths") return pd.concat(dfs, axis=1) def concat_unindexed_dataframes(dfs): name = 'concat-' + tokenize(*dfs) dsk = {(name, i): (concat_and_check, [(df._name, i) for df in dfs]) for i in range(dfs[0].npartitions)} meta = pd.concat([df._meta for df in dfs], axis=1) return new_dd_object(toolz.merge(dsk, *[df.dask for df in dfs]), name, meta, dfs[0].divisions) def concat_indexed_dataframes(dfs, axis=0, join='outer'): """ Concatenate indexed dataframes together along the index """ meta = methods.concat([df._meta for df in dfs], axis=axis, join=join) empties = [strip_unknown_categories(df._meta) for df in dfs] dfs2, divisions, parts = align_partitions(*dfs) name = 'concat-indexed-' + tokenize(join, *dfs) parts2 = [[df if df is not None else empty for df, empty in zip(part, empties)] for part in parts] dsk = dict(((name, i), (methods.concat, part, axis, join)) for i, part in enumerate(parts2)) for df in dfs2: dsk.update(df.dask) return new_dd_object(dsk, name, meta, divisions) def stack_partitions(dfs, divisions, join='outer'): """Concatenate partitions on axis=0 by doing a simple stack""" meta = methods.concat([df._meta for df in dfs], join=join) empty = strip_unknown_categories(meta) name = 'concat-{0}'.format(tokenize(*dfs)) dsk = {} i = 0 for df in dfs: dsk.update(df.dask) # An error will be raised if the schemas or categories don't match. In # this case we need to pass along the meta object to transform each # partition, so they're all equivalent. try: df._meta == meta match = True except (ValueError, TypeError): match = False for key in df.__dask_keys__(): if match: dsk[(name, i)] = key else: dsk[(name, i)] = (methods.concat, [empty, key], 0, join) i += 1 return new_dd_object(dsk, name, meta, divisions) def concat(dfs, axis=0, join='outer', interleave_partitions=False): """ Concatenate DataFrames along rows. - When axis=0 (default), concatenate DataFrames row-wise: - If all divisions are known and ordered, concatenate DataFrames keeping divisions. When divisions are not ordered, specifying interleave_partition=True allows concatenate divisions each by each. - If any of division is unknown, concatenate DataFrames resetting its division to unknown (None) - When axis=1, concatenate DataFrames column-wise: - Allowed if all divisions are known. - If any of division is unknown, it raises ValueError. Parameters ---------- dfs : list List of dask.DataFrames to be concatenated axis : {0, 1, 'index', 'columns'}, default 0 The axis to concatenate along join : {'inner', 'outer'}, default 'outer' How to handle indexes on other axis interleave_partitions : bool, default False Whether to concatenate DataFrames ignoring its order. If True, every divisions are concatenated each by each. Examples -------- If all divisions are known and ordered, divisions are kept. >>> a # doctest: +SKIP dd.DataFrame >>> b # doctest: +SKIP dd.DataFrame >>> dd.concat([a, b]) # doctest: +SKIP dd.DataFrame Unable to concatenate if divisions are not ordered. >>> a # doctest: +SKIP dd.DataFrame >>> b # doctest: +SKIP dd.DataFrame >>> dd.concat([a, b]) # doctest: +SKIP ValueError: All inputs have known divisions which cannot be concatenated in order. Specify interleave_partitions=True to ignore order Specify interleave_partitions=True to ignore the division order. >>> dd.concat([a, b], interleave_partitions=True) # doctest: +SKIP dd.DataFrame If any of division is unknown, the result division will be unknown >>> a # doctest: +SKIP dd.DataFrame >>> b # doctest: +SKIP dd.DataFrame >>> dd.concat([a, b]) # doctest: +SKIP dd.DataFrame """ if not isinstance(dfs, list): raise TypeError("dfs must be a list of DataFrames/Series objects") if len(dfs) == 0: raise ValueError('No objects to concatenate') if len(dfs) == 1: if axis == 1 and isinstance(dfs[0], Series): return dfs[0].to_frame() else: return dfs[0] if join not in ('inner', 'outer'): raise ValueError("'join' must be 'inner' or 'outer'") axis = DataFrame._validate_axis(axis) dasks = [df for df in dfs if isinstance(df, _Frame)] dfs = _maybe_from_pandas(dfs) if axis == 1: if all(df.known_divisions for df in dasks): return concat_indexed_dataframes(dfs, axis=axis, join=join) elif (len(dasks) == len(dfs) and all(not df.known_divisions for df in dfs) and len({df.npartitions for df in dasks}) == 1): warn("Concatenating dataframes with unknown divisions.\n" "We're assuming that the indexes of each dataframes are \n" "aligned. This assumption is not generally safe.") return concat_unindexed_dataframes(dfs) else: raise ValueError('Unable to concatenate DataFrame with unknown ' 'division specifying axis=1') else: if all(df.known_divisions for df in dasks): # each DataFrame's division must be greater than previous one if all(dfs[i].divisions[-1] < dfs[i + 1].divisions[0] for i in range(len(dfs) - 1)): divisions = [] for df in dfs[:-1]: # remove last to concatenate with next divisions += df.divisions[:-1] divisions += dfs[-1].divisions return stack_partitions(dfs, divisions, join=join) elif interleave_partitions: return concat_indexed_dataframes(dfs, join=join) else: raise ValueError('All inputs have known divisions which ' 'cannot be concatenated in order. Specify ' 'interleave_partitions=True to ignore order') else: divisions = [None] * (sum([df.npartitions for df in dfs]) + 1) return stack_partitions(dfs, divisions, join=join) dask-0.16.0/dask/dataframe/optimize.py000066400000000000000000000015711320364734500176130ustar00rootroot00000000000000""" Dataframe optimizations """ from __future__ import absolute_import, division, print_function from ..optimize import cull, fuse_getitem, fuse from ..context import _globals from .. import core try: import fastparquet # noqa: F401 except ImportError: fastparquet = False def optimize(dsk, keys, **kwargs): from .io import dataframe_from_ctable if isinstance(keys, list): dsk, dependencies = cull(dsk, list(core.flatten(keys))) else: dsk, dependencies = cull(dsk, [keys]) dsk = fuse_getitem(dsk, dataframe_from_ctable, 3) if fastparquet: from .io.parquet import _read_parquet_row_group dsk = fuse_getitem(dsk, _read_parquet_row_group, 4) dsk, dependencies = fuse(dsk, keys, dependencies=dependencies, ave_width=_globals.get('fuse_ave_width', 0)) dsk, _ = cull(dsk, keys) return dsk dask-0.16.0/dask/dataframe/partitionquantiles.py000066400000000000000000000446621320364734500217220ustar00rootroot00000000000000"""Determine new partition divisions using approximate percentiles. We use a custom algorithm to calculate approximate, evenly-distributed percentiles of arbitrarily-ordered data for any dtype in a distributed fashion with one pass over the data. This is used to determine new partition divisions when changing the index of a dask.dataframe. We claim no statistical guarantees, but we use a variety of heuristics to try to provide reliable, robust results that are "good enough" and can scale to large number of partitions. Our approach is similar to standard approaches such as t- and q-digest, GK, and sampling-based algorithms, which consist of three parts: 1. **Summarize:** create summaries of subsets of data 2. **Merge:** combine summaries to make a new summary 3. **Compress:** periodically compress a summary into a smaller summary We summarize the data in each partition by calculating several percentiles. The value at each percentile is given a weight proportional to the length of the partition and the differences between the current percentile and the adjacent percentiles. Merging summaries is simply a ``merge_sorted`` of the values and their weights, which we do with a reduction tree. Percentiles is a good choice for our case, because we are given a numpy array of the partition's data, and percentiles is a relatively cheap operation. Moreover, percentiles are, by definition, much less susceptible to the underlying distribution of the data, so the weights given to each value--even across partitions--should be comparable. Let us describe this to a child of five. We are given many small cubes (of equal size) with numbers on them. Split these into many piles. This is like the original data. Let's sort and stack the cubes from one of the piles. Next, we are given a bunch of unlabeled blocks of different sizes, and most are much larger than the the original cubes. Stack these blocks until they're the same height as our first stack. Let's write a number on each block of the new stack. To do this, choose the number of the cube in the first stack that is located in the middle of an unlabeled block. We are finished with this stack once all blocks have a number written on them. Repeat this for all the piles of cubes. Finished already? Great! Now take all the stacks of the larger blocks you wrote on and throw them into a single pile. We'll be sorting these blocks next, which may be easier if you carefully move the blocks over and organize... ah, nevermind--too late. Okay, sort and stack all the blocks from that amazing, disorganized pile you just made. This will be very tall, so we had better stack it sideways on the floor like so. This will also make it easier for us to split the stack into groups of approximately equal size, which is our final task... This, in a nutshell, is the algorithm we deploy. The main difference is that we don't always assign a block the number at its median (ours fluctuates around the median). The numbers at the edges of the final groups is what we use as divisions for repartitioning. We also need the overall min and max, so we take the 0th and 100th percentile of each partition, and another sample near each edge so we don't give disproportionate weights to extreme values. Choosing appropriate percentiles to take in each partition is where things get interesting. The data is arbitrarily ordered, which means it may be sorted, random, or follow some pathological distribution--who knows. We hope all partitions are of similar length, but we ought to expect some variation in lengths. The number of partitions may also be changing significantly, which could affect the optimal choice of percentiles. For improved robustness, we use both evenly-distributed and random percentiles. If the number of partitions isn't changing, then the total number of percentiles across all partitions scales as ``npartitions**1.5``. Although we only have a simple compression operation (step 3 above) that combines weights of equal values, a more sophisticated one could be added if needed, such as for extremely large ``npartitions`` or if we find we need to increase the sample size for each partition. """ from __future__ import absolute_import, division, print_function import math import numpy as np import pandas as pd from toolz import merge, merge_sorted, take from ..utils import random_state_data from ..base import tokenize from .core import Series from .utils import is_categorical_dtype from dask.compatibility import zip def sample_percentiles(num_old, num_new, chunk_length, upsample=1.0, random_state=None): """Construct percentiles for a chunk for repartitioning. Adapt the number of total percentiles calculated based on the number of current and new partitions. Returned percentiles include equally spaced percentiles between [0, 100], and random percentiles. See detailed discussion below. Parameters ---------- num_old: int Number of partitions of the current object num_new: int Number of partitions of the new object chunk_length: int Number of rows of the partition upsample : float Multiplicative factor to increase the number of samples Returns ------- qs : numpy.ndarray of sorted percentiles between 0, 100 Constructing ordered (i.e., not hashed) partitions is hard. Calculating approximate percentiles for generic objects in an out-of-core fashion is also hard. Fortunately, partition boundaries don't need to be perfect in order for partitioning to be effective, so we strive for a "good enough" method that can scale to many partitions and is reasonably well-behaved for a wide variety of scenarios. Two similar approaches come to mind: (1) take a subsample of every partition, then find the best new partitions for the combined subsamples; and (2) calculate equally-spaced percentiles on every partition (a relatively cheap operation), then merge the results. We do both, but instead of random samples, we use random percentiles. If the number of partitions isn't changing, then the ratio of fixed percentiles to random percentiles is 2 to 1. If repartitioning goes from a very high number of partitions to a very low number of partitions, then we use more random percentiles, because a stochastic approach will be more stable to potential correlations in the data that may cause a few equally- spaced partitions to under-sample the data. The more partitions there are, then the more total percentiles will get calculated across all partitions. Squaring the number of partitions approximately doubles the number of total percentiles calculated, so num_total_percentiles ~ sqrt(num_partitions). We assume each partition is approximately the same length. This should provide adequate resolution and allow the number of partitions to scale. For numeric data, one could instead use T-Digest for floats and Q-Digest for ints to calculate approximate percentiles. Our current method works for any dtype. """ # *waves hands* random_percentage = 1 / (1 + (4 * num_new / num_old)**0.5) num_percentiles = upsample * num_new * (num_old + 22)**0.55 / num_old num_fixed = int(num_percentiles * (1 - random_percentage)) + 2 num_random = int(num_percentiles * random_percentage) + 2 if num_fixed + num_random + 5 >= chunk_length: return np.linspace(0, 100, chunk_length + 1) if not isinstance(random_state, np.random.RandomState): random_state = np.random.RandomState(random_state) q_fixed = np.linspace(0, 100, num_fixed) q_random = random_state.rand(num_random) * 100 q_edges = [60 / (num_fixed - 1), 100 - 60 / (num_fixed - 1)] qs = np.concatenate([q_fixed, q_random, q_edges, [0, 100]]) qs.sort() # Make the divisions between percentiles a little more even qs = 0.5 * (qs[:-1] + qs[1:]) return qs def tree_width(N, to_binary=False): """Generate tree width suitable for ``merge_sorted`` given N inputs The larger N is, the more tasks are reduced in a single task. In theory, this is designed so all tasks are of comparable effort. """ if N < 32: group_size = 2 else: group_size = int(math.log(N)) num_groups = N // group_size if to_binary or num_groups < 16: return 2**int(math.log(N / group_size, 2)) else: return num_groups def tree_groups(N, num_groups): """Split an integer N into evenly sized and spaced groups. >>> tree_groups(16, 6) [3, 2, 3, 3, 2, 3] """ # Bresenham, you so smooth! group_size = N // num_groups dx = num_groups dy = N - group_size * num_groups D = 2 * dy - dx rv = [] for _ in range(num_groups): if D < 0: rv.append(group_size) else: rv.append(group_size + 1) D -= 2 * dx D += 2 * dy return rv def create_merge_tree(func, keys, token): """Create a task tree that merges all the keys with a reduction function. Parameters ---------- func: callable Reduction function that accepts a single list of values to reduce. keys: iterable Keys to reduce from the source dask graph. token: object Included in each key of the returned dict. This creates a k-ary tree where k depends on the current level and is greater the further away a node is from the root node. This reduces the total number of nodes (thereby reducing scheduler overhead), but still has beneficial properties of trees. For reasonable numbers of keys, N < 1e5, the total number of nodes in the tree is roughly ``N**0.78``. For 1e5 < N < 2e5, is it roughly ``N**0.8``. """ level = 0 prev_width = len(keys) prev_keys = iter(keys) rv = {} while prev_width > 1: width = tree_width(prev_width) groups = tree_groups(prev_width, width) keys = [(token, level, i) for i in range(width)] rv.update((key, (func, list(take(num, prev_keys)))) for num, key in zip(groups, keys)) prev_width = width prev_keys = iter(keys) level += 1 return rv def percentiles_to_weights(qs, vals, length): """Weigh percentile values by length and the difference between percentiles >>> percentiles = np.array([0, 25, 50, 90, 100]) >>> values = np.array([2, 3, 5, 8, 13]) >>> length = 10 >>> percentiles_to_weights(percentiles, values, length) ([2, 3, 5, 8, 13], [125.0, 250.0, 325.0, 250.0, 50.0]) The weight of the first element, ``2``, is determined by the difference between the first and second percentiles, and then scaled by length: >>> 0.5 * length * (percentiles[1] - percentiles[0]) 125.0 The second weight uses the difference of percentiles on both sides, so it will be twice the first weight if the percentiles are equally spaced: >>> 0.5 * length * (percentiles[2] - percentiles[0]) 250.0 """ if length == 0: return () diff = np.ediff1d(qs, 0.0, 0.0) weights = 0.5 * length * (diff[1:] + diff[:-1]) return vals.tolist(), weights.tolist() def merge_and_compress_summaries(vals_and_weights): """Merge and sort percentile summaries that are already sorted. Each item is a tuple like ``(vals, weights)`` where vals and weights are lists. We sort both by vals. Equal values will be combined, their weights summed together. """ vals_and_weights = [x for x in vals_and_weights if x] if not vals_and_weights: return () it = merge_sorted(*[zip(x, y) for x, y in vals_and_weights]) vals = [] weights = [] vals_append = vals.append weights_append = weights.append val, weight = prev_val, prev_weight = next(it) for val, weight in it: if val == prev_val: prev_weight += weight else: vals_append(prev_val) weights_append(prev_weight) prev_val, prev_weight = val, weight if val == prev_val: vals_append(prev_val) weights_append(prev_weight) return vals, weights def process_val_weights(vals_and_weights, npartitions, dtype_info): """Calculate final approximate percentiles given weighted vals ``vals_and_weights`` is assumed to be sorted. We take a cumulative sum of the weights, which makes them percentile-like (their scale is [0, N] instead of [0, 100]). Next we find the divisions to create partitions of approximately equal size. It is possible for adjacent values of the result to be the same. Since these determine the divisions of the new partitions, some partitions may be empty. This can happen if we under-sample the data, or if there aren't enough unique values in the column. Increasing ``upsample`` keyword argument in ``df.set_index`` may help. """ dtype, info = dtype_info if not vals_and_weights: try: return np.array(None, dtype=dtype) except Exception: # dtype does not support None value so allow it to change return np.array(None, dtype=np.float_) vals, weights = vals_and_weights vals = np.array(vals) weights = np.array(weights) # We want to create exactly `npartition` number of groups of `vals` that # are approximately the same weight and non-empty if possible. We use a # simple approach (more accurate algorithms exist): # 1. Remove all the values with weights larger than the relative # percentile width from consideration (these are `jumbo`s) # 2. Calculate percentiles with "interpolation=left" of percentile-like # weights of the remaining values. These are guaranteed to be unique. # 3. Concatenate the values from (1) and (2), sort, and return. # # We assume that all values are unique, which happens in the previous # step `merge_and_compress_summaries`. if len(vals) == npartitions + 1: rv = vals elif len(vals) < npartitions + 1: # The data is under-sampled if np.issubdtype(vals.dtype, np.number): # Interpolate extra divisions q_weights = np.cumsum(weights) q_target = np.linspace(q_weights[0], q_weights[-1], npartitions + 1) rv = np.interp(q_target, q_weights, vals) else: # Distribute the empty partitions duplicated_index = np.linspace( 0, len(vals) - 1, npartitions - len(vals) + 1, dtype=int ) duplicated_vals = vals[duplicated_index] rv = np.concatenate([vals, duplicated_vals]) rv.sort() else: target_weight = weights.sum() / npartitions jumbo_mask = weights >= target_weight jumbo_vals = vals[jumbo_mask] trimmed_vals = vals[~jumbo_mask] trimmed_weights = weights[~jumbo_mask] trimmed_npartitions = npartitions - len(jumbo_vals) # percentile-like, but scaled by weights q_weights = np.cumsum(trimmed_weights) q_target = np.linspace(0, q_weights[-1], trimmed_npartitions + 1) left = np.searchsorted(q_weights, q_target, side='left') right = np.searchsorted(q_weights, q_target, side='right') - 1 # stay inbounds np.maximum(right, 0, right) lower = np.minimum(left, right) trimmed = trimmed_vals[lower] rv = np.concatenate([trimmed, jumbo_vals]) rv.sort() if is_categorical_dtype(dtype): rv = pd.Categorical.from_codes(rv, info[0], info[1]) elif 'datetime64' in str(dtype): rv = pd.DatetimeIndex(rv, dtype=dtype) elif rv.dtype != dtype: rv = rv.astype(dtype) return rv def percentiles_summary(df, num_old, num_new, upsample, state): """Summarize data using percentiles and derived weights. These summaries can be merged, compressed, and converted back into approximate percentiles. Parameters ---------- df: pandas.Series Data to summarize num_old: int Number of partitions of the current object num_new: int Number of partitions of the new object upsample: float Scale factor to increase the number of percentiles calculated in each partition. Use to improve accuracy. """ from dask.array.percentile import _percentile length = len(df) if length == 0: return () random_state = np.random.RandomState(state) qs = sample_percentiles(num_old, num_new, length, upsample, random_state) data = df.values interpolation = 'linear' if is_categorical_dtype(data): data = data.codes interpolation = 'nearest' vals = _percentile(data, qs, interpolation=interpolation) if interpolation == 'linear' and np.issubdtype(data.dtype, np.integer): vals = np.round(vals).astype(data.dtype) vals_and_weights = percentiles_to_weights(qs, vals, length) return vals_and_weights def dtype_info(df): info = None if is_categorical_dtype(df): data = df.values info = (data.categories, data.ordered) return df.dtype, info def partition_quantiles(df, npartitions, upsample=1.0, random_state=None): """ Approximate quantiles of Series used for repartitioning """ assert isinstance(df, Series) # currently, only Series has quantile method # Index.quantile(list-like) must be pd.Series, not pd.Index return_type = Series qs = np.linspace(0, 1, npartitions + 1) token = tokenize(df, qs, upsample) if random_state is None: random_state = hash(token) % np.iinfo(np.int32).max state_data = random_state_data(df.npartitions, random_state) df_keys = df.__dask_keys__() name0 = 're-quantiles-0-' + token dtype_dsk = {(name0, 0): (dtype_info, df_keys[0])} name1 = 're-quantiles-1-' + token val_dsk = {(name1, i): (percentiles_summary, key, df.npartitions, npartitions, upsample, state) for i, (state, key) in enumerate(zip(state_data, df_keys))} name2 = 're-quantiles-2-' + token merge_dsk = create_merge_tree(merge_and_compress_summaries, sorted(val_dsk), name2) if not merge_dsk: # Compress the data even if we only have one partition merge_dsk = {(name2, 0, 0): (merge_and_compress_summaries, [list(val_dsk)[0]])} merged_key = max(merge_dsk) name3 = 're-quantiles-3-' + token last_dsk = {(name3, 0): (pd.Series, (process_val_weights, merged_key, npartitions, (name0, 0)), qs, None, df.name)} dsk = merge(df.dask, dtype_dsk, val_dsk, merge_dsk, last_dsk) new_divisions = [0.0, 1.0] return return_type(dsk, name3, df._meta, new_divisions) dask-0.16.0/dask/dataframe/reshape.py000066400000000000000000000153531320364734500174050ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import numpy as np import pandas as pd from .core import Series, DataFrame, map_partitions, apply_concat_apply from . import methods from .utils import is_categorical_dtype, is_scalar, has_known_categories ############################################################### # Dummies ############################################################### def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, columns=None, sparse=False, drop_first=False): """ Convert categorical variable into dummy/indicator variables. Data must have category dtype to infer result's ``columns`` Parameters ---------- data : Series or DataFrame with category dtype prefix : string, list of strings, or dict of strings, default None String to append DataFrame column names Pass a list with length equal to the number of columns when calling get_dummies on a DataFrame. Alternativly, `prefix` can be a dictionary mapping column names to prefixes. prefix_sep : string, default '_' If appending prefix, separator/delimiter to use. Or pass a list or dictionary as with `prefix.` dummy_na : bool, default False Add a column to indicate NaNs, if False NaNs are ignored. columns : list-like, default None Column names in the DataFrame to be encoded. If `columns` is None then all the columns with `category` dtype will be converted. drop_first : bool, default False Whether to get k-1 dummies out of k categorical levels by removing the first level. Returns ------- dummies : DataFrame """ if isinstance(data, (pd.Series, pd.DataFrame)): return pd.get_dummies(data, prefix=prefix, prefix_sep=prefix_sep, dummy_na=dummy_na, columns=columns, sparse=sparse, drop_first=drop_first) not_cat_msg = ("`get_dummies` with non-categorical dtypes is not " "supported. Please use `df.categorize()` beforehand to " "convert to categorical dtype.") unknown_cat_msg = ("`get_dummies` with unknown categories is not " "supported. Please use `column.cat.as_known()` or " "`df.categorize()` beforehand to ensure known " "categories") if isinstance(data, Series): if not is_categorical_dtype(data): raise NotImplementedError(not_cat_msg) if not has_known_categories(data): raise NotImplementedError(unknown_cat_msg) elif isinstance(data, DataFrame): if columns is None: if (data.dtypes == 'object').any(): raise NotImplementedError(not_cat_msg) columns = data._meta.select_dtypes(include=['category']).columns else: if not all(is_categorical_dtype(data[c]) for c in columns): raise NotImplementedError(not_cat_msg) if not all(has_known_categories(data[c]) for c in columns): raise NotImplementedError(unknown_cat_msg) if sparse: raise NotImplementedError('sparse=True is not supported') return map_partitions(pd.get_dummies, data, prefix=prefix, prefix_sep=prefix_sep, dummy_na=dummy_na, columns=columns, sparse=sparse, drop_first=drop_first) ############################################################### # Pivot table ############################################################### def pivot_table(df, index=None, columns=None, values=None, aggfunc='mean'): """ Create a spreadsheet-style pivot table as a DataFrame. Target ``columns`` must have category dtype to infer result's ``columns``. ``index``, ``columns``, ``values`` and ``aggfunc`` must be all scalar. Parameters ---------- data : DataFrame values : scalar column to aggregate index : scalar column to be index columns : scalar column to be columns aggfunc : {'mean', 'sum', 'count'}, default 'mean' Returns ------- table : DataFrame """ if not is_scalar(index) or index is None: raise ValueError("'index' must be the name of an existing column") if not is_scalar(columns) or columns is None: raise ValueError("'columns' must be the name of an existing column") if not is_categorical_dtype(df[columns]): raise ValueError("'columns' must be category dtype") if not has_known_categories(df[columns]): raise ValueError("'columns' must have known categories. Please use " "`df[columns].cat.as_known()` beforehand to ensure " "known categories") if not is_scalar(values) or values is None: raise ValueError("'values' must be the name of an existing column") if not is_scalar(aggfunc) or aggfunc not in ('mean', 'sum', 'count'): raise ValueError("aggfunc must be either 'mean', 'sum' or 'count'") # _emulate can't work for empty data # the result must have CategoricalIndex columns new_columns = pd.CategoricalIndex(df[columns].cat.categories, name=columns) meta = pd.DataFrame(columns=new_columns, dtype=np.float64) meta.index.name = index kwargs = {'index': index, 'columns': columns, 'values': values} pv_sum = apply_concat_apply([df], chunk=methods.pivot_sum, aggregate=methods.pivot_agg, meta=meta, token='pivot_table_sum', chunk_kwargs=kwargs) pv_count = apply_concat_apply([df], chunk=methods.pivot_count, aggregate=methods.pivot_agg, meta=meta, token='pivot_table_count', chunk_kwargs=kwargs) if aggfunc == 'sum': return pv_sum elif aggfunc == 'count': return pv_count elif aggfunc == 'mean': return pv_sum / pv_count else: raise ValueError ############################################################### # Melt ############################################################### def melt(frame, id_vars=None, value_vars=None, var_name=None, value_name='value', col_level=None): from dask.dataframe.core import no_default return frame.map_partitions(pd.melt, meta=no_default, id_vars=id_vars, value_vars=value_vars, var_name=var_name, value_name=value_name, col_level=col_level, token='melt') dask-0.16.0/dask/dataframe/rolling.py000066400000000000000000000305431320364734500174220ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import datetime import warnings from functools import wraps import pandas as pd from pandas.core.window import Rolling as pd_Rolling from ..base import tokenize from ..utils import M, funcname, derived_from from .core import _emulate from .utils import make_meta def overlap_chunk(func, prev_part, current_part, next_part, before, after, args, kwargs): msg = ("Partition size is less than overlapping " "window size. Try using ``df.repartition`` " "to increase the partition size.") if prev_part is not None and isinstance(before, int): if prev_part.shape[0] != before: raise NotImplementedError(msg) if next_part is not None and isinstance(after, int): if next_part.shape[0] != after: raise NotImplementedError(msg) # We validate that the window isn't too large for tiemdeltas in map_overlap parts = [p for p in (prev_part, current_part, next_part) if p is not None] combined = pd.concat(parts) out = func(combined, *args, **kwargs) if prev_part is None: before = None if isinstance(before, datetime.timedelta): before = len(prev_part) if next_part is None: return out.iloc[before:] if isinstance(after, datetime.timedelta): after = len(next_part) return out.iloc[before:-after] def map_overlap(func, df, before, after, *args, **kwargs): """Apply a function to each partition, sharing rows with adjacent partitions. Parameters ---------- func : function Function applied to each partition. df : dd.DataFrame, dd.Series before : int or timedelta The rows to prepend to partition ``i`` from the end of partition ``i - 1``. after : int or timedelta The rows to append to partition ``i`` from the beginning of partition ``i + 1``. args, kwargs : Arguments and keywords to pass to the function. The partition will be the first argument, and these will be passed *after*. See Also -------- dd.DataFrame.map_overlap """ if (isinstance(before, datetime.timedelta) or isinstance(after, datetime.timedelta)): if not df.index._meta_nonempty.is_all_dates: raise TypeError("Must have a `DatetimeIndex` when using string offset " "for `before` and `after`") else: if not (isinstance(before, int) and before >= 0 and isinstance(after, int) and after >= 0): raise ValueError("before and after must be positive integers") if 'token' in kwargs: func_name = kwargs.pop('token') token = tokenize(df, before, after, *args, **kwargs) else: func_name = 'overlap-' + funcname(func) token = tokenize(func, df, before, after, *args, **kwargs) if 'meta' in kwargs: meta = kwargs.pop('meta') else: meta = _emulate(func, df, *args, **kwargs) meta = make_meta(meta) name = '{0}-{1}'.format(func_name, token) name_a = 'overlap-prepend-' + tokenize(df, before) name_b = 'overlap-append-' + tokenize(df, after) df_name = df._name dsk = df.dask.copy() # Have to do the checks for too large windows in the time-delta case # here instead of in `overlap_chunk`, since we can't rely on fix-frequency # index timedelta_partition_message = ( "Partition size is less than specified window. " "Try using ``df.repartition`` to increase the partition size" ) if before and isinstance(before, int): dsk.update({(name_a, i): (M.tail, (df_name, i), before) for i in range(df.npartitions - 1)}) prevs = [None] + [(name_a, i) for i in range(df.npartitions - 1)] elif isinstance(before, datetime.timedelta): # Assumes monotonic (increasing?) index deltas = pd.Series(df.divisions).diff().iloc[1:-1] if (before > deltas).any(): raise ValueError(timedelta_partition_message) dsk.update({(name_a, i): (_tail_timedelta, (df_name, i), (df_name, i + 1), before) for i in range(df.npartitions - 1)}) prevs = [None] + [(name_a, i) for i in range(df.npartitions - 1)] else: prevs = [None] * df.npartitions if after and isinstance(after, int): dsk.update({(name_b, i): (M.head, (df_name, i), after) for i in range(1, df.npartitions)}) nexts = [(name_b, i) for i in range(1, df.npartitions)] + [None] elif isinstance(after, datetime.timedelta): # TODO: Do we have a use-case for this? Pandas doesn't allow negative rolling windows deltas = pd.Series(df.divisions).diff().iloc[1:-1] if (after > deltas).any(): raise ValueError(timedelta_partition_message) dsk.update({(name_b, i): (_head_timedelta, (df_name, i - 0), (df_name, i), after) for i in range(1, df.npartitions)}) nexts = [(name_b, i) for i in range(1, df.npartitions)] + [None] else: nexts = [None] * df.npartitions for i, (prev, current, next) in enumerate(zip(prevs, df.__dask_keys__(), nexts)): dsk[(name, i)] = (overlap_chunk, func, prev, current, next, before, after, args, kwargs) return df._constructor(dsk, name, meta, df.divisions) def wrap_rolling(func, method_name): """Create a chunked version of a pandas.rolling_* function""" @wraps(func) def rolling(arg, window, *args, **kwargs): # pd.rolling_* functions are deprecated warnings.warn(("DeprecationWarning: dd.rolling_{0} is deprecated and " "will be removed in a future version, replace with " "df.rolling(...).{0}(...)").format(method_name)) rolling_kwargs = {} method_kwargs = {} for k, v in kwargs.items(): if k in {'min_periods', 'center', 'win_type', 'axis', 'freq'}: rolling_kwargs[k] = v else: method_kwargs[k] = v rolling = arg.rolling(window, **rolling_kwargs) return getattr(rolling, method_name)(*args, **method_kwargs) return rolling def _head_timedelta(current, next_, after): """Return rows of ``next_`` whose index is before the last observation in ``current`` + ``after``. Parameters ---------- current : DataFrame next_ : DataFrame after : timedelta Returns ------- overlapped : DataFrame """ return next_[next_.index < (current.index.max() + after)] def _tail_timedelta(prev, current, before): """Return rows of ``prev`` whose index is after the first observation in ``current`` - ``before``. Parameters ---------- current : DataFrame next_ : DataFrame before : timedelta Returns ------- overlapped : DataFrame """ return prev[prev.index > (current.index.min() - before)] rolling_count = wrap_rolling(pd.rolling_count, 'count') rolling_sum = wrap_rolling(pd.rolling_sum, 'sum') rolling_mean = wrap_rolling(pd.rolling_mean, 'mean') rolling_median = wrap_rolling(pd.rolling_median, 'median') rolling_min = wrap_rolling(pd.rolling_min, 'min') rolling_max = wrap_rolling(pd.rolling_max, 'max') rolling_std = wrap_rolling(pd.rolling_std, 'std') rolling_var = wrap_rolling(pd.rolling_var, 'var') rolling_skew = wrap_rolling(pd.rolling_skew, 'skew') rolling_kurt = wrap_rolling(pd.rolling_kurt, 'kurt') rolling_quantile = wrap_rolling(pd.rolling_quantile, 'quantile') rolling_apply = wrap_rolling(pd.rolling_apply, 'apply') @wraps(pd.rolling_window) def rolling_window(arg, window, **kwargs): if kwargs.pop('mean', True): return rolling_mean(arg, window, **kwargs) return rolling_sum(arg, window, **kwargs) def pandas_rolling_method(df, rolling_kwargs, name, *args, **kwargs): rolling = df.rolling(**rolling_kwargs) return getattr(rolling, name)(*args, **kwargs) class Rolling(object): """Provides rolling window calculations.""" def __init__(self, obj, window=None, min_periods=None, freq=None, center=False, win_type=None, axis=0): if freq is not None: msg = 'The deprecated freq argument is not supported.' raise NotImplementedError(msg) self.obj = obj # dataframe or series self.window = window self.min_periods = min_periods self.center = center self.axis = axis self.win_type = win_type # Allow pandas to raise if appropriate pd_roll = obj._meta.rolling(**self._rolling_kwargs()) # Using .rolling(window='2s'), pandas will convert the # offset str to a window in nanoseconds. But pandas doesn't # accept the integer window with win_type='freq', so we store # that information here. # See https://github.com/pandas-dev/pandas/issues/15969 self._window = pd_roll.window self._win_type = pd_roll.win_type self._min_periods = pd_roll.min_periods def _rolling_kwargs(self): return {'window': self.window, 'min_periods': self.min_periods, 'center': self.center, 'win_type': self.win_type, 'axis': self.axis} @property def _has_single_partition(self): """ Indicator for whether the object has a single partition (True) or multiple (False). """ return (self.axis in (1, 'columns') or (isinstance(self.window, int) and self.window <= 1) or self.obj.npartitions == 1) def _call_method(self, method_name, *args, **kwargs): rolling_kwargs = self._rolling_kwargs() meta = pandas_rolling_method(self.obj._meta_nonempty, rolling_kwargs, method_name, *args, **kwargs) if self._has_single_partition: # There's no overlap just use map_partitions return self.obj.map_partitions(pandas_rolling_method, rolling_kwargs, method_name, *args, token=method_name, meta=meta, **kwargs) # Convert window to overlap if self.center: before = self.window // 2 after = self.window - before - 1 elif self._win_type == 'freq': before = pd.Timedelta(self.window) after = 0 else: before = self.window - 1 after = 0 return map_overlap(pandas_rolling_method, self.obj, before, after, rolling_kwargs, method_name, *args, token=method_name, meta=meta, **kwargs) @derived_from(pd_Rolling) def count(self): return self._call_method('count') @derived_from(pd_Rolling) def sum(self): return self._call_method('sum') @derived_from(pd_Rolling) def mean(self): return self._call_method('mean') @derived_from(pd_Rolling) def median(self): return self._call_method('median') @derived_from(pd_Rolling) def min(self): return self._call_method('min') @derived_from(pd_Rolling) def max(self): return self._call_method('max') @derived_from(pd_Rolling) def std(self, ddof=1): return self._call_method('std', ddof=1) @derived_from(pd_Rolling) def var(self, ddof=1): return self._call_method('var', ddof=1) @derived_from(pd_Rolling) def skew(self): return self._call_method('skew') @derived_from(pd_Rolling) def kurt(self): return self._call_method('kurt') @derived_from(pd_Rolling) def quantile(self, quantile): return self._call_method('quantile', quantile) @derived_from(pd_Rolling) def apply(self, func, args=(), kwargs={}): return self._call_method('apply', func, args=args, kwargs=kwargs) def __repr__(self): def order(item): k, v = item _order = {'window': 0, 'min_periods': 1, 'center': 2, 'win_type': 3, 'axis': 4} return _order[k] rolling_kwargs = self._rolling_kwargs() # pandas translates the '2S' offset to nanoseconds rolling_kwargs['window'] = self._window rolling_kwargs['win_type'] = self._win_type return 'Rolling [{}]'.format(','.join( '{}={}'.format(k, v) for k, v in sorted(rolling_kwargs.items(), key=order) if v is not None)) dask-0.16.0/dask/dataframe/shuffle.py000066400000000000000000000432611320364734500174110ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import math from operator import getitem import uuid import numpy as np import pandas as pd from toolz import merge from .methods import drop_columns from .core import DataFrame, Series, _Frame, _concat, map_partitions from .hashing import hash_pandas_object from .utils import PANDAS_VERSION from .. import base from ..base import tokenize, compute, compute_as_if_collection from ..context import _globals from ..delayed import delayed from ..sizeof import sizeof from ..utils import digit, insert, M if PANDAS_VERSION >= '0.20.0': from pandas._libs.algos import groupsort_indexer else: from pandas.algos import groupsort_indexer def set_index(df, index, npartitions=None, shuffle=None, compute=False, drop=True, upsample=1.0, divisions=None, partition_size=128e6, **kwargs): """ See _Frame.set_index for docstring """ if (isinstance(index, Series) and index._name == df.index._name): return df if isinstance(index, (DataFrame, tuple, list)): raise NotImplementedError( "Dask dataframe does not yet support multi-indexes.\n" "You tried to index with this index: %s\n" "Indexes must be single columns only." % str(index)) if npartitions == 'auto': repartition = True npartitions = max(100, df.npartitions) else: if npartitions is None: npartitions = df.npartitions repartition = False if not isinstance(index, Series): index2 = df[index] else: index2 = index if divisions is None: divisions = index2._repartition_quantiles(npartitions, upsample=upsample) if repartition: parts = df.to_delayed() sizes = [delayed(sizeof)(part) for part in parts] else: sizes = [] iparts = index2.to_delayed() mins = [ipart.min() for ipart in iparts] maxes = [ipart.max() for ipart in iparts] divisions, sizes, mins, maxes = base.compute(divisions, sizes, mins, maxes) divisions = divisions.tolist() empty_dataframe_detected = pd.isnull(divisions).all() if repartition or empty_dataframe_detected: total = sum(sizes) npartitions = max(math.ceil(total / partition_size), 1) npartitions = min(npartitions, df.npartitions) n = len(divisions) try: divisions = np.interp(x=np.linspace(0, n - 1, npartitions + 1), xp=np.linspace(0, n - 1, n), fp=divisions).tolist() except (TypeError, ValueError): # str type indexes = np.linspace(0, n - 1, npartitions + 1).astype(int) divisions = [divisions[i] for i in indexes] mins = remove_nans(mins) maxes = remove_nans(maxes) if (mins == sorted(mins) and maxes == sorted(maxes) and all(mx < mn for mx, mn in zip(maxes[:-1], mins[1:]))): divisions = mins + [maxes[-1]] result = set_sorted_index(df, index, drop=drop, divisions=divisions) # There are cases where this still may not be sorted # so sort_index to be sure. https://github.com/dask/dask/issues/2288 return result.map_partitions(M.sort_index) return set_partition(df, index, divisions, shuffle=shuffle, drop=drop, compute=compute, **kwargs) def remove_nans(divisions): """ Remove nans from divisions These sometime pop up when we call min/max on an empty partition Examples -------- >>> remove_nans((np.nan, 1, 2)) [1, 1, 2] >>> remove_nans((1, np.nan, 2)) [1, 2, 2] >>> remove_nans((1, 2, np.nan)) [1, 2, 2] """ divisions = list(divisions) for i in range(len(divisions) - 2, -1, -1): if pd.isnull(divisions[i]): divisions[i] = divisions[i + 1] for i in range(len(divisions) - 1, -1, -1): if not pd.isnull(divisions[i]): for j in range(i + 1, len(divisions)): divisions[j] = divisions[i] break return divisions def set_partition(df, index, divisions, max_branch=32, drop=True, shuffle=None, compute=None): """ Group DataFrame by index Sets a new index and partitions data along that index according to divisions. Divisions are often found by computing approximate quantiles. The function ``set_index`` will do both of these steps. Parameters ---------- df: DataFrame/Series Data that we want to re-partition index: string or Series Column to become the new index divisions: list Values to form new divisions between partitions drop: bool, default True Whether to delete columns to be used as the new index shuffle: str (optional) Either 'disk' for an on-disk shuffle or 'tasks' to use the task scheduling framework. Use 'disk' if you are on a single machine and 'tasks' if you are on a distributed cluster. max_branch: int (optional) If using the task-based shuffle, the amount of splitting each partition undergoes. Increase this for fewer copies but more scheduler overhead. See Also -------- set_index shuffle partd """ if np.isscalar(index): partitions = df[index].map_partitions(set_partitions_pre, divisions=divisions, meta=pd.Series([0])) df2 = df.assign(_partitions=partitions) else: partitions = index.map_partitions(set_partitions_pre, divisions=divisions, meta=pd.Series([0])) df2 = df.assign(_partitions=partitions, _index=index) df3 = rearrange_by_column(df2, '_partitions', max_branch=max_branch, npartitions=len(divisions) - 1, shuffle=shuffle, compute=compute) if np.isscalar(index): df4 = df3.map_partitions(set_index_post_scalar, index_name=index, drop=drop, column_dtype=df.columns.dtype) else: df4 = df3.map_partitions(set_index_post_series, index_name=index.name, drop=drop, column_dtype=df.columns.dtype) df4.divisions = divisions return df4.map_partitions(M.sort_index) def shuffle(df, index, shuffle=None, npartitions=None, max_branch=32, compute=None): """ Group DataFrame by index Hash grouping of elements. After this operation all elements that have the same index will be in the same partition. Note that this requires full dataset read, serialization and shuffle. This is expensive. If possible you should avoid shuffles. This does not preserve a meaningful index/partitioning scheme. This is not deterministic if done in parallel. See Also -------- set_index set_partition shuffle_disk shuffle_tasks """ if not isinstance(index, _Frame): index = df[index] partitions = index.map_partitions(partitioning_index, npartitions=npartitions or df.npartitions, meta=pd.Series([0])) df2 = df.assign(_partitions=partitions) df3 = rearrange_by_column(df2, '_partitions', npartitions=npartitions, max_branch=max_branch, shuffle=shuffle, compute=compute) df4 = df3.map_partitions(drop_columns, '_partitions', df.columns.dtype) return df4 def rearrange_by_divisions(df, column, divisions, max_branch=None, shuffle=None): """ Shuffle dataframe so that column separates along divisions """ partitions = df[column].map_partitions(set_partitions_pre, divisions=divisions, meta=pd.Series([0])) df2 = df.assign(_partitions=partitions) df3 = rearrange_by_column(df2, '_partitions', max_branch=max_branch, npartitions=len(divisions) - 1, shuffle=shuffle) df4 = df3.map_partitions(drop_columns, '_partitions', df.columns.dtype) return df4 def rearrange_by_column(df, col, npartitions=None, max_branch=None, shuffle=None, compute=None): shuffle = shuffle or _globals.get('shuffle', 'disk') if shuffle == 'disk': return rearrange_by_column_disk(df, col, npartitions, compute=compute) elif shuffle == 'tasks': return rearrange_by_column_tasks(df, col, max_branch, npartitions) else: raise NotImplementedError("Unknown shuffle method %s" % shuffle) class maybe_buffered_partd(object): """If serialized, will return non-buffered partd. Otherwise returns a buffered partd""" def __init__(self, buffer=True, tempdir=None): self.tempdir = tempdir or _globals.get('temporary_directory') self.buffer = buffer def __reduce__(self): if self.tempdir: return (maybe_buffered_partd, (False, self.tempdir)) else: return (maybe_buffered_partd, (False,)) def __call__(self, *args, **kwargs): import partd if self.tempdir: file = partd.File(dir=self.tempdir) else: file = partd.File() if self.buffer: return partd.PandasBlocks(partd.Buffer(partd.Dict(), file)) else: return partd.PandasBlocks(file) def rearrange_by_column_disk(df, column, npartitions=None, compute=False): """ Shuffle using local disk """ if npartitions is None: npartitions = df.npartitions token = tokenize(df, column, npartitions) always_new_token = uuid.uuid1().hex p = ('zpartd-' + always_new_token,) dsk1 = {p: (maybe_buffered_partd(),)} # Partition data on disk name = 'shuffle-partition-' + always_new_token dsk2 = {(name, i): (shuffle_group_3, key, column, npartitions, p) for i, key in enumerate(df.__dask_keys__())} dsk = merge(df.dask, dsk1, dsk2) if compute: keys = [p, sorted(dsk2)] pp, values = compute_as_if_collection(DataFrame, dsk, keys) dsk1 = {p: pp} dsk = dict(zip(sorted(dsk2), values)) # Barrier barrier_token = 'barrier-' + always_new_token dsk3 = {barrier_token: (barrier, list(dsk2))} # Collect groups name = 'shuffle-collect-' + token dsk4 = {(name, i): (collect, p, i, df._meta, barrier_token) for i in range(npartitions)} divisions = (None,) * (npartitions + 1) dsk = merge(dsk, dsk1, dsk3, dsk4) return DataFrame(dsk, name, df._meta, divisions) def rearrange_by_column_tasks(df, column, max_branch=32, npartitions=None): """ Order divisions of DataFrame so that all values within column align This enacts a task-based shuffle See also: rearrange_by_column_disk set_partitions_tasks shuffle_tasks """ max_branch = max_branch or 32 n = df.npartitions stages = int(math.ceil(math.log(n) / math.log(max_branch))) if stages > 1: k = int(math.ceil(n ** (1 / stages))) else: k = n groups = [] splits = [] joins = [] inputs = [tuple(digit(i, j, k) for j in range(stages)) for i in range(k**stages)] token = tokenize(df, column, max_branch) start = dict((('shuffle-join-' + token, 0, inp), (df._name, i) if i < df.npartitions else df._meta) for i, inp in enumerate(inputs)) for stage in range(1, stages + 1): group = dict((('shuffle-group-' + token, stage, inp), (shuffle_group, ('shuffle-join-' + token, stage - 1, inp), column, stage - 1, k, n)) for inp in inputs) split = dict((('shuffle-split-' + token, stage, i, inp), (getitem, ('shuffle-group-' + token, stage, inp), i)) for i in range(k) for inp in inputs) join = dict((('shuffle-join-' + token, stage, inp), (_concat, [('shuffle-split-' + token, stage, inp[stage - 1], insert(inp, stage - 1, j)) for j in range(k)])) for inp in inputs) groups.append(group) splits.append(split) joins.append(join) end = dict((('shuffle-' + token, i), ('shuffle-join-' + token, stages, inp)) for i, inp in enumerate(inputs)) dsk = merge(df.dask, start, end, *(groups + splits + joins)) df2 = DataFrame(dsk, 'shuffle-' + token, df, df.divisions) if npartitions is not None and npartitions != df.npartitions: parts = [i % df.npartitions for i in range(npartitions)] token = tokenize(df2, npartitions) dsk = {('repartition-group-' + token, i): (shuffle_group_2, k, column) for i, k in enumerate(df2.__dask_keys__())} for p in range(npartitions): dsk[('repartition-get-' + token, p)] = \ (shuffle_group_get, ('repartition-group-' + token, parts[p]), p) df3 = DataFrame(merge(df2.dask, dsk), 'repartition-get-' + token, df2, [None] * (npartitions + 1)) else: df3 = df2 df3.divisions = (None,) * (df.npartitions + 1) return df3 ######################################################## # Various convenience functions to be run by the above # ######################################################## def partitioning_index(df, npartitions): """ Computes a deterministic index mapping each record to a partition. Identical rows are mapped to the same partition. Parameters ---------- df : DataFrame/Series/Index npartitions : int The number of partitions to group into. Returns ------- partitions : ndarray An array of int64 values mapping each record to a partition. """ return hash_pandas_object(df, index=False) % int(npartitions) def barrier(args): list(args) return 0 def collect(p, part, meta, barrier_token): """ Collect partitions from partd, yield dataframes """ res = p.get(part) return res if len(res) > 0 else meta def set_partitions_pre(s, divisions): partitions = pd.Series(divisions).searchsorted(s, side='right') - 1 partitions[(s >= divisions[-1]).values] = len(divisions) - 2 return partitions def shuffle_group_2(df, col): if not len(df): return {}, df ind = df[col]._values.astype(np.int64) n = ind.max() + 1 indexer, locations = groupsort_indexer(ind.view(np.int64), n) df2 = df.take(indexer) locations = locations.cumsum() parts = [df2.iloc[a:b] for a, b in zip(locations[:-1], locations[1:])] result2 = dict(zip(range(n), parts)) return result2, df.iloc[:0] def shuffle_group_get(g_head, i): g, head = g_head if i in g: return g[i] else: return head def shuffle_group(df, col, stage, k, npartitions): if col == '_partitions': ind = df[col] else: ind = hash_pandas_object(df[col], index=False) c = ind._values typ = np.min_scalar_type(npartitions * 2) c = c.astype(typ) npartitions, k, stage = [np.array(x, dtype=np.min_scalar_type(x))[()] for x in [npartitions, k, stage]] c = np.mod(c, npartitions, out=c) c = np.floor_divide(c, k ** stage, out=c) c = np.mod(c, k, out=c) indexer, locations = groupsort_indexer(c.astype(np.int64), k) df2 = df.take(indexer) locations = locations.cumsum() parts = [df2.iloc[a:b] for a, b in zip(locations[:-1], locations[1:])] return dict(zip(range(k), parts)) def shuffle_group_3(df, col, npartitions, p): g = df.groupby(col) d = {i: g.get_group(i) for i in g.groups} p.append(d, fsync=True) def set_index_post_scalar(df, index_name, drop, column_dtype): df2 = df.drop('_partitions', axis=1).set_index(index_name, drop=drop) df2.columns = df2.columns.astype(column_dtype) return df2 def set_index_post_series(df, index_name, drop, column_dtype): df2 = df.drop('_partitions', axis=1).set_index('_index', drop=True) df2.index.name = index_name df2.columns = df2.columns.astype(column_dtype) return df2 def set_sorted_index(df, index, drop=True, divisions=None, **kwargs): if not isinstance(index, Series): meta = df._meta.set_index(index, drop=drop) else: meta = df._meta.set_index(index._meta, drop=drop) result = map_partitions(M.set_index, df, index, drop=drop, meta=meta) if not divisions: divisions = compute_divisions(result, **kwargs) elif len(divisions) != len(df.divisions): msg = ("When doing `df.set_index(col, sorted=True, divisions=...)`, " "divisions indicates known splits in the index column. In this " "case divisions must be the same length as the existing " "divisions in `df`\n\n" "If the intent is to repartition into new divisions after " "setting the index, you probably want:\n\n" "`df.set_index(col, sorted=True).repartition(divisions=divisions)`") raise ValueError(msg) result.divisions = tuple(divisions) return result def compute_divisions(df, **kwargs): mins = df.index.map_partitions(M.min, meta=df.index) maxes = df.index.map_partitions(M.max, meta=df.index) mins, maxes = compute(mins, maxes, **kwargs) if (sorted(mins) != list(mins) or sorted(maxes) != list(maxes) or any(a > b for a, b in zip(mins, maxes))): raise ValueError("Partitions must be sorted ascending with the index", mins, maxes) divisions = tuple(mins) + (list(maxes)[-1],) return divisions dask-0.16.0/dask/dataframe/tests/000077500000000000000000000000001320364734500165375ustar00rootroot00000000000000dask-0.16.0/dask/dataframe/tests/__init__.py000066400000000000000000000000001320364734500206360ustar00rootroot00000000000000dask-0.16.0/dask/dataframe/tests/test_arithmetics_reduction.py000066400000000000000000001234331320364734500245460ustar00rootroot00000000000000from datetime import datetime import pytest import numpy as np import pandas as pd import dask.dataframe as dd from dask.dataframe.utils import assert_eq, assert_dask_graph, make_meta @pytest.mark.slow def test_arithmetics(): dsk = {('x', 0): pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[0, 1, 3]), ('x', 1): pd.DataFrame({'a': [4, 5, 6], 'b': [3, 2, 1]}, index=[5, 6, 8]), ('x', 2): pd.DataFrame({'a': [7, 8, 9], 'b': [0, 0, 0]}, index=[9, 9, 9])} meta = make_meta({'a': 'i8', 'b': 'i8'}, index=pd.Index([], 'i8')) ddf1 = dd.DataFrame(dsk, 'x', meta, [0, 4, 9, 9]) pdf1 = ddf1.compute() pdf2 = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7, 8], 'b': [5, 6, 7, 8, 1, 2, 3, 4]}) pdf3 = pd.DataFrame({'a': [5, 6, 7, 8, 4, 3, 2, 1], 'b': [2, 4, 5, 3, 4, 2, 1, 0]}) ddf2 = dd.from_pandas(pdf2, 3) ddf3 = dd.from_pandas(pdf3, 2) dsk4 = {('y', 0): pd.DataFrame({'a': [3, 2, 1], 'b': [7, 8, 9]}, index=[0, 1, 3]), ('y', 1): pd.DataFrame({'a': [5, 2, 8], 'b': [4, 2, 3]}, index=[5, 6, 8]), ('y', 2): pd.DataFrame({'a': [1, 4, 10], 'b': [1, 0, 5]}, index=[9, 9, 9])} ddf4 = dd.DataFrame(dsk4, 'y', meta, [0, 4, 9, 9]) pdf4 = ddf4.compute() # Arithmetics cases = [(ddf1, ddf1, pdf1, pdf1), (ddf1, ddf1.repartition([0, 1, 3, 6, 9]), pdf1, pdf1), (ddf2, ddf3, pdf2, pdf3), (ddf2.repartition([0, 3, 6, 7]), ddf3.repartition([0, 7]), pdf2, pdf3), (ddf2.repartition([0, 7]), ddf3.repartition([0, 2, 4, 5, 7]), pdf2, pdf3), (ddf1, ddf4, pdf1, pdf4), (ddf1, ddf4.repartition([0, 9]), pdf1, pdf4), (ddf1.repartition([0, 3, 9]), ddf4.repartition([0, 5, 9]), pdf1, pdf4), # dask + pandas (ddf1, pdf4, pdf1, pdf4), (ddf2, pdf3, pdf2, pdf3)] for (l, r, el, er) in cases: check_series_arithmetics(l.a, r.b, el.a, er.b) check_frame_arithmetics(l, r, el, er) # different index, pandas raises ValueError in comparison ops pdf5 = pd.DataFrame({'a': [3, 2, 1, 5, 2, 8, 1, 4, 10], 'b': [7, 8, 9, 4, 2, 3, 1, 0, 5]}, index=[0, 1, 3, 5, 6, 8, 9, 9, 9]) ddf5 = dd.from_pandas(pdf5, 2) pdf6 = pd.DataFrame({'a': [3, 2, 1, 5, 2, 8, 1, 4, 10], 'b': [7, 8, 9, 5, 7, 8, 4, 2, 5]}, index=[0, 1, 2, 3, 4, 5, 6, 7, 9]) ddf6 = dd.from_pandas(pdf6, 4) pdf7 = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7, 8], 'b': [5, 6, 7, 8, 1, 2, 3, 4]}, index=list('aaabcdeh')) pdf8 = pd.DataFrame({'a': [5, 6, 7, 8, 4, 3, 2, 1], 'b': [2, 4, 5, 3, 4, 2, 1, 0]}, index=list('abcdefgh')) ddf7 = dd.from_pandas(pdf7, 3) ddf8 = dd.from_pandas(pdf8, 4) pdf9 = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7, 8], 'b': [5, 6, 7, 8, 1, 2, 3, 4], 'c': [5, 6, 7, 8, 1, 2, 3, 4]}, index=list('aaabcdeh')) pdf10 = pd.DataFrame({'b': [5, 6, 7, 8, 4, 3, 2, 1], 'c': [2, 4, 5, 3, 4, 2, 1, 0], 'd': [2, 4, 5, 3, 4, 2, 1, 0]}, index=list('abcdefgh')) ddf9 = dd.from_pandas(pdf9, 3) ddf10 = dd.from_pandas(pdf10, 4) # Arithmetics with different index cases = [(ddf5, ddf6, pdf5, pdf6), (ddf5.repartition([0, 9]), ddf6, pdf5, pdf6), (ddf5.repartition([0, 5, 9]), ddf6.repartition([0, 7, 9]), pdf5, pdf6), (ddf7, ddf8, pdf7, pdf8), (ddf7.repartition(['a', 'c', 'h']), ddf8.repartition(['a', 'h']), pdf7, pdf8), (ddf7.repartition(['a', 'b', 'e', 'h']), ddf8.repartition(['a', 'e', 'h']), pdf7, pdf8), (ddf9, ddf10, pdf9, pdf10), (ddf9.repartition(['a', 'c', 'h']), ddf10.repartition(['a', 'h']), pdf9, pdf10), # dask + pandas (ddf5, pdf6, pdf5, pdf6), (ddf7, pdf8, pdf7, pdf8), (ddf9, pdf10, pdf9, pdf10)] for (l, r, el, er) in cases: check_series_arithmetics(l.a, r.b, el.a, er.b, allow_comparison_ops=False) check_frame_arithmetics(l, r, el, er, allow_comparison_ops=False) def test_deterministic_arithmetic_names(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [5, 6, 7, 8]}) a = dd.from_pandas(df, npartitions=2) assert sorted((a.x + a.y ** 2).dask) == sorted((a.x + a.y ** 2).dask) assert sorted((a.x + a.y ** 2).dask) != sorted((a.x + a.y ** 3).dask) assert sorted((a.x + a.y ** 2).dask) != sorted((a.x - a.y ** 2).dask) @pytest.mark.slow def test_arithmetics_different_index(): # index are different, but overwraps pdf1 = pd.DataFrame({'a': [1, 2, 3, 4, 5], 'b': [3, 5, 2, 5, 7]}, index=[1, 2, 3, 4, 5]) ddf1 = dd.from_pandas(pdf1, 2) pdf2 = pd.DataFrame({'a': [3, 2, 6, 7, 8], 'b': [9, 4, 2, 6, 2]}, index=[3, 4, 5, 6, 7]) ddf2 = dd.from_pandas(pdf2, 2) # index are not overwrapped pdf3 = pd.DataFrame({'a': [1, 2, 3, 4, 5], 'b': [3, 5, 2, 5, 7]}, index=[1, 2, 3, 4, 5]) ddf3 = dd.from_pandas(pdf3, 2) pdf4 = pd.DataFrame({'a': [3, 2, 6, 7, 8], 'b': [9, 4, 2, 6, 2]}, index=[10, 11, 12, 13, 14]) ddf4 = dd.from_pandas(pdf4, 2) # index is included in another pdf5 = pd.DataFrame({'a': [1, 2, 3, 4, 5], 'b': [3, 5, 2, 5, 7]}, index=[1, 3, 5, 7, 9]) ddf5 = dd.from_pandas(pdf5, 2) pdf6 = pd.DataFrame({'a': [3, 2, 6, 7, 8], 'b': [9, 4, 2, 6, 2]}, index=[2, 3, 4, 5, 6]) ddf6 = dd.from_pandas(pdf6, 2) cases = [(ddf1, ddf2, pdf1, pdf2), (ddf2, ddf1, pdf2, pdf1), (ddf1.repartition([1, 3, 5]), ddf2.repartition([3, 4, 7]), pdf1, pdf2), (ddf2.repartition([3, 4, 5, 7]), ddf1.repartition([1, 2, 4, 5]), pdf2, pdf1), (ddf3, ddf4, pdf3, pdf4), (ddf4, ddf3, pdf4, pdf3), (ddf3.repartition([1, 2, 3, 4, 5]), ddf4.repartition([10, 11, 12, 13, 14]), pdf3, pdf4), (ddf4.repartition([10, 14]), ddf3.repartition([1, 3, 4, 5]), pdf4, pdf3), (ddf5, ddf6, pdf5, pdf6), (ddf6, ddf5, pdf6, pdf5), (ddf5.repartition([1, 7, 8, 9]), ddf6.repartition([2, 3, 4, 6]), pdf5, pdf6), (ddf6.repartition([2, 6]), ddf5.repartition([1, 3, 7, 9]), pdf6, pdf5), # dask + pandas (ddf1, pdf2, pdf1, pdf2), (ddf2, pdf1, pdf2, pdf1), (ddf3, pdf4, pdf3, pdf4), (ddf4, pdf3, pdf4, pdf3), (ddf5, pdf6, pdf5, pdf6), (ddf6, pdf5, pdf6, pdf5)] for (l, r, el, er) in cases: check_series_arithmetics(l.a, r.b, el.a, er.b, allow_comparison_ops=False) check_frame_arithmetics(l, r, el, er, allow_comparison_ops=False) pdf7 = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7, 8], 'b': [5, 6, 7, 8, 1, 2, 3, 4]}, index=[0, 2, 4, 8, 9, 10, 11, 13]) pdf8 = pd.DataFrame({'a': [5, 6, 7, 8, 4, 3, 2, 1], 'b': [2, 4, 5, 3, 4, 2, 1, 0]}, index=[1, 3, 4, 8, 9, 11, 12, 13]) ddf7 = dd.from_pandas(pdf7, 3) ddf8 = dd.from_pandas(pdf8, 2) pdf9 = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7, 8], 'b': [5, 6, 7, 8, 1, 2, 3, 4]}, index=[0, 2, 4, 8, 9, 10, 11, 13]) pdf10 = pd.DataFrame({'a': [5, 6, 7, 8, 4, 3, 2, 1], 'b': [2, 4, 5, 3, 4, 2, 1, 0]}, index=[0, 3, 4, 8, 9, 11, 12, 13]) ddf9 = dd.from_pandas(pdf9, 3) ddf10 = dd.from_pandas(pdf10, 2) cases = [(ddf7, ddf8, pdf7, pdf8), (ddf8, ddf7, pdf8, pdf7), # (ddf7.repartition([0, 13]), # ddf8.repartition([0, 4, 11, 14], force=True), # pdf7, pdf8), (ddf8.repartition([-5, 10, 15], force=True), ddf7.repartition([-1, 4, 11, 14], force=True), pdf8, pdf7), (ddf7.repartition([0, 8, 12, 13]), ddf8.repartition([0, 2, 8, 12, 13], force=True), pdf7, pdf8), (ddf8.repartition([-5, 0, 10, 20], force=True), ddf7.repartition([-1, 4, 11, 13], force=True), pdf8, pdf7), (ddf9, ddf10, pdf9, pdf10), (ddf10, ddf9, pdf10, pdf9), # dask + pandas (ddf7, pdf8, pdf7, pdf8), (ddf8, pdf7, pdf8, pdf7), (ddf9, pdf10, pdf9, pdf10), (ddf10, pdf9, pdf10, pdf9)] for (l, r, el, er) in cases: check_series_arithmetics(l.a, r.b, el.a, er.b, allow_comparison_ops=False) check_frame_arithmetics(l, r, el, er, allow_comparison_ops=False) def check_series_arithmetics(l, r, el, er, allow_comparison_ops=True): assert isinstance(l, dd.Series) assert isinstance(r, (dd.Series, pd.Series)) assert isinstance(el, pd.Series) assert isinstance(er, pd.Series) # l, r may be repartitioned, test whether repartition keeps original data assert_eq(l, el) assert_eq(r, er) assert_eq(l + r, el + er) assert_eq(l * r, el * er) assert_eq(l - r, el - er) assert_eq(l / r, el / er) assert_eq(l // r, el // er) assert_eq(l ** r, el ** er) assert_eq(l % r, el % er) if allow_comparison_ops: # comparison is allowed if data have same index assert_eq(l & r, el & er) assert_eq(l | r, el | er) assert_eq(l ^ r, el ^ er) assert_eq(l > r, el > er) assert_eq(l < r, el < er) assert_eq(l >= r, el >= er) assert_eq(l <= r, el <= er) assert_eq(l == r, el == er) assert_eq(l != r, el != er) assert_eq(l.lt(r), el.lt(er)) assert_eq(l.gt(r), el.gt(er)) assert_eq(l.le(r), el.le(er)) assert_eq(l.ge(r), el.ge(er)) assert_eq(l.ne(r), el.ne(er)) assert_eq(l.eq(r), el.eq(er)) assert_eq(l + 2, el + 2) assert_eq(l * 2, el * 2) assert_eq(l - 2, el - 2) assert_eq(l / 2, el / 2) assert_eq(l & True, el & True) assert_eq(l | True, el | True) assert_eq(l ^ True, el ^ True) assert_eq(l // 2, el // 2) assert_eq(l ** 2, el ** 2) assert_eq(l % 2, el % 2) assert_eq(l > 2, el > 2) assert_eq(l < 2, el < 2) assert_eq(l >= 2, el >= 2) assert_eq(l <= 2, el <= 2) assert_eq(l == 2, el == 2) assert_eq(l != 2, el != 2) assert_eq(2 + r, 2 + er) assert_eq(2 * r, 2 * er) assert_eq(2 - r, 2 - er) assert_eq(2 / r, 2 / er) assert_eq(True & r, True & er) assert_eq(True | r, True | er) assert_eq(True ^ r, True ^ er) assert_eq(2 // r, 2 // er) assert_eq(2 ** r, 2 ** er) assert_eq(2 % r, 2 % er) assert_eq(2 > r, 2 > er) assert_eq(2 < r, 2 < er) assert_eq(2 >= r, 2 >= er) assert_eq(2 <= r, 2 <= er) assert_eq(2 == r, 2 == er) assert_eq(2 != r, 2 != er) assert_eq(l.lt(2), el.lt(2)) assert_eq(l.gt(2), el.gt(2)) assert_eq(l.le(2), el.le(2)) assert_eq(l.ge(2), el.ge(2)) assert_eq(l.ne(2), el.ne(2)) assert_eq(l.eq(2), el.eq(2)) assert_eq(-l, -el) assert_eq(abs(l), abs(el)) if allow_comparison_ops: # comparison is allowed if data have same index assert_eq(~(l == r), ~(el == er)) def check_frame_arithmetics(l, r, el, er, allow_comparison_ops=True): assert isinstance(l, dd.DataFrame) assert isinstance(r, (dd.DataFrame, pd.DataFrame)) assert isinstance(el, pd.DataFrame) assert isinstance(er, pd.DataFrame) # l, r may be repartitioned, test whether repartition keeps original data assert_eq(l, el) assert_eq(r, er) assert_eq(l + r, el + er) assert_eq(l * r, el * er) assert_eq(l - r, el - er) assert_eq(l / r, el / er) assert_eq(l // r, el // er) assert_eq(l ** r, el ** er) assert_eq(l % r, el % er) if allow_comparison_ops: # comparison is allowed if data have same index assert_eq(l & r, el & er) assert_eq(l | r, el | er) assert_eq(l ^ r, el ^ er) assert_eq(l > r, el > er) assert_eq(l < r, el < er) assert_eq(l >= r, el >= er) assert_eq(l <= r, el <= er) assert_eq(l == r, el == er) assert_eq(l != r, el != er) assert_eq(l.lt(r), el.lt(er)) assert_eq(l.gt(r), el.gt(er)) assert_eq(l.le(r), el.le(er)) assert_eq(l.ge(r), el.ge(er)) assert_eq(l.ne(r), el.ne(er)) assert_eq(l.eq(r), el.eq(er)) assert_eq(l + 2, el + 2) assert_eq(l * 2, el * 2) assert_eq(l - 2, el - 2) assert_eq(l / 2, el / 2) assert_eq(l & True, el & True) assert_eq(l | True, el | True) assert_eq(l ^ True, el ^ True) assert_eq(l // 2, el // 2) assert_eq(l ** 2, el ** 2) assert_eq(l % 2, el % 2) assert_eq(l > 2, el > 2) assert_eq(l < 2, el < 2) assert_eq(l >= 2, el >= 2) assert_eq(l <= 2, el <= 2) assert_eq(l == 2, el == 2) assert_eq(l != 2, el != 2) assert_eq(2 + l, 2 + el) assert_eq(2 * l, 2 * el) assert_eq(2 - l, 2 - el) assert_eq(2 / l, 2 / el) assert_eq(True & l, True & el) assert_eq(True | l, True | el) assert_eq(True ^ l, True ^ el) assert_eq(2 // l, 2 // el) assert_eq(2 ** l, 2 ** el) assert_eq(2 % l, 2 % el) assert_eq(2 > l, 2 > el) assert_eq(2 < l, 2 < el) assert_eq(2 >= l, 2 >= el) assert_eq(2 <= l, 2 <= el) assert_eq(2 == l, 2 == el) assert_eq(2 != l, 2 != el) assert_eq(l.lt(2), el.lt(2)) assert_eq(l.gt(2), el.gt(2)) assert_eq(l.le(2), el.le(2)) assert_eq(l.ge(2), el.ge(2)) assert_eq(l.ne(2), el.ne(2)) assert_eq(l.eq(2), el.eq(2)) assert_eq(-l, -el) assert_eq(abs(l), abs(el)) if allow_comparison_ops: # comparison is allowed if data have same index assert_eq(~(l == r), ~(el == er)) def test_scalar_arithmetics(): el = np.int64(10) er = np.int64(4) l = dd.core.Scalar({('l', 0): el}, 'l', 'i8') r = dd.core.Scalar({('r', 0): er}, 'r', 'i8') assert isinstance(l, dd.core.Scalar) assert isinstance(r, dd.core.Scalar) assert_eq(l, el) assert_eq(r, er) assert_eq(l + r, el + er) assert_eq(l * r, el * er) assert_eq(l - r, el - er) assert_eq(l / r, el / er) assert_eq(l // r, el // er) assert_eq(l ** r, el ** er) assert_eq(l % r, el % er) assert_eq(l & r, el & er) assert_eq(l | r, el | er) assert_eq(l ^ r, el ^ er) assert_eq(l > r, el > er) assert_eq(l < r, el < er) assert_eq(l >= r, el >= er) assert_eq(l <= r, el <= er) assert_eq(l == r, el == er) assert_eq(l != r, el != er) assert_eq(l + 2, el + 2) assert_eq(l * 2, el * 2) assert_eq(l - 2, el - 2) assert_eq(l / 2, el / 2) assert_eq(l & True, el & True) assert_eq(l | True, el | True) assert_eq(l ^ True, el ^ True) assert_eq(l // 2, el // 2) assert_eq(l ** 2, el ** 2) assert_eq(l % 2, el % 2) assert_eq(l > 2, el > 2) assert_eq(l < 2, el < 2) assert_eq(l >= 2, el >= 2) assert_eq(l <= 2, el <= 2) assert_eq(l == 2, el == 2) assert_eq(l != 2, el != 2) assert_eq(2 + r, 2 + er) assert_eq(2 * r, 2 * er) assert_eq(2 - r, 2 - er) assert_eq(2 / r, 2 / er) assert_eq(True & r, True & er) assert_eq(True | r, True | er) assert_eq(True ^ r, True ^ er) assert_eq(2 // r, 2 // er) assert_eq(2 ** r, 2 ** er) assert_eq(2 % r, 2 % er) assert_eq(2 > r, 2 > er) assert_eq(2 < r, 2 < er) assert_eq(2 >= r, 2 >= er) assert_eq(2 <= r, 2 <= er) assert_eq(2 == r, 2 == er) assert_eq(2 != r, 2 != er) assert_eq(-l, -el) assert_eq(abs(l), abs(el)) assert_eq(~(l == r), ~(el == er)) def test_scalar_arithmetics_with_dask_instances(): s = dd.core.Scalar({('s', 0): 10}, 's', 'i8') e = 10 pds = pd.Series([1, 2, 3, 4, 5, 6, 7]) dds = dd.from_pandas(pds, 2) pdf = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7], 'b': [7, 6, 5, 4, 3, 2, 1]}) ddf = dd.from_pandas(pdf, 2) # pandas Series result = pds + s # this result pd.Series (automatically computed) assert isinstance(result, pd.Series) assert_eq(result, pds + e) result = s + pds # this result dd.Series assert isinstance(result, dd.Series) assert_eq(result, pds + e) # dask Series result = dds + s # this result dd.Series assert isinstance(result, dd.Series) assert_eq(result, pds + e) result = s + dds # this result dd.Series assert isinstance(result, dd.Series) assert_eq(result, pds + e) # pandas DataFrame result = pdf + s # this result pd.DataFrame (automatically computed) assert isinstance(result, pd.DataFrame) assert_eq(result, pdf + e) result = s + pdf # this result dd.DataFrame assert isinstance(result, dd.DataFrame) assert_eq(result, pdf + e) # dask DataFrame result = ddf + s # this result dd.DataFrame assert isinstance(result, dd.DataFrame) assert_eq(result, pdf + e) result = s + ddf # this result dd.DataFrame assert isinstance(result, dd.DataFrame) assert_eq(result, pdf + e) def test_frame_series_arithmetic_methods(): pdf1 = pd.DataFrame({'A': np.arange(10), 'B': [np.nan, 1, 2, 3, 4] * 2, 'C': [np.nan] * 10, 'D': np.arange(10)}, index=list('abcdefghij'), columns=list('ABCD')) pdf2 = pd.DataFrame(np.random.randn(10, 4), index=list('abcdefghjk'), columns=list('ABCX')) ps1 = pdf1.A ps2 = pdf2.A ps3 = pd.Series(np.random.randn(10), index=list('ABCDXabcde')) ddf1 = dd.from_pandas(pdf1, 2) ddf2 = dd.from_pandas(pdf2, 2) ds1 = ddf1.A ds2 = ddf2.A s = dd.core.Scalar({('s', 0): 4}, 's', 'i8') for l, r, el, er in [(ddf1, ddf2, pdf1, pdf2), (ds1, ds2, ps1, ps2), (ddf1.repartition(['a', 'f', 'j']), ddf2, pdf1, pdf2), (ds1.repartition(['a', 'b', 'f', 'j']), ds2, ps1, ps2), (ddf1, ddf2.repartition(['a', 'k']), pdf1, pdf2), (ds1, ds2.repartition(['a', 'b', 'd', 'h', 'k']), ps1, ps2), (ddf1, 3, pdf1, 3), (ds1, 3, ps1, 3), (ddf1, s, pdf1, 4), (ds1, s, ps1, 4)]: # l, r may be repartitioned, test whether repartition keeps original data assert_eq(l, el) assert_eq(r, er) assert_eq(l.add(r, fill_value=0), el.add(er, fill_value=0)) assert_eq(l.sub(r, fill_value=0), el.sub(er, fill_value=0)) assert_eq(l.mul(r, fill_value=0), el.mul(er, fill_value=0)) assert_eq(l.div(r, fill_value=0), el.div(er, fill_value=0)) assert_eq(l.truediv(r, fill_value=0), el.truediv(er, fill_value=0)) assert_eq(l.floordiv(r, fill_value=1), el.floordiv(er, fill_value=1)) assert_eq(l.mod(r, fill_value=0), el.mod(er, fill_value=0)) assert_eq(l.pow(r, fill_value=0), el.pow(er, fill_value=0)) assert_eq(l.radd(r, fill_value=0), el.radd(er, fill_value=0)) assert_eq(l.rsub(r, fill_value=0), el.rsub(er, fill_value=0)) assert_eq(l.rmul(r, fill_value=0), el.rmul(er, fill_value=0)) assert_eq(l.rdiv(r, fill_value=0), el.rdiv(er, fill_value=0)) assert_eq(l.rtruediv(r, fill_value=0), el.rtruediv(er, fill_value=0)) assert_eq(l.rfloordiv(r, fill_value=1), el.rfloordiv(er, fill_value=1)) assert_eq(l.rmod(r, fill_value=0), el.rmod(er, fill_value=0)) assert_eq(l.rpow(r, fill_value=0), el.rpow(er, fill_value=0)) for l, r, el, er in [(ddf1, ds2, pdf1, ps2), (ddf1, ddf2.X, pdf1, pdf2.X)]: assert_eq(l, el) assert_eq(r, er) # must specify axis=0 to add Series to each column # axis=1 is not supported (add to each row) assert_eq(l.add(r, axis=0), el.add(er, axis=0)) assert_eq(l.sub(r, axis=0), el.sub(er, axis=0)) assert_eq(l.mul(r, axis=0), el.mul(er, axis=0)) assert_eq(l.div(r, axis=0), el.div(er, axis=0)) assert_eq(l.truediv(r, axis=0), el.truediv(er, axis=0)) assert_eq(l.floordiv(r, axis=0), el.floordiv(er, axis=0)) assert_eq(l.mod(r, axis=0), el.mod(er, axis=0)) assert_eq(l.pow(r, axis=0), el.pow(er, axis=0)) assert_eq(l.radd(r, axis=0), el.radd(er, axis=0)) assert_eq(l.rsub(r, axis=0), el.rsub(er, axis=0)) assert_eq(l.rmul(r, axis=0), el.rmul(er, axis=0)) assert_eq(l.rdiv(r, axis=0), el.rdiv(er, axis=0)) assert_eq(l.rtruediv(r, axis=0), el.rtruediv(er, axis=0)) assert_eq(l.rfloordiv(r, axis=0), el.rfloordiv(er, axis=0)) assert_eq(l.rmod(r, axis=0), el.rmod(er, axis=0)) assert_eq(l.rpow(r, axis=0), el.rpow(er, axis=0)) pytest.raises(ValueError, lambda: l.add(r, axis=1)) for l, r, el, er in [(ddf1, pdf2, pdf1, pdf2), (ddf1, ps3, pdf1, ps3)]: assert_eq(l, el) assert_eq(r, er) for axis in [0, 1, 'index', 'columns']: assert_eq(l.add(r, axis=axis), el.add(er, axis=axis)) assert_eq(l.sub(r, axis=axis), el.sub(er, axis=axis)) assert_eq(l.mul(r, axis=axis), el.mul(er, axis=axis)) assert_eq(l.div(r, axis=axis), el.div(er, axis=axis)) assert_eq(l.truediv(r, axis=axis), el.truediv(er, axis=axis)) assert_eq(l.floordiv(r, axis=axis), el.floordiv(er, axis=axis)) assert_eq(l.mod(r, axis=axis), el.mod(er, axis=axis)) assert_eq(l.pow(r, axis=axis), el.pow(er, axis=axis)) assert_eq(l.radd(r, axis=axis), el.radd(er, axis=axis)) assert_eq(l.rsub(r, axis=axis), el.rsub(er, axis=axis)) assert_eq(l.rmul(r, axis=axis), el.rmul(er, axis=axis)) assert_eq(l.rdiv(r, axis=axis), el.rdiv(er, axis=axis)) assert_eq(l.rtruediv(r, axis=axis), el.rtruediv(er, axis=axis)) assert_eq(l.rfloordiv(r, axis=axis), el.rfloordiv(er, axis=axis)) assert_eq(l.rmod(r, axis=axis), el.rmod(er, axis=axis)) assert_eq(l.rpow(r, axis=axis), el.rpow(er, axis=axis)) @pytest.mark.parametrize('split_every', [False, 2]) def test_reductions(split_every): dsk = {('x', 0): pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[0, 1, 3]), ('x', 1): pd.DataFrame({'a': [4, 5, 6], 'b': [3, 2, 1]}, index=[5, 6, 8]), ('x', 2): pd.DataFrame({'a': [7, 8, 9], 'b': [0, 0, 0]}, index=[9, 9, 9])} meta = make_meta({'a': 'i8', 'b': 'i8'}, index=pd.Index([], 'i8')) ddf1 = dd.DataFrame(dsk, 'x', meta, [0, 4, 9, 9]) pdf1 = ddf1.compute() nans1 = pd.Series([1] + [np.nan] * 4 + [2] + [np.nan] * 3) nands1 = dd.from_pandas(nans1, 2) nans2 = pd.Series([1] + [np.nan] * 8) nands2 = dd.from_pandas(nans2, 2) nans3 = pd.Series([np.nan] * 9) nands3 = dd.from_pandas(nans3, 2) bools = pd.Series([True, False, True, False, True], dtype=bool) boolds = dd.from_pandas(bools, 2) for dds, pds in [(ddf1.b, pdf1.b), (ddf1.a, pdf1.a), (ddf1['a'], pdf1['a']), (ddf1['b'], pdf1['b']), (nands1, nans1), (nands2, nans2), (nands3, nans3), (boolds, bools)]: assert isinstance(dds, dd.Series) assert isinstance(pds, pd.Series) assert_eq(dds.sum(split_every=split_every), pds.sum()) assert_eq(dds.prod(split_every=split_every), pds.prod()) assert_eq(dds.min(split_every=split_every), pds.min()) assert_eq(dds.max(split_every=split_every), pds.max()) assert_eq(dds.count(split_every=split_every), pds.count()) with pytest.warns(None): # runtime warnings; https://github.com/dask/dask/issues/2381 assert_eq(dds.std(split_every=split_every), pds.std()) with pytest.warns(None): # runtime warnings; https://github.com/dask/dask/issues/2381 assert_eq(dds.var(split_every=split_every), pds.var()) with pytest.warns(None): # runtime warnings; https://github.com/dask/dask/issues/2381 assert_eq(dds.sem(split_every=split_every), pds.sem()) assert_eq(dds.std(ddof=0, split_every=split_every), pds.std(ddof=0)) assert_eq(dds.var(ddof=0, split_every=split_every), pds.var(ddof=0)) assert_eq(dds.sem(ddof=0, split_every=split_every), pds.sem(ddof=0)) assert_eq(dds.mean(split_every=split_every), pds.mean()) assert_eq(dds.nunique(split_every=split_every), pds.nunique()) assert_eq(dds.sum(skipna=False, split_every=split_every), pds.sum(skipna=False)) assert_eq(dds.prod(skipna=False, split_every=split_every), pds.prod(skipna=False)) assert_eq(dds.min(skipna=False, split_every=split_every), pds.min(skipna=False)) assert_eq(dds.max(skipna=False, split_every=split_every), pds.max(skipna=False)) assert_eq(dds.std(skipna=False, split_every=split_every), pds.std(skipna=False)) assert_eq(dds.var(skipna=False, split_every=split_every), pds.var(skipna=False)) assert_eq(dds.sem(skipna=False, split_every=split_every), pds.sem(skipna=False)) assert_eq(dds.std(skipna=False, ddof=0, split_every=split_every), pds.std(skipna=False, ddof=0)) assert_eq(dds.var(skipna=False, ddof=0, split_every=split_every), pds.var(skipna=False, ddof=0)) assert_eq(dds.sem(skipna=False, ddof=0, split_every=split_every), pds.sem(skipna=False, ddof=0)) assert_eq(dds.mean(skipna=False, split_every=split_every), pds.mean(skipna=False)) assert_dask_graph(ddf1.b.sum(split_every=split_every), 'series-sum') assert_dask_graph(ddf1.b.prod(split_every=split_every), 'series-prod') assert_dask_graph(ddf1.b.min(split_every=split_every), 'series-min') assert_dask_graph(ddf1.b.max(split_every=split_every), 'series-max') assert_dask_graph(ddf1.b.count(split_every=split_every), 'series-count') assert_dask_graph(ddf1.b.std(split_every=split_every), 'series-std') assert_dask_graph(ddf1.b.var(split_every=split_every), 'series-var') assert_dask_graph(ddf1.b.sem(split_every=split_every), 'series-sem') assert_dask_graph(ddf1.b.std(ddof=0, split_every=split_every), 'series-std') assert_dask_graph(ddf1.b.var(ddof=0, split_every=split_every), 'series-var') assert_dask_graph(ddf1.b.sem(ddof=0, split_every=split_every), 'series-sem') assert_dask_graph(ddf1.b.mean(split_every=split_every), 'series-mean') # nunique is performed using drop-duplicates assert_dask_graph(ddf1.b.nunique(split_every=split_every), 'drop-duplicates') assert_eq(ddf1.index.min(split_every=split_every), pdf1.index.min()) assert_eq(ddf1.index.max(split_every=split_every), pdf1.index.max()) assert_eq(ddf1.index.count(split_every=split_every), pd.notnull(pdf1.index).sum()) @pytest.mark.parametrize('split_every', [False, 2]) def test_allany(split_every): df = pd.DataFrame(np.random.choice([True, False], size=(100, 4)), columns=['A', 'B', 'C', 'D']) df['E'] = list('abcde') * 20 ddf = dd.from_pandas(df, 10) assert_eq(ddf.all(split_every=split_every), df.all()) assert_eq(ddf.all(axis=1, split_every=split_every), df.all(axis=1)) assert_eq(ddf.all(axis=0, split_every=split_every), df.all(axis=0)) assert_eq(ddf.any(split_every=split_every), df.any()) assert_eq(ddf.any(axis=1, split_every=split_every), df.any(axis=1)) assert_eq(ddf.any(axis=0, split_every=split_every), df.any(axis=0)) assert_eq(ddf.A.all(split_every=split_every), df.A.all()) assert_eq(ddf.A.any(split_every=split_every), df.A.any()) @pytest.mark.parametrize('split_every', [False, 2]) def test_deterministic_reduction_names(split_every): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [5, 6, 7, 8]}) ddf = dd.from_pandas(df, npartitions=2) for x in [ddf, ddf.x]: assert (x.sum(split_every=split_every)._name == x.sum(split_every=split_every)._name) assert (x.prod(split_every=split_every)._name == x.prod(split_every=split_every)._name) assert (x.min(split_every=split_every)._name == x.min(split_every=split_every)._name) assert (x.max(split_every=split_every)._name == x.max(split_every=split_every)._name) assert (x.count(split_every=split_every)._name == x.count(split_every=split_every)._name) assert (x.std(split_every=split_every)._name == x.std(split_every=split_every)._name) assert (x.var(split_every=split_every)._name == x.var(split_every=split_every)._name) assert (x.sem(split_every=split_every)._name == x.sem(split_every=split_every)._name) assert (x.mean(split_every=split_every)._name == x.mean(split_every=split_every)._name) assert (ddf.x.nunique(split_every=split_every)._name == ddf.x.nunique(split_every=split_every)._name) def test_reduction_series_invalid_axis(): dsk = {('x', 0): pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[0, 1, 3]), ('x', 1): pd.DataFrame({'a': [4, 5, 6], 'b': [3, 2, 1]}, index=[5, 6, 8]), ('x', 2): pd.DataFrame({'a': [7, 8, 9], 'b': [0, 0, 0]}, index=[9, 9, 9])} meta = make_meta({'a': 'i8', 'b': 'i8'}, index=pd.Index([], 'i8')) ddf1 = dd.DataFrame(dsk, 'x', meta, [0, 4, 9, 9]) pdf1 = ddf1.compute() for axis in [1, 'columns']: for s in [ddf1.a, pdf1.a]: # both must behave the same pytest.raises(ValueError, lambda: s.sum(axis=axis)) pytest.raises(ValueError, lambda: s.prod(axis=axis)) pytest.raises(ValueError, lambda: s.min(axis=axis)) pytest.raises(ValueError, lambda: s.max(axis=axis)) # only count doesn't have axis keyword pytest.raises(TypeError, lambda: s.count(axis=axis)) pytest.raises(ValueError, lambda: s.std(axis=axis)) pytest.raises(ValueError, lambda: s.var(axis=axis)) pytest.raises(ValueError, lambda: s.sem(axis=axis)) pytest.raises(ValueError, lambda: s.mean(axis=axis)) def test_reductions_non_numeric_dtypes(): # test non-numric blocks def check_raises(d, p, func): pytest.raises((TypeError, ValueError), lambda: getattr(d, func)().compute()) pytest.raises((TypeError, ValueError), lambda: getattr(p, func)()) pds = pd.Series(['a', 'b', 'c', 'd', 'e']) dds = dd.from_pandas(pds, 2) assert_eq(dds.sum(), pds.sum()) check_raises(dds, pds, 'prod') assert_eq(dds.min(), pds.min()) assert_eq(dds.max(), pds.max()) assert_eq(dds.count(), pds.count()) check_raises(dds, pds, 'std') check_raises(dds, pds, 'var') check_raises(dds, pds, 'sem') check_raises(dds, pds, 'mean') assert_eq(dds.nunique(), pds.nunique()) for pds in [pd.Series(pd.Categorical([1, 2, 3, 4, 5], ordered=True)), pd.Series(pd.Categorical(list('abcde'), ordered=True)), pd.Series(pd.date_range('2011-01-01', freq='D', periods=5))]: dds = dd.from_pandas(pds, 2) check_raises(dds, pds, 'sum') check_raises(dds, pds, 'prod') assert_eq(dds.min(), pds.min()) assert_eq(dds.max(), pds.max()) assert_eq(dds.count(), pds.count()) check_raises(dds, pds, 'std') check_raises(dds, pds, 'var') check_raises(dds, pds, 'sem') check_raises(dds, pds, 'mean') assert_eq(dds.nunique(), pds.nunique()) pds = pd.Series(pd.timedelta_range('1 days', freq='D', periods=5)) dds = dd.from_pandas(pds, 2) assert_eq(dds.sum(), pds.sum()) assert_eq(dds.min(), pds.min()) assert_eq(dds.max(), pds.max()) assert_eq(dds.count(), pds.count()) # ToDo: pandas supports timedelta std, otherwise dask raises: # incompatible type for a datetime/timedelta operation [__pow__] # assert_eq(dds.std(), pds.std()) # assert_eq(dds.var(), pds.var()) # ToDo: pandas supports timedelta std, otherwise dask raises: # TypeError: unsupported operand type(s) for *: 'float' and 'Timedelta' # assert_eq(dds.mean(), pds.mean()) assert_eq(dds.nunique(), pds.nunique()) @pytest.mark.parametrize('split_every', [False, 2]) def test_reductions_frame(split_every): dsk = {('x', 0): pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[0, 1, 3]), ('x', 1): pd.DataFrame({'a': [4, 5, 6], 'b': [3, 2, 1]}, index=[5, 6, 8]), ('x', 2): pd.DataFrame({'a': [7, 8, 9], 'b': [0, 0, 0]}, index=[9, 9, 9])} meta = make_meta({'a': 'i8', 'b': 'i8'}, index=pd.Index([], 'i8')) ddf1 = dd.DataFrame(dsk, 'x', meta, [0, 4, 9, 9]) pdf1 = ddf1.compute() assert_eq(ddf1.sum(split_every=split_every), pdf1.sum()) assert_eq(ddf1.prod(split_every=split_every), pdf1.prod()) assert_eq(ddf1.min(split_every=split_every), pdf1.min()) assert_eq(ddf1.max(split_every=split_every), pdf1.max()) assert_eq(ddf1.count(split_every=split_every), pdf1.count()) assert_eq(ddf1.std(split_every=split_every), pdf1.std()) assert_eq(ddf1.var(split_every=split_every), pdf1.var()) assert_eq(ddf1.sem(split_every=split_every), pdf1.sem()) assert_eq(ddf1.std(ddof=0, split_every=split_every), pdf1.std(ddof=0)) assert_eq(ddf1.var(ddof=0, split_every=split_every), pdf1.var(ddof=0)) assert_eq(ddf1.sem(ddof=0, split_every=split_every), pdf1.sem(ddof=0)) assert_eq(ddf1.mean(split_every=split_every), pdf1.mean()) for axis in [0, 1, 'index', 'columns']: assert_eq(ddf1.sum(axis=axis, split_every=split_every), pdf1.sum(axis=axis)) assert_eq(ddf1.prod(axis=axis, split_every=split_every), pdf1.prod(axis=axis)) assert_eq(ddf1.min(axis=axis, split_every=split_every), pdf1.min(axis=axis)) assert_eq(ddf1.max(axis=axis, split_every=split_every), pdf1.max(axis=axis)) assert_eq(ddf1.count(axis=axis, split_every=split_every), pdf1.count(axis=axis)) assert_eq(ddf1.std(axis=axis, split_every=split_every), pdf1.std(axis=axis)) assert_eq(ddf1.var(axis=axis, split_every=split_every), pdf1.var(axis=axis)) assert_eq(ddf1.sem(axis=axis, split_every=split_every), pdf1.sem(axis=axis)) assert_eq(ddf1.std(axis=axis, ddof=0, split_every=split_every), pdf1.std(axis=axis, ddof=0)) assert_eq(ddf1.var(axis=axis, ddof=0, split_every=split_every), pdf1.var(axis=axis, ddof=0)) assert_eq(ddf1.sem(axis=axis, ddof=0, split_every=split_every), pdf1.sem(axis=axis, ddof=0)) assert_eq(ddf1.mean(axis=axis, split_every=split_every), pdf1.mean(axis=axis)) pytest.raises(ValueError, lambda: ddf1.sum(axis='incorrect').compute()) # axis=0 assert_dask_graph(ddf1.sum(split_every=split_every), 'dataframe-sum') assert_dask_graph(ddf1.prod(split_every=split_every), 'dataframe-prod') assert_dask_graph(ddf1.min(split_every=split_every), 'dataframe-min') assert_dask_graph(ddf1.max(split_every=split_every), 'dataframe-max') assert_dask_graph(ddf1.count(split_every=split_every), 'dataframe-count') # std, var, sem, and mean consist of sum and count operations assert_dask_graph(ddf1.std(split_every=split_every), 'dataframe-sum') assert_dask_graph(ddf1.std(split_every=split_every), 'dataframe-count') assert_dask_graph(ddf1.var(split_every=split_every), 'dataframe-sum') assert_dask_graph(ddf1.var(split_every=split_every), 'dataframe-count') assert_dask_graph(ddf1.sem(split_every=split_every), 'dataframe-sum') assert_dask_graph(ddf1.sem(split_every=split_every), 'dataframe-count') assert_dask_graph(ddf1.mean(split_every=split_every), 'dataframe-sum') assert_dask_graph(ddf1.mean(split_every=split_every), 'dataframe-count') # axis=1 assert_dask_graph(ddf1.sum(axis=1, split_every=split_every), 'dataframe-sum') assert_dask_graph(ddf1.prod(axis=1, split_every=split_every), 'dataframe-prod') assert_dask_graph(ddf1.min(axis=1, split_every=split_every), 'dataframe-min') assert_dask_graph(ddf1.max(axis=1, split_every=split_every), 'dataframe-max') assert_dask_graph(ddf1.count(axis=1, split_every=split_every), 'dataframe-count') assert_dask_graph(ddf1.std(axis=1, split_every=split_every), 'dataframe-std') assert_dask_graph(ddf1.var(axis=1, split_every=split_every), 'dataframe-var') assert_dask_graph(ddf1.sem(axis=1, split_every=split_every), 'dataframe-sem') assert_dask_graph(ddf1.mean(axis=1, split_every=split_every), 'dataframe-mean') def test_reductions_frame_dtypes(): df = pd.DataFrame({'int': [1, 2, 3, 4, 5, 6, 7, 8], 'float': [1., 2., 3., 4., np.nan, 6., 7., 8.], 'dt': [pd.NaT] + [datetime(2011, i, 1) for i in range(1, 8)], 'str': list('abcdefgh')}) ddf = dd.from_pandas(df, 3) assert_eq(df.sum(), ddf.sum()) assert_eq(df.prod(), ddf.prod()) assert_eq(df.min(), ddf.min()) assert_eq(df.max(), ddf.max()) assert_eq(df.count(), ddf.count()) assert_eq(df.std(), ddf.std()) assert_eq(df.var(), ddf.var()) assert_eq(df.sem(), ddf.sem()) assert_eq(df.std(ddof=0), ddf.std(ddof=0)) assert_eq(df.var(ddof=0), ddf.var(ddof=0)) assert_eq(df.sem(ddof=0), ddf.sem(ddof=0)) assert_eq(df.mean(), ddf.mean()) assert_eq(df._get_numeric_data(), ddf._get_numeric_data()) numerics = ddf[['int', 'float']] assert numerics._get_numeric_data().dask == numerics.dask @pytest.mark.parametrize('split_every', [False, 2]) def test_reductions_frame_nan(split_every): df = pd.DataFrame({'a': [1, 2, np.nan, 4, 5, 6, 7, 8], 'b': [1, 2, np.nan, np.nan, np.nan, 5, np.nan, np.nan], 'c': [np.nan] * 8}) ddf = dd.from_pandas(df, 3) assert_eq(df.sum(), ddf.sum(split_every=split_every)) assert_eq(df.prod(), ddf.prod(split_every=split_every)) assert_eq(df.min(), ddf.min(split_every=split_every)) assert_eq(df.max(), ddf.max(split_every=split_every)) assert_eq(df.count(), ddf.count(split_every=split_every)) assert_eq(df.std(), ddf.std(split_every=split_every)) assert_eq(df.var(), ddf.var(split_every=split_every)) assert_eq(df.sem(), ddf.sem(split_every=split_every)) assert_eq(df.std(ddof=0), ddf.std(ddof=0, split_every=split_every)) assert_eq(df.var(ddof=0), ddf.var(ddof=0, split_every=split_every)) assert_eq(df.sem(ddof=0), ddf.sem(ddof=0, split_every=split_every)) assert_eq(df.mean(), ddf.mean(split_every=split_every)) assert_eq(df.sum(skipna=False), ddf.sum(skipna=False, split_every=split_every)) assert_eq(df.prod(skipna=False), ddf.prod(skipna=False, split_every=split_every)) assert_eq(df.min(skipna=False), ddf.min(skipna=False, split_every=split_every)) assert_eq(df.max(skipna=False), ddf.max(skipna=False, split_every=split_every)) assert_eq(df.std(skipna=False), ddf.std(skipna=False, split_every=split_every)) assert_eq(df.var(skipna=False), ddf.var(skipna=False, split_every=split_every)) assert_eq(df.sem(skipna=False), ddf.sem(skipna=False, split_every=split_every)) assert_eq(df.std(skipna=False, ddof=0), ddf.std(skipna=False, ddof=0, split_every=split_every)) assert_eq(df.var(skipna=False, ddof=0), ddf.var(skipna=False, ddof=0, split_every=split_every)) assert_eq(df.sem(skipna=False, ddof=0), ddf.sem(skipna=False, ddof=0, split_every=split_every)) assert_eq(df.mean(skipna=False), ddf.mean(skipna=False, split_every=split_every)) assert_eq(df.sum(axis=1, skipna=False), ddf.sum(axis=1, skipna=False, split_every=split_every)) assert_eq(df.prod(axis=1, skipna=False), ddf.prod(axis=1, skipna=False, split_every=split_every)) assert_eq(df.min(axis=1, skipna=False), ddf.min(axis=1, skipna=False, split_every=split_every)) assert_eq(df.max(axis=1, skipna=False), ddf.max(axis=1, skipna=False, split_every=split_every)) assert_eq(df.std(axis=1, skipna=False), ddf.std(axis=1, skipna=False, split_every=split_every)) assert_eq(df.var(axis=1, skipna=False), ddf.var(axis=1, skipna=False, split_every=split_every)) assert_eq(df.sem(axis=1, skipna=False), ddf.sem(axis=1, skipna=False, split_every=split_every)) assert_eq(df.std(axis=1, skipna=False, ddof=0), ddf.std(axis=1, skipna=False, ddof=0, split_every=split_every)) assert_eq(df.var(axis=1, skipna=False, ddof=0), ddf.var(axis=1, skipna=False, ddof=0, split_every=split_every)) assert_eq(df.sem(axis=1, skipna=False, ddof=0), ddf.sem(axis=1, skipna=False, ddof=0, split_every=split_every)) assert_eq(df.mean(axis=1, skipna=False), ddf.mean(axis=1, skipna=False, split_every=split_every)) dask-0.16.0/dask/dataframe/tests/test_categorical.py000066400000000000000000000306321320364734500224310ustar00rootroot00000000000000import operator import numpy as np import pandas as pd import pandas.util.testing as tm import pytest import dask import dask.dataframe as dd from dask.dataframe.core import _concat from dask.dataframe.utils import make_meta, assert_eq, is_categorical_dtype, clear_known_categories # Generate a list of categorical series and indices cat_series = [] for ordered in [True, False]: s = pd.Series(pd.Categorical(list('bacbac'), ordered=ordered)) ds = dd.from_pandas(s, npartitions=2) cat_series.append((s, ds)) s = pd.Series(range(6), index=pd.Categorical(list('bacbac'))) ds = dd.from_pandas(s, npartitions=2) cat_series.append((ds.compute().index, ds.index)) a = pd.DataFrame({'v': list('abcde'), 'w': list('xxxxx'), 'x': np.arange(5), 'y': list('abcbc'), 'z': np.arange(5, dtype='f8')}) b = pd.DataFrame({'v': list('fghij'), 'w': list('yyyyy'), 'x': np.arange(5, 10), 'y': list('abbba'), 'z': np.arange(5, 10, dtype='f8')}) c = pd.DataFrame({'v': list('klmno'), 'w': list('zzzzz'), 'x': np.arange(10, 15), 'y': list('bcbcc'), 'z': np.arange(10, 15, dtype='f8')}) frames = [a, b, c] frames2 = [] for df in frames: df.w = df.w.astype('category') df.y = df.y.astype('category') frames2.append(df.assign(w=df.w.cat.set_categories(list('xyz')), y=df.y.cat.set_categories(list('abc')))) frames3 = [i.set_index(i.y) for i in frames] frames4 = [i.set_index(i.y) for i in frames2] frames5 = [i.set_index([i.y, i.x]) for i in frames] frames6 = [i.set_index([i.y, i.x]) for i in frames2] def test_concat_unions_categoricals(): # Categorical DataFrame, regular index tm.assert_frame_equal(_concat(frames), pd.concat(frames2)) # Categorical Series, regular index tm.assert_series_equal(_concat([i.y for i in frames]), pd.concat([i.y for i in frames2])) # Categorical Index tm.assert_index_equal(_concat([i.index for i in frames3]), pd.concat([i for i in frames4]).index) # Categorical DataFrame, Categorical Index tm.assert_frame_equal(_concat(frames3), pd.concat(frames4)) # Non-categorical DataFrame, Categorical Index tm.assert_frame_equal(_concat([i[['x', 'z']] for i in frames3]), pd.concat([i[['x', 'z']] for i in frames4])) # Categorical Series, Categorical Index tm.assert_series_equal(_concat([i.z for i in frames3]), pd.concat([i.z for i in frames4])) # Non-categorical Series, Categorical Index tm.assert_series_equal(_concat([i.x for i in frames3]), pd.concat([i.x for i in frames4])) # MultiIndex with Categorical Index tm.assert_index_equal(_concat([i.index for i in frames5]), pd.concat([i for i in frames6]).index) # DataFrame, MultiIndex with CategoricalIndex tm.assert_frame_equal(_concat(frames5), pd.concat(frames6)) def test_unknown_categoricals(): ddf = dd.DataFrame({('unknown', i): df for (i, df) in enumerate(frames)}, 'unknown', make_meta({'v': 'object', 'w': 'category', 'x': 'i8', 'y': 'category', 'z': 'f8'}), [None] * 4) # Compute df = ddf.compute() assert_eq(ddf.w.value_counts(), df.w.value_counts()) assert_eq(ddf.w.nunique(), df.w.nunique()) assert_eq(ddf.groupby(ddf.w).sum(), df.groupby(df.w).sum()) assert_eq(ddf.groupby(ddf.w).y.nunique(), df.groupby(df.w).y.nunique()) assert_eq(ddf.y.groupby(ddf.w).count(), df.y.groupby(df.w).count()) def test_is_categorical_dtype(): df = pd.DataFrame({'cat': pd.Categorical([1, 2, 3, 4]), 'x': [1, 2, 3, 4]}) assert is_categorical_dtype(df['cat']) assert not is_categorical_dtype(df['x']) ddf = dd.from_pandas(df, 2) assert is_categorical_dtype(ddf['cat']) assert not is_categorical_dtype(ddf['x']) def test_categorize(): meta = clear_known_categories(frames4[0]) ddf = dd.DataFrame({('unknown', i): df for (i, df) in enumerate(frames3)}, 'unknown', meta, [None] * 4) ddf = ddf.assign(w=ddf.w.cat.set_categories(['x', 'y', 'z'])) assert ddf.w.cat.known assert not ddf.y.cat.known assert not ddf.index.cat.known df = ddf.compute() for index in [None, True, False]: known_index = index is not False # By default categorize object and unknown cat columns ddf2 = ddf.categorize(index=index) assert ddf2.y.cat.known assert ddf2.v.cat.known assert ddf2.index.cat.known == known_index assert_eq(ddf2, df.astype({'v': 'category'}), check_categorical=False) # Specifying split_every works ddf2 = ddf.categorize(index=index, split_every=2) assert ddf2.y.cat.known assert ddf2.v.cat.known assert ddf2.index.cat.known == known_index assert_eq(ddf2, df.astype({'v': 'category'}), check_categorical=False) # Specifying one column doesn't affect others ddf2 = ddf.categorize('v', index=index) assert not ddf2.y.cat.known assert ddf2.v.cat.known assert ddf2.index.cat.known == known_index assert_eq(ddf2, df.astype({'v': 'category'}), check_categorical=False) ddf2 = ddf.categorize('y', index=index) assert ddf2.y.cat.known assert ddf2.v.dtype == 'object' assert ddf2.index.cat.known == known_index assert_eq(ddf2, df) ddf_known_index = ddf.categorize(columns=[], index=True) assert ddf_known_index.index.cat.known assert_eq(ddf_known_index, df) # Specifying known categorical or no columns is a no-op: assert ddf.categorize(['w'], index=False) is ddf assert ddf.categorize([], index=False) is ddf assert ddf_known_index.categorize(['w']) is ddf_known_index assert ddf_known_index.categorize([]) is ddf_known_index # Bad split_every fails with pytest.raises(ValueError): ddf.categorize(split_every=1) with pytest.raises(ValueError): ddf.categorize(split_every='foo') def test_categorize_index(): # Object dtype ddf = dd.from_pandas(tm.makeDataFrame(), npartitions=5) df = ddf.compute() ddf2 = ddf.categorize() assert ddf2.index.cat.known assert_eq(ddf2, df.set_index(pd.CategoricalIndex(df.index)), check_divisions=False, check_categorical=False) assert ddf.categorize(index=False) is ddf # Non-object dtype ddf = dd.from_pandas(df.set_index(df.A), npartitions=5) df = ddf.compute() ddf2 = ddf.categorize(index=True) assert ddf2.index.cat.known assert_eq(ddf2, df.set_index(pd.CategoricalIndex(df.index)), check_divisions=False, check_categorical=False) assert ddf.categorize() is ddf @pytest.mark.parametrize('shuffle', ['disk', 'tasks']) def test_categorical_set_index(shuffle): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': ['a', 'b', 'b', 'c']}) df['y'] = pd.Categorical(df['y'], categories=['a', 'b', 'c'], ordered=True) a = dd.from_pandas(df, npartitions=2) with dask.set_options(get=dask.get, shuffle=shuffle): b = a.set_index('y', npartitions=a.npartitions) d1, d2 = b.get_partition(0), b.get_partition(1) assert list(d1.index.compute()) == ['a'] assert list(sorted(d2.index.compute())) == ['b', 'b', 'c'] b = a.set_index(a.y, npartitions=a.npartitions) d1, d2 = b.get_partition(0), b.get_partition(1) assert list(d1.index.compute()) == ['a'] assert list(sorted(d2.index.compute())) == ['b', 'b', 'c'] b = a.set_index('y', divisions=['a', 'b', 'c'], npartitions=a.npartitions) d1, d2 = b.get_partition(0), b.get_partition(1) assert list(d1.index.compute()) == ['a'] assert list(sorted(d2.index.compute())) == ['b', 'b', 'c'] @pytest.mark.parametrize('npartitions', [1, 4]) def test_repartition_on_categoricals(npartitions): df = pd.DataFrame({'x': range(10), 'y': list('abababcbcb')}) ddf = dd.from_pandas(df, npartitions=2) ddf['y'] = ddf['y'].astype('category') ddf2 = ddf.repartition(npartitions=npartitions) df = df.copy() df['y'] = df['y'].astype('category') assert_eq(df, ddf) assert_eq(df, ddf2) def test_categorical_accessor_presence(): df = pd.DataFrame({'x': list('a' * 5 + 'b' * 5 + 'c' * 5), 'y': range(15)}) df.x = df.x.astype('category') ddf = dd.from_pandas(df, npartitions=2) assert 'cat' in dir(ddf.x) assert 'cat' not in dir(ddf.y) assert hasattr(ddf.x, 'cat') assert not hasattr(ddf.y, 'cat') df2 = df.set_index(df.x) ddf2 = dd.from_pandas(df2, npartitions=2, sort=False) assert hasattr(ddf2.index, 'categories') assert not hasattr(ddf.index, 'categories') def test_categorize_nan(): df = dd.from_pandas(pd.DataFrame({"A": ['a', 'b', 'a', float('nan')]}), npartitions=2) with pytest.warns(None) as record: df.categorize().compute() assert len(record) == 0 def get_cat(x): return x if isinstance(x, pd.CategoricalIndex) else x.cat def assert_array_index_eq(left, right): """left and right are equal, treating index and array as equivalent""" assert_eq(left, pd.Index(right) if isinstance(right, np.ndarray) else right) class TestCategoricalAccessor: @pytest.mark.parametrize('series', cat_series) @pytest.mark.parametrize('prop, compare', [ ('categories', assert_array_index_eq), ('ordered', assert_eq), ('codes', assert_array_index_eq) ]) def test_properties(self, series, prop, compare): s, ds = series expected = getattr(get_cat(s), prop) result = getattr(get_cat(ds), prop) compare(result, expected) @pytest.mark.parametrize('series', cat_series) @pytest.mark.parametrize('method, kwargs', [ ('add_categories', dict(new_categories=['d', 'e'])), ('as_ordered', {}), ('as_unordered', {}), ('as_ordered', {}), ('remove_categories', dict(removals=['a'])), ('rename_categories', dict(new_categories=['d', 'e', 'f'])), ('reorder_categories', dict(new_categories=['a', 'b', 'c'])), ('set_categories', dict(new_categories=['a', 'e', 'b'])), ('remove_unused_categories', {}), ]) def test_callable(self, series, method, kwargs): op = operator.methodcaller(method, **kwargs) # Series s, ds = series expected = op(get_cat(s)) result = op(get_cat(ds)) assert_eq(result, expected) assert_eq(get_cat(result._meta).categories, get_cat(expected).categories) assert_eq(get_cat(result._meta).ordered, get_cat(expected).ordered) def test_categorical_empty(self): # GH 1705 def make_empty(): return pd.DataFrame({"A": pd.Categorical([np.nan, np.nan])}) def make_full(): return pd.DataFrame({"A": pd.Categorical(['a', 'a'])}) a = dd.from_delayed([dask.delayed(make_empty)(), dask.delayed(make_full)()]) # Used to raise an IndexError a.A.cat.categories @pytest.mark.parametrize('series', cat_series) def test_unknown_categories(self, series): a, da = series assert da.cat.known da = da.cat.as_unknown() assert not da.cat.known with pytest.raises(NotImplementedError): da.cat.categories with pytest.raises(NotImplementedError): da.cat.codes db = da.cat.set_categories(['a', 'b', 'c']) assert db.cat.known tm.assert_index_equal(db.cat.categories, get_cat(a).categories) assert_array_index_eq(db.cat.codes, get_cat(a).codes) db = da.cat.as_known() assert db.cat.known res = db.compute() tm.assert_index_equal(db.cat.categories, get_cat(res).categories) assert_array_index_eq(db.cat.codes, get_cat(res).codes) def test_categorical_string_ops(self): a = pd.Series(['a', 'a', 'b'], dtype='category') da = dd.from_pandas(a, 2) result = da.str.upper() expected = a.str.upper() assert_eq(result, expected) def test_categorical_non_string_raises(self): a = pd.Series([1, 2, 3], dtype='category') da = dd.from_pandas(a, 2) with pytest.raises(AttributeError): da.str.upper() dask-0.16.0/dask/dataframe/tests/test_dataframe.py000066400000000000000000002727661320364734500221200ustar00rootroot00000000000000import sys from operator import add from itertools import product import pandas as pd import pandas.util.testing as tm import numpy as np import pytest import dask import dask.dataframe as dd from dask.base import compute_as_if_collection from dask.utils import ignoring, put_lines from dask.dataframe.core import repartition_divisions, aca, _concat, Scalar from dask.dataframe import methods from dask.dataframe.utils import (assert_eq, make_meta, assert_max_deps, PANDAS_VERSION) if PANDAS_VERSION >= '0.20.0': from pandas.io.formats import format as pandas_format else: from pandas.formats import format as pandas_format dsk = {('x', 0): pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[0, 1, 3]), ('x', 1): pd.DataFrame({'a': [4, 5, 6], 'b': [3, 2, 1]}, index=[5, 6, 8]), ('x', 2): pd.DataFrame({'a': [7, 8, 9], 'b': [0, 0, 0]}, index=[9, 9, 9])} meta = make_meta({'a': 'i8', 'b': 'i8'}, index=pd.Index([], 'i8')) d = dd.DataFrame(dsk, 'x', meta, [0, 5, 9, 9]) full = d.compute() def test_Dataframe(): expected = pd.Series([2, 3, 4, 5, 6, 7, 8, 9, 10], index=[0, 1, 3, 5, 6, 8, 9, 9, 9], name='a') assert_eq(d['a'] + 1, expected) tm.assert_index_equal(d.columns, pd.Index(['a', 'b'])) assert_eq(d[d['b'] > 2], full[full['b'] > 2]) assert_eq(d[['a', 'b']], full[['a', 'b']]) assert_eq(d.a, full.a) assert d.b.mean().compute() == full.b.mean() assert np.allclose(d.b.var().compute(), full.b.var()) assert np.allclose(d.b.std().compute(), full.b.std()) assert d.index._name == d.index._name # this is deterministic assert repr(d) def test_head_tail(): assert_eq(d.head(2), full.head(2)) assert_eq(d.head(3), full.head(3)) assert_eq(d.head(2), dsk[('x', 0)].head(2)) assert_eq(d['a'].head(2), full['a'].head(2)) assert_eq(d['a'].head(3), full['a'].head(3)) assert_eq(d['a'].head(2), dsk[('x', 0)]['a'].head(2)) assert (sorted(d.head(2, compute=False).dask) == sorted(d.head(2, compute=False).dask)) assert (sorted(d.head(2, compute=False).dask) != sorted(d.head(3, compute=False).dask)) assert_eq(d.tail(2), full.tail(2)) assert_eq(d.tail(3), full.tail(3)) assert_eq(d.tail(2), dsk[('x', 2)].tail(2)) assert_eq(d['a'].tail(2), full['a'].tail(2)) assert_eq(d['a'].tail(3), full['a'].tail(3)) assert_eq(d['a'].tail(2), dsk[('x', 2)]['a'].tail(2)) assert (sorted(d.tail(2, compute=False).dask) == sorted(d.tail(2, compute=False).dask)) assert (sorted(d.tail(2, compute=False).dask) != sorted(d.tail(3, compute=False).dask)) def test_head_npartitions(): assert_eq(d.head(5, npartitions=2), full.head(5)) assert_eq(d.head(5, npartitions=2, compute=False), full.head(5)) assert_eq(d.head(5, npartitions=-1), full.head(5)) assert_eq(d.head(7, npartitions=-1), full.head(7)) assert_eq(d.head(2, npartitions=-1), full.head(2)) with pytest.raises(ValueError): d.head(2, npartitions=5) @pytest.mark.skipif(sys.version_info[:2] == (3, 3), reason="Python3.3 uses pytest2.7.2, w/o warns method") def test_head_npartitions_warn(): with pytest.warns(None): d.head(100) with pytest.warns(None): d.head(7) with pytest.warns(None): d.head(7, npartitions=2) def test_index_head(): assert_eq(d.index.head(2), full.index[:2]) assert_eq(d.index.head(3), full.index[:3]) def test_Series(): assert isinstance(d.a, dd.Series) assert isinstance(d.a + 1, dd.Series) assert_eq((d + 1), full + 1) def test_Index(): for case in [pd.DataFrame(np.random.randn(10, 5), index=list('abcdefghij')), pd.DataFrame(np.random.randn(10, 5), index=pd.date_range('2011-01-01', freq='D', periods=10))]: ddf = dd.from_pandas(case, 3) assert_eq(ddf.index, case.index) pytest.raises(AttributeError, lambda: ddf.index.index) def test_Scalar(): val = np.int64(1) s = Scalar({('a', 0): val}, 'a', 'i8') assert hasattr(s, 'dtype') assert 'dtype' in dir(s) assert_eq(s, val) assert repr(s) == "dd.Scalar" val = pd.Timestamp('2001-01-01') s = Scalar({('a', 0): val}, 'a', val) assert not hasattr(s, 'dtype') assert 'dtype' not in dir(s) assert_eq(s, val) assert repr(s) == "dd.Scalar" def test_attributes(): assert 'a' in dir(d) assert 'foo' not in dir(d) pytest.raises(AttributeError, lambda: d.foo) df = dd.from_pandas(pd.DataFrame({'a b c': [1, 2, 3]}), npartitions=2) assert 'a b c' not in dir(df) df = dd.from_pandas(pd.DataFrame({'a': [1, 2], 5: [1, 2]}), npartitions=2) assert 'a' in dir(df) assert 5 not in dir(df) df = dd.from_pandas(tm.makeTimeDataFrame(), npartitions=3) pytest.raises(AttributeError, lambda: df.foo) def test_column_names(): tm.assert_index_equal(d.columns, pd.Index(['a', 'b'])) tm.assert_index_equal(d[['b', 'a']].columns, pd.Index(['b', 'a'])) assert d['a'].name == 'a' assert (d['a'] + 1).name == 'a' assert (d['a'] + d['b']).name is None def test_index_names(): assert d.index.name is None idx = pd.Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], name='x') df = pd.DataFrame(np.random.randn(10, 5), idx) ddf = dd.from_pandas(df, 3) assert ddf.index.name == 'x' assert ddf.index.compute().name == 'x' @pytest.mark.parametrize( 'npartitions', [1, pytest.mark.xfail(2, reason='pandas join removes freq')] ) def test_timezone_freq(npartitions): s_naive = pd.Series(pd.date_range('20130101', periods=10)) s_aware = pd.Series(pd.date_range('20130101', periods=10, tz='US/Eastern')) pdf = pd.DataFrame({'tz': s_aware, 'notz': s_naive}) ddf = dd.from_pandas(pdf, npartitions=npartitions) assert pdf.tz[0].freq == ddf.compute().tz[0].freq == ddf.tz.compute()[0].freq def test_rename_columns(): # GH 819 df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7], 'b': [7, 6, 5, 4, 3, 2, 1]}) ddf = dd.from_pandas(df, 2) ddf.columns = ['x', 'y'] df.columns = ['x', 'y'] tm.assert_index_equal(ddf.columns, pd.Index(['x', 'y'])) tm.assert_index_equal(ddf._meta.columns, pd.Index(['x', 'y'])) assert_eq(ddf, df) msg = r"Length mismatch: Expected axis has 2 elements, new values have 4 elements" with pytest.raises(ValueError) as err: ddf.columns = [1, 2, 3, 4] assert msg in str(err.value) # Multi-index columns df = pd.DataFrame({('A', '0') : [1, 2, 2, 3], ('B', 1) : [1, 2, 3, 4]}) ddf = dd.from_pandas(df, npartitions=2) df.columns = ['x', 'y'] ddf.columns = ['x', 'y'] tm.assert_index_equal(ddf.columns, pd.Index(['x', 'y'])) tm.assert_index_equal(ddf._meta.columns, pd.Index(['x', 'y'])) assert_eq(ddf, df) def test_rename_series(): # GH 819 s = pd.Series([1, 2, 3, 4, 5, 6, 7], name='x') ds = dd.from_pandas(s, 2) s.name = 'renamed' ds.name = 'renamed' assert s.name == 'renamed' assert_eq(ds, s) ind = s.index dind = ds.index ind.name = 'renamed' dind.name = 'renamed' assert ind.name == 'renamed' assert_eq(dind, ind) def test_describe(): # prepare test case which approx quantiles will be the same as actuals s = pd.Series(list(range(20)) * 4) df = pd.DataFrame({'a': list(range(20)) * 4, 'b': list(range(4)) * 20}) ds = dd.from_pandas(s, 4) ddf = dd.from_pandas(df, 4) assert_eq(s.describe(), ds.describe()) assert_eq(df.describe(), ddf.describe()) assert_eq(s.describe(), ds.describe(split_every=2)) assert_eq(df.describe(), ddf.describe(split_every=2)) assert ds.describe(split_every=2)._name != ds.describe()._name assert ddf.describe(split_every=2)._name != ddf.describe()._name # remove string columns df = pd.DataFrame({'a': list(range(20)) * 4, 'b': list(range(4)) * 20, 'c': list('abcd') * 20}) ddf = dd.from_pandas(df, 4) assert_eq(df.describe(), ddf.describe()) assert_eq(df.describe(), ddf.describe(split_every=2)) def test_describe_empty(): # https://github.com/dask/dask/issues/2326 ddf = dd.from_pandas(pd.DataFrame({"A": ['a', 'b']}), 2) with pytest.raises(ValueError) as rec: ddf.describe() assert 'DataFrame contains only non-numeric data.' in str(rec) with pytest.raises(ValueError) as rec: ddf.A.describe() assert 'Cannot compute ``describe`` on object dtype.' in str(rec) def test_cumulative(): df = pd.DataFrame(np.random.randn(100, 5), columns=list('abcde')) ddf = dd.from_pandas(df, 5) assert_eq(ddf.cumsum(), df.cumsum()) assert_eq(ddf.cumprod(), df.cumprod()) assert_eq(ddf.cummin(), df.cummin()) assert_eq(ddf.cummax(), df.cummax()) assert_eq(ddf.cumsum(axis=1), df.cumsum(axis=1)) assert_eq(ddf.cumprod(axis=1), df.cumprod(axis=1)) assert_eq(ddf.cummin(axis=1), df.cummin(axis=1)) assert_eq(ddf.cummax(axis=1), df.cummax(axis=1)) assert_eq(ddf.a.cumsum(), df.a.cumsum()) assert_eq(ddf.a.cumprod(), df.a.cumprod()) assert_eq(ddf.a.cummin(), df.a.cummin()) assert_eq(ddf.a.cummax(), df.a.cummax()) # With NaNs df = pd.DataFrame({'a': [1, 2, np.nan, 4, 5, 6, 7, 8], 'b': [1, 2, np.nan, np.nan, np.nan, 5, np.nan, np.nan], 'c': [np.nan] * 8}) ddf = dd.from_pandas(df, 3) assert_eq(df.cumsum(), ddf.cumsum()) assert_eq(df.cummin(), ddf.cummin()) assert_eq(df.cummax(), ddf.cummax()) assert_eq(df.cumprod(), ddf.cumprod()) assert_eq(df.cumsum(skipna=False), ddf.cumsum(skipna=False)) assert_eq(df.cummin(skipna=False), ddf.cummin(skipna=False)) assert_eq(df.cummax(skipna=False), ddf.cummax(skipna=False)) assert_eq(df.cumprod(skipna=False), ddf.cumprod(skipna=False)) assert_eq(df.cumsum(axis=1), ddf.cumsum(axis=1)) assert_eq(df.cummin(axis=1), ddf.cummin(axis=1)) assert_eq(df.cummax(axis=1), ddf.cummax(axis=1)) assert_eq(df.cumprod(axis=1), ddf.cumprod(axis=1)) assert_eq(df.cumsum(axis=1, skipna=False), ddf.cumsum(axis=1, skipna=False)) assert_eq(df.cummin(axis=1, skipna=False), ddf.cummin(axis=1, skipna=False)) assert_eq(df.cummax(axis=1, skipna=False), ddf.cummax(axis=1, skipna=False)) assert_eq(df.cumprod(axis=1, skipna=False), ddf.cumprod(axis=1, skipna=False)) def test_dropna(): df = pd.DataFrame({'x': [np.nan, 2, 3, 4, np.nan, 6], 'y': [1, 2, np.nan, 4, np.nan, np.nan], 'z': [1, 2, 3, 4, np.nan, np.nan]}, index=[10, 20, 30, 40, 50, 60]) ddf = dd.from_pandas(df, 3) assert_eq(ddf.x.dropna(), df.x.dropna()) assert_eq(ddf.y.dropna(), df.y.dropna()) assert_eq(ddf.z.dropna(), df.z.dropna()) assert_eq(ddf.dropna(), df.dropna()) assert_eq(ddf.dropna(how='all'), df.dropna(how='all')) assert_eq(ddf.dropna(subset=['x']), df.dropna(subset=['x'])) assert_eq(ddf.dropna(subset=['y', 'z']), df.dropna(subset=['y', 'z'])) assert_eq(ddf.dropna(subset=['y', 'z'], how='all'), df.dropna(subset=['y', 'z'], how='all')) @pytest.mark.parametrize('lower, upper', [(2, 5), (2.5, 3.5)]) def test_clip(lower, upper): df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'b': [3, 5, 2, 5, 7, 2, 4, 2, 4]}) ddf = dd.from_pandas(df, 3) s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9]) ds = dd.from_pandas(s, 3) assert_eq(ddf.clip(lower=lower, upper=upper), df.clip(lower=lower, upper=upper)) assert_eq(ddf.clip(lower=lower), df.clip(lower=lower)) assert_eq(ddf.clip(upper=upper), df.clip(upper=upper)) assert_eq(ds.clip(lower=lower, upper=upper), s.clip(lower=lower, upper=upper)) assert_eq(ds.clip(lower=lower), s.clip(lower=lower)) assert_eq(ds.clip(upper=upper), s.clip(upper=upper)) assert_eq(ddf.clip_lower(lower), df.clip_lower(lower)) assert_eq(ddf.clip_lower(upper), df.clip_lower(upper)) assert_eq(ddf.clip_upper(lower), df.clip_upper(lower)) assert_eq(ddf.clip_upper(upper), df.clip_upper(upper)) assert_eq(ds.clip_lower(lower), s.clip_lower(lower)) assert_eq(ds.clip_lower(upper), s.clip_lower(upper)) assert_eq(ds.clip_upper(lower), s.clip_upper(lower)) assert_eq(ds.clip_upper(upper), s.clip_upper(upper)) def test_where_mask(): pdf1 = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'b': [3, 5, 2, 5, 7, 2, 4, 2, 4]}) ddf1 = dd.from_pandas(pdf1, 2) pdf2 = pd.DataFrame({'a': [True, False, True] * 3, 'b': [False, False, True] * 3}) ddf2 = dd.from_pandas(pdf2, 2) # different index pdf3 = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'b': [3, 5, 2, 5, 7, 2, 4, 2, 4]}, index=[0, 1, 2, 3, 4, 5, 6, 7, 8]) ddf3 = dd.from_pandas(pdf3, 2) pdf4 = pd.DataFrame({'a': [True, False, True] * 3, 'b': [False, False, True] * 3}, index=[5, 6, 7, 8, 9, 10, 11, 12, 13]) ddf4 = dd.from_pandas(pdf4, 2) # different columns pdf5 = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'b': [9, 4, 2, 6, 2, 3, 1, 6, 2], 'c': [5, 6, 7, 8, 9, 10, 11, 12, 13]}, index=[0, 1, 2, 3, 4, 5, 6, 7, 8]) ddf5 = dd.from_pandas(pdf5, 2) pdf6 = pd.DataFrame({'a': [True, False, True] * 3, 'b': [False, False, True] * 3, 'c': [False] * 9, 'd': [True] * 9}, index=[5, 6, 7, 8, 9, 10, 11, 12, 13]) ddf6 = dd.from_pandas(pdf6, 2) cases = [(ddf1, ddf2, pdf1, pdf2), (ddf1.repartition([0, 3, 6, 8]), ddf2, pdf1, pdf2), (ddf1, ddf4, pdf3, pdf4), (ddf3.repartition([0, 4, 6, 8]), ddf4.repartition([5, 9, 10, 13]), pdf3, pdf4), (ddf5, ddf6, pdf5, pdf6), (ddf5.repartition([0, 4, 7, 8]), ddf6, pdf5, pdf6), # use pd.DataFrame as cond (ddf1, pdf2, pdf1, pdf2), (ddf1, pdf4, pdf3, pdf4), (ddf5, pdf6, pdf5, pdf6)] for ddf, ddcond, pdf, pdcond in cases: assert isinstance(ddf, dd.DataFrame) assert isinstance(ddcond, (dd.DataFrame, pd.DataFrame)) assert isinstance(pdf, pd.DataFrame) assert isinstance(pdcond, pd.DataFrame) assert_eq(ddf.where(ddcond), pdf.where(pdcond)) assert_eq(ddf.mask(ddcond), pdf.mask(pdcond)) assert_eq(ddf.where(ddcond, -ddf), pdf.where(pdcond, -pdf)) assert_eq(ddf.mask(ddcond, -ddf), pdf.mask(pdcond, -pdf)) assert_eq(ddf.where(ddcond.a, -ddf), pdf.where(pdcond.a, -pdf)) assert_eq(ddf.mask(ddcond.a, -ddf), pdf.mask(pdcond.a, -pdf)) assert_eq(ddf.a.where(ddcond.a), pdf.a.where(pdcond.a)) assert_eq(ddf.a.mask(ddcond.a), pdf.a.mask(pdcond.a)) assert_eq(ddf.a.where(ddcond.a, -ddf.a), pdf.a.where(pdcond.a, -pdf.a)) assert_eq(ddf.a.mask(ddcond.a, -ddf.a), pdf.a.mask(pdcond.a, -pdf.a)) def test_map_partitions_multi_argument(): assert_eq(dd.map_partitions(lambda a, b: a + b, d.a, d.b), full.a + full.b) assert_eq(dd.map_partitions(lambda a, b, c: a + b + c, d.a, d.b, 1), full.a + full.b + 1) def test_map_partitions(): assert_eq(d.map_partitions(lambda df: df, meta=d), full) assert_eq(d.map_partitions(lambda df: df), full) result = d.map_partitions(lambda df: df.sum(axis=1)) assert_eq(result, full.sum(axis=1)) assert_eq(d.map_partitions(lambda df: 1), pd.Series([1, 1, 1], dtype=np.int64), check_divisions=False) x = Scalar({('x', 0): 1}, 'x', int) result = dd.map_partitions(lambda x: 2, x) assert result.dtype in (np.int32, np.int64) and result.compute() == 2 result = dd.map_partitions(lambda x: 4.0, x) assert result.dtype == np.float64 and result.compute() == 4.0 def test_map_partitions_names(): func = lambda x: x assert (sorted(dd.map_partitions(func, d, meta=d).dask) == sorted(dd.map_partitions(func, d, meta=d).dask)) assert (sorted(dd.map_partitions(lambda x: x, d, meta=d, token=1).dask) == sorted(dd.map_partitions(lambda x: x, d, meta=d, token=1).dask)) func = lambda x, y: x assert (sorted(dd.map_partitions(func, d, d, meta=d).dask) == sorted(dd.map_partitions(func, d, d, meta=d).dask)) def test_map_partitions_column_info(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [5, 6, 7, 8]}) a = dd.from_pandas(df, npartitions=2) b = dd.map_partitions(lambda x: x, a, meta=a) tm.assert_index_equal(b.columns, a.columns) assert_eq(df, b) b = dd.map_partitions(lambda x: x, a.x, meta=a.x) assert b.name == a.x.name assert_eq(df.x, b) b = dd.map_partitions(lambda x: x, a.x, meta=a.x) assert b.name == a.x.name assert_eq(df.x, b) b = dd.map_partitions(lambda df: df.x + df.y, a) assert isinstance(b, dd.Series) assert b.dtype == 'i8' b = dd.map_partitions(lambda df: df.x + 1, a, meta=('x', 'i8')) assert isinstance(b, dd.Series) assert b.name == 'x' assert b.dtype == 'i8' def test_map_partitions_method_names(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [5, 6, 7, 8]}) a = dd.from_pandas(df, npartitions=2) b = a.map_partitions(lambda x: x) assert isinstance(b, dd.DataFrame) tm.assert_index_equal(b.columns, a.columns) b = a.map_partitions(lambda df: df.x + 1) assert isinstance(b, dd.Series) assert b.dtype == 'i8' b = a.map_partitions(lambda df: df.x + 1, meta=('x', 'i8')) assert isinstance(b, dd.Series) assert b.name == 'x' assert b.dtype == 'i8' def test_map_partitions_keeps_kwargs_in_dict(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [5, 6, 7, 8]}) a = dd.from_pandas(df, npartitions=2) def f(s, x=1): return s + x b = a.x.map_partitions(f, x=5) assert "'x': 5" in str(b.dask) assert_eq(df.x + 5, b) assert a.x.map_partitions(f, x=5)._name != a.x.map_partitions(f, x=6)._name def test_drop_duplicates(): res = d.drop_duplicates() res2 = d.drop_duplicates(split_every=2) sol = full.drop_duplicates() assert_eq(res, sol) assert_eq(res2, sol) assert res._name != res2._name res = d.a.drop_duplicates() res2 = d.a.drop_duplicates(split_every=2) sol = full.a.drop_duplicates() assert_eq(res, sol) assert_eq(res2, sol) assert res._name != res2._name res = d.index.drop_duplicates() res2 = d.index.drop_duplicates(split_every=2) sol = full.index.drop_duplicates() assert_eq(res, sol) assert_eq(res2, sol) assert res._name != res2._name with pytest.raises(NotImplementedError): d.drop_duplicates(keep=False) def test_drop_duplicates_subset(): df = pd.DataFrame({'x': [1, 2, 3, 1, 2, 3], 'y': ['a', 'a', 'b', 'b', 'c', 'c']}) ddf = dd.from_pandas(df, npartitions=2) for kwarg in [{'keep': 'first'}, {'keep': 'last'}]: assert_eq(df.x.drop_duplicates(**kwarg), ddf.x.drop_duplicates(**kwarg)) for ss in [['x'], 'y', ['x', 'y']]: assert_eq(df.drop_duplicates(subset=ss, **kwarg), ddf.drop_duplicates(subset=ss, **kwarg)) def test_get_partition(): pdf = pd.DataFrame(np.random.randn(10, 5), columns=list('abcde')) ddf = dd.from_pandas(pdf, 3) assert ddf.divisions == (0, 4, 8, 9) # DataFrame div1 = ddf.get_partition(0) assert isinstance(div1, dd.DataFrame) assert_eq(div1, pdf.loc[0:3]) div2 = ddf.get_partition(1) assert_eq(div2, pdf.loc[4:7]) div3 = ddf.get_partition(2) assert_eq(div3, pdf.loc[8:9]) assert len(div1) + len(div2) + len(div3) == len(pdf) # Series div1 = ddf.a.get_partition(0) assert isinstance(div1, dd.Series) assert_eq(div1, pdf.a.loc[0:3]) div2 = ddf.a.get_partition(1) assert_eq(div2, pdf.a.loc[4:7]) div3 = ddf.a.get_partition(2) assert_eq(div3, pdf.a.loc[8:9]) assert len(div1) + len(div2) + len(div3) == len(pdf.a) with pytest.raises(ValueError): ddf.get_partition(-1) with pytest.raises(ValueError): ddf.get_partition(3) def test_ndim(): assert (d.ndim == 2) assert (d.a.ndim == 1) assert (d.index.ndim == 1) def test_dtype(): assert (d.dtypes == full.dtypes).all() def test_value_counts(): df = pd.DataFrame({'x': [1, 2, 1, 3, 3, 1, 4]}) ddf = dd.from_pandas(df, npartitions=3) result = ddf.x.value_counts() expected = df.x.value_counts() assert_eq(result, expected) result2 = ddf.x.value_counts(split_every=2) assert_eq(result2, expected) assert result._name != result2._name def test_unique(): pdf = pd.DataFrame({'x': [1, 2, 1, 3, 3, 1, 4, 2, 3, 1], 'y': ['a', 'c', 'b', np.nan, 'c', 'b', 'a', 'd', np.nan, 'a']}) ddf = dd.from_pandas(pdf, npartitions=3) assert_eq(ddf.x.unique(), pd.Series(pdf.x.unique(), name='x')) assert_eq(ddf.y.unique(), pd.Series(pdf.y.unique(), name='y')) assert_eq(ddf.x.unique(split_every=2), pd.Series(pdf.x.unique(), name='x')) assert_eq(ddf.y.unique(split_every=2), pd.Series(pdf.y.unique(), name='y')) assert ddf.x.unique(split_every=2)._name != ddf.x.unique()._name def test_isin(): # Series test assert_eq(d.a.isin([0, 1, 2]), full.a.isin([0, 1, 2])) assert_eq(d.a.isin(pd.Series([0, 1, 2])), full.a.isin(pd.Series([0, 1, 2]))) # DataFrame test assert_eq(d.isin([0, 1, 2]), full.isin([0, 1, 2])) def test_len(): assert len(d) == len(full) assert len(d.a) == len(full.a) def test_size(): assert_eq(d.size, full.size) assert_eq(d.a.size, full.a.size) assert_eq(d.index.size, full.index.size) def test_nbytes(): assert_eq(d.a.nbytes, full.a.nbytes) assert_eq(d.index.nbytes, full.index.nbytes) def test_quantile(): # series / multiple result = d.b.quantile([.3, .7]) exp = full.b.quantile([.3, .7]) # result may different assert len(result) == 2 assert result.divisions == (.3, .7) assert_eq(result.index, exp.index) assert isinstance(result, dd.Series) result = result.compute() assert isinstance(result, pd.Series) assert result.iloc[0] == 0 assert 5 < result.iloc[1] < 6 # index s = pd.Series(np.arange(10), index=np.arange(10)) ds = dd.from_pandas(s, 2) result = ds.index.quantile([.3, .7]) exp = s.quantile([.3, .7]) assert len(result) == 2 assert result.divisions == (.3, .7) assert_eq(result.index, exp.index) assert isinstance(result, dd.Series) result = result.compute() assert isinstance(result, pd.Series) assert 1 < result.iloc[0] < 2 assert 7 < result.iloc[1] < 8 # series / single result = d.b.quantile(.5) exp = full.b.quantile(.5) # result may different assert isinstance(result, dd.core.Scalar) result = result.compute() assert 4 < result < 6 def test_quantile_missing(): df = pd.DataFrame({"A": [0, np.nan, 2]}) ddf = dd.from_pandas(df, 2) expected = df.quantile() result = ddf.quantile() assert_eq(result, expected) expected = df.A.quantile() result = ddf.A.quantile() assert_eq(result, expected) def test_empty_quantile(): result = d.b.quantile([]) exp = full.b.quantile([]) assert result.divisions == (None, None) assert result.name == 'b' assert result.compute().name == 'b' assert_eq(result, exp) def test_dataframe_quantile(): # column X is for test column order and result division df = pd.DataFrame({'A': np.arange(20), 'X': np.arange(20, 40), 'B': np.arange(10, 30), 'C': ['a', 'b', 'c', 'd'] * 5}, columns=['A', 'X', 'B', 'C']) ddf = dd.from_pandas(df, 3) result = ddf.quantile() assert result.npartitions == 1 assert result.divisions == ('A', 'X') result = result.compute() assert isinstance(result, pd.Series) assert result.name == 0.5 tm.assert_index_equal(result.index, pd.Index(['A', 'X', 'B'])) assert (result > pd.Series([16, 36, 26], index=['A', 'X', 'B'])).all() assert (result < pd.Series([17, 37, 27], index=['A', 'X', 'B'])).all() result = ddf.quantile([0.25, 0.75]) assert result.npartitions == 1 assert result.divisions == (0.25, 0.75) result = result.compute() assert isinstance(result, pd.DataFrame) tm.assert_index_equal(result.index, pd.Index([0.25, 0.75])) tm.assert_index_equal(result.columns, pd.Index(['A', 'X', 'B'])) minexp = pd.DataFrame([[1, 21, 11], [17, 37, 27]], index=[0.25, 0.75], columns=['A', 'X', 'B']) assert (result > minexp).all().all() maxexp = pd.DataFrame([[2, 22, 12], [18, 38, 28]], index=[0.25, 0.75], columns=['A', 'X', 'B']) assert (result < maxexp).all().all() assert_eq(ddf.quantile(axis=1), df.quantile(axis=1)) pytest.raises(ValueError, lambda: ddf.quantile([0.25, 0.75], axis=1)) def test_index(): assert_eq(d.index, full.index) def test_assign(): d_unknown = dd.from_pandas(full, npartitions=3, sort=False) assert not d_unknown.known_divisions res = d.assign(c=1, d='string', e=d.a.sum(), f=d.a + d.b, g=lambda x: x.a + x.b) res_unknown = d_unknown.assign(c=1, d='string', e=d_unknown.a.sum(), f=d_unknown.a + d_unknown.b, g=lambda x: x.a + x.b) sol = full.assign(c=1, d='string', e=full.a.sum(), f=full.a + full.b, g=lambda x: x.a + x.b) assert_eq(res, sol) assert_eq(res_unknown, sol) res = d.assign(c=full.a + 1) assert_eq(res, full.assign(c=full.a + 1)) # divisions unknown won't work with pandas with pytest.raises(ValueError): d_unknown.assign(c=full.a + 1) # unsupported type with pytest.raises(TypeError): d.assign(c=list(range(9))) # Fails when assigning known divisions to unknown divisions with pytest.raises(ValueError): d_unknown.assign(foo=d.a) # Fails when assigning unknown divisions to known divisions with pytest.raises(ValueError): d.assign(foo=d_unknown.a) def test_map(): assert_eq(d.a.map(lambda x: x + 1), full.a.map(lambda x: x + 1)) lk = dict((v, v + 1) for v in full.a.values) assert_eq(d.a.map(lk), full.a.map(lk)) assert_eq(d.b.map(lk), full.b.map(lk)) lk = pd.Series(lk) assert_eq(d.a.map(lk), full.a.map(lk)) assert_eq(d.b.map(lk), full.b.map(lk)) assert_eq(d.b.map(lk, meta=d.b), full.b.map(lk)) assert_eq(d.b.map(lk, meta=('b', 'i8')), full.b.map(lk)) pytest.raises(TypeError, lambda: d.a.map(d.b)) def test_concat(): x = _concat([pd.DataFrame(columns=['a', 'b']), pd.DataFrame(columns=['a', 'b'])]) assert list(x.columns) == ['a', 'b'] assert len(x) == 0 def test_args(): e = d.assign(c=d.a + 1) f = type(e)(*e._args) assert_eq(e, f) assert_eq(d.a, type(d.a)(*d.a._args)) assert_eq(d.a.sum(), type(d.a.sum())(*d.a.sum()._args)) def test_known_divisions(): assert d.known_divisions df = dd.DataFrame(dsk, 'x', meta, divisions=[None, None, None]) assert not df.known_divisions def test_unknown_divisions(): dsk = {('x', 0): pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}), ('x', 1): pd.DataFrame({'a': [4, 5, 6], 'b': [3, 2, 1]}), ('x', 2): pd.DataFrame({'a': [7, 8, 9], 'b': [0, 0, 0]})} meta = make_meta({'a': 'i8', 'b': 'i8'}) d = dd.DataFrame(dsk, 'x', meta, [None, None, None, None]) full = d.compute(get=dask.get) assert_eq(d.a.sum(), full.a.sum()) assert_eq(d.a + d.b + 1, full.a + full.b + 1) @pytest.mark.parametrize('join', ['inner', 'outer', 'left', 'right']) def test_align(join): df1a = pd.DataFrame({'A': np.random.randn(10), 'B': np.random.randn(10)}, index=[1, 12, 5, 6, 3, 9, 10, 4, 13, 11]) df1b = pd.DataFrame({'A': np.random.randn(10), 'B': np.random.randn(10)}, index=[0, 3, 2, 10, 5, 6, 7, 8, 12, 13]) ddf1a = dd.from_pandas(df1a, 3) ddf1b = dd.from_pandas(df1b, 3) # DataFrame res1, res2 = ddf1a.align(ddf1b, join=join) exp1, exp2 = df1a.align(df1b, join=join) assert assert_eq(res1, exp1) assert assert_eq(res2, exp2) # Series res1, res2 = ddf1a['A'].align(ddf1b['B'], join=join) exp1, exp2 = df1a['A'].align(df1b['B'], join=join) assert assert_eq(res1, exp1) assert assert_eq(res2, exp2) # DataFrame with fill_value res1, res2 = ddf1a.align(ddf1b, join=join, fill_value=1) exp1, exp2 = df1a.align(df1b, join=join, fill_value=1) assert assert_eq(res1, exp1) assert assert_eq(res2, exp2) # Series res1, res2 = ddf1a['A'].align(ddf1b['B'], join=join, fill_value=1) exp1, exp2 = df1a['A'].align(df1b['B'], join=join, fill_value=1) assert assert_eq(res1, exp1) assert assert_eq(res2, exp2) @pytest.mark.parametrize('join', ['inner', 'outer', 'left', 'right']) def test_align_axis(join): df1a = pd.DataFrame({'A': np.random.randn(10), 'B': np.random.randn(10), 'C': np.random.randn(10)}, index=[1, 12, 5, 6, 3, 9, 10, 4, 13, 11]) df1b = pd.DataFrame({'B': np.random.randn(10), 'C': np.random.randn(10), 'D': np.random.randn(10)}, index=[0, 3, 2, 10, 5, 6, 7, 8, 12, 13]) ddf1a = dd.from_pandas(df1a, 3) ddf1b = dd.from_pandas(df1b, 3) res1, res2 = ddf1a.align(ddf1b, join=join, axis=0) exp1, exp2 = df1a.align(df1b, join=join, axis=0) assert assert_eq(res1, exp1) assert assert_eq(res2, exp2) res1, res2 = ddf1a.align(ddf1b, join=join, axis=1) exp1, exp2 = df1a.align(df1b, join=join, axis=1) assert assert_eq(res1, exp1) assert assert_eq(res2, exp2) res1, res2 = ddf1a.align(ddf1b, join=join, axis='index') exp1, exp2 = df1a.align(df1b, join=join, axis='index') assert assert_eq(res1, exp1) assert assert_eq(res2, exp2) res1, res2 = ddf1a.align(ddf1b, join=join, axis='columns') exp1, exp2 = df1a.align(df1b, join=join, axis='columns') assert assert_eq(res1, exp1) assert assert_eq(res2, exp2) # invalid with pytest.raises(ValueError): ddf1a.align(ddf1b, join=join, axis='XXX') with pytest.raises(ValueError): ddf1a['A'].align(ddf1b['B'], join=join, axis=1) def test_combine(): df1 = pd.DataFrame({'A': np.random.choice([1, 2, np.nan], 100), 'B': np.random.choice(['a', 'b', np.nan], 100)}) df2 = pd.DataFrame({'A': np.random.choice([1, 2, 3], 100), 'B': np.random.choice(['a', 'b', 'c'], 100)}) ddf1 = dd.from_pandas(df1, 4) ddf2 = dd.from_pandas(df2, 5) first = lambda a, b: a # DataFrame for da, db, a, b in [(ddf1, ddf2, df1, df2), (ddf1.A, ddf2.A, df1.A, df2.A), (ddf1.B, ddf2.B, df1.B, df2.B)]: for func, fill_value in [(add, None), (add, 100), (first, None)]: sol = a.combine(b, func, fill_value=fill_value) assert_eq(da.combine(db, func, fill_value=fill_value), sol) assert_eq(da.combine(b, func, fill_value=fill_value), sol) assert_eq(ddf1.combine(ddf2, add, overwrite=False), df1.combine(df2, add, overwrite=False)) assert da.combine(db, add)._name == da.combine(db, add)._name def test_combine_first(): df1 = pd.DataFrame({'A': np.random.choice([1, 2, np.nan], 100), 'B': np.random.choice(['a', 'b', np.nan], 100)}) df2 = pd.DataFrame({'A': np.random.choice([1, 2, 3], 100), 'B': np.random.choice(['a', 'b', 'c'], 100)}) ddf1 = dd.from_pandas(df1, 4) ddf2 = dd.from_pandas(df2, 5) # DataFrame assert_eq(ddf1.combine_first(ddf2), df1.combine_first(df2)) assert_eq(ddf1.combine_first(df2), df1.combine_first(df2)) # Series assert_eq(ddf1.A.combine_first(ddf2.A), df1.A.combine_first(df2.A)) assert_eq(ddf1.A.combine_first(df2.A), df1.A.combine_first(df2.A)) assert_eq(ddf1.B.combine_first(ddf2.B), df1.B.combine_first(df2.B)) assert_eq(ddf1.B.combine_first(df2.B), df1.B.combine_first(df2.B)) def test_dataframe_picklable(): from pickle import loads, dumps cloudpickle = pytest.importorskip('cloudpickle') cp_dumps = cloudpickle.dumps d = tm.makeTimeDataFrame() df = dd.from_pandas(d, npartitions=3) df = df + 2 # dataframe df2 = loads(dumps(df)) assert_eq(df, df2) df2 = loads(cp_dumps(df)) assert_eq(df, df2) # series a2 = loads(dumps(df.A)) assert_eq(df.A, a2) a2 = loads(cp_dumps(df.A)) assert_eq(df.A, a2) # index i2 = loads(dumps(df.index)) assert_eq(df.index, i2) i2 = loads(cp_dumps(df.index)) assert_eq(df.index, i2) # scalar # lambdas are present, so only test cloudpickle s = df.A.sum() s2 = loads(cp_dumps(s)) assert_eq(s, s2) def test_random_partitions(): a, b = d.random_split([0.5, 0.5], 42) assert isinstance(a, dd.DataFrame) assert isinstance(b, dd.DataFrame) assert a._name != b._name assert len(a.compute()) + len(b.compute()) == len(full) a2, b2 = d.random_split([0.5, 0.5], 42) assert a2._name == a._name assert b2._name == b._name parts = d.random_split([0.4, 0.5, 0.1], 42) names = set([p._name for p in parts]) names.update([a._name, b._name]) assert len(names) == 5 with pytest.raises(ValueError): d.random_split([0.4, 0.5], 42) def test_series_round(): ps = pd.Series([1.123, 2.123, 3.123, 1.234, 2.234, 3.234], name='a') s = dd.from_pandas(ps, npartitions=3) assert_eq(s.round(), ps.round()) @pytest.mark.slow def test_repartition(): def _check_split_data(orig, d): """Check data is split properly""" keys = [k for k in d.dask if k[0].startswith('repartition-split')] keys = sorted(keys) sp = pd.concat([compute_as_if_collection(dd.DataFrame, d.dask, k) for k in keys]) assert_eq(orig, sp) assert_eq(orig, d) df = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': list('abdabd')}, index=[10, 20, 30, 40, 50, 60]) a = dd.from_pandas(df, 2) b = a.repartition(divisions=[10, 20, 50, 60]) assert b.divisions == (10, 20, 50, 60) assert_eq(a, b) assert_eq(compute_as_if_collection(dd.DataFrame, b.dask, (b._name, 0)), df.iloc[:1]) for div in [[20, 60], [10, 50], [1], # first / last element mismatch [0, 60], [10, 70], # do not allow to expand divisions by default [10, 50, 20, 60], # not sorted [10, 10, 20, 60]]: # not unique (last element can be duplicated) pytest.raises(ValueError, lambda: a.repartition(divisions=div)) pdf = pd.DataFrame(np.random.randn(7, 5), columns=list('abxyz')) for p in range(1, 7): ddf = dd.from_pandas(pdf, p) assert_eq(ddf, pdf) for div in [[0, 6], [0, 6, 6], [0, 5, 6], [0, 4, 6, 6], [0, 2, 6], [0, 2, 6, 6], [0, 2, 3, 6, 6], [0, 1, 2, 3, 4, 5, 6, 6]]: rddf = ddf.repartition(divisions=div) _check_split_data(ddf, rddf) assert rddf.divisions == tuple(div) assert_eq(pdf, rddf) rds = ddf.x.repartition(divisions=div) _check_split_data(ddf.x, rds) assert rds.divisions == tuple(div) assert_eq(pdf.x, rds) # expand divisions for div in [[-5, 10], [-2, 3, 5, 6], [0, 4, 5, 9, 10]]: rddf = ddf.repartition(divisions=div, force=True) _check_split_data(ddf, rddf) assert rddf.divisions == tuple(div) assert_eq(pdf, rddf) rds = ddf.x.repartition(divisions=div, force=True) _check_split_data(ddf.x, rds) assert rds.divisions == tuple(div) assert_eq(pdf.x, rds) pdf = pd.DataFrame({'x': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'y': [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]}, index=list('abcdefghij')) for p in range(1, 7): ddf = dd.from_pandas(pdf, p) assert_eq(ddf, pdf) for div in [list('aj'), list('ajj'), list('adj'), list('abfj'), list('ahjj'), list('acdj'), list('adfij'), list('abdefgij'), list('abcdefghij')]: rddf = ddf.repartition(divisions=div) _check_split_data(ddf, rddf) assert rddf.divisions == tuple(div) assert_eq(pdf, rddf) rds = ddf.x.repartition(divisions=div) _check_split_data(ddf.x, rds) assert rds.divisions == tuple(div) assert_eq(pdf.x, rds) # expand divisions for div in [list('Yadijm'), list('acmrxz'), list('Yajz')]: rddf = ddf.repartition(divisions=div, force=True) _check_split_data(ddf, rddf) assert rddf.divisions == tuple(div) assert_eq(pdf, rddf) rds = ddf.x.repartition(divisions=div, force=True) _check_split_data(ddf.x, rds) assert rds.divisions == tuple(div) assert_eq(pdf.x, rds) def test_repartition_divisions(): result = repartition_divisions([0, 6], [0, 6, 6], 'a', 'b', 'c') assert result == {('b', 0): (methods.boundary_slice, ('a', 0), 0, 6, False), ('b', 1): (methods.boundary_slice, ('a', 0), 6, 6, True), ('c', 0): ('b', 0), ('c', 1): ('b', 1)} result = repartition_divisions([1, 3, 7], [1, 4, 6, 7], 'a', 'b', 'c') assert result == {('b', 0): (methods.boundary_slice, ('a', 0), 1, 3, False), ('b', 1): (methods.boundary_slice, ('a', 1), 3, 4, False), ('b', 2): (methods.boundary_slice, ('a', 1), 4, 6, False), ('b', 3): (methods.boundary_slice, ('a', 1), 6, 7, True), ('c', 0): (methods.concat, [('b', 0), ('b', 1)]), ('c', 1): ('b', 2), ('c', 2): ('b', 3)} def test_repartition_on_pandas_dataframe(): df = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': list('abdabd')}, index=[10, 20, 30, 40, 50, 60]) ddf = dd.repartition(df, divisions=[10, 20, 50, 60]) assert isinstance(ddf, dd.DataFrame) assert ddf.divisions == (10, 20, 50, 60) assert_eq(ddf, df) ddf = dd.repartition(df.y, divisions=[10, 20, 50, 60]) assert isinstance(ddf, dd.Series) assert ddf.divisions == (10, 20, 50, 60) assert_eq(ddf, df.y) @pytest.mark.parametrize('use_index', [True, False]) @pytest.mark.parametrize('n', [1, 2, 4, 5]) @pytest.mark.parametrize('k', [1, 2, 4, 5]) @pytest.mark.parametrize('dtype', [int, float, 'M8[ns]']) @pytest.mark.parametrize('transform', [lambda df: df, lambda df: df.x]) def test_repartition_npartitions(use_index, n, k, dtype, transform): df = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6] * 10, 'y': list('abdabd') * 10}, index=pd.Series([10, 20, 30, 40, 50, 60] * 10, dtype=dtype)) df = transform(df) a = dd.from_pandas(df, npartitions=n, sort=use_index) b = a.repartition(npartitions=k) assert_eq(a, b) assert b.npartitions == k parts = dask.get(b.dask, b.__dask_keys__()) assert all(map(len, parts)) def test_repartition_npartitions_same_limits(): df = pd.DataFrame({'x': [1, 2, 3]}, index=[pd.Timestamp('2017-05-09 00:00:00.006000'), pd.Timestamp('2017-05-09 02:45:00.017999'), pd.Timestamp('2017-05-09 05:59:58.938999')]) ddf = dd.from_pandas(df, npartitions=2) ddf.repartition(npartitions=10) def test_repartition_object_index(): df = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6] * 10}, index=list('abdabd') * 10) a = dd.from_pandas(df, npartitions=5) b = a.repartition(npartitions=2) assert b.npartitions == 2 assert_eq(b, df) b = a.repartition(npartitions=10) assert b.npartitions == 10 assert_eq(b, df) assert not b.known_divisions @pytest.mark.slow @pytest.mark.parametrize('npartitions', [1, 20, 243]) @pytest.mark.parametrize('freq', ['1D', '7D', '28h', '1h']) @pytest.mark.parametrize('end', ['2000-04-15', '2000-04-15 12:37:01', '2000-01-01 12:37:00']) @pytest.mark.parametrize('start', ['2000-01-01', '2000-01-01 12:30:00', '2000-01-01 12:30:00']) def test_repartition_freq(npartitions, freq, start, end): start = pd.Timestamp(start) end = pd.Timestamp(end) ind = pd.DatetimeIndex(start=start, end=end, freq='60s') df = pd.DataFrame({'x': np.arange(len(ind))}, index=ind) ddf = dd.from_pandas(df, npartitions=npartitions, name='x') ddf2 = ddf.repartition(freq=freq) assert_eq(ddf2, df) def test_repartition_freq_divisions(): df = pd.DataFrame({'x': np.random.random(10)}, index=pd.DatetimeIndex(np.random.random(10) * 100e9)) ddf = dd.from_pandas(df, npartitions=3) ddf2 = ddf.repartition(freq='15s') for div in ddf2.divisions[1:-1]: assert div == div.round('15s') assert ddf2.divisions[0] == df.index.min() assert ddf2.divisions[-1] == df.index.max() assert_eq(ddf2, ddf2) def test_repartition_freq_errors(): df = pd.DataFrame({'x': [1, 2, 3]}) ddf = dd.from_pandas(df, npartitions=1) with pytest.raises(TypeError) as info: ddf.repartition(freq='1s') assert 'only' in str(info.value) assert 'timeseries' in str(info.value) def test_embarrassingly_parallel_operations(): df = pd.DataFrame({'x': [1, 2, 3, 4, None, 6], 'y': list('abdabd')}, index=[10, 20, 30, 40, 50, 60]) a = dd.from_pandas(df, 2) assert_eq(a.x.astype('float32'), df.x.astype('float32')) assert a.x.astype('float32').compute().dtype == 'float32' assert_eq(a.x.dropna(), df.x.dropna()) assert_eq(a.x.between(2, 4), df.x.between(2, 4)) assert_eq(a.x.clip(2, 4), df.x.clip(2, 4)) assert_eq(a.x.notnull(), df.x.notnull()) assert_eq(a.x.isnull(), df.x.isnull()) assert_eq(a.notnull(), df.notnull()) assert_eq(a.isnull(), df.isnull()) assert len(a.sample(0.5).compute()) < len(df) def test_fillna(): df = tm.makeMissingDataframe(0.8, 42) ddf = dd.from_pandas(df, npartitions=5, sort=False) assert_eq(ddf.fillna(100), df.fillna(100)) assert_eq(ddf.A.fillna(100), df.A.fillna(100)) assert_eq(ddf.fillna(method='pad'), df.fillna(method='pad')) assert_eq(ddf.A.fillna(method='pad'), df.A.fillna(method='pad')) assert_eq(ddf.fillna(method='bfill'), df.fillna(method='bfill')) assert_eq(ddf.A.fillna(method='bfill'), df.A.fillna(method='bfill')) assert_eq(ddf.fillna(method='pad', limit=2), df.fillna(method='pad', limit=2)) assert_eq(ddf.A.fillna(method='pad', limit=2), df.A.fillna(method='pad', limit=2)) assert_eq(ddf.fillna(method='bfill', limit=2), df.fillna(method='bfill', limit=2)) assert_eq(ddf.A.fillna(method='bfill', limit=2), df.A.fillna(method='bfill', limit=2)) assert_eq(ddf.fillna(100, axis=1), df.fillna(100, axis=1)) assert_eq(ddf.fillna(method='pad', axis=1), df.fillna(method='pad', axis=1)) assert_eq(ddf.fillna(method='pad', limit=2, axis=1), df.fillna(method='pad', limit=2, axis=1)) pytest.raises(ValueError, lambda: ddf.A.fillna(0, axis=1)) pytest.raises(NotImplementedError, lambda: ddf.fillna(0, limit=10)) pytest.raises(NotImplementedError, lambda: ddf.fillna(0, limit=10, axis=1)) df = tm.makeMissingDataframe(0.2, 42) ddf = dd.from_pandas(df, npartitions=5, sort=False) pytest.raises(ValueError, lambda: ddf.fillna(method='pad').compute()) assert_eq(df.fillna(method='pad', limit=3), ddf.fillna(method='pad', limit=3)) def test_fillna_multi_dataframe(): df = tm.makeMissingDataframe(0.8, 42) ddf = dd.from_pandas(df, npartitions=5, sort=False) assert_eq(ddf.A.fillna(ddf.B), df.A.fillna(df.B)) assert_eq(ddf.B.fillna(ddf.A), df.B.fillna(df.A)) def test_ffill_bfill(): df = tm.makeMissingDataframe(0.8, 42) ddf = dd.from_pandas(df, npartitions=5, sort=False) assert_eq(ddf.ffill(), df.ffill()) assert_eq(ddf.bfill(), df.bfill()) assert_eq(ddf.ffill(axis=1), df.ffill(axis=1)) assert_eq(ddf.bfill(axis=1), df.bfill(axis=1)) def test_fillna_series_types(): # https://github.com/dask/dask/issues/2809 df = pd.DataFrame({"A": [1, np.nan, 3], "B": [1, np.nan, 3]}) ddf = dd.from_pandas(df, npartitions=2) fill_value = pd.Series([1, 10], index=['A', 'C']) assert_eq(ddf.fillna(fill_value), df.fillna(fill_value)) def test_sample(): df = pd.DataFrame({'x': [1, 2, 3, 4, None, 6], 'y': list('abdabd')}, index=[10, 20, 30, 40, 50, 60]) a = dd.from_pandas(df, 2) b = a.sample(0.5) assert_eq(b, b) c = a.sample(0.5, random_state=1234) d = a.sample(0.5, random_state=1234) assert_eq(c, d) assert a.sample(0.5)._name != a.sample(0.5)._name def test_sample_without_replacement(): df = pd.DataFrame({'x': [1, 2, 3, 4, None, 6], 'y': list('abdabd')}, index=[10, 20, 30, 40, 50, 60]) a = dd.from_pandas(df, 2) b = a.sample(0.7, replace=False) bb = b.index.compute() assert len(bb) == len(set(bb)) def test_datetime_accessor(): df = pd.DataFrame({'x': [1, 2, 3, 4]}) df['x'] = df.x.astype('M8[us]') a = dd.from_pandas(df, 2) assert 'date' in dir(a.x.dt) # pandas loses Series.name via datetime accessor # see https://github.com/pydata/pandas/issues/10712 assert_eq(a.x.dt.date, df.x.dt.date, check_names=False) # to_pydatetime returns a numpy array in pandas, but a Series in dask assert_eq(a.x.dt.to_pydatetime(), pd.Series(df.x.dt.to_pydatetime(), index=df.index, dtype=object)) assert set(a.x.dt.date.dask) == set(a.x.dt.date.dask) assert set(a.x.dt.to_pydatetime().dask) == set(a.x.dt.to_pydatetime().dask) def test_str_accessor(): df = pd.DataFrame({'x': ['abc', 'bcd', 'cdef', 'DEFG'], 'y': [1, 2, 3, 4]}, index=['e', 'f', 'g', 'H']) a = dd.from_pandas(df, 2, sort=False) # Check that str not in dir/hasattr for non-object columns assert 'str' not in dir(a.y) assert not hasattr(a.y, 'str') # not implemented methods don't show up assert 'get_dummies' not in dir(a.x.str) assert not hasattr(a.x.str, 'get_dummies') assert 'upper' in dir(a.x.str) assert_eq(a.x.str.upper(), df.x.str.upper()) assert set(a.x.str.upper().dask) == set(a.x.str.upper().dask) assert 'upper' in dir(a.index.str) assert_eq(a.index.str.upper(), df.index.str.upper()) assert set(a.index.str.upper().dask) == set(a.index.str.upper().dask) # make sure to pass thru args & kwargs assert 'contains' in dir(a.x.str) assert_eq(a.x.str.contains('a'), df.x.str.contains('a')) assert set(a.x.str.contains('a').dask) == set(a.x.str.contains('a').dask) assert_eq(a.x.str.contains('d', case=False), df.x.str.contains('d', case=False)) assert set(a.x.str.contains('d', case=False).dask) == set(a.x.str.contains('d', case=False).dask) for na in [True, False]: assert_eq(a.x.str.contains('a', na=na), df.x.str.contains('a', na=na)) assert set(a.x.str.contains('a', na=na).dask) == set(a.x.str.contains('a', na=na).dask) for regex in [True, False]: assert_eq(a.x.str.contains('a', regex=regex), df.x.str.contains('a', regex=regex)) assert set(a.x.str.contains('a', regex=regex).dask) == set(a.x.str.contains('a', regex=regex).dask) assert_eq(df.x.str[:2], df.x.str[:2]) assert_eq(a.x.str[1], a.x.str[1]) def test_empty_max(): meta = make_meta({'x': 'i8'}) a = dd.DataFrame({('x', 0): pd.DataFrame({'x': [1]}), ('x', 1): pd.DataFrame({'x': []})}, 'x', meta, [None, None, None]) assert_eq(a.x.max(), 1) def test_query(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [5, 6, 7, 8]}) a = dd.from_pandas(df, npartitions=2) q = a.query('x**2 > y') with ignoring(ImportError): assert_eq(q, df.query('x**2 > y')) def test_eval(): p = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [5, 6, 7, 8]}) d = dd.from_pandas(p, npartitions=2) with ignoring(ImportError): assert_eq(p.eval('x + y'), d.eval('x + y')) assert_eq(p.eval('z = x + y', inplace=False), d.eval('z = x + y', inplace=False)) with pytest.raises(NotImplementedError): d.eval('z = x + y', inplace=True) # catch FutureWarning from pandas about assignment in eval with pytest.warns(None): if PANDAS_VERSION < '0.21.0': if p.eval('z = x + y', inplace=None) is None: with pytest.raises(NotImplementedError): d.eval('z = x + y', inplace=None) @pytest.mark.parametrize('include, exclude', [ ([int], None), (None, [int]), ([np.number, object], [float]), (['datetime'], None) ]) def test_select_dtypes(include, exclude): n = 10 df = pd.DataFrame({'cint': [1] * n, 'cstr': ['a'] * n, 'clfoat': [1.] * n, 'cdt': pd.date_range('2016-01-01', periods=n)}) a = dd.from_pandas(df, npartitions=2) result = a.select_dtypes(include=include, exclude=exclude) expected = df.select_dtypes(include=include, exclude=exclude) assert_eq(result, expected) # count dtypes tm.assert_series_equal(a.get_dtype_counts(), df.get_dtype_counts()) tm.assert_series_equal(a.get_ftype_counts(), df.get_ftype_counts()) tm.assert_series_equal(result.get_dtype_counts(), expected.get_dtype_counts()) tm.assert_series_equal(result.get_ftype_counts(), expected.get_ftype_counts()) def test_deterministic_apply_concat_apply_names(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [5, 6, 7, 8]}) a = dd.from_pandas(df, npartitions=2) assert sorted(a.x.nlargest(2).dask) == sorted(a.x.nlargest(2).dask) assert sorted(a.x.nlargest(2).dask) != sorted(a.x.nlargest(3).dask) assert (sorted(a.x.drop_duplicates().dask) == sorted(a.x.drop_duplicates().dask)) assert (sorted(a.groupby('x').y.mean().dask) == sorted(a.groupby('x').y.mean().dask)) # Test aca without passing in token string f = lambda a: a.nlargest(5) f2 = lambda a: a.nlargest(3) assert (sorted(aca(a.x, f, f, a.x._meta).dask) != sorted(aca(a.x, f2, f2, a.x._meta).dask)) assert (sorted(aca(a.x, f, f, a.x._meta).dask) == sorted(aca(a.x, f, f, a.x._meta).dask)) # Test aca with keywords def chunk(x, c_key=0, both_key=0): return x.sum() + c_key + both_key def agg(x, a_key=0, both_key=0): return pd.Series(x).sum() + a_key + both_key c_key = 2 a_key = 3 both_key = 4 res = aca(a.x, chunk=chunk, aggregate=agg, chunk_kwargs={'c_key': c_key}, aggregate_kwargs={'a_key': a_key}, both_key=both_key) assert (sorted(res.dask) == sorted(aca(a.x, chunk=chunk, aggregate=agg, chunk_kwargs={'c_key': c_key}, aggregate_kwargs={'a_key': a_key}, both_key=both_key).dask)) assert (sorted(res.dask) != sorted(aca(a.x, chunk=chunk, aggregate=agg, chunk_kwargs={'c_key': c_key}, aggregate_kwargs={'a_key': a_key}, both_key=0).dask)) assert_eq(res, df.x.sum() + 2 * (c_key + both_key) + a_key + both_key) def test_aca_meta_infer(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [5, 6, 7, 8]}) ddf = dd.from_pandas(df, npartitions=2) def chunk(x, y, constant=1.0): return (x + y + constant).head() def agg(x): return x.head() res = aca([ddf, 2.0], chunk=chunk, aggregate=agg, chunk_kwargs=dict(constant=2.0)) sol = (df + 2.0 + 2.0).head() assert_eq(res, sol) # Should infer as a scalar res = aca([ddf.x], chunk=lambda x: pd.Series([x.sum()]), aggregate=lambda x: x.sum()) assert isinstance(res, Scalar) assert res.compute() == df.x.sum() def test_aca_split_every(): df = pd.DataFrame({'x': [1] * 60}) ddf = dd.from_pandas(df, npartitions=15) def chunk(x, y, constant=0): return x.sum() + y + constant def combine(x, constant=0): return x.sum() + constant + 1 def agg(x, constant=0): return x.sum() + constant + 2 f = lambda n: aca([ddf, 2.0], chunk=chunk, aggregate=agg, combine=combine, chunk_kwargs=dict(constant=1.0), combine_kwargs=dict(constant=2.0), aggregate_kwargs=dict(constant=3.0), split_every=n) assert_max_deps(f(3), 3) assert_max_deps(f(4), 4, False) assert_max_deps(f(5), 5) assert set(f(15).dask.keys()) == set(f(ddf.npartitions).dask.keys()) r3 = f(3) r4 = f(4) assert r3._name != r4._name # Only intersect on reading operations assert len(set(r3.dask.keys()) & set(r4.dask.keys())) == len(ddf.dask.keys()) # Keywords are different for each step assert f(3).compute() == 60 + 15 * (2 + 1) + 7 * (2 + 1) + (3 + 2) # Keywords are same for each step res = aca([ddf, 2.0], chunk=chunk, aggregate=agg, combine=combine, constant=3.0, split_every=3) assert res.compute() == 60 + 15 * (2 + 3) + 7 * (3 + 1) + (3 + 2) # No combine provided, combine is agg res = aca([ddf, 2.0], chunk=chunk, aggregate=agg, constant=3, split_every=3) assert res.compute() == 60 + 15 * (2 + 3) + 8 * (3 + 2) # split_every must be >= 2 with pytest.raises(ValueError): f(1) # combine_kwargs with no combine provided with pytest.raises(ValueError): aca([ddf, 2.0], chunk=chunk, aggregate=agg, split_every=3, chunk_kwargs=dict(constant=1.0), combine_kwargs=dict(constant=2.0), aggregate_kwargs=dict(constant=3.0)) def test_reduction_method(): df = pd.DataFrame({'x': range(50), 'y': range(50, 100)}) ddf = dd.from_pandas(df, npartitions=4) chunk = lambda x, val=0: (x >= val).sum() agg = lambda x: x.sum() # Output of chunk is a scalar res = ddf.x.reduction(chunk, aggregate=agg) assert_eq(res, df.x.count()) # Output of chunk is a series res = ddf.reduction(chunk, aggregate=agg) assert res._name == ddf.reduction(chunk, aggregate=agg)._name assert_eq(res, df.count()) # Test with keywords res2 = ddf.reduction(chunk, aggregate=agg, chunk_kwargs={'val': 25}) res2._name == ddf.reduction(chunk, aggregate=agg, chunk_kwargs={'val': 25})._name assert res2._name != res._name assert_eq(res2, (df >= 25).sum()) # Output of chunk is a dataframe def sum_and_count(x): return pd.DataFrame({'sum': x.sum(), 'count': x.count()}) res = ddf.reduction(sum_and_count, aggregate=lambda x: x.groupby(level=0).sum()) assert_eq(res, pd.DataFrame({'sum': df.sum(), 'count': df.count()})) def test_reduction_method_split_every(): df = pd.Series([1] * 60) ddf = dd.from_pandas(df, npartitions=15) def chunk(x, constant=0): return x.sum() + constant def combine(x, constant=0): return x.sum() + constant + 1 def agg(x, constant=0): return x.sum() + constant + 2 f = lambda n: ddf.reduction(chunk, aggregate=agg, combine=combine, chunk_kwargs=dict(constant=1.0), combine_kwargs=dict(constant=2.0), aggregate_kwargs=dict(constant=3.0), split_every=n) assert_max_deps(f(3), 3) assert_max_deps(f(4), 4, False) assert_max_deps(f(5), 5) assert set(f(15).dask.keys()) == set(f(ddf.npartitions).dask.keys()) r3 = f(3) r4 = f(4) assert r3._name != r4._name # Only intersect on reading operations assert len(set(r3.dask.keys()) & set(r4.dask.keys())) == len(ddf.dask.keys()) # Keywords are different for each step assert f(3).compute() == 60 + 15 + 7 * (2 + 1) + (3 + 2) # Keywords are same for each step res = ddf.reduction(chunk, aggregate=agg, combine=combine, constant=3.0, split_every=3) assert res.compute() == 60 + 15 * 3 + 7 * (3 + 1) + (3 + 2) # No combine provided, combine is agg res = ddf.reduction(chunk, aggregate=agg, constant=3.0, split_every=3) assert res.compute() == 60 + 15 * 3 + 8 * (3 + 2) # split_every must be >= 2 with pytest.raises(ValueError): f(1) # combine_kwargs with no combine provided with pytest.raises(ValueError): ddf.reduction(chunk, aggregate=agg, split_every=3, chunk_kwargs=dict(constant=1.0), combine_kwargs=dict(constant=2.0), aggregate_kwargs=dict(constant=3.0)) def test_pipe(): df = pd.DataFrame({'x': range(50), 'y': range(50, 100)}) ddf = dd.from_pandas(df, npartitions=4) def f(x, y, z=0): return x + y + z assert_eq(ddf.pipe(f, 1, z=2), f(ddf, 1, z=2)) assert_eq(ddf.x.pipe(f, 1, z=2), f(ddf.x, 1, z=2)) def test_gh_517(): arr = np.random.randn(100, 2) df = pd.DataFrame(arr, columns=['a', 'b']) ddf = dd.from_pandas(df, 2) assert ddf.index.nunique().compute() == 100 ddf2 = dd.from_pandas(pd.concat([df, df]), 5) assert ddf2.index.nunique().compute() == 100 def test_drop_axis_1(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [5, 6, 7, 8], 'z': [9, 10, 11, 12]}) ddf = dd.from_pandas(df, npartitions=2) assert_eq(ddf.drop('y', axis=1), df.drop('y', axis=1)) assert_eq(ddf.drop(['y', 'z'], axis=1), df.drop(['y', 'z'], axis=1)) with pytest.raises(ValueError): ddf.drop(['a', 'x'], axis=1) assert_eq(ddf.drop(['a', 'x'], axis=1, errors='ignore'), df.drop(['a', 'x'], axis=1, errors='ignore')) def test_gh580(): df = pd.DataFrame({'x': np.arange(10, dtype=float)}) ddf = dd.from_pandas(df, 2) assert_eq(np.cos(df['x']), np.cos(ddf['x'])) assert_eq(np.cos(df['x']), np.cos(ddf['x'])) def test_rename_dict(): renamer = {'a': 'A', 'b': 'B'} assert_eq(d.rename(columns=renamer), full.rename(columns=renamer)) def test_rename_function(): renamer = lambda x: x.upper() assert_eq(d.rename(columns=renamer), full.rename(columns=renamer)) def test_rename_index(): renamer = {0: 1} pytest.raises(ValueError, lambda: d.rename(index=renamer)) def test_to_timestamp(): index = pd.PeriodIndex(freq='A', start='1/1/2001', end='12/1/2004') df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [10, 20, 30, 40]}, index=index) ddf = dd.from_pandas(df, npartitions=3) assert_eq(ddf.to_timestamp(), df.to_timestamp()) assert_eq(ddf.to_timestamp(freq='M', how='s').compute(), df.to_timestamp(freq='M', how='s')) assert_eq(ddf.x.to_timestamp(), df.x.to_timestamp()) assert_eq(ddf.x.to_timestamp(freq='M', how='s').compute(), df.x.to_timestamp(freq='M', how='s')) def test_to_frame(): s = pd.Series([1, 2, 3], name='foo') a = dd.from_pandas(s, npartitions=2) assert_eq(s.to_frame(), a.to_frame()) assert_eq(s.to_frame('bar'), a.to_frame('bar')) def test_apply(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [10, 20, 30, 40]}) ddf = dd.from_pandas(df, npartitions=2) func = lambda row: row['x'] + row['y'] assert_eq(ddf.x.apply(lambda x: x + 1, meta=("x", int)), df.x.apply(lambda x: x + 1)) # specify meta assert_eq(ddf.apply(lambda xy: xy[0] + xy[1], axis=1, meta=(None, int)), df.apply(lambda xy: xy[0] + xy[1], axis=1)) assert_eq(ddf.apply(lambda xy: xy[0] + xy[1], axis='columns', meta=(None, int)), df.apply(lambda xy: xy[0] + xy[1], axis='columns')) # inference with pytest.warns(None): assert_eq(ddf.apply(lambda xy: xy[0] + xy[1], axis=1), df.apply(lambda xy: xy[0] + xy[1], axis=1)) with pytest.warns(None): assert_eq(ddf.apply(lambda xy: xy, axis=1), df.apply(lambda xy: xy, axis=1)) # specify meta func = lambda x: pd.Series([x, x]) assert_eq(ddf.x.apply(func, meta=[(0, int), (1, int)]), df.x.apply(func)) # inference with pytest.warns(None): assert_eq(ddf.x.apply(func), df.x.apply(func)) # axis=0 with pytest.raises(NotImplementedError): ddf.apply(lambda xy: xy, axis=0) with pytest.raises(NotImplementedError): ddf.apply(lambda xy: xy, axis='index') @pytest.mark.skipif(sys.version_info <= (3, 0), reason="Global filter is applied by another library, and " "not reset properly.") def test_apply_warns(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [10, 20, 30, 40]}) ddf = dd.from_pandas(df, npartitions=2) func = lambda row: row['x'] + row['y'] with pytest.warns(UserWarning) as w: ddf.apply(func, axis=1) assert len(w) == 1 with pytest.warns(None) as w: ddf.apply(func, axis=1, meta=(None, int)) assert len(w) == 0 def test_applymap(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [10, 20, 30, 40]}) ddf = dd.from_pandas(df, npartitions=2) assert_eq(ddf.applymap(lambda x: x + 1), df.applymap(lambda x: x + 1)) assert_eq(ddf.applymap(lambda x: (x, x)), df.applymap(lambda x: (x, x))) def test_abs(): df = pd.DataFrame({'A': [1, -2, 3, -4, 5], 'B': [-6., -7, -8, -9, 10], 'C': ['a', 'b', 'c', 'd', 'e']}) ddf = dd.from_pandas(df, npartitions=2) assert_eq(ddf.A.abs(), df.A.abs()) assert_eq(ddf[['A', 'B']].abs(), df[['A', 'B']].abs()) pytest.raises(ValueError, lambda: ddf.C.abs()) pytest.raises(TypeError, lambda: ddf.abs()) def test_round(): df = pd.DataFrame({'col1': [1.123, 2.123, 3.123], 'col2': [1.234, 2.234, 3.234]}) ddf = dd.from_pandas(df, npartitions=2) assert_eq(ddf.round(), df.round()) assert_eq(ddf.round(2), df.round(2)) def test_cov(): # DataFrame df = pd.util.testing.makeMissingDataframe(0.3, 42) ddf = dd.from_pandas(df, npartitions=6) res = ddf.cov() res2 = ddf.cov(split_every=2) res3 = ddf.cov(10) res4 = ddf.cov(10, split_every=2) sol = df.cov() sol2 = df.cov(10) assert_eq(res, sol) assert_eq(res2, sol) assert_eq(res3, sol2) assert_eq(res4, sol2) assert res._name == ddf.cov()._name assert res._name != res2._name assert res3._name != res4._name assert res._name != res3._name # Series a = df.A b = df.B da = dd.from_pandas(a, npartitions=6) db = dd.from_pandas(b, npartitions=7) res = da.cov(db) res2 = da.cov(db, split_every=2) res3 = da.cov(db, 10) res4 = da.cov(db, 10, split_every=2) sol = a.cov(b) sol2 = a.cov(b, 10) assert_eq(res, sol) assert_eq(res2, sol) assert_eq(res3, sol2) assert_eq(res4, sol2) assert res._name == da.cov(db)._name assert res._name != res2._name assert res3._name != res4._name assert res._name != res3._name def test_corr(): # DataFrame df = pd.util.testing.makeMissingDataframe(0.3, 42) ddf = dd.from_pandas(df, npartitions=6) res = ddf.corr() res2 = ddf.corr(split_every=2) res3 = ddf.corr(min_periods=10) res4 = ddf.corr(min_periods=10, split_every=2) sol = df.corr() sol2 = df.corr(min_periods=10) assert_eq(res, sol) assert_eq(res2, sol) assert_eq(res3, sol2) assert_eq(res4, sol2) assert res._name == ddf.corr()._name assert res._name != res2._name assert res3._name != res4._name assert res._name != res3._name pytest.raises(NotImplementedError, lambda: ddf.corr(method='spearman')) # Series a = df.A b = df.B da = dd.from_pandas(a, npartitions=6) db = dd.from_pandas(b, npartitions=7) res = da.corr(db) res2 = da.corr(db, split_every=2) res3 = da.corr(db, min_periods=10) res4 = da.corr(db, min_periods=10, split_every=2) sol = da.corr(db) sol2 = da.corr(db, min_periods=10) assert_eq(res, sol) assert_eq(res2, sol) assert_eq(res3, sol2) assert_eq(res4, sol2) assert res._name == da.corr(db)._name assert res._name != res2._name assert res3._name != res4._name assert res._name != res3._name pytest.raises(NotImplementedError, lambda: da.corr(db, method='spearman')) pytest.raises(TypeError, lambda: da.corr(ddf)) def test_cov_corr_meta(): df = pd.DataFrame({'a': np.array([1, 2, 3]), 'b': np.array([1.0, 2.0, 3.0], dtype='f4'), 'c': np.array([1.0, 2.0, 3.0])}, index=pd.Index([1, 2, 3], name='myindex')) ddf = dd.from_pandas(df, npartitions=2) assert_eq(ddf.corr(), df.corr()) assert_eq(ddf.cov(), df.cov()) assert ddf.a.cov(ddf.b)._meta.dtype == 'f8' assert ddf.a.corr(ddf.b)._meta.dtype == 'f8' @pytest.mark.slow def test_cov_corr_stable(): df = pd.DataFrame(np.random.uniform(-1, 1, (20000000, 2)), columns=['a', 'b']) ddf = dd.from_pandas(df, npartitions=50) assert_eq(ddf.cov(split_every=8), df.cov()) assert_eq(ddf.corr(split_every=8), df.corr()) def test_autocorr(): x = pd.Series(np.random.random(100)) dx = dd.from_pandas(x, npartitions=10) assert_eq(dx.autocorr(2), x.autocorr(2)) assert_eq(dx.autocorr(0), x.autocorr(0)) assert_eq(dx.autocorr(-2), x.autocorr(-2)) assert_eq(dx.autocorr(2, split_every=3), x.autocorr(2)) pytest.raises(TypeError, lambda: dx.autocorr(1.5)) def test_apply_infer_columns(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [10, 20, 30, 40]}) ddf = dd.from_pandas(df, npartitions=2) def return_df(x): # will create new DataFrame which columns is ['sum', 'mean'] return pd.Series([x.sum(), x.mean()], index=['sum', 'mean']) # DataFrame to completely different DataFrame with pytest.warns(None): result = ddf.apply(return_df, axis=1) assert isinstance(result, dd.DataFrame) tm.assert_index_equal(result.columns, pd.Index(['sum', 'mean'])) assert_eq(result, df.apply(return_df, axis=1)) # DataFrame to Series with pytest.warns(None): result = ddf.apply(lambda x: 1, axis=1) assert isinstance(result, dd.Series) assert result.name is None assert_eq(result, df.apply(lambda x: 1, axis=1)) def return_df2(x): return pd.Series([x * 2, x * 3], index=['x2', 'x3']) # Series to completely different DataFrame with pytest.warns(None): result = ddf.x.apply(return_df2) assert isinstance(result, dd.DataFrame) tm.assert_index_equal(result.columns, pd.Index(['x2', 'x3'])) assert_eq(result, df.x.apply(return_df2)) # Series to Series with pytest.warns(None): result = ddf.x.apply(lambda x: 1) assert isinstance(result, dd.Series) assert result.name == 'x' assert_eq(result, df.x.apply(lambda x: 1)) def test_index_time_properties(): i = tm.makeTimeSeries() a = dd.from_pandas(i, npartitions=3) assert 'day' in dir(a.index) # returns a numpy array in pandas, but a Index in dask assert_eq(a.index.day, pd.Index(i.index.day)) assert_eq(a.index.month, pd.Index(i.index.month)) def test_nlargest_nsmallest(): from string import ascii_lowercase df = pd.DataFrame({'a': np.random.permutation(20), 'b': list(ascii_lowercase[:20]), 'c': np.random.permutation(20).astype('float64')}) ddf = dd.from_pandas(df, npartitions=3) for m in ['nlargest', 'nsmallest']: f = lambda df, *args, **kwargs: getattr(df, m)(*args, **kwargs) res = f(ddf, 5, 'a') res2 = f(ddf, 5, 'a', split_every=2) sol = f(df, 5, 'a') assert_eq(res, sol) assert_eq(res2, sol) assert res._name != res2._name res = f(ddf, 5, ['a', 'c']) res2 = f(ddf, 5, ['a', 'c'], split_every=2) sol = f(df, 5, ['a', 'c']) assert_eq(res, sol) assert_eq(res2, sol) assert res._name != res2._name res = f(ddf.a, 5) res2 = f(ddf.a, 5, split_every=2) sol = f(df.a, 5) assert_eq(res, sol) assert_eq(res2, sol) assert res._name != res2._name def test_reset_index(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [10, 20, 30, 40]}) ddf = dd.from_pandas(df, npartitions=2) sol = df.reset_index() res = ddf.reset_index() assert all(d is None for d in res.divisions) assert_eq(res, sol, check_index=False) sol = df.reset_index(drop=True) res = ddf.reset_index(drop=True) assert all(d is None for d in res.divisions) assert_eq(res, sol, check_index=False) sol = df.x.reset_index() res = ddf.x.reset_index() assert all(d is None for d in res.divisions) assert_eq(res, sol, check_index=False) sol = df.x.reset_index(drop=True) res = ddf.x.reset_index(drop=True) assert all(d is None for d in res.divisions) assert_eq(res, sol, check_index=False) def test_dataframe_compute_forward_kwargs(): x = dd.from_pandas(pd.DataFrame({'a': range(10)}), npartitions=2).a.sum() x.compute(bogus_keyword=10) def test_series_iteritems(): df = pd.DataFrame({'x': [1, 2, 3, 4]}) ddf = dd.from_pandas(df, npartitions=2) for (a, b) in zip(df['x'].iteritems(), ddf['x'].iteritems()): assert a == b def test_dataframe_iterrows(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [10, 20, 30, 40]}) ddf = dd.from_pandas(df, npartitions=2) for (a, b) in zip(df.iterrows(), ddf.iterrows()): tm.assert_series_equal(a[1], b[1]) def test_dataframe_itertuples(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [10, 20, 30, 40]}) ddf = dd.from_pandas(df, npartitions=2) for (a, b) in zip(df.itertuples(), ddf.itertuples()): assert a == b def test_astype(): df = pd.DataFrame({'x': [1, 2, 3, None], 'y': [10, 20, 30, 40]}, index=[10, 20, 30, 40]) a = dd.from_pandas(df, 2) assert_eq(a.astype(float), df.astype(float)) assert_eq(a.x.astype(float), df.x.astype(float)) def test_astype_categoricals(): df = pd.DataFrame({'x': ['a', 'b', 'c', 'b', 'c'], 'y': ['x', 'y', 'z', 'x', 'y'], 'z': [1, 2, 3, 4, 5]}) df = df.astype({'y': 'category'}) ddf = dd.from_pandas(df, 2) assert ddf.y.cat.known ddf2 = ddf.astype({'x': 'category'}) assert not ddf2.x.cat.known assert ddf2.y.cat.known assert ddf2.x.dtype == 'category' assert ddf2.compute().x.dtype == 'category' dx = ddf.x.astype('category') assert not dx.cat.known assert dx.dtype == 'category' assert dx.compute().dtype == 'category' @pytest.mark.skipif(PANDAS_VERSION < '0.21.0', reason="No CategoricalDtype with categories") def test_astype_categoricals_known(): df = pd.DataFrame({'x': ['a', 'b', 'c', 'b', 'c'], 'y': ['x', 'y', 'z', 'y', 'z'], 'z': ['b', 'b', 'b', 'c', 'b'], 'other': [1, 2, 3, 4, 5]}) ddf = dd.from_pandas(df, 2) abc = pd.api.types.CategoricalDtype(['a', 'b', 'c']) category = pd.api.types.CategoricalDtype() # DataFrame ddf2 = ddf.astype({'x': abc, 'y': category, 'z': 'category', 'other': 'f8'}) for col, known in [('x', True), ('y', False), ('z', False)]: x = getattr(ddf2, col) assert pd.api.types.is_categorical_dtype(x.dtype) assert x.cat.known == known # Series for dtype, known in [('category', False), (category, False), (abc, True)]: dx2 = ddf.x.astype(dtype) assert pd.api.types.is_categorical_dtype(dx2.dtype) assert dx2.cat.known == known def test_groupby_callable(): a = pd.DataFrame({'x': [1, 2, 3, None], 'y': [10, 20, 30, 40]}, index=[1, 2, 3, 4]) b = dd.from_pandas(a, 2) def iseven(x): return x % 2 == 0 assert_eq(a.groupby(iseven).y.sum(), b.groupby(iseven).y.sum()) assert_eq(a.y.groupby(iseven).sum(), b.y.groupby(iseven).sum()) def test_methods_tokenize_differently(): df = pd.DataFrame({'x': [1, 2, 3, 4]}) df = dd.from_pandas(df, npartitions=1) assert (df.x.map_partitions(lambda x: pd.Series(x.min()))._name != df.x.map_partitions(lambda x: pd.Series(x.max()))._name) def _assert_info(df, ddf, memory_usage=True): from io import StringIO assert isinstance(df, pd.DataFrame) assert isinstance(ddf, dd.DataFrame) buf_pd, buf_da = StringIO(), StringIO() df.info(buf=buf_pd, memory_usage=memory_usage) ddf.info(buf=buf_da, verbose=True, memory_usage=memory_usage) stdout_pd = buf_pd.getvalue() stdout_da = buf_da.getvalue() stdout_da = stdout_da.replace(str(type(ddf)), str(type(df))) assert stdout_pd == stdout_da def test_info(): from io import StringIO from dask.compatibility import unicode pandas_format._put_lines = put_lines test_frames = [ pd.DataFrame({'x': [1, 2, 3, 4], 'y': [1, 0, 1, 0]}, index=pd.Int64Index(range(4))), # No RangeIndex in dask pd.DataFrame() ] for df in test_frames: ddf = dd.from_pandas(df, npartitions=4) _assert_info(df, ddf) buf = StringIO() ddf = dd.from_pandas(pd.DataFrame({'x': [1, 2, 3, 4], 'y': [1, 0, 1, 0]}, index=range(4)), npartitions=4) # Verbose=False ddf.info(buf=buf, verbose=False) assert buf.getvalue() == unicode("\n" "Columns: 2 entries, x to y\n" "dtypes: int64(2)") # buf=None assert ddf.info(buf=None) is None def test_groupby_multilevel_info(): # GH 1844 from io import StringIO from dask.compatibility import unicode pandas_format._put_lines = put_lines df = pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 3, 4], 'C': [1, 2, 3, 4]}) ddf = dd.from_pandas(df, npartitions=2) g = ddf.groupby(['A', 'B']).sum() # slight difference between memory repr (single additional space) _assert_info(g.compute(), g, memory_usage=False) buf = StringIO() g.info(buf, verbose=False) assert buf.getvalue() == unicode(""" Columns: 1 entries, C to C dtypes: int64(1)""") # multilevel g = ddf.groupby(['A', 'B']).agg(['count', 'sum']) _assert_info(g.compute(), g, memory_usage=False) buf = StringIO() g.info(buf, verbose=False) assert buf.getvalue() == unicode(""" Columns: 2 entries, (C, count) to (C, sum) dtypes: int64(2)""") def test_categorize_info(): # assert that we can call info after categorize # workaround for: https://github.com/pydata/pandas/issues/14368 from io import StringIO from dask.compatibility import unicode pandas_format._put_lines = put_lines df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': pd.Series(list('aabc')), 'z': pd.Series(list('aabc'))}, index=pd.Int64Index(range(4))) # No RangeIndex in dask ddf = dd.from_pandas(df, npartitions=4).categorize(['y']) # Verbose=False buf = StringIO() ddf.info(buf=buf, verbose=True) expected = unicode("\n" "Int64Index: 4 entries, 0 to 3\n" "Data columns (total 3 columns):\n" "x 4 non-null int64\n" "y 4 non-null category\n" "z 4 non-null object\n" "dtypes: category(1), object(1), int64(1)") assert buf.getvalue() == expected def test_gh_1301(): df = pd.DataFrame([['1', '2'], ['3', '4']]) ddf = dd.from_pandas(df, npartitions=2) ddf2 = ddf.assign(y=ddf[1].astype(int)) assert_eq(ddf2, df.assign(y=df[1].astype(int))) assert ddf2.dtypes['y'] == np.dtype(int) def test_timeseries_sorted(): df = tm.makeTimeDataFrame() ddf = dd.from_pandas(df.reset_index(), npartitions=2) df.index.name = 'index' assert_eq(ddf.set_index('index', sorted=True, drop=True), df) def test_column_assignment(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [1, 0, 1, 0]}) ddf = dd.from_pandas(df, npartitions=2) orig = ddf.copy() ddf['z'] = ddf.x + ddf.y df['z'] = df.x + df.y assert_eq(df, ddf) assert 'z' not in orig.columns def test_columns_assignment(): df = pd.DataFrame({'x': [1, 2, 3, 4]}) ddf = dd.from_pandas(df, npartitions=2) df2 = df.assign(y=df.x + 1, z=df.x - 1) df[['a', 'b']] = df2[['y', 'z']] ddf2 = ddf.assign(y=ddf.x + 1, z=ddf.x - 1) ddf[['a', 'b']] = ddf2[['y', 'z']] assert_eq(df, ddf) def test_attribute_assignment(): df = pd.DataFrame({'x': [1, 2, 3, 4, 5], 'y': [1., 2., 3., 4., 5.]}) ddf = dd.from_pandas(df, npartitions=2) ddf.y = ddf.x + ddf.y assert_eq(ddf, df.assign(y=df.x + df.y)) def test_setitem_triggering_realign(): a = dd.from_pandas(pd.DataFrame({"A": range(12)}), npartitions=3) b = dd.from_pandas(pd.Series(range(12), name='B'), npartitions=4) a['C'] = b assert len(a) == 12 def test_inplace_operators(): df = pd.DataFrame({'x': [1, 2, 3, 4, 5], 'y': [1., 2., 3., 4., 5.]}) ddf = dd.from_pandas(df, npartitions=2) ddf.y **= 0.5 assert_eq(ddf.y, df.y ** 0.5) assert_eq(ddf, df.assign(y=df.y ** 0.5)) @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("idx", [ np.arange(100), sorted(np.random.random(size=100)), pd.date_range('20150101', periods=100) ]) def test_idxmaxmin(idx, skipna): df = pd.DataFrame(np.random.randn(100, 5), columns=list('abcde'), index=idx) df.b.iloc[31] = np.nan df.d.iloc[78] = np.nan ddf = dd.from_pandas(df, npartitions=3) assert_eq(df.idxmax(axis=1, skipna=skipna), ddf.idxmax(axis=1, skipna=skipna)) assert_eq(df.idxmin(axis=1, skipna=skipna), ddf.idxmin(axis=1, skipna=skipna)) assert_eq(df.idxmax(skipna=skipna), ddf.idxmax(skipna=skipna)) assert_eq(df.idxmax(skipna=skipna), ddf.idxmax(skipna=skipna, split_every=2)) assert (ddf.idxmax(skipna=skipna)._name != ddf.idxmax(skipna=skipna, split_every=2)._name) assert_eq(df.idxmin(skipna=skipna), ddf.idxmin(skipna=skipna)) assert_eq(df.idxmin(skipna=skipna), ddf.idxmin(skipna=skipna, split_every=2)) assert (ddf.idxmin(skipna=skipna)._name != ddf.idxmin(skipna=skipna, split_every=2)._name) assert_eq(df.a.idxmax(skipna=skipna), ddf.a.idxmax(skipna=skipna)) assert_eq(df.a.idxmax(skipna=skipna), ddf.a.idxmax(skipna=skipna, split_every=2)) assert (ddf.a.idxmax(skipna=skipna)._name != ddf.a.idxmax(skipna=skipna, split_every=2)._name) assert_eq(df.a.idxmin(skipna=skipna), ddf.a.idxmin(skipna=skipna)) assert_eq(df.a.idxmin(skipna=skipna), ddf.a.idxmin(skipna=skipna, split_every=2)) assert (ddf.a.idxmin(skipna=skipna)._name != ddf.a.idxmin(skipna=skipna, split_every=2)._name) def test_idxmaxmin_empty_partitions(): df = pd.DataFrame({'a': [1, 2, 3], 'b': [1.5, 2, 3], 'c': [np.NaN] * 3, 'd': [1, 2, np.NaN]}) empty = df.iloc[:0] ddf = dd.concat([dd.from_pandas(df, npartitions=1)] + [dd.from_pandas(empty, npartitions=1)] * 10) for skipna in [True, False]: assert_eq(ddf.idxmin(skipna=skipna, split_every=3), df.idxmin(skipna=skipna)) assert_eq(ddf[['a', 'b', 'd']].idxmin(skipna=skipna, split_every=3), df[['a', 'b', 'd']].idxmin(skipna=skipna)) assert_eq(ddf.b.idxmax(split_every=3), df.b.idxmax()) # Completely empty raises ddf = dd.concat([dd.from_pandas(empty, npartitions=1)] * 10) with pytest.raises(ValueError): ddf.idxmax().compute() with pytest.raises(ValueError): ddf.b.idxmax().compute() def test_getitem_meta(): data = {'col1': ['a', 'a', 'b'], 'col2': [0, 1, 0]} df = pd.DataFrame(data=data, columns=['col1', 'col2']) ddf = dd.from_pandas(df, npartitions=1) assert_eq(df.col2[df.col1 == 'a'], ddf.col2[ddf.col1 == 'a']) def test_getitem_multilevel(): pdf = pd.DataFrame({('A', '0') : [1,2,2], ('B', '1') : [1,2,3]}) ddf = dd.from_pandas(pdf, npartitions=3) assert_eq(pdf['A', '0'], ddf['A', '0']) assert_eq(pdf[[('A', '0'), ('B', '1')]], ddf[[('A', '0'), ('B', '1')]]) def test_diff(): df = pd.DataFrame(np.random.randn(100, 5), columns=list('abcde')) ddf = dd.from_pandas(df, 5) assert_eq(ddf.diff(), df.diff()) assert_eq(ddf.diff(0), df.diff(0)) assert_eq(ddf.diff(2), df.diff(2)) assert_eq(ddf.diff(-2), df.diff(-2)) assert_eq(ddf.diff(2, axis=1), df.diff(2, axis=1)) assert_eq(ddf.a.diff(), df.a.diff()) assert_eq(ddf.a.diff(0), df.a.diff(0)) assert_eq(ddf.a.diff(2), df.a.diff(2)) assert_eq(ddf.a.diff(-2), df.a.diff(-2)) assert ddf.diff(2)._name == ddf.diff(2)._name assert ddf.diff(2)._name != ddf.diff(3)._name pytest.raises(TypeError, lambda: ddf.diff(1.5)) def test_shift(): df = tm.makeTimeDataFrame() ddf = dd.from_pandas(df, npartitions=4) # DataFrame assert_eq(ddf.shift(), df.shift()) assert_eq(ddf.shift(0), df.shift(0)) assert_eq(ddf.shift(2), df.shift(2)) assert_eq(ddf.shift(-2), df.shift(-2)) assert_eq(ddf.shift(2, axis=1), df.shift(2, axis=1)) # Series assert_eq(ddf.A.shift(), df.A.shift()) assert_eq(ddf.A.shift(0), df.A.shift(0)) assert_eq(ddf.A.shift(2), df.A.shift(2)) assert_eq(ddf.A.shift(-2), df.A.shift(-2)) with pytest.raises(TypeError): ddf.shift(1.5) def test_shift_with_freq(): df = tm.makeTimeDataFrame(30) # DatetimeIndex for data_freq, divs1 in [('B', False), ('D', True), ('H', True)]: df = df.set_index(tm.makeDateIndex(30, freq=data_freq)) ddf = dd.from_pandas(df, npartitions=4) for freq, divs2 in [('S', True), ('W', False), (pd.Timedelta(10, unit='h'), True)]: for d, p in [(ddf, df), (ddf.A, df.A), (ddf.index, df.index)]: res = d.shift(2, freq=freq) assert_eq(res, p.shift(2, freq=freq)) assert res.known_divisions == divs2 # Index shifts also work with freq=None res = ddf.index.shift(2) assert_eq(res, df.index.shift(2)) assert res.known_divisions == divs1 # PeriodIndex for data_freq, divs in [('B', False), ('D', True), ('H', True)]: df = df.set_index(pd.period_range('2000-01-01', periods=30, freq=data_freq)) ddf = dd.from_pandas(df, npartitions=4) for d, p in [(ddf, df), (ddf.A, df.A)]: res = d.shift(2, freq=data_freq) assert_eq(res, p.shift(2, freq=data_freq)) assert res.known_divisions == divs # PeriodIndex.shift doesn't have `freq` parameter res = ddf.index.shift(2) assert_eq(res, df.index.shift(2)) assert res.known_divisions == divs with pytest.raises(ValueError): ddf.index.shift(2, freq='D') # freq keyword not supported # TimedeltaIndex for data_freq in ['T', 'D', 'H']: df = df.set_index(tm.makeTimedeltaIndex(30, freq=data_freq)) ddf = dd.from_pandas(df, npartitions=4) for freq in ['S', pd.Timedelta(10, unit='h')]: for d, p in [(ddf, df), (ddf.A, df.A), (ddf.index, df.index)]: res = d.shift(2, freq=freq) assert_eq(res, p.shift(2, freq=freq)) assert res.known_divisions # Index shifts also work with freq=None res = ddf.index.shift(2) assert_eq(res, df.index.shift(2)) assert res.known_divisions # Other index types error df = tm.makeDataFrame() ddf = dd.from_pandas(df, npartitions=4) pytest.raises(NotImplementedError, lambda: ddf.shift(2, freq='S')) pytest.raises(NotImplementedError, lambda: ddf.A.shift(2, freq='S')) pytest.raises(NotImplementedError, lambda: ddf.index.shift(2)) @pytest.mark.parametrize('method', ['first', 'last']) def test_first_and_last(method): f = lambda x, offset: getattr(x, method)(offset) freqs = ['12h', 'D'] offsets = ['0d', '100h', '20d', '20B', '3W', '3M', '400d', '13M'] for freq in freqs: index = pd.date_range('1/1/2000', '1/1/2001', freq=freq)[::4] df = pd.DataFrame(np.random.random((len(index), 4)), index=index, columns=['A', 'B', 'C', 'D']) ddf = dd.from_pandas(df, npartitions=10) for offset in offsets: assert_eq(f(ddf, offset), f(df, offset)) assert_eq(f(ddf.A, offset), f(df.A, offset)) @pytest.mark.parametrize('npartitions', [1, 4, 20]) @pytest.mark.parametrize('split_every', [2, 5]) @pytest.mark.parametrize('split_out', [None, 1, 5, 20]) def test_hash_split_unique(npartitions, split_every, split_out): from string import ascii_lowercase s = pd.Series(np.random.choice(list(ascii_lowercase), 1000, replace=True)) ds = dd.from_pandas(s, npartitions=npartitions) dropped = ds.unique(split_every=split_every, split_out=split_out) dsk = dropped.__dask_optimize__(dropped.dask, dropped.__dask_keys__()) from dask.core import get_deps dependencies, dependents = get_deps(dsk) assert len([k for k, v in dependencies.items() if not v]) == npartitions assert dropped.npartitions == (split_out or 1) assert sorted(dropped.compute(get=dask.get)) == sorted(s.unique()) @pytest.mark.parametrize('split_every', [None, 2]) def test_split_out_drop_duplicates(split_every): x = np.concatenate([np.arange(10)] * 100)[:, None] y = x.copy() z = np.concatenate([np.arange(20)] * 50)[:, None] rs = np.random.RandomState(1) rs.shuffle(x) rs.shuffle(y) rs.shuffle(z) df = pd.DataFrame(np.concatenate([x, y, z], axis=1), columns=['x', 'y', 'z']) ddf = dd.from_pandas(df, npartitions=20) for subset, keep in product([None, ['x', 'z']], ['first', 'last']): sol = df.drop_duplicates(subset=subset, keep=keep) res = ddf.drop_duplicates(subset=subset, keep=keep, split_every=split_every, split_out=10) assert res.npartitions == 10 assert_eq(sol, res) @pytest.mark.parametrize('split_every', [None, 2]) def test_split_out_value_counts(split_every): df = pd.DataFrame({'x': [1, 2, 3] * 100}) ddf = dd.from_pandas(df, npartitions=5) assert ddf.x.value_counts(split_out=10, split_every=split_every).npartitions == 10 assert_eq(ddf.x.value_counts(split_out=10, split_every=split_every), df.x.value_counts()) def test_values(): from dask.array.utils import assert_eq df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [2, 3, 4, 5]}, index=pd.Index([1., 2., 3., 4.], name='ind')) ddf = dd.from_pandas(df, 2) assert_eq(df.values, ddf.values) assert_eq(df.x.values, ddf.x.values) assert_eq(df.y.values, ddf.y.values) assert_eq(df.index.values, ddf.index.values) def test_copy(): df = pd.DataFrame({'x': [1, 2, 3]}) a = dd.from_pandas(df, npartitions=2) b = a.copy() a['y'] = a.x * 2 assert_eq(b, df) df['y'] = df.x * 2 def test_del(): df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'], 'y': [2, 3, 4, 5]}, index=pd.Index([1., 2., 3., 4.], name='ind')) a = dd.from_pandas(df, 2) b = a.copy() del a['x'] assert_eq(b, df) del df['x'] assert_eq(a, df) @pytest.mark.parametrize('index', [True, False]) @pytest.mark.parametrize('deep', [True, False]) def test_memory_usage(index, deep): df = pd.DataFrame({'x': [1, 2, 3], 'y': [1.0, 2.0, 3.0], 'z': ['a', 'b', 'c']}) ddf = dd.from_pandas(df, npartitions=2) assert_eq(df.memory_usage(index=index, deep=deep), ddf.memory_usage(index=index, deep=deep)) assert (df.x.memory_usage(index=index, deep=deep) == ddf.x.memory_usage(index=index, deep=deep).compute()) @pytest.mark.parametrize('reduction', ['sum', 'mean', 'std', 'var', 'count', 'min', 'max', 'idxmin', 'idxmax', 'prod', 'all', 'sem']) def test_dataframe_reductions_arithmetic(reduction): df = pd.DataFrame({'x': [1, 2, 3, 4, 5], 'y': [1.1, 2.2, 3.3, 4.4, 5.5]}) ddf = dd.from_pandas(df, npartitions=3) assert_eq(ddf - (getattr(ddf, reduction)() + 1), df - (getattr(df, reduction)() + 1)) def test_datetime_loc_open_slicing(): dtRange = pd.date_range('01.01.2015','05.05.2015') df = pd.DataFrame(np.random.random((len(dtRange), 2)), index=dtRange) ddf = dd.from_pandas(df, npartitions=5) assert_eq(df.loc[:'02.02.2015'], ddf.loc[:'02.02.2015']) assert_eq(df.loc['02.02.2015':], ddf.loc['02.02.2015':]) assert_eq(df[0].loc[:'02.02.2015'], ddf[0].loc[:'02.02.2015']) assert_eq(df[0].loc['02.02.2015':], ddf[0].loc['02.02.2015':]) def test_to_datetime(): df = pd.DataFrame({'year': [2015, 2016], 'month': [2, 3], 'day': [4, 5]}) ddf = dd.from_pandas(df, npartitions=2) assert_eq(pd.to_datetime(df), dd.to_datetime(ddf)) s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'] * 100) ds = dd.from_pandas(s, npartitions=10) assert_eq(pd.to_datetime(s, infer_datetime_format=True), dd.to_datetime(ds, infer_datetime_format=True)) def test_to_timedelta(): s = pd.Series(range(10)) ds = dd.from_pandas(s, npartitions=2) assert_eq(pd.to_timedelta(s), dd.to_timedelta(ds)) assert_eq(pd.to_timedelta(s, unit='h'), dd.to_timedelta(ds, unit='h')) s = pd.Series([1, 2, 'this will error']) ds = dd.from_pandas(s, npartitions=2) assert_eq(pd.to_timedelta(s, errors='coerce'), dd.to_timedelta(ds, errors='coerce')) @pytest.mark.parametrize('drop', [0, 9]) def test_slice_on_filtered_boundary(drop): # https://github.com/dask/dask/issues/2211 x = np.arange(10) x[[5, 6]] -= 2 df = pd.DataFrame({"A": x, "B": np.arange(len(x))}) pdf = df.set_index("A").query("B != {}".format(drop)) ddf = dd.from_pandas(df, 1).set_index("A").query("B != {}".format(drop)) result = dd.concat([ddf, ddf.rename(columns={"B": "C"})], axis=1) expected = pd.concat([pdf, pdf.rename(columns={"B": "C"})], axis=1) assert_eq(result, expected) def test_boundary_slice_nonmonotonic(): x = np.array([-1, -2, 2, 4, 3]) df = pd.DataFrame({"B": range(len(x))}, index=x) result = methods.boundary_slice(df, 0, 4) expected = df.iloc[2:] tm.assert_frame_equal(result, expected) result = methods.boundary_slice(df, -1, 4) expected = df.drop(-2) tm.assert_frame_equal(result, expected) result = methods.boundary_slice(df, -2, 3) expected = df.drop(4) tm.assert_frame_equal(result, expected) result = methods.boundary_slice(df, -2, 3.5) expected = df.drop(4) tm.assert_frame_equal(result, expected) result = methods.boundary_slice(df, -2, 4) expected = df tm.assert_frame_equal(result, expected) @pytest.mark.parametrize('start, stop, right_boundary, left_boundary, drop', [ (-1, None, False, False, [-1, -2]), (-1, None, False, True, [-2]), (None, 3, False, False, [3, 4]), (None, 3, True, False, [4]), # Missing keys (-.5, None, False, False, [-1, -2]), (-.5, None, False, True, [-1, -2]), (-1.5, None, False, True, [-2]), (None, 3.5, False, False, [4]), (None, 3.5, True, False, [4]), (None, 2.5, False, False, [3, 4]), ]) def test_with_boundary(start, stop, right_boundary, left_boundary, drop): x = np.array([-1, -2, 2, 4, 3]) df = pd.DataFrame({"B": range(len(x))}, index=x) result = methods.boundary_slice(df, start, stop, right_boundary, left_boundary) expected = df.drop(drop) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize('index, left, right', [ (range(10), 0, 9), (range(10), -1, None), (range(10), None, 10), ([-1, 0, 2, 1], None, None), ([-1, 0, 2, 1], -1, None), ([-1, 0, 2, 1], None, 2), ([-1, 0, 2, 1], -2, 3), (pd.date_range("2017", periods=10), None, None), (pd.date_range("2017", periods=10), pd.Timestamp("2017"), None), (pd.date_range("2017", periods=10), None, pd.Timestamp("2017-01-10")), (pd.date_range("2017", periods=10), pd.Timestamp("2016"), None), (pd.date_range("2017", periods=10), None, pd.Timestamp("2018")), ]) def test_boundary_slice_same(index, left, right): df = pd.DataFrame({"A": range(len(index))}, index=index) result = methods.boundary_slice(df, left, right) tm.assert_frame_equal(result, df) def test_better_errors_object_reductions(): # GH2452 s = pd.Series(['a', 'b', 'c', 'd']) ds = dd.from_pandas(s, npartitions=2) with pytest.raises(ValueError) as err: ds.mean() assert str(err.value) == "`mean` not supported with object series" def test_sample_empty_partitions(): @dask.delayed def make_df(n): return pd.DataFrame(np.zeros((n, 4)), columns=list('abcd')) ddf = dd.from_delayed([make_df(0), make_df(100), make_df(0)]) ddf2 = ddf.sample(frac=0.2) # smoke test sample on empty partitions res = ddf2.compute() assert res.dtypes.equals(ddf2.dtypes) dask-0.16.0/dask/dataframe/tests/test_format.py000066400000000000000000000316551320364734500214520ustar00rootroot00000000000000# coding: utf-8 import pandas as pd from textwrap import dedent import dask.dataframe as dd from dask.dataframe.utils import PANDAS_VERSION if PANDAS_VERSION >= '0.21.0': style = """ """ elif PANDAS_VERSION >= '0.20.0': style = """ """ else: style = "" def test_repr(): df = pd.DataFrame({'x': list(range(100))}) ddf = dd.from_pandas(df, 3) for x in [ddf, ddf.index, ddf.x]: assert type(x).__name__ in repr(x) assert str(x.npartitions) in repr(x) def test_repr_meta_mutation(): # Check that the repr changes when meta changes df = pd.DataFrame({'a': range(5), 'b': ['a', 'b', 'c', 'd', 'e']}) ddf = dd.from_pandas(df, npartitions=2) s1 = repr(ddf) assert repr(ddf) == s1 ddf.b = ddf.b.astype('category') assert repr(ddf) != s1 def test_dataframe_format(): df = pd.DataFrame({'A': [1, 2, 3, 4, 5, 6, 7, 8], 'B': list('ABCDEFGH'), 'C': pd.Categorical(list('AAABBBCC'))}) ddf = dd.from_pandas(df, 3) exp = ("Dask DataFrame Structure:\n" " A B C\n" "npartitions=3 \n" "0 int64 object category[known]\n" "3 ... ... ...\n" "6 ... ... ...\n" "7 ... ... ...\n" "Dask Name: from_pandas, 3 tasks") assert repr(ddf) == exp assert str(ddf) == exp exp = (" A B C\n" "npartitions=3 \n" "0 int64 object category[known]\n" "3 ... ... ...\n" "6 ... ... ...\n" "7 ... ... ...") assert ddf.to_string() == exp exp_table = """
A B C
npartitions=3
0 int64 object category[known]
3 ... ... ...
6 ... ... ...
7 ... ... ...
""" exp = """
Dask DataFrame Structure:
{exp_table}
Dask Name: from_pandas, 3 tasks
""".format(exp_table=exp_table) assert ddf.to_html() == exp # table is boxed with div and has style exp = """
Dask DataFrame Structure:
{style}{exp_table}
Dask Name: from_pandas, 3 tasks
""".format(style=style, exp_table=exp_table) assert ddf._repr_html_() == exp def test_dataframe_format_with_index(): df = pd.DataFrame({'A': [1, 2, 3, 4, 5, 6, 7, 8], 'B': list('ABCDEFGH'), 'C': pd.Categorical(list('AAABBBCC'))}, index=list('ABCDEFGH')) ddf = dd.from_pandas(df, 3) exp = ("Dask DataFrame Structure:\n" " A B C\n" "npartitions=3 \n" "A int64 object category[known]\n" "D ... ... ...\n" "G ... ... ...\n" "H ... ... ...\n" "Dask Name: from_pandas, 3 tasks") assert repr(ddf) == exp assert str(ddf) == exp exp_table = """
A B C
npartitions=3
A int64 object category[known]
D ... ... ...
G ... ... ...
H ... ... ...
""" exp = """
Dask DataFrame Structure:
{exp_table}
Dask Name: from_pandas, 3 tasks
""".format(exp_table=exp_table) assert ddf.to_html() == exp # table is boxed with div and has style exp = """
Dask DataFrame Structure:
{style}{exp_table}
Dask Name: from_pandas, 3 tasks
""".format(style=style, exp_table=exp_table) assert ddf._repr_html_() == exp def test_dataframe_format_unknown_divisions(): df = pd.DataFrame({'A': [1, 2, 3, 4, 5, 6, 7, 8], 'B': list('ABCDEFGH'), 'C': pd.Categorical(list('AAABBBCC'))}) ddf = dd.from_pandas(df, 3) ddf = ddf.clear_divisions() assert not ddf.known_divisions exp = ("Dask DataFrame Structure:\n" " A B C\n" "npartitions=3 \n" " int64 object category[known]\n" " ... ... ...\n" " ... ... ...\n" " ... ... ...\n" "Dask Name: from_pandas, 3 tasks") assert repr(ddf) == exp assert str(ddf) == exp exp = (" A B C\n" "npartitions=3 \n" " int64 object category[known]\n" " ... ... ...\n" " ... ... ...\n" " ... ... ...") assert ddf.to_string() == exp exp_table = """
A B C
npartitions=3
int64 object category[known]
... ... ...
... ... ...
... ... ...
""" exp = """
Dask DataFrame Structure:
{exp_table}
Dask Name: from_pandas, 3 tasks
""".format(exp_table=exp_table) assert ddf.to_html() == exp # table is boxed with div and has style exp = """
Dask DataFrame Structure:
{style}{exp_table}
Dask Name: from_pandas, 3 tasks
""".format(style=style, exp_table=exp_table) assert ddf._repr_html_() == exp def test_dataframe_format_long(): df = pd.DataFrame({'A': [1, 2, 3, 4, 5, 6, 7, 8] * 10, 'B': list('ABCDEFGH') * 10, 'C': pd.Categorical(list('AAABBBCC') * 10)}) ddf = dd.from_pandas(df, 10) exp = ('Dask DataFrame Structure:\n' ' A B C\n' 'npartitions=10 \n' '0 int64 object category[known]\n' '8 ... ... ...\n' '... ... ... ...\n' '72 ... ... ...\n' '79 ... ... ...\n' 'Dask Name: from_pandas, 10 tasks') assert repr(ddf) == exp assert str(ddf) == exp exp = (" A B C\n" "npartitions=10 \n" "0 int64 object category[known]\n" "8 ... ... ...\n" "... ... ... ...\n" "72 ... ... ...\n" "79 ... ... ...") assert ddf.to_string() == exp exp_table = """
A B C
npartitions=10
0 int64 object category[known]
8 ... ... ...
... ... ... ...
72 ... ... ...
79 ... ... ...
""" exp = """
Dask DataFrame Structure:
{exp_table}
Dask Name: from_pandas, 10 tasks
""".format(exp_table=exp_table) assert ddf.to_html() == exp # table is boxed with div exp = u"""
Dask DataFrame Structure:
{style}{exp_table}
Dask Name: from_pandas, 10 tasks
""".format(style=style, exp_table=exp_table) assert ddf._repr_html_() == exp def test_series_format(): s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=list('ABCDEFGH')) ds = dd.from_pandas(s, 3) exp = """Dask Series Structure: npartitions=3 A int64 D ... G ... H ... dtype: int64 Dask Name: from_pandas, 3 tasks""" assert repr(ds) == exp assert str(ds) == exp exp = """npartitions=3 A int64 D ... G ... H ...""" assert ds.to_string() == exp s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=list('ABCDEFGH'), name='XXX') ds = dd.from_pandas(s, 3) exp = """Dask Series Structure: npartitions=3 A int64 D ... G ... H ... Name: XXX, dtype: int64 Dask Name: from_pandas, 3 tasks""" assert repr(ds) == exp assert str(ds) == exp def test_series_format_long(): s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10] * 10, index=list('ABCDEFGHIJ') * 10) ds = dd.from_pandas(s, 10) exp = ("Dask Series Structure:\nnpartitions=10\nA int64\nB ...\n" " ... \nJ ...\nJ ...\ndtype: int64\n" "Dask Name: from_pandas, 10 tasks") assert repr(ds) == exp assert str(ds) == exp exp = "npartitions=10\nA int64\nB ...\n ... \nJ ...\nJ ..." assert ds.to_string() == exp def test_index_format(): s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=list('ABCDEFGH')) ds = dd.from_pandas(s, 3) exp = """Dask Index Structure: npartitions=3 A object D ... G ... H ... dtype: object Dask Name: from_pandas, 6 tasks""" assert repr(ds.index) == exp assert str(ds.index) == exp s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=pd.CategoricalIndex([1, 2, 3, 4, 5, 6, 7, 8], name='YYY')) ds = dd.from_pandas(s, 3) exp = dedent("""\ Dask Index Structure: npartitions=3 1 category[known] 4 ... 7 ... 8 ... Name: YYY, dtype: category Dask Name: from_pandas, 6 tasks""") assert repr(ds.index) == exp assert str(ds.index) == exp def test_categorical_format(): s = pd.Series(['a', 'b', 'c']).astype('category') known = dd.from_pandas(s, npartitions=1) unknown = known.cat.as_unknown() exp = ("Dask Series Structure:\n" "npartitions=1\n" "0 category[known]\n" "2 ...\n" "dtype: category\n" "Dask Name: from_pandas, 1 tasks") assert repr(known) == exp exp = ("Dask Series Structure:\n" "npartitions=1\n" "0 category[unknown]\n" "2 ...\n" "dtype: category\n" "Dask Name: from_pandas, 1 tasks") assert repr(unknown) == exp dask-0.16.0/dask/dataframe/tests/test_groupby.py000066400000000000000000001435041320364734500216460ustar00rootroot00000000000000import collections import numpy as np import pandas as pd import pandas.util.testing as tm import pytest import dask import dask.dataframe as dd from dask.dataframe.utils import assert_eq, assert_dask_graph, assert_max_deps, PANDAS_VERSION def groupby_internal_repr(): pdf = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7, 8, 9, 10], 'y': list('abcbabbcda')}) ddf = dd.from_pandas(pdf, 3) gp = pdf.groupby('y') dp = ddf.groupby('y') assert isinstance(dp, dd.groupby.DataFrameGroupBy) assert isinstance(dp._meta, pd.core.groupby.DataFrameGroupBy) assert isinstance(dp.obj, dd.DataFrame) assert_eq(dp.obj, gp.obj) gp = pdf.groupby('y')['x'] dp = ddf.groupby('y')['x'] assert isinstance(dp, dd.groupby.SeriesGroupBy) assert isinstance(dp._meta, pd.core.groupby.SeriesGroupBy) # slicing should not affect to internal assert isinstance(dp.obj, dd.Series) assert_eq(dp.obj, gp.obj) gp = pdf.groupby('y')[['x']] dp = ddf.groupby('y')[['x']] assert isinstance(dp, dd.groupby.DataFrameGroupBy) assert isinstance(dp._meta, pd.core.groupby.DataFrameGroupBy) # slicing should not affect to internal assert isinstance(dp.obj, dd.DataFrame) assert_eq(dp.obj, gp.obj) gp = pdf.groupby(pdf.y)['x'] dp = ddf.groupby(ddf.y)['x'] assert isinstance(dp, dd.groupby.SeriesGroupBy) assert isinstance(dp._meta, pd.core.groupby.SeriesGroupBy) # slicing should not affect to internal assert isinstance(dp.obj, dd.Series) assert_eq(dp.obj, gp.obj) gp = pdf.groupby(pdf.y)[['x']] dp = ddf.groupby(ddf.y)[['x']] assert isinstance(dp, dd.groupby.DataFrameGroupBy) assert isinstance(dp._meta, pd.core.groupby.DataFrameGroupBy) # slicing should not affect to internal assert isinstance(dp.obj, dd.DataFrame) assert_eq(dp.obj, gp.obj) def groupby_error(): pdf = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7, 8, 9, 10], 'y': list('abcbabbcda')}) ddf = dd.from_pandas(pdf, 3) with pytest.raises(KeyError): ddf.groupby('A') with pytest.raises(KeyError): ddf.groupby(['x', 'A']) dp = ddf.groupby('y') msg = 'Column not found: ' with pytest.raises(KeyError) as err: dp['A'] assert msg in str(err.value) with pytest.raises(KeyError) as err: dp[['x', 'A']] assert msg in str(err.value) def groupby_internal_head(): pdf = pd.DataFrame({'A': [1, 2] * 10, 'B': np.random.randn(20), 'C': np.random.randn(20)}) ddf = dd.from_pandas(pdf, 3) assert_eq(ddf.groupby('A')._head().sum(), pdf.head().groupby('A').sum()) assert_eq(ddf.groupby(ddf['A'])._head().sum(), pdf.head().groupby(pdf['A']).sum()) assert_eq(ddf.groupby(ddf['A'] + 1)._head().sum(), pdf.head().groupby(pdf['A'] + 1).sum()) def test_full_groupby(): df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'b': [4, 5, 6, 3, 2, 1, 0, 0, 0]}, index=[0, 1, 3, 5, 6, 8, 9, 9, 9]) ddf = dd.from_pandas(df, npartitions=3) pytest.raises(Exception, lambda: df.groupby('does_not_exist')) pytest.raises(Exception, lambda: df.groupby('a').does_not_exist) assert 'b' in dir(df.groupby('a')) def func(df): df['b'] = df.b - df.b.mean() return df assert_eq(df.groupby('a').apply(func), ddf.groupby('a').apply(func, meta={"a": int, "b": float})) @pytest.mark.parametrize('grouper', [ lambda df: ['a'], lambda df: ['a', 'b'], lambda df: df['a'], lambda df: [df['a'], df['b']], pytest.mark.xfail(reason="not yet supported")(lambda df: [df['a'] > 2, df['b'] > 1]) ]) def test_full_groupby_multilevel(grouper): df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'd': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'b': [4, 5, 6, 3, 2, 1, 0, 0, 0]}, index=[0, 1, 3, 5, 6, 8, 9, 9, 9]) ddf = dd.from_pandas(df, npartitions=3) def func(df): df['b'] = df.b - df.b.mean() return df # last one causes a DeprcationWarning from pandas. # See https://github.com/pandas-dev/pandas/issues/16481 assert_eq(df.groupby(grouper(df)).apply(func), ddf.groupby(grouper(ddf)).apply(func, meta={"a": int, "d": int, "b": float})) def test_groupby_dir(): df = pd.DataFrame({'a': range(10), 'b c d e': range(10)}) ddf = dd.from_pandas(df, npartitions=2) g = ddf.groupby('a') assert 'a' in dir(g) assert 'b c d e' not in dir(g) @pytest.mark.parametrize('get', [dask.get, dask.threaded.get]) def test_groupby_on_index(get): pdf = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'b': [4, 5, 6, 3, 2, 1, 0, 0, 0]}, index=[0, 1, 3, 5, 6, 8, 9, 9, 9]) ddf = dd.from_pandas(pdf, npartitions=3) ddf2 = ddf.set_index('a') pdf2 = pdf.set_index('a') assert_eq(ddf.groupby('a').b.mean(), ddf2.groupby(ddf2.index).b.mean()) def func(df): return df.assign(b=df.b - df.b.mean()) with dask.set_options(get=get): with pytest.warns(None): assert_eq(ddf.groupby('a').apply(func), pdf.groupby('a').apply(func)) assert_eq(ddf.groupby('a').apply(func).set_index('a'), pdf.groupby('a').apply(func).set_index('a')) assert_eq(pdf2.groupby(pdf2.index).apply(func), ddf2.groupby(ddf2.index).apply(func)) def test_groupby_multilevel_getitem(): df = pd.DataFrame({'a': [1, 2, 3, 1, 2, 3], 'b': [1, 2, 1, 4, 2, 1], 'c': [1, 3, 2, 1, 1, 2], 'd': [1, 2, 1, 1, 2, 2]}) ddf = dd.from_pandas(df, 2) cases = [(ddf.groupby('a')['b'], df.groupby('a')['b']), (ddf.groupby(['a', 'b']), df.groupby(['a', 'b'])), (ddf.groupby(['a', 'b'])['c'], df.groupby(['a', 'b'])['c']), (ddf.groupby(ddf['a'])[['b', 'c']], df.groupby(df['a'])[['b', 'c']]), (ddf.groupby('a')[['b', 'c']], df.groupby('a')[['b', 'c']]), (ddf.groupby('a')[['b']], df.groupby('a')[['b']]), (ddf.groupby(['a', 'b', 'c']), df.groupby(['a', 'b', 'c']))] for d, p in cases: assert isinstance(d, dd.groupby._GroupBy) assert isinstance(p, pd.core.groupby.GroupBy) assert_eq(d.sum(), p.sum()) assert_eq(d.min(), p.min()) assert_eq(d.max(), p.max()) assert_eq(d.count(), p.count()) assert_eq(d.mean(), p.mean().astype(float)) def test_groupby_multilevel_agg(): df = pd.DataFrame({'a': [1, 2, 3, 1, 2, 3], 'b': [1, 2, 1, 4, 2, 1], 'c': [1, 3, 2, 1, 1, 2], 'd': [1, 2, 1, 1, 2, 2]}) ddf = dd.from_pandas(df, 2) sol = df.groupby(['a']).mean() res = ddf.groupby(['a']).mean() assert_eq(res, sol) sol = df.groupby(['a', 'c']).mean() res = ddf.groupby(['a', 'c']).mean() assert_eq(res, sol) sol = df.groupby([df['a'], df['c']]).mean() res = ddf.groupby([ddf['a'], ddf['c']]).mean() assert_eq(res, sol) def test_groupby_get_group(): dsk = {('x', 0): pd.DataFrame({'a': [1, 2, 6], 'b': [4, 2, 7]}, index=[0, 1, 3]), ('x', 1): pd.DataFrame({'a': [4, 2, 6], 'b': [3, 3, 1]}, index=[5, 6, 8]), ('x', 2): pd.DataFrame({'a': [4, 3, 7], 'b': [1, 1, 3]}, index=[9, 9, 9])} meta = dsk[('x', 0)] d = dd.DataFrame(dsk, 'x', meta, [0, 4, 9, 9]) full = d.compute() for ddkey, pdkey in [('b', 'b'), (d.b, full.b), (d.b + 1, full.b + 1)]: ddgrouped = d.groupby(ddkey) pdgrouped = full.groupby(pdkey) # DataFrame assert_eq(ddgrouped.get_group(2), pdgrouped.get_group(2)) assert_eq(ddgrouped.get_group(3), pdgrouped.get_group(3)) # Series assert_eq(ddgrouped.a.get_group(3), pdgrouped.a.get_group(3)) assert_eq(ddgrouped.a.get_group(2), pdgrouped.a.get_group(2)) def test_dataframe_groupby_nunique(): strings = list('aaabbccccdddeee') data = np.random.randn(len(strings)) ps = pd.DataFrame(dict(strings=strings, data=data)) s = dd.from_pandas(ps, npartitions=3) expected = ps.groupby('strings')['data'].nunique() assert_eq(s.groupby('strings')['data'].nunique(), expected) def test_dataframe_groupby_nunique_across_group_same_value(): strings = list('aaabbccccdddeee') data = list(map(int, '123111223323412')) ps = pd.DataFrame(dict(strings=strings, data=data)) s = dd.from_pandas(ps, npartitions=3) expected = ps.groupby('strings')['data'].nunique() assert_eq(s.groupby('strings')['data'].nunique(), expected) def test_series_groupby_propagates_names(): df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) ddf = dd.from_pandas(df, 2) func = lambda df: df['y'].sum() with pytest.warns(UserWarning): # meta inference result = ddf.groupby('x').apply(func) expected = df.groupby('x').apply(func) assert_eq(result, expected) def test_series_groupby(): s = pd.Series([1, 2, 2, 1, 1]) pd_group = s.groupby(s) ss = dd.from_pandas(s, npartitions=2) dask_group = ss.groupby(ss) pd_group2 = s.groupby(s + 1) dask_group2 = ss.groupby(ss + 1) for dg, pdg in [(dask_group, pd_group), (pd_group2, dask_group2)]: assert_eq(dg.count(), pdg.count()) assert_eq(dg.sum(), pdg.sum()) assert_eq(dg.min(), pdg.min()) assert_eq(dg.max(), pdg.max()) assert_eq(dg.size(), pdg.size()) def test_series_groupby_errors(): s = pd.Series([1, 2, 2, 1, 1]) ss = dd.from_pandas(s, npartitions=2) msg = "No group keys passed!" with pytest.raises(ValueError) as err: s.groupby([]) # pandas assert msg in str(err.value) with pytest.raises(ValueError) as err: ss.groupby([]) # dask should raise the same error assert msg in str(err.value) sss = dd.from_pandas(s, npartitions=3) pytest.raises(NotImplementedError, lambda: ss.groupby(sss)) with pytest.raises(KeyError): s.groupby('x') # pandas with pytest.raises(KeyError): ss.groupby('x') # dask should raise the same error def test_groupby_index_array(): df = tm.makeTimeDataFrame() ddf = dd.from_pandas(df, npartitions=2) # first select column, then group assert_eq(df.A.groupby(df.index.month).nunique(), ddf.A.groupby(ddf.index.month).nunique(), check_names=False) # first group, then select column assert_eq(df.groupby(df.index.month).A.nunique(), ddf.groupby(ddf.index.month).A.nunique(), check_names=False) def test_groupby_set_index(): df = tm.makeTimeDataFrame() ddf = dd.from_pandas(df, npartitions=2) pytest.raises(TypeError, lambda: ddf.groupby(df.index.month, as_index=False)) def test_split_apply_combine_on_series(): pdf = pd.DataFrame({'a': [1, 2, 6, 4, 4, 6, 4, 3, 7], 'b': [4, 2, 7, 3, 3, 1, 1, 1, 2]}, index=[0, 1, 3, 5, 6, 8, 9, 9, 9]) ddf = dd.from_pandas(pdf, npartitions=3) for ddkey, pdkey in [('b', 'b'), (ddf.b, pdf.b), (ddf.b + 1, pdf.b + 1)]: assert_eq(ddf.groupby(ddkey).a.min(), pdf.groupby(pdkey).a.min()) assert_eq(ddf.groupby(ddkey).a.max(), pdf.groupby(pdkey).a.max()) assert_eq(ddf.groupby(ddkey).a.count(), pdf.groupby(pdkey).a.count()) assert_eq(ddf.groupby(ddkey).a.mean(), pdf.groupby(pdkey).a.mean()) assert_eq(ddf.groupby(ddkey).a.nunique(), pdf.groupby(pdkey).a.nunique()) assert_eq(ddf.groupby(ddkey).a.size(), pdf.groupby(pdkey).a.size()) for ddof in [0, 1, 2]: assert_eq(ddf.groupby(ddkey).a.var(ddof), pdf.groupby(pdkey).a.var(ddof)) assert_eq(ddf.groupby(ddkey).a.std(ddof), pdf.groupby(pdkey).a.std(ddof)) assert_eq(ddf.groupby(ddkey).sum(), pdf.groupby(pdkey).sum()) assert_eq(ddf.groupby(ddkey).min(), pdf.groupby(pdkey).min()) assert_eq(ddf.groupby(ddkey).max(), pdf.groupby(pdkey).max()) assert_eq(ddf.groupby(ddkey).count(), pdf.groupby(pdkey).count()) assert_eq(ddf.groupby(ddkey).mean(), pdf.groupby(pdkey).mean()) assert_eq(ddf.groupby(ddkey).size(), pdf.groupby(pdkey).size()) for ddof in [0, 1, 2]: assert_eq(ddf.groupby(ddkey).var(ddof), pdf.groupby(pdkey).var(ddof), check_dtype=False) assert_eq(ddf.groupby(ddkey).std(ddof), pdf.groupby(pdkey).std(ddof), check_dtype=False) for ddkey, pdkey in [(ddf.b, pdf.b), (ddf.b + 1, pdf.b + 1)]: assert_eq(ddf.a.groupby(ddkey).sum(), pdf.a.groupby(pdkey).sum(), check_names=False) assert_eq(ddf.a.groupby(ddkey).max(), pdf.a.groupby(pdkey).max(), check_names=False) assert_eq(ddf.a.groupby(ddkey).count(), pdf.a.groupby(pdkey).count(), check_names=False) assert_eq(ddf.a.groupby(ddkey).mean(), pdf.a.groupby(pdkey).mean(), check_names=False) assert_eq(ddf.a.groupby(ddkey).nunique(), pdf.a.groupby(pdkey).nunique(), check_names=False) for ddof in [0, 1, 2]: assert_eq(ddf.a.groupby(ddkey).var(ddof), pdf.a.groupby(pdkey).var(ddof)) assert_eq(ddf.a.groupby(ddkey).std(ddof), pdf.a.groupby(pdkey).std(ddof)) for i in [0, 4, 7]: assert_eq(ddf.groupby(ddf.b > i).a.sum(), pdf.groupby(pdf.b > i).a.sum()) assert_eq(ddf.groupby(ddf.b > i).a.min(), pdf.groupby(pdf.b > i).a.min()) assert_eq(ddf.groupby(ddf.b > i).a.max(), pdf.groupby(pdf.b > i).a.max()) assert_eq(ddf.groupby(ddf.b > i).a.count(), pdf.groupby(pdf.b > i).a.count()) assert_eq(ddf.groupby(ddf.b > i).a.mean(), pdf.groupby(pdf.b > i).a.mean()) assert_eq(ddf.groupby(ddf.b > i).a.nunique(), pdf.groupby(pdf.b > i).a.nunique()) assert_eq(ddf.groupby(ddf.b > i).a.size(), pdf.groupby(pdf.b > i).a.size()) assert_eq(ddf.groupby(ddf.a > i).b.sum(), pdf.groupby(pdf.a > i).b.sum()) assert_eq(ddf.groupby(ddf.a > i).b.min(), pdf.groupby(pdf.a > i).b.min()) assert_eq(ddf.groupby(ddf.a > i).b.max(), pdf.groupby(pdf.a > i).b.max()) assert_eq(ddf.groupby(ddf.a > i).b.count(), pdf.groupby(pdf.a > i).b.count()) assert_eq(ddf.groupby(ddf.a > i).b.mean(), pdf.groupby(pdf.a > i).b.mean()) assert_eq(ddf.groupby(ddf.a > i).b.nunique(), pdf.groupby(pdf.a > i).b.nunique()) assert_eq(ddf.groupby(ddf.b > i).b.size(), pdf.groupby(pdf.b > i).b.size()) assert_eq(ddf.groupby(ddf.b > i).sum(), pdf.groupby(pdf.b > i).sum()) assert_eq(ddf.groupby(ddf.b > i).min(), pdf.groupby(pdf.b > i).min()) assert_eq(ddf.groupby(ddf.b > i).max(), pdf.groupby(pdf.b > i).max()) assert_eq(ddf.groupby(ddf.b > i).count(), pdf.groupby(pdf.b > i).count()) assert_eq(ddf.groupby(ddf.b > i).mean(), pdf.groupby(pdf.b > i).mean()) assert_eq(ddf.groupby(ddf.b > i).size(), pdf.groupby(pdf.b > i).size()) assert_eq(ddf.groupby(ddf.a > i).sum(), pdf.groupby(pdf.a > i).sum()) assert_eq(ddf.groupby(ddf.a > i).min(), pdf.groupby(pdf.a > i).min()) assert_eq(ddf.groupby(ddf.a > i).max(), pdf.groupby(pdf.a > i).max()) assert_eq(ddf.groupby(ddf.a > i).count(), pdf.groupby(pdf.a > i).count()) assert_eq(ddf.groupby(ddf.a > i).mean(), pdf.groupby(pdf.a > i).mean()) assert_eq(ddf.groupby(ddf.a > i).size(), pdf.groupby(pdf.a > i).size()) for ddof in [0, 1, 2]: assert_eq(ddf.groupby(ddf.b > i).std(ddof), pdf.groupby(pdf.b > i).std(ddof)) for ddkey, pdkey in [('a', 'a'), (ddf.a, pdf.a), (ddf.a + 1, pdf.a + 1), (ddf.a > 3, pdf.a > 3)]: assert_eq(ddf.groupby(ddkey).b.sum(), pdf.groupby(pdkey).b.sum()) assert_eq(ddf.groupby(ddkey).b.min(), pdf.groupby(pdkey).b.min()) assert_eq(ddf.groupby(ddkey).b.max(), pdf.groupby(pdkey).b.max()) assert_eq(ddf.groupby(ddkey).b.count(), pdf.groupby(pdkey).b.count()) assert_eq(ddf.groupby(ddkey).b.mean(), pdf.groupby(pdkey).b.mean()) assert_eq(ddf.groupby(ddkey).b.nunique(), pdf.groupby(pdkey).b.nunique()) assert_eq(ddf.groupby(ddkey).b.size(), pdf.groupby(pdkey).b.size()) assert_eq(ddf.groupby(ddkey).sum(), pdf.groupby(pdkey).sum()) assert_eq(ddf.groupby(ddkey).min(), pdf.groupby(pdkey).min()) assert_eq(ddf.groupby(ddkey).max(), pdf.groupby(pdkey).max()) assert_eq(ddf.groupby(ddkey).count(), pdf.groupby(pdkey).count()) assert_eq(ddf.groupby(ddkey).mean(), pdf.groupby(pdkey).mean().astype(float)) assert_eq(ddf.groupby(ddkey).size(), pdf.groupby(pdkey).size()) for ddof in [0, 1, 2]: assert_eq(ddf.groupby(ddkey).b.std(ddof), pdf.groupby(pdkey).b.std(ddof)) assert (sorted(ddf.groupby('b').a.sum().dask) == sorted(ddf.groupby('b').a.sum().dask)) assert (sorted(ddf.groupby(ddf.a > 3).b.mean().dask) == sorted(ddf.groupby(ddf.a > 3).b.mean().dask)) # test raises with incorrect key pytest.raises(KeyError, lambda: ddf.groupby('x')) pytest.raises(KeyError, lambda: ddf.groupby(['a', 'x'])) pytest.raises(KeyError, lambda: ddf.groupby('a')['x']) pytest.raises(KeyError, lambda: ddf.groupby('a')['b', 'x']) pytest.raises(KeyError, lambda: ddf.groupby('a')[['b', 'x']]) # test graph node labels assert_dask_graph(ddf.groupby('b').a.sum(), 'series-groupby-sum') assert_dask_graph(ddf.groupby('b').a.min(), 'series-groupby-min') assert_dask_graph(ddf.groupby('b').a.max(), 'series-groupby-max') assert_dask_graph(ddf.groupby('b').a.count(), 'series-groupby-count') assert_dask_graph(ddf.groupby('b').a.var(), 'series-groupby-var') # mean consists from sum and count operations assert_dask_graph(ddf.groupby('b').a.mean(), 'series-groupby-sum') assert_dask_graph(ddf.groupby('b').a.mean(), 'series-groupby-count') assert_dask_graph(ddf.groupby('b').a.nunique(), 'series-groupby-nunique') assert_dask_graph(ddf.groupby('b').a.size(), 'series-groupby-size') assert_dask_graph(ddf.groupby('b').sum(), 'dataframe-groupby-sum') assert_dask_graph(ddf.groupby('b').min(), 'dataframe-groupby-min') assert_dask_graph(ddf.groupby('b').max(), 'dataframe-groupby-max') assert_dask_graph(ddf.groupby('b').count(), 'dataframe-groupby-count') # mean consists from sum and count operations assert_dask_graph(ddf.groupby('b').mean(), 'dataframe-groupby-sum') assert_dask_graph(ddf.groupby('b').mean(), 'dataframe-groupby-count') assert_dask_graph(ddf.groupby('b').size(), 'dataframe-groupby-size') @pytest.mark.parametrize('keyword', ['split_every', 'split_out']) def test_groupby_reduction_split(keyword): pdf = pd.DataFrame({'a': [1, 2, 6, 4, 4, 6, 4, 3, 7] * 100, 'b': [4, 2, 7, 3, 3, 1, 1, 1, 2] * 100}) ddf = dd.from_pandas(pdf, npartitions=15) def call(g, m, **kwargs): return getattr(g, m)(**kwargs) # DataFrame for m in ['sum', 'min', 'max', 'count', 'mean', 'size', 'var', 'std']: res = call(ddf.groupby('b'), m, **{keyword: 2}) sol = call(pdf.groupby('b'), m) assert_eq(res, sol) assert call(ddf.groupby('b'), m)._name != res._name res = call(ddf.groupby('b'), 'var', ddof=2, **{keyword: 2}) sol = call(pdf.groupby('b'), 'var', ddof=2) assert_eq(res, sol) assert call(ddf.groupby('b'), 'var', ddof=2)._name != res._name # Series, post select for m in ['sum', 'min', 'max', 'count', 'mean', 'nunique', 'size', 'var', 'std']: res = call(ddf.groupby('b').a, m, **{keyword: 2}) sol = call(pdf.groupby('b').a, m) assert_eq(res, sol) assert call(ddf.groupby('b').a, m)._name != res._name res = call(ddf.groupby('b').a, 'var', ddof=2, **{keyword: 2}) sol = call(pdf.groupby('b').a, 'var', ddof=2) assert_eq(res, sol) assert call(ddf.groupby('b').a, 'var', ddof=2)._name != res._name # Series, pre select for m in ['sum', 'min', 'max', 'count', 'mean', 'nunique', 'size', 'var', 'std']: res = call(ddf.a.groupby(ddf.b), m, **{keyword: 2}) sol = call(pdf.a.groupby(pdf.b), m) # There's a bug in pandas 0.18.0 with `pdf.a.groupby(pdf.b).count()` # not forwarding the series name. Skip name checks here for now. assert_eq(res, sol, check_names=False) assert call(ddf.a.groupby(ddf.b), m)._name != res._name res = call(ddf.a.groupby(ddf.b), 'var', ddof=2, **{keyword: 2}) sol = call(pdf.a.groupby(pdf.b), 'var', ddof=2) assert_eq(res, sol) assert call(ddf.a.groupby(ddf.b), 'var', ddof=2)._name != res._name def test_apply_shuffle(): pdf = pd.DataFrame({'A': [1, 2, 3, 4] * 5, 'B': np.random.randn(20), 'C': np.random.randn(20), 'D': np.random.randn(20)}) ddf = dd.from_pandas(pdf, 3) with pytest.warns(UserWarning): # meta inference assert_eq(ddf.groupby('A').apply(lambda x: x.sum()), pdf.groupby('A').apply(lambda x: x.sum())) assert_eq(ddf.groupby(ddf['A']).apply(lambda x: x.sum()), pdf.groupby(pdf['A']).apply(lambda x: x.sum())) assert_eq(ddf.groupby(ddf['A'] + 1).apply(lambda x: x.sum()), pdf.groupby(pdf['A'] + 1).apply(lambda x: x.sum())) # SeriesGroupBy assert_eq(ddf.groupby('A')['B'].apply(lambda x: x.sum()), pdf.groupby('A')['B'].apply(lambda x: x.sum())) assert_eq(ddf.groupby(ddf['A'])['B'].apply(lambda x: x.sum()), pdf.groupby(pdf['A'])['B'].apply(lambda x: x.sum())) assert_eq(ddf.groupby(ddf['A'] + 1)['B'].apply(lambda x: x.sum()), pdf.groupby(pdf['A'] + 1)['B'].apply(lambda x: x.sum())) # DataFrameGroupBy with column slice assert_eq(ddf.groupby('A')[['B', 'C']].apply(lambda x: x.sum()), pdf.groupby('A')[['B', 'C']].apply(lambda x: x.sum())) assert_eq(ddf.groupby(ddf['A'])[['B', 'C']].apply(lambda x: x.sum()), pdf.groupby(pdf['A'])[['B', 'C']].apply(lambda x: x.sum())) assert_eq(ddf.groupby(ddf['A'] + 1)[['B', 'C']].apply(lambda x: x.sum()), pdf.groupby(pdf['A'] + 1)[['B', 'C']].apply(lambda x: x.sum())) @pytest.mark.parametrize('grouper', [ lambda df: 'AA', lambda df: ['AA', 'AB'], lambda df: df['AA'], lambda df: [df['AA'], df['AB']], lambda df: df['AA'] + 1, pytest.mark.xfail("NotImplemented")(lambda df: [df['AA'] + 1, df['AB'] + 1]), ]) def test_apply_shuffle_multilevel(grouper): pdf = pd.DataFrame({'AB': [1, 2, 3, 4] * 5, 'AA': [1, 2, 3, 4] * 5, 'B': np.random.randn(20), 'C': np.random.randn(20), 'D': np.random.randn(20)}) ddf = dd.from_pandas(pdf, 3) with pytest.warns(UserWarning): # DataFrameGroupBy assert_eq(ddf.groupby(grouper(ddf)).apply(lambda x: x.sum()), pdf.groupby(grouper(pdf)).apply(lambda x: x.sum())) # SeriesGroupBy assert_eq(ddf.groupby(grouper(ddf))['B'].apply(lambda x: x.sum()), pdf.groupby(grouper(pdf))['B'].apply(lambda x: x.sum())) # DataFrameGroupBy with column slice assert_eq(ddf.groupby(grouper(ddf))[['B', 'C']].apply(lambda x: x.sum()), pdf.groupby(grouper(pdf))[['B', 'C']].apply(lambda x: x.sum())) def test_numeric_column_names(): # df.groupby(0)[df.columns] fails if all columns are numbers (pandas bug) # This ensures that we cast all column iterables to list beforehand. df = pd.DataFrame({0: [0, 1, 0, 1], 1: [1, 2, 3, 4], 2: [0, 1, 0, 1],}) ddf = dd.from_pandas(df, npartitions=2) assert_eq(ddf.groupby(0).sum(), df.groupby(0).sum()) assert_eq(ddf.groupby([0, 2]).sum(), df.groupby([0, 2]).sum()) assert_eq(ddf.groupby(0).apply(lambda x: x, meta={0: int, 1: int, 2: int}), df.groupby(0).apply(lambda x: x)) def test_groupby_apply_tasks(): df = pd.util.testing.makeTimeDataFrame() df['A'] = df.A // 0.1 df['B'] = df.B // 0.1 ddf = dd.from_pandas(df, npartitions=10) with dask.set_options(shuffle='tasks'): for ind in [lambda x: 'A', lambda x: x.A]: a = df.groupby(ind(df)).apply(len) with pytest.warns(UserWarning): b = ddf.groupby(ind(ddf)).apply(len) assert_eq(a, b.compute()) assert not any('partd' in k[0] for k in b.dask) a = df.groupby(ind(df)).B.apply(len) with pytest.warns(UserWarning): b = ddf.groupby(ind(ddf)).B.apply(len) assert_eq(a, b.compute()) assert not any('partd' in k[0] for k in b.dask) def test_groupby_multiprocessing(): from dask.multiprocessing import get df = pd.DataFrame({'A': [1, 2, 3, 4, 5], 'B': ['1','1','a','a','a']}) ddf = dd.from_pandas(df, npartitions=3) with dask.set_options(get=get): assert_eq(ddf.groupby('B').apply(lambda x: x, meta={"A": int, "B": object}), df.groupby('B').apply(lambda x: x)) def test_groupby_normalize_index(): full = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'b': [4, 5, 6, 3, 2, 1, 0, 0, 0]}, index=[0, 1, 3, 5, 6, 8, 9, 9, 9]) d = dd.from_pandas(full, npartitions=3) assert d.groupby('a').index == 'a' assert d.groupby(d['a']).index == 'a' assert d.groupby(d['a'] > 2).index._name == (d['a'] > 2)._name assert d.groupby(['a', 'b']).index == ['a', 'b'] assert d.groupby([d['a'], d['b']]).index == ['a', 'b'] assert d.groupby([d['a'], 'b']).index == ['a', 'b'] @pytest.mark.parametrize('spec', [ {'b': {'c': 'mean'}, 'c': {'a': 'max', 'b': 'min'}}, {'b': 'mean', 'c': ['min', 'max']}, {'b': np.sum, 'c': ['min', np.max, np.std, np.var]}, ['sum', 'mean', 'min', 'max', 'count', 'size', 'std', 'var'], 'var', ]) @pytest.mark.parametrize('split_every', [False, None]) @pytest.mark.parametrize('grouper', [ lambda df: 'a', lambda df: ['a', 'd'], lambda df: [df['a'], df['d']], lambda df: df['a'], lambda df: df['a'] > 2, ]) def test_aggregate__examples(spec, split_every, grouper): pdf = pd.DataFrame({'a': [1, 2, 3, 1, 1, 2, 4, 3, 7] * 10, 'b': [4, 2, 7, 3, 3, 1, 1, 1, 2] * 10, 'c': [0, 1, 2, 3, 4, 5, 6, 7, 8] * 10, 'd': [3, 2, 1, 3, 2, 1, 2, 6, 4] * 10}, columns=['c', 'b', 'a', 'd']) ddf = dd.from_pandas(pdf, npartitions=10) # Warning from pandas deprecation .agg(dict[dict]) # it's from pandas, so no reason to assert the deprecation warning, # but we should still test it for now with pytest.warns(None): assert_eq(pdf.groupby(grouper(pdf)).agg(spec), ddf.groupby(grouper(ddf)).agg(spec, split_every=split_every)) @pytest.mark.parametrize('spec', [ {'b': 'sum', 'c': 'min', 'd': 'max'}, ['sum'], ['sum', 'mean', 'min', 'max', 'count', 'size', 'std', 'var'], 'sum', 'size', ]) @pytest.mark.parametrize('split_every', [False, None]) @pytest.mark.parametrize('grouper', [ lambda df: [df['a'], df['d']], lambda df: df['a'], lambda df: df['a'] > 2, ]) def test_series_aggregate__examples(spec, split_every, grouper): pdf = pd.DataFrame({'a': [1, 2, 3, 1, 1, 2, 4, 3, 7] * 10, 'b': [4, 2, 7, 3, 3, 1, 1, 1, 2] * 10, 'c': [0, 1, 2, 3, 4, 5, 6, 7, 8] * 10, 'd': [3, 2, 1, 3, 2, 1, 2, 6, 4] * 10}, columns=['c', 'b', 'a', 'd']) ps = pdf['c'] ddf = dd.from_pandas(pdf, npartitions=10) ds = ddf['c'] # Warning from pandas deprecation .agg(dict[dict]) # it's from pandas, so no reason to assert the deprecation warning, # but we should still test it for now with pytest.warns(None): assert_eq(ps.groupby(grouper(pdf)).agg(spec), ds.groupby(grouper(ddf)).agg(spec, split_every=split_every), # pandas < 0.20.0 does not propagate the name for size check_names=(spec != 'size')) @pytest.mark.parametrize('spec', [ 'sum', 'min', 'max', 'count', 'size', 'std', 'var', 'mean', ]) def test_aggregate__single_element_groups(spec): pdf = pd.DataFrame({'a': [1, 1, 3, 3], 'b': [4, 4, 16, 16], 'c': [1, 1, 4, 4], 'd': [1, 1, 3, 3]}, columns=['c', 'b', 'a', 'd']) ddf = dd.from_pandas(pdf, npartitions=3) expected = pdf.groupby(['a', 'd']).agg(spec) # NOTE: for std the result is not recast ot the original dtype if spec in {'mean', 'var'}: expected = expected.astype(float) assert_eq(expected, ddf.groupby(['a', 'd']).agg(spec)) def test_aggregate_build_agg_args__reuse_of_intermediates(): """Aggregate reuses intermediates. For example, with sum, count, and mean the sums and counts are only calculated once accross the graph and reused to compute the mean. """ from dask.dataframe.groupby import _build_agg_args no_mean_spec = [ ('foo', 'sum', 'input'), ('bar', 'count', 'input'), ] with_mean_spec = [ ('foo', 'sum', 'input'), ('bar', 'count', 'input'), ('baz', 'mean', 'input'), ] no_mean_chunks, no_mean_aggs, no_mean_finalizers = _build_agg_args(no_mean_spec) with_mean_chunks, with_mean_aggs, with_mean_finalizers = _build_agg_args(with_mean_spec) assert len(no_mean_chunks) == len(with_mean_chunks) assert len(no_mean_aggs) == len(with_mean_aggs) assert len(no_mean_finalizers) == len(no_mean_spec) assert len(with_mean_finalizers) == len(with_mean_spec) def test_aggregate__dask(): dask_holder = collections.namedtuple('dask_holder', ['dask']) get_agg_dask = lambda obj: dask_holder({ k: v for (k, v) in obj.dask.items() if k[0].startswith('aggregate') }) specs = [ {'b': {'c': 'mean'}, 'c': {'a': 'max', 'b': 'min'}}, {'b': 'mean', 'c': ['min', 'max']}, ['sum', 'mean', 'min', 'max', 'count', 'size', 'std', 'var'], 'sum', 'mean', 'min', 'max', 'count', 'std', 'var', # NOTE: the 'size' spec is special since it bypasses aggregate # 'size' ] pdf = pd.DataFrame({'a': [1, 2, 3, 1, 1, 2, 4, 3, 7] * 100, 'b': [4, 2, 7, 3, 3, 1, 1, 1, 2] * 100, 'c': [0, 1, 2, 3, 4, 5, 6, 7, 8] * 100, 'd': [3, 2, 1, 3, 2, 1, 2, 6, 4] * 100}, columns=['c', 'b', 'a', 'd']) ddf = dd.from_pandas(pdf, npartitions=100) for spec in specs: result1 = ddf.groupby(['a', 'b']).agg(spec, split_every=2) result2 = ddf.groupby(['a', 'b']).agg(spec, split_every=2) agg_dask1 = get_agg_dask(result1) agg_dask2 = get_agg_dask(result2) # check that the number of partitions used is fixed by split_every assert_max_deps(agg_dask1, 2) assert_max_deps(agg_dask2, 2) # check for deterministic key names and values assert agg_dask1 == agg_dask2 # the length of the dask does not depend on the passed spec for other_spec in specs: other = ddf.groupby(['a', 'b']).agg(other_spec, split_every=2) assert len(other.dask) == len(result1.dask) assert len(other.dask) == len(result2.dask) @pytest.mark.parametrize('agg_func', [ 'sum', 'var', 'mean', 'count', 'size', 'std', 'nunique', 'min', 'max' ]) @pytest.mark.parametrize('grouper', [ lambda df: ['a'], lambda df: ['a', 'b'], lambda df: df['a'], lambda df: [df['a'], df['b']], lambda df: [df['a'] > 2, df['b'] > 1] ]) def test_dataframe_aggregations_multilevel(grouper, agg_func): def call(g, m, **kwargs): return getattr(g, m)(**kwargs) pdf = pd.DataFrame({'a': [1, 2, 6, 4, 4, 6, 4, 3, 7] * 10, 'b': [4, 2, 7, 3, 3, 1, 1, 1, 2] * 10, 'd': [0, 1, 2, 3, 4, 5, 6, 7, 8] * 10, 'c': [0, 1, 2, 3, 4, 5, 6, 7, 8] * 10}, columns=['c', 'b', 'a', 'd']) ddf = dd.from_pandas(pdf, npartitions=10) assert_eq(call(pdf.groupby(grouper(pdf))['c'], agg_func), call(ddf.groupby(grouper(ddf))['c'], agg_func, split_every=2)) # not supported by pandas if agg_func != 'nunique': assert_eq(call(pdf.groupby(grouper(pdf))[['c', 'd']], agg_func), call(ddf.groupby(grouper(ddf))[['c', 'd']], agg_func, split_every=2)) assert_eq(call(pdf.groupby(grouper(pdf)), agg_func), call(ddf.groupby(grouper(ddf)), agg_func, split_every=2)) @pytest.mark.parametrize('agg_func', [ 'sum', 'var', 'mean', 'count', 'size', 'std', 'min', 'max', 'nunique', ]) @pytest.mark.parametrize('grouper', [ lambda df: df['a'], lambda df: [df['a'], df['b']], lambda df: [df['a'] > 2, df['b'] > 1] ]) def test_series_aggregations_multilevel(grouper, agg_func): """ similar to ``test_dataframe_aggregations_multilevel``, but series do not support all groupby args. """ def call(g, m, **kwargs): return getattr(g, m)(**kwargs) pdf = pd.DataFrame({'a': [1, 2, 6, 4, 4, 6, 4, 3, 7] * 10, 'b': [4, 2, 7, 3, 3, 1, 1, 1, 2] * 10, 'c': [0, 1, 2, 3, 4, 5, 6, 7, 8] * 10}, columns=['c', 'b', 'a']) ddf = dd.from_pandas(pdf, npartitions=10) assert_eq(call(pdf['c'].groupby(grouper(pdf)), agg_func), call(ddf['c'].groupby(grouper(ddf)), agg_func, split_every=2), # for pandas ~ 0.18, the name is not not properly propagated for # the mean aggregation check_names=(agg_func not in {'mean', 'nunique'})) @pytest.mark.parametrize('grouper', [ lambda df: df['a'], lambda df: df['a'] > 2, lambda df: [df['a'], df['b']], lambda df: [df['a'] > 2], pytest.mark.xfail(reason="index dtype does not coincide: boolean != empty")(lambda df: [df['a'] > 2, df['b'] > 1]) ]) @pytest.mark.parametrize('group_and_slice', [ lambda df, grouper: df.groupby(grouper(df)), lambda df, grouper: df['c'].groupby(grouper(df)), lambda df, grouper: df.groupby(grouper(df))['c'], ]) def test_groupby_meta_content(group_and_slice, grouper): pdf = pd.DataFrame({'a': [1, 2, 6, 4, 4, 6, 4, 3, 7] * 10, 'b': [4, 2, 7, 3, 3, 1, 1, 1, 2] * 10, 'c': [0, 1, 2, 3, 4, 5, 6, 7, 8] * 10}, columns=['c', 'b', 'a']) ddf = dd.from_pandas(pdf, npartitions=10) expected = group_and_slice(pdf, grouper).first().head(0) meta = group_and_slice(ddf, grouper)._meta.first() meta_nonempty = group_and_slice(ddf, grouper)._meta_nonempty.first().head(0) assert_eq(expected, meta) assert_eq(expected, meta_nonempty) def test_groupy_non_aligned_index(): pdf = pd.DataFrame({'a': [1, 2, 6, 4, 4, 6, 4, 3, 7] * 10, 'b': [4, 2, 7, 3, 3, 1, 1, 1, 2] * 10, 'c': [0, 1, 2, 3, 4, 5, 6, 7, 8] * 10}, columns=['c', 'b', 'a']) ddf3 = dd.from_pandas(pdf, npartitions=3) ddf7 = dd.from_pandas(pdf, npartitions=7) # working examples ddf3.groupby(['a', 'b']) ddf3.groupby([ddf3['a'], ddf3['b']]) # misaligned divisions with pytest.raises(NotImplementedError): ddf3.groupby(ddf7['a']) with pytest.raises(NotImplementedError): ddf3.groupby([ddf7['a'], ddf7['b']]) with pytest.raises(NotImplementedError): ddf3.groupby([ddf7['a'], ddf3['b']]) with pytest.raises(NotImplementedError): ddf3.groupby([ddf3['a'], ddf7['b']]) with pytest.raises(NotImplementedError): ddf3.groupby([ddf7['a'], 'b']) def test_groupy_series_wrong_grouper(): df = pd.DataFrame({'a': [1, 2, 6, 4, 4, 6, 4, 3, 7] * 10, 'b': [4, 2, 7, 3, 3, 1, 1, 1, 2] * 10, 'c': [0, 1, 2, 3, 4, 5, 6, 7, 8] * 10}, columns=['c', 'b', 'a']) df = dd.from_pandas(df, npartitions=3) s = df['a'] # working index values s.groupby(s) s.groupby([s, s]) # non working index values with pytest.raises(KeyError): s.groupby('foo') with pytest.raises(KeyError): s.groupby([s, 'foo']) with pytest.raises(ValueError): s.groupby(df) with pytest.raises(ValueError): s.groupby([s, df]) @pytest.mark.parametrize('npartitions', [1, 4, 20]) @pytest.mark.parametrize('split_every', [2, 5]) @pytest.mark.parametrize('split_out', [None, 1, 5, 20]) def test_hash_groupby_aggregate(npartitions, split_every, split_out): df = pd.DataFrame({'x': np.arange(100) % 10, 'y': np.ones(100)}) ddf = dd.from_pandas(df, npartitions) result = ddf.groupby('x').y.var(split_every=split_every, split_out=split_out) dsk = result.__dask_optimize__(result.dask, result.__dask_keys__()) from dask.core import get_deps dependencies, dependents = get_deps(dsk) assert result.npartitions == (split_out or 1) assert len([k for k, v in dependencies.items() if not v]) == npartitions assert_eq(result, df.groupby('x').y.var()) def test_split_out_multi_column_groupby(): df = pd.DataFrame({'x': np.arange(100) % 10, 'y': np.ones(100), 'z': [1, 2, 3, 4, 5] * 20}) ddf = dd.from_pandas(df, npartitions=10) result = ddf.groupby(['x', 'y']).z.mean(split_out=4) expected = df.groupby(['x', 'y']).z.mean() assert_eq(result, expected, check_dtype=False) def test_groupby_split_out_num(): # GH 1841 ddf = dd.from_pandas(pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 3, 4]}), npartitions=2) assert ddf.groupby('A').sum().npartitions == 1 assert ddf.groupby('A').sum(split_out=2).npartitions == 2 assert ddf.groupby('A').sum(split_out=3).npartitions == 3 with pytest.raises(TypeError): # groupby doesn't adcept split_out ddf.groupby('A', split_out=2) def test_groupby_not_supported(): ddf = dd.from_pandas(pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 3, 4]}), npartitions=2) with pytest.raises(TypeError): ddf.groupby('A', axis=1) with pytest.raises(TypeError): ddf.groupby('A', level=1) with pytest.raises(TypeError): ddf.groupby('A', as_index=False) with pytest.raises(TypeError): ddf.groupby('A', sort=False) with pytest.raises(TypeError): ddf.groupby('A', group_keys=False) with pytest.raises(TypeError): ddf.groupby('A', squeeze=True) def test_groupby_numeric_column(): df = pd.DataFrame({'A' : ['foo', 'foo', 'bar'], 0: [1,2,3]}) ddf = dd.from_pandas(df, npartitions=3) assert_eq(ddf.groupby(ddf.A)[0].sum(), df.groupby(df.A)[0].sum()) @pytest.mark.parametrize('sel', ['c', 'd', ['c', 'd']]) @pytest.mark.parametrize('key', ['a', ['a', 'b']]) @pytest.mark.parametrize('func', ['cumsum', 'cumprod', 'cumcount']) def test_cumulative(func, key, sel): df = pd.DataFrame({'a': [1, 2, 6, 4, 4, 6, 4, 3, 7] * 6, 'b': [4, 2, 7, 3, 3, 1, 1, 1, 2] * 6, 'c': np.random.randn(54), 'd': np.random.randn(54)}, columns=['a', 'b', 'c', 'd']) df.iloc[[-18, -12, -6], -1] = np.nan ddf = dd.from_pandas(df, npartitions=10) g, dg = [d.groupby(key)[sel] for d in (df, ddf)] assert_eq(getattr(g, func)(), getattr(dg, func)()) @pytest.mark.parametrize('func', ['cumsum', 'cumprod']) def test_cumulative_axis1(func): df = pd.DataFrame({'a': [1, 2, 6, 4, 4, 6, 4, 3, 7] * 2, 'b': np.random.randn(18), 'c': np.random.randn(18)}) df.iloc[-6, -1] = np.nan ddf = dd.from_pandas(df, npartitions=4) assert_eq(getattr(df.groupby('a'), func)(axis=1), getattr(ddf.groupby('a'), func)(axis=1)) def test_groupby_unaligned_index(): df = pd.DataFrame({'a': np.random.randint(0, 10, 50), 'b': np.random.randn(50), 'c': np.random.randn(50)}) ddf = dd.from_pandas(df, npartitions=5) filtered = df[df.b < 0.5] dfiltered = ddf[ddf.b < 0.5] ddf_group = dfiltered.groupby(ddf.a) ds_group = dfiltered.b.groupby(ddf.a) bad = [ddf_group.mean(), ddf_group.var(), ddf_group.b.nunique(), ddf_group.get_group(0), ds_group.mean(), ds_group.var(), ds_group.nunique(), ds_group.get_group(0)] for obj in bad: with pytest.raises(ValueError): obj.compute() def add1(x): return x + 1 df_group = filtered.groupby(df.a) good = [(ddf_group.apply(add1, meta=ddf), df_group.apply(add1)), (ddf_group.b.apply(add1, meta=ddf.b), df_group.b.apply(add1))] for (res, sol) in good: assert_eq(res, sol) def test_groupby_slice_agg_reduces(): d = pd.DataFrame({"a": [1, 2, 3, 4], "b": [2, 3, 4, 5]}) a = dd.from_pandas(d, npartitions=2) result = a.groupby("a")["b"].agg(['min', 'max']) expected = d.groupby("a")['b'].agg(['min', 'max']) assert_eq(result, expected) def test_groupby_agg_grouper_single(): # https://github.com/dask/dask/issues/2255 d = pd.DataFrame({'a': [1, 2, 3, 4]}) a = dd.from_pandas(d, npartitions=2) result = a.groupby('a')['a'].agg(['min', 'max']) expected = d.groupby('a')['a'].agg(['min', 'max']) assert_eq(result, expected) @pytest.mark.parametrize('slice_', [ 'a', ['a'], ['a', 'b'], ['b'], ]) def test_groupby_agg_grouper_multiple(slice_): # https://github.com/dask/dask/issues/2255 d = pd.DataFrame({'a': [1, 2, 3, 4], 'b': [1, 2, 3, 4]}) a = dd.from_pandas(d, npartitions=2) result = a.groupby('a')[slice_].agg(['min', 'max']) expected = d.groupby('a')[slice_].agg(['min', 'max']) assert_eq(result, expected) @pytest.mark.skipif(PANDAS_VERSION < '0.21.0', reason="Need pandas groupby bug fix " "(pandas-dev/pandas#16859)") @pytest.mark.parametrize('agg_func', [ 'cumprod', 'cumcount', 'cumsum', 'var', 'sum', 'mean', 'count', 'size', 'std', 'min', 'max', ]) def test_groupby_column_and_index_agg_funcs(agg_func): def call(g, m, **kwargs): return getattr(g, m)(**kwargs) df = pd.DataFrame({'idx': [1, 1, 1, 2, 2, 2], 'a': [1, 2, 1, 2, 1, 2], 'b': np.arange(6), 'c': [1, 1, 1, 2, 2, 2]} ).set_index('idx') ddf = dd.from_pandas(df, npartitions=df.index.nunique()) ddf_no_divs = dd.from_pandas(df, npartitions=df.index.nunique(), sort=False) # Index and then column # Compute expected result expected = call(df.groupby(['idx', 'a']), agg_func) if agg_func in {'mean', 'var'}: expected = expected.astype(float) result = call(ddf.groupby(['idx', 'a']), agg_func) assert_eq(expected, result) result = call(ddf_no_divs.groupby(['idx', 'a']), agg_func) assert_eq(expected, result) # Test aggregate strings if agg_func in {'sum', 'mean', 'var', 'size', 'std', 'count'}: result = ddf_no_divs.groupby(['idx', 'a']).agg(agg_func) assert_eq(expected, result) # Column and then index # Compute expected result expected = call(df.groupby(['a', 'idx']), agg_func) if agg_func in {'mean', 'var'}: expected = expected.astype(float) result = call(ddf.groupby(['a', 'idx']), agg_func) assert_eq(expected, result) result = call(ddf_no_divs.groupby(['a', 'idx']), agg_func) assert_eq(expected, result) # Test aggregate strings if agg_func in {'sum', 'mean', 'var', 'size', 'std', 'count'}: result = ddf_no_divs.groupby(['a', 'idx']).agg(agg_func) assert_eq(expected, result) # Index only # Compute expected result expected = call(df.groupby('idx'), agg_func) if agg_func in {'mean', 'var'}: expected = expected.astype(float) result = call(ddf.groupby('idx'), agg_func) assert_eq(expected, result) result = call(ddf_no_divs.groupby('idx'), agg_func) assert_eq(expected, result) # Test aggregate strings if agg_func in {'sum', 'mean', 'var', 'size', 'std', 'count'}: result = ddf_no_divs.groupby('idx').agg(agg_func) assert_eq(expected, result) @pytest.mark.skipif(PANDAS_VERSION < '0.21.0', reason="Need 0.21.0 for mixed column/index grouping") @pytest.mark.parametrize( 'group_args', [['idx', 'a'], ['a', 'idx'], ['idx'], 'idx']) @pytest.mark.parametrize( 'apply_func', [np.min, np.mean, lambda s: np.max(s) - np.mean(s)]) def test_groupby_column_and_index_apply(group_args, apply_func): df = pd.DataFrame({'idx': [1, 1, 1, 2, 2, 2], 'a': [1, 2, 1, 2, 1, 2], 'b': np.arange(6)} ).set_index('idx') ddf = dd.from_pandas(df, npartitions=df.index.nunique()) ddf_no_divs = dd.from_pandas(df, npartitions=df.index.nunique(), sort=False) # Expected result expected = df.groupby(group_args).apply(apply_func) # Compute on dask DataFrame with divisions (no shuffling) result = ddf.groupby(group_args).apply(apply_func) assert_eq(expected, result, check_divisions=False) # Check that partitioning is preserved assert ddf.divisions == result.divisions # Check that no shuffling occurred. # The groupby operation should add only 1 task per partition assert len(result.dask) == (len(ddf.dask) + ddf.npartitions) # Compute on dask DataFrame without divisions (requires shuffling) result = ddf_no_divs.groupby(group_args).apply(apply_func) assert_eq(expected, result, check_divisions=False) # Check that divisions were preserved (all None in this case) assert ddf_no_divs.divisions == result.divisions # Crude check to see if shuffling was performed. # The groupby operation should add only more than 1 task per partition assert len(result.dask) > (len(ddf_no_divs.dask) + ddf_no_divs.npartitions) custom_mean = dd.Aggregation( 'mean', lambda s: (s.count(), s.sum()), lambda s0, s1: (s0.sum(), s1.sum()), lambda s0, s1: s1 / s0, ) custom_sum = dd.Aggregation('sum', lambda s: s.sum(), lambda s0: s0.sum()) @pytest.mark.parametrize('pandas_spec, dask_spec, check_dtype', [ ({'b': 'mean'}, {'b': custom_mean}, False), ({'b': 'sum'}, {'b': custom_sum}, True), (['mean', 'sum'], [custom_mean, custom_sum], False), ({'b': ['mean', 'sum']}, {'b': [custom_mean, custom_sum]}, False), ]) def test_dataframe_groupby_agg_custom_sum(pandas_spec, dask_spec, check_dtype): df = pd.DataFrame({'g': [0, 0, 1] * 3, 'b': [1, 2, 3] * 3}) ddf = dd.from_pandas(df, npartitions=2) expected = df.groupby('g').aggregate(pandas_spec) result = ddf.groupby('g').aggregate(dask_spec) assert_eq(result, expected, check_dtype=check_dtype) @pytest.mark.parametrize('pandas_spec, dask_spec', [ ('mean', custom_mean), (['mean'], [custom_mean]), (['mean', 'sum'], [custom_mean, custom_sum]), ]) def test_series_groupby_agg_custom_mean(pandas_spec, dask_spec): d = pd.DataFrame({'g': [0, 0, 1] * 3, 'b': [1, 2, 3] * 3}) a = dd.from_pandas(d, npartitions=2) expected = d['b'].groupby(d['g']).aggregate(pandas_spec) result = a['b'].groupby(a['g']).aggregate(dask_spec) assert_eq(result, expected, check_dtype=False) def test_groupby_agg_custom__name_clash_with_internal_same_column(): """for a single input column only unique names are allowed""" d = pd.DataFrame({'g': [0, 0, 1] * 3, 'b': [1, 2, 3] * 3}) a = dd.from_pandas(d, npartitions=2) agg_func = dd.Aggregation('sum', lambda s: s.sum(), lambda s0: s0.sum()) with pytest.raises(ValueError): a.groupby('g').aggregate({'b': [agg_func, 'sum']}) def test_groupby_agg_custom__name_clash_with_internal_different_column(): """custom aggregation functions can share the name of a builtin function""" d = pd.DataFrame({'g': [0, 0, 1] * 3, 'b': [1, 2, 3] * 3, 'c': [4, 5, 6] * 3}) a = dd.from_pandas(d, npartitions=2) # NOTE: this function is purposefully misnamed agg_func = dd.Aggregation( 'sum', lambda s: (s.count(), s.sum()), lambda s0, s1: (s0.sum(), s1.sum()), lambda s0, s1: s1 / s0, ) # NOTE: the name of agg-func is suppressed in the output, # since only a single agg func per column was specified result = a.groupby('g').aggregate({'b': agg_func, 'c': 'sum'}) expected = d.groupby('g').aggregate({'b': 'mean', 'c': 'sum'}) assert_eq(result, expected, check_dtype=False) def test_groupby_agg_custom__mode(): # mode function passing intermediates as pure python objects around. to protect # results from pandas in apply use return results as single-item lists def agg_mode(s): def impl(s): res, = s.iloc[0] for i, in s.iloc[1:]: res = res.add(i, fill_value=0) return [res] return s.apply(impl) agg_func = dd.Aggregation( 'custom_mode', lambda s: s.apply(lambda s: [s.value_counts()]), agg_mode, lambda s: s.map(lambda i: i[0].argmax()), ) d = pd.DataFrame({ 'g0': [0, 0, 0, 1, 1] * 3, 'g1': [0, 0, 0, 1, 1] * 3, 'cc': [4, 5, 4, 6, 6] * 3, }) a = dd.from_pandas(d, npartitions=5) actual = a['cc'].groupby([a['g0'], a['g1']]).agg(agg_func) # cheat to get the correct index expected = pd.DataFrame({'g0': [0, 1], 'g1': [0, 1], 'cc': [4, 6]}) expected = expected['cc'].groupby([expected['g0'], expected['g1']]).agg('sum') assert_eq(actual, expected) dask-0.16.0/dask/dataframe/tests/test_hashing.py000066400000000000000000000037441320364734500216010ustar00rootroot00000000000000import numpy as np import pandas as pd import pandas.util.testing as tm import pytest from dask.dataframe.hashing import hash_pandas_object from dask.dataframe.utils import assert_eq @pytest.mark.parametrize('obj', [ pd.Series([1, 2, 3]), pd.Series([1.0, 1.5, 3.2]), pd.Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]), pd.Series(['a', 'b', 'c']), pd.Series([True, False, True]), pd.Index([1, 2, 3]), pd.Index([True, False, True]), pd.DataFrame({'x': ['a', 'b', 'c'], 'y': [1, 2, 3]}), pd.util.testing.makeMissingDataframe(), pd.util.testing.makeMixedDataFrame(), pd.util.testing.makeTimeDataFrame(), pd.util.testing.makeTimeSeries(), pd.util.testing.makeTimedeltaIndex()]) def test_hash_pandas_object(obj): a = hash_pandas_object(obj) b = hash_pandas_object(obj) if isinstance(a, np.ndarray): np.testing.assert_equal(a, b) else: assert_eq(a, b) def test_categorical_consistency(): # Check that categoricals hash consistent with their values, not codes # This should work for categoricals of any dtype for s1 in [pd.Series(['a', 'b', 'c', 'd']), pd.Series([1000, 2000, 3000, 4000]), pd.Series(pd.date_range(0, periods=4))]: s2 = s1.astype('category').cat.set_categories(s1) s3 = s2.cat.set_categories(list(reversed(s1))) for categorize in [True, False]: # These should all hash identically h1 = hash_pandas_object(s1, categorize=categorize) h2 = hash_pandas_object(s2, categorize=categorize) h3 = hash_pandas_object(s3, categorize=categorize) tm.assert_series_equal(h1, h2) tm.assert_series_equal(h1, h3) def test_object_missing_values(): # Check that the presence of missing values doesn't change how object dtype # is hashed. s = pd.Series(['a', 'b', 'c', None]) h1 = hash_pandas_object(s).iloc[:3] h2 = hash_pandas_object(s.iloc[:3]) tm.assert_series_equal(h1, h2) dask-0.16.0/dask/dataframe/tests/test_hyperloglog.py000066400000000000000000000046461320364734500225150ustar00rootroot00000000000000 import dask import dask.dataframe as dd import pandas as pd import numpy as np import pytest rs = np.random.RandomState(96) @pytest.mark.parametrize("df", [ pd.DataFrame({ 'x': [1, 2, 3] * 3, 'y': [1.2, 3.4, 5.6] * 3, 'z': -np.arange(9, dtype=np.int8)}), pd.DataFrame({ 'x': rs.randint(0, 1000000, (10000,)), 'y': rs.randn(10000), 'z': rs.uniform(0, 9999999, (10000,))}), pd.DataFrame({ 'x': np.repeat(rs.randint(0, 1000000, (1000,)), 3), 'y': np.repeat(rs.randn(1000), 3), 'z': np.repeat(rs.uniform(0, 9999999, (1000,)), 3)}), pd.DataFrame({ 'x': rs.randint(0, 1000000, (10000,))}), pd.DataFrame({ 'x': rs.randint(0, 1000000, (7,)), 'y': ['a', 'bet', 'is', 'a', 'tax', 'on', 'bs']}), pd.DataFrame({ 'w': np.zeros((20000,)), 'x': np.zeros((20000,)), 'y': np.zeros((20000,)) + 4803592, 'z': np.zeros((20000,))}), pd.DataFrame({'x': [1, 2, 3] * 1000}), pd.DataFrame({'x': np.random.random(1000)}), pd.DataFrame({ 'a': [1, 2, 3] * 3, 'b': [1.2, 3.4, 5.6] * 3, 'c': [1 + 2j, 3 + 4j, 5 + 6j] * 3, 'd': -np.arange(9, dtype=np.int8)}), pd.Series([1, 2, 3] * 1000), pd.Series(np.random.random(1000)), pd.Series(np.random.random(1000), index=np.ones(1000)), pd.Series(np.random.random(1000), index=np.random.random(1000)), ]) @pytest.mark.parametrize('npartitions', [2, 20]) def test_basic(df, npartitions): ddf = dd.from_pandas(df, npartitions=npartitions) approx = ddf.nunique_approx().compute(get=dask.local.get_sync) exact = len(df.drop_duplicates()) assert abs(approx - exact) <= 2 or abs(approx - exact) / exact < 0.05 @pytest.mark.parametrize('split_every', [None, 2, 10]) @pytest.mark.parametrize('npartitions', [2, 20]) def test_split_every(split_every, npartitions): df = pd.Series([1, 2, 3] * 1000) ddf = dd.from_pandas(df, npartitions=npartitions) approx = ddf.nunique_approx(split_every=split_every).compute(get=dask.local.get_sync) exact = len(df.drop_duplicates()) assert abs(approx - exact) <= 2 or abs(approx - exact) / exact < 0.05 def test_larger_data(): df = dd.demo.make_timeseries('2000-01-01', '2000-04-01', {'value': float, 'id': int}, freq='10s', partition_freq='1D', seed=1) assert df.nunique_approx().compute() > 1000 dask-0.16.0/dask/dataframe/tests/test_indexing.py000066400000000000000000000322571320364734500217660ustar00rootroot00000000000000import pandas as pd import pandas.util.testing as tm import numpy as np import pytest import dask import dask.dataframe as dd from dask.dataframe.indexing import _coerce_loc_index from dask.dataframe.utils import assert_eq, make_meta dsk = {('x', 0): pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[0, 1, 3]), ('x', 1): pd.DataFrame({'a': [4, 5, 6], 'b': [3, 2, 1]}, index=[5, 6, 8]), ('x', 2): pd.DataFrame({'a': [7, 8, 9], 'b': [0, 0, 0]}, index=[9, 9, 9])} meta = make_meta({'a': 'i8', 'b': 'i8'}, index=pd.Index([], 'i8')) d = dd.DataFrame(dsk, 'x', meta, [0, 5, 9, 9]) full = d.compute() def test_loc(): assert d.loc[3:8].divisions[0] == 3 assert d.loc[3:8].divisions[-1] == 8 assert d.loc[5].divisions == (5, 5) assert_eq(d.loc[5], full.loc[5:5]) assert_eq(d.loc[3:8], full.loc[3:8]) assert_eq(d.loc[:8], full.loc[:8]) assert_eq(d.loc[3:], full.loc[3:]) assert_eq(d.loc[[5]], full.loc[[5]]) assert_eq(d.loc[[3, 4, 1, 8]], full.loc[[3, 4, 1, 8]]) assert_eq(d.loc[[3, 4, 1, 9]], full.loc[[3, 4, 1, 9]]) assert_eq(d.loc[np.array([3, 4, 1, 9])], full.loc[np.array([3, 4, 1, 9])]) assert_eq(d.a.loc[5], full.a.loc[5:5]) assert_eq(d.a.loc[3:8], full.a.loc[3:8]) assert_eq(d.a.loc[:8], full.a.loc[:8]) assert_eq(d.a.loc[3:], full.a.loc[3:]) assert_eq(d.a.loc[[5]], full.a.loc[[5]]) assert_eq(d.a.loc[[3, 4, 1, 8]], full.a.loc[[3, 4, 1, 8]]) assert_eq(d.a.loc[[3, 4, 1, 9]], full.a.loc[[3, 4, 1, 9]]) assert_eq(d.a.loc[np.array([3, 4, 1, 9])], full.a.loc[np.array([3, 4, 1, 9])]) assert_eq(d.a.loc[[]], full.a.loc[[]]) assert_eq(d.a.loc[np.array([])], full.a.loc[np.array([])]) pytest.raises(KeyError, lambda: d.loc[1000]) assert_eq(d.loc[1000:], full.loc[1000:]) assert_eq(d.loc[-2000:-1000], full.loc[-2000:-1000]) assert sorted(d.loc[5].dask) == sorted(d.loc[5].dask) assert sorted(d.loc[5].dask) != sorted(d.loc[6].dask) def test_loc_non_informative_index(): df = pd.DataFrame({'x': [1, 2, 3, 4]}, index=[10, 20, 30, 40]) ddf = dd.from_pandas(df, npartitions=2, sort=True) ddf.divisions = (None,) * 3 assert not ddf.known_divisions ddf.loc[20:30].compute(get=dask.get) assert_eq(ddf.loc[20:30], df.loc[20:30]) df = pd.DataFrame({'x': [1, 2, 3, 4]}, index=[10, 20, 20, 40]) ddf = dd.from_pandas(df, npartitions=2, sort=True) assert_eq(ddf.loc[20], df.loc[20:20]) def test_loc_with_text_dates(): A = tm.makeTimeSeries(10).iloc[:5] B = tm.makeTimeSeries(10).iloc[5:] s = dd.Series({('df', 0): A, ('df', 1): B}, 'df', A, [A.index.min(), B.index.min(), B.index.max()]) assert s.loc['2000': '2010'].divisions == s.divisions assert_eq(s.loc['2000': '2010'], s) assert len(s.loc['2000-01-03': '2000-01-05'].compute()) == 3 def test_loc_with_series(): assert_eq(d.loc[d.a % 2 == 0], full.loc[full.a % 2 == 0]) assert sorted(d.loc[d.a % 2].dask) == sorted(d.loc[d.a % 2].dask) assert sorted(d.loc[d.a % 2].dask) != sorted(d.loc[d.a % 3].dask) def test_loc_with_series_different_partition(): df = pd.DataFrame(np.random.randn(20, 5), index=list('abcdefghijklmnopqrst'), columns=list('ABCDE')) ddf = dd.from_pandas(df, 3) assert_eq(ddf.loc[ddf.A > 0], df.loc[df.A > 0]) assert_eq(ddf.loc[(ddf.A > 0).repartition(['a', 'g', 'k', 'o', 't'])], df.loc[df.A > 0]) def test_loc2d(): # index indexer is always regarded as slice for duplicated values assert_eq(d.loc[5, 'a'], full.loc[5:5, 'a']) # assert_eq(d.loc[[5], 'a'], full.loc[[5], 'a']) assert_eq(d.loc[5, ['a']], full.loc[5:5, ['a']]) # assert_eq(d.loc[[5], ['a']], full.loc[[5], ['a']]) assert_eq(d.loc[3:8, 'a'], full.loc[3:8, 'a']) assert_eq(d.loc[:8, 'a'], full.loc[:8, 'a']) assert_eq(d.loc[3:, 'a'], full.loc[3:, 'a']) assert_eq(d.loc[[8], 'a'], full.loc[[8], 'a']) assert_eq(d.loc[3:8, ['a']], full.loc[3:8, ['a']]) assert_eq(d.loc[:8, ['a']], full.loc[:8, ['a']]) assert_eq(d.loc[3:, ['a']], full.loc[3:, ['a']]) assert_eq(d.loc[[3, 4, 3], ['a']], full.loc[[3, 4, 3], ['a']]) # 3d with pytest.raises(pd.core.indexing.IndexingError): d.loc[3, 3, 3] # Series should raise with pytest.raises(pd.core.indexing.IndexingError): d.a.loc[3, 3] with pytest.raises(pd.core.indexing.IndexingError): d.a.loc[3:, 3] with pytest.raises(pd.core.indexing.IndexingError): d.a.loc[d.a % 2 == 0, 3] def test_loc2d_with_known_divisions(): df = pd.DataFrame(np.random.randn(20, 5), index=list('abcdefghijklmnopqrst'), columns=list('ABCDE')) ddf = dd.from_pandas(df, 3) assert_eq(ddf.loc['a', 'A'], df.loc[['a'], 'A']) assert_eq(ddf.loc['a', ['A']], df.loc[['a'], ['A']]) assert_eq(ddf.loc['a':'o', 'A'], df.loc['a':'o', 'A']) assert_eq(ddf.loc['a':'o', ['A']], df.loc['a':'o', ['A']]) assert_eq(ddf.loc[['n'], ['A']], df.loc[['n'], ['A']]) assert_eq(ddf.loc[['a', 'c', 'n'], ['A']], df.loc[['a', 'c', 'n'], ['A']]) assert_eq(ddf.loc[['t', 'b'], ['A']], df.loc[['t', 'b'], ['A']]) assert_eq(ddf.loc[['r', 'r', 'c', 'g', 'h'], ['A']], df.loc[['r', 'r', 'c', 'g', 'h'], ['A']]) def test_loc2d_with_unknown_divisions(): df = pd.DataFrame(np.random.randn(20, 5), index=list('abcdefghijklmnopqrst'), columns=list('ABCDE')) ddf = dd.from_pandas(df, 3) ddf.divisions = (None, ) * len(ddf.divisions) assert ddf.known_divisions is False assert_eq(ddf.loc['a', 'A'], df.loc[['a'], 'A']) assert_eq(ddf.loc['a', ['A']], df.loc[['a'], ['A']]) assert_eq(ddf.loc['a':'o', 'A'], df.loc['a':'o', 'A']) assert_eq(ddf.loc['a':'o', ['A']], df.loc['a':'o', ['A']]) def test_loc2d_duplicated_columns(): df = pd.DataFrame(np.random.randn(20, 5), index=list('abcdefghijklmnopqrst'), columns=list('AABCD')) ddf = dd.from_pandas(df, 3) assert_eq(ddf.loc['a', 'A'], df.loc[['a'], 'A']) assert_eq(ddf.loc['a', ['A']], df.loc[['a'], ['A']]) assert_eq(ddf.loc['j', 'B'], df.loc[['j'], 'B']) assert_eq(ddf.loc['j', ['B']], df.loc[['j'], ['B']]) assert_eq(ddf.loc['a':'o', 'A'], df.loc['a':'o', 'A']) assert_eq(ddf.loc['a':'o', ['A']], df.loc['a':'o', ['A']]) assert_eq(ddf.loc['j':'q', 'B'], df.loc['j':'q', 'B']) assert_eq(ddf.loc['j':'q', ['B']], df.loc['j':'q', ['B']]) assert_eq(ddf.loc['a':'o', 'B':'D'], df.loc['a':'o', 'B':'D']) assert_eq(ddf.loc['a':'o', 'B':'D'], df.loc['a':'o', 'B':'D']) assert_eq(ddf.loc['j':'q', 'B':'A'], df.loc['j':'q', 'B':'A']) assert_eq(ddf.loc['j':'q', 'B':'A'], df.loc['j':'q', 'B':'A']) assert_eq(ddf.loc[ddf.B > 0, 'B'], df.loc[df.B > 0, 'B']) assert_eq(ddf.loc[ddf.B > 0, ['A', 'C']], df.loc[df.B > 0, ['A', 'C']]) def test_getitem(): df = pd.DataFrame({'A': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'B': [9, 8, 7, 6, 5, 4, 3, 2, 1], 'C': [True, False, True] * 3}, columns=list('ABC')) ddf = dd.from_pandas(df, 2) assert_eq(ddf['A'], df['A']) # check cache consistency tm.assert_series_equal(ddf['A']._meta, ddf._meta['A']) assert_eq(ddf[['A', 'B']], df[['A', 'B']]) tm.assert_frame_equal(ddf[['A', 'B']]._meta, ddf._meta[['A', 'B']]) assert_eq(ddf[ddf.C], df[df.C]) tm.assert_series_equal(ddf.C._meta, ddf._meta.C) assert_eq(ddf[ddf.C.repartition([0, 2, 5, 8])], df[df.C]) pytest.raises(KeyError, lambda: df['X']) pytest.raises(KeyError, lambda: df[['A', 'X']]) pytest.raises(AttributeError, lambda: df.X) # not str/unicode df = pd.DataFrame(np.random.randn(10, 5)) ddf = dd.from_pandas(df, 2) assert_eq(ddf[0], df[0]) assert_eq(ddf[[1, 2]], df[[1, 2]]) pytest.raises(KeyError, lambda: df[8]) pytest.raises(KeyError, lambda: df[[1, 8]]) def test_getitem_slice(): df = pd.DataFrame({'A': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'B': [9, 8, 7, 6, 5, 4, 3, 2, 1], 'C': [True, False, True] * 3}, index=list('abcdefghi')) ddf = dd.from_pandas(df, 3) assert_eq(ddf['a':'e'], df['a':'e']) assert_eq(ddf['a':'b'], df['a':'b']) assert_eq(ddf['f':], df['f':]) def test_loc_on_numpy_datetimes(): df = pd.DataFrame({'x': [1, 2, 3]}, index=list(map(np.datetime64, ['2014', '2015', '2016']))) a = dd.from_pandas(df, 2) a.divisions = list(map(np.datetime64, a.divisions)) assert_eq(a.loc['2014': '2015'], a.loc['2014': '2015']) def test_loc_on_pandas_datetimes(): df = pd.DataFrame({'x': [1, 2, 3]}, index=list(map(pd.Timestamp, ['2014', '2015', '2016']))) a = dd.from_pandas(df, 2) a.divisions = list(map(pd.Timestamp, a.divisions)) assert_eq(a.loc['2014': '2015'], a.loc['2014': '2015']) def test_loc_datetime_no_freq(): # https://github.com/dask/dask/issues/2389 datetime_index = pd.date_range('2016-01-01', '2016-01-31', freq='12h') datetime_index.freq = None # FORGET FREQUENCY df = pd.DataFrame({'num': range(len(datetime_index))}, index=datetime_index) ddf = dd.from_pandas(df, npartitions=1) slice_ = slice('2016-01-03', '2016-01-05') result = ddf.loc[slice_, :] expected = df.loc[slice_, :] assert_eq(result, expected) def test_coerce_loc_index(): for t in [pd.Timestamp, np.datetime64]: assert isinstance(_coerce_loc_index([t('2014')], '2014'), t) def test_loc_timestamp_str(): df = pd.DataFrame({'A': np.random.randn(100), 'B': np.random.randn(100)}, index=pd.date_range('2011-01-01', freq='H', periods=100)) ddf = dd.from_pandas(df, 10) # partial string slice assert_eq(df.loc['2011-01-02'], ddf.loc['2011-01-02']) assert_eq(df.loc['2011-01-02':'2011-01-10'], ddf.loc['2011-01-02':'2011-01-10']) # same reso, dask result is always DataFrame assert_eq(df.loc['2011-01-02 10:00'].to_frame().T, ddf.loc['2011-01-02 10:00']) # series assert_eq(df.A.loc['2011-01-02'], ddf.A.loc['2011-01-02']) assert_eq(df.A.loc['2011-01-02':'2011-01-10'], ddf.A.loc['2011-01-02':'2011-01-10']) # slice with timestamp (dask result must be DataFrame) assert_eq(df.loc[pd.Timestamp('2011-01-02')].to_frame().T, ddf.loc[pd.Timestamp('2011-01-02')]) assert_eq(df.loc[pd.Timestamp('2011-01-02'):pd.Timestamp('2011-01-10')], ddf.loc[pd.Timestamp('2011-01-02'):pd.Timestamp('2011-01-10')]) assert_eq(df.loc[pd.Timestamp('2011-01-02 10:00')].to_frame().T, ddf.loc[pd.Timestamp('2011-01-02 10:00')]) df = pd.DataFrame({'A': np.random.randn(100), 'B': np.random.randn(100)}, index=pd.date_range('2011-01-01', freq='M', periods=100)) ddf = dd.from_pandas(df, 50) assert_eq(df.loc['2011-01'], ddf.loc['2011-01']) assert_eq(df.loc['2011'], ddf.loc['2011']) assert_eq(df.loc['2011-01':'2012-05'], ddf.loc['2011-01':'2012-05']) assert_eq(df.loc['2011':'2015'], ddf.loc['2011':'2015']) # series assert_eq(df.B.loc['2011-01'], ddf.B.loc['2011-01']) assert_eq(df.B.loc['2011'], ddf.B.loc['2011']) assert_eq(df.B.loc['2011-01':'2012-05'], ddf.B.loc['2011-01':'2012-05']) assert_eq(df.B.loc['2011':'2015'], ddf.B.loc['2011':'2015']) def test_getitem_timestamp_str(): df = pd.DataFrame({'A': np.random.randn(100), 'B': np.random.randn(100)}, index=pd.date_range('2011-01-01', freq='H', periods=100)) ddf = dd.from_pandas(df, 10) # partial string slice assert_eq(df['2011-01-02'], ddf['2011-01-02']) assert_eq(df['2011-01-02':'2011-01-10'], df['2011-01-02':'2011-01-10']) df = pd.DataFrame({'A': np.random.randn(100), 'B': np.random.randn(100)}, index=pd.date_range('2011-01-01', freq='D', periods=100)) ddf = dd.from_pandas(df, 50) assert_eq(df['2011-01'], ddf['2011-01']) assert_eq(df['2011'], ddf['2011']) assert_eq(df['2011-01':'2012-05'], ddf['2011-01':'2012-05']) assert_eq(df['2011':'2015'], ddf['2011':'2015']) def test_loc_period_str(): # .loc with PeriodIndex doesn't support partial string indexing # https://github.com/pydata/pandas/issues/13429 pass def test_getitem_period_str(): df = pd.DataFrame({'A': np.random.randn(100), 'B': np.random.randn(100)}, index=pd.period_range('2011-01-01', freq='H', periods=100)) ddf = dd.from_pandas(df, 10) # partial string slice assert_eq(df['2011-01-02'], ddf['2011-01-02']) assert_eq(df['2011-01-02':'2011-01-10'], df['2011-01-02':'2011-01-10']) # same reso, dask result is always DataFrame df = pd.DataFrame({'A': np.random.randn(100), 'B': np.random.randn(100)}, index=pd.period_range('2011-01-01', freq='D', periods=100)) ddf = dd.from_pandas(df, 50) assert_eq(df['2011-01'], ddf['2011-01']) assert_eq(df['2011'], ddf['2011']) assert_eq(df['2011-01':'2012-05'], ddf['2011-01':'2012-05']) assert_eq(df['2011':'2015'], ddf['2011':'2015']) dask-0.16.0/dask/dataframe/tests/test_multi.py000066400000000000000000001405211320364734500213050ustar00rootroot00000000000000import dask.dataframe as dd import numpy as np import pandas as pd import pandas.util.testing as tm from dask.local import get_sync from dask.base import compute_as_if_collection from dask.dataframe.core import _Frame from dask.dataframe.methods import concat from dask.dataframe.multi import (align_partitions, merge_indexed_dataframes, hash_join, concat_indexed_dataframes, _maybe_align_partitions) from dask.dataframe.utils import (assert_eq, assert_divisions, make_meta, has_known_categories, clear_known_categories) import pytest def test_align_partitions(): A = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': list('abdabd')}, index=[10, 20, 30, 40, 50, 60]) a = dd.repartition(A, [10, 40, 60]) B = pd.DataFrame({'x': [1, 2, 3, 4], 'y': list('abda')}, index=[30, 70, 80, 100]) b = dd.repartition(B, [30, 80, 100]) s = dd.core.Scalar({('s', 0): 10}, 's', 'i8') (aa, bb), divisions, L = align_partitions(a, b) def _check(a, b, aa, bb): assert isinstance(a, dd.DataFrame) assert isinstance(b, dd.DataFrame) assert isinstance(aa, dd.DataFrame) assert isinstance(bb, dd.DataFrame) assert_eq(a, aa) assert_eq(b, bb) assert divisions == (10, 30, 40, 60, 80, 100) assert isinstance(L, list) assert len(divisions) == 1 + len(L) _check(a, b, aa, bb) assert L == [[(aa._name, 0), (bb._name, 0)], [(aa._name, 1), (bb._name, 1)], [(aa._name, 2), (bb._name, 2)], [(aa._name, 3), (bb._name, 3)], [(aa._name, 4), (bb._name, 4)]] (aa, ss, bb), divisions, L = align_partitions(a, s, b) _check(a, b, aa, bb) assert L == [[(aa._name, 0), None, (bb._name, 0)], [(aa._name, 1), None, (bb._name, 1)], [(aa._name, 2), None, (bb._name, 2)], [(aa._name, 3), None, (bb._name, 3)], [(aa._name, 4), None, (bb._name, 4)]] assert_eq(ss, 10) ldf = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7], 'b': [7, 6, 5, 4, 3, 2, 1]}) rdf = pd.DataFrame({'c': [1, 2, 3, 4, 5, 6, 7], 'd': [7, 6, 5, 4, 3, 2, 1]}) for lhs, rhs in [(dd.from_pandas(ldf, 1), dd.from_pandas(rdf, 1)), (dd.from_pandas(ldf, 2), dd.from_pandas(rdf, 2)), (dd.from_pandas(ldf, 2), dd.from_pandas(rdf, 3)), (dd.from_pandas(ldf, 3), dd.from_pandas(rdf, 2))]: (lresult, rresult), div, parts = align_partitions(lhs, rhs) assert_eq(lresult, ldf) assert_eq(rresult, rdf) # different index ldf = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7], 'b': [7, 6, 5, 4, 3, 2, 1]}, index=list('abcdefg')) rdf = pd.DataFrame({'c': [1, 2, 3, 4, 5, 6, 7], 'd': [7, 6, 5, 4, 3, 2, 1]}, index=list('fghijkl')) for lhs, rhs in [(dd.from_pandas(ldf, 1), dd.from_pandas(rdf, 1)), (dd.from_pandas(ldf, 2), dd.from_pandas(rdf, 2)), (dd.from_pandas(ldf, 2), dd.from_pandas(rdf, 3)), (dd.from_pandas(ldf, 3), dd.from_pandas(rdf, 2))]: (lresult, rresult), div, parts = align_partitions(lhs, rhs) assert_eq(lresult, ldf) assert_eq(rresult, rdf) def test_align_partitions_unknown_divisions(): df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7], 'b': [7, 6, 5, 4, 3, 2, 1]}) # One known, one unknown ddf = dd.from_pandas(df, npartitions=2) ddf2 = dd.from_pandas(df, npartitions=2, sort=False) assert not ddf2.known_divisions with pytest.raises(ValueError): align_partitions(ddf, ddf2) # Both unknown ddf = dd.from_pandas(df + 1, npartitions=2, sort=False) ddf2 = dd.from_pandas(df, npartitions=2, sort=False) assert not ddf.known_divisions assert not ddf2.known_divisions with pytest.raises(ValueError): align_partitions(ddf, ddf2) def test__maybe_align_partitions(): df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7], 'b': [7, 6, 5, 4, 3, 2, 1]}) # Both known, same divisions ddf = dd.from_pandas(df + 1, npartitions=2) ddf2 = dd.from_pandas(df, npartitions=2) a, b = _maybe_align_partitions([ddf, ddf2]) assert a is ddf assert b is ddf2 # Both unknown, same divisions ddf = dd.from_pandas(df + 1, npartitions=2, sort=False) ddf2 = dd.from_pandas(df, npartitions=2, sort=False) assert not ddf.known_divisions assert not ddf2.known_divisions a, b = _maybe_align_partitions([ddf, ddf2]) assert a is ddf assert b is ddf2 # Both known, different divisions ddf = dd.from_pandas(df + 1, npartitions=2) ddf2 = dd.from_pandas(df, npartitions=3) a, b = _maybe_align_partitions([ddf, ddf2]) assert a.divisions == b.divisions # Both unknown, different divisions ddf = dd.from_pandas(df + 1, npartitions=2, sort=False) ddf2 = dd.from_pandas(df, npartitions=3, sort=False) assert not ddf.known_divisions assert not ddf2.known_divisions with pytest.raises(ValueError): _maybe_align_partitions([ddf, ddf2]) # One known, one unknown ddf = dd.from_pandas(df, npartitions=2) ddf2 = dd.from_pandas(df, npartitions=2, sort=False) assert not ddf2.known_divisions with pytest.raises(ValueError): _maybe_align_partitions([ddf, ddf2]) def test_merge_indexed_dataframe_to_indexed_dataframe(): A = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6]}, index=[1, 2, 3, 4, 6, 7]) a = dd.repartition(A, [1, 4, 7]) B = pd.DataFrame({'y': list('abcdef')}, index=[1, 2, 4, 5, 6, 8]) b = dd.repartition(B, [1, 2, 5, 8]) c = merge_indexed_dataframes(a, b, how='left') assert c.divisions[0] == a.divisions[0] assert c.divisions[-1] == max(a.divisions + b.divisions) assert_eq(c, A.join(B)) c = merge_indexed_dataframes(a, b, how='right') assert c.divisions[0] == b.divisions[0] assert c.divisions[-1] == b.divisions[-1] assert_eq(c, A.join(B, how='right')) c = merge_indexed_dataframes(a, b, how='inner') assert c.divisions[0] == 1 assert c.divisions[-1] == max(a.divisions + b.divisions) assert_eq(c.compute(), A.join(B, how='inner')) c = merge_indexed_dataframes(a, b, how='outer') assert c.divisions[0] == 1 assert c.divisions[-1] == 8 assert_eq(c.compute(), A.join(B, how='outer')) assert (sorted(merge_indexed_dataframes(a, b, how='inner').dask) == sorted(merge_indexed_dataframes(a, b, how='inner').dask)) assert (sorted(merge_indexed_dataframes(a, b, how='inner').dask) != sorted(merge_indexed_dataframes(a, b, how='outer').dask)) def list_eq(aa, bb): if isinstance(aa, dd.DataFrame): a = aa.compute(get=get_sync) else: a = aa if isinstance(bb, dd.DataFrame): b = bb.compute(get=get_sync) else: b = bb tm.assert_index_equal(a.columns, b.columns) if isinstance(a, pd.DataFrame): av = a.sort_values(list(a.columns)).values bv = b.sort_values(list(b.columns)).values else: av = a.sort_values().values bv = b.sort_values().values tm.assert_numpy_array_equal(av, bv) @pytest.mark.parametrize('how', ['inner', 'left', 'right', 'outer']) @pytest.mark.parametrize('shuffle', ['disk', 'tasks']) def test_hash_join(how, shuffle): A = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': [1, 1, 2, 2, 3, 4]}) a = dd.repartition(A, [0, 4, 5]) B = pd.DataFrame({'y': [1, 3, 4, 4, 5, 6], 'z': [6, 5, 4, 3, 2, 1]}) b = dd.repartition(B, [0, 2, 5]) c = hash_join(a, 'y', b, 'y', how) result = c.compute() expected = pd.merge(A, B, how, 'y') list_eq(result, expected) # Different columns and npartitions c = hash_join(a, 'x', b, 'z', 'outer', npartitions=3, shuffle=shuffle) assert c.npartitions == 3 result = c.compute() expected = pd.merge(A, B, 'outer', None, 'x', 'z') list_eq(result, expected) assert (hash_join(a, 'y', b, 'y', 'inner', shuffle=shuffle)._name == hash_join(a, 'y', b, 'y', 'inner', shuffle=shuffle)._name) assert (hash_join(a, 'y', b, 'y', 'inner', shuffle=shuffle)._name != hash_join(a, 'y', b, 'y', 'outer', shuffle=shuffle)._name) @pytest.mark.parametrize('join', ['inner', 'outer']) def test_indexed_concat(join): A = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7], 'y': list('abcdef')}, index=[1, 2, 3, 4, 6, 7]) a = dd.repartition(A, [1, 4, 7]) B = pd.DataFrame({'x': [10, 20, 40, 50, 60, 80]}, index=[1, 2, 4, 5, 6, 8]) b = dd.repartition(B, [1, 2, 5, 8]) result = concat_indexed_dataframes([a, b], join=join) expected = pd.concat([A, B], axis=0, join=join) assert_eq(result, expected) assert (sorted(concat_indexed_dataframes([a, b], join=join).dask) == sorted(concat_indexed_dataframes([a, b], join=join).dask)) assert (sorted(concat_indexed_dataframes([a, b], join='inner').dask) != sorted(concat_indexed_dataframes([a, b], join='outer').dask)) @pytest.mark.parametrize('join', ['inner', 'outer']) def test_concat(join): pdf1 = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7], 'y': list('abcdef')}, index=[1, 2, 3, 4, 6, 7]) ddf1 = dd.from_pandas(pdf1, 2) pdf2 = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7], 'y': list('abcdef')}, index=[8, 9, 10, 11, 12, 13]) ddf2 = dd.from_pandas(pdf2, 2) # different columns pdf3 = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7], 'z': list('abcdef')}, index=[8, 9, 10, 11, 12, 13]) ddf3 = dd.from_pandas(pdf3, 2) for (dd1, dd2, pd1, pd2) in [(ddf1, ddf2, pdf1, pdf2), (ddf1, ddf3, pdf1, pdf3)]: result = dd.concat([dd1, dd2], join=join) expected = pd.concat([pd1, pd2], join=join) assert_eq(result, expected) # test outer only, inner has a problem on pandas side for (dd1, dd2, pd1, pd2) in [(ddf1, ddf2, pdf1, pdf2), (ddf1, ddf3, pdf1, pdf3), (ddf1.x, ddf2.x, pdf1.x, pdf2.x), (ddf1.x, ddf3.z, pdf1.x, pdf3.z), (ddf1.x, ddf2.x, pdf1.x, pdf2.x), (ddf1.x, ddf3.z, pdf1.x, pdf3.z)]: result = dd.concat([dd1, dd2]) expected = pd.concat([pd1, pd2]) assert_eq(result, expected) @pytest.mark.parametrize('how', ['inner', 'outer', 'left', 'right']) @pytest.mark.parametrize('shuffle', ['disk', 'tasks']) def test_merge(how, shuffle): A = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': [1, 1, 2, 2, 3, 4]}) a = dd.repartition(A, [0, 4, 5]) B = pd.DataFrame({'y': [1, 3, 4, 4, 5, 6], 'z': [6, 5, 4, 3, 2, 1]}) b = dd.repartition(B, [0, 2, 5]) assert_eq(dd.merge(a, b, left_index=True, right_index=True, how=how, shuffle=shuffle), pd.merge(A, B, left_index=True, right_index=True, how=how)) result = dd.merge(a, b, on='y', how=how) list_eq(result, pd.merge(A, B, on='y', how=how)) assert all(d is None for d in result.divisions) list_eq(dd.merge(a, b, left_on='x', right_on='z', how=how, shuffle=shuffle), pd.merge(A, B, left_on='x', right_on='z', how=how)) list_eq(dd.merge(a, b, left_on='x', right_on='z', how=how, suffixes=('1', '2'), shuffle=shuffle), pd.merge(A, B, left_on='x', right_on='z', how=how, suffixes=('1', '2'))) list_eq(dd.merge(a, b, how=how, shuffle=shuffle), pd.merge(A, B, how=how)) list_eq(dd.merge(a, B, how=how, shuffle=shuffle), pd.merge(A, B, how=how)) list_eq(dd.merge(A, b, how=how, shuffle=shuffle), pd.merge(A, B, how=how)) list_eq(dd.merge(A, B, how=how, shuffle=shuffle), pd.merge(A, B, how=how)) list_eq(dd.merge(a, b, left_index=True, right_index=True, how=how, shuffle=shuffle), pd.merge(A, B, left_index=True, right_index=True, how=how)) list_eq(dd.merge(a, b, left_index=True, right_index=True, how=how, suffixes=('1', '2'), shuffle=shuffle), pd.merge(A, B, left_index=True, right_index=True, how=how, suffixes=('1', '2'))) list_eq(dd.merge(a, b, left_on='x', right_index=True, how=how, shuffle=shuffle), pd.merge(A, B, left_on='x', right_index=True, how=how)) list_eq(dd.merge(a, b, left_on='x', right_index=True, how=how, suffixes=('1', '2'), shuffle=shuffle), pd.merge(A, B, left_on='x', right_index=True, how=how, suffixes=('1', '2'))) # pandas result looks buggy # list_eq(dd.merge(a, B, left_index=True, right_on='y'), # pd.merge(A, B, left_index=True, right_on='y')) def test_merge_tasks_passes_through(): a = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7], 'b': [7, 6, 5, 4, 3, 2, 1]}) b = pd.DataFrame({'c': [1, 2, 3, 4, 5, 6, 7], 'd': [7, 6, 5, 4, 3, 2, 1]}) aa = dd.from_pandas(a, npartitions=3) bb = dd.from_pandas(b, npartitions=2) cc = aa.merge(bb, left_on='a', right_on='d', shuffle='tasks') assert not any('partd' in k[0] for k in cc.dask) @pytest.mark.parametrize('shuffle', ['disk', 'tasks']) @pytest.mark.parametrize('how', ['inner', 'outer', 'left', 'right']) def test_merge_by_index_patterns(how, shuffle): pdf1l = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7], 'b': [7, 6, 5, 4, 3, 2, 1]}) pdf1r = pd.DataFrame({'c': [1, 2, 3, 4, 5, 6, 7], 'd': [7, 6, 5, 4, 3, 2, 1]}) pdf2l = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7], 'b': [7, 6, 5, 4, 3, 2, 1]}, index=list('abcdefg')) pdf2r = pd.DataFrame({'c': [7, 6, 5, 4, 3, 2, 1], 'd': [7, 6, 5, 4, 3, 2, 1]}, index=list('abcdefg')) pdf3l = pdf2l pdf3r = pd.DataFrame({'c': [6, 7, 8, 9], 'd': [5, 4, 3, 2]}, index=list('abdg')) pdf4l = pdf2l pdf4r = pd.DataFrame({'c': [9, 10, 11, 12], 'd': [5, 4, 3, 2]}, index=list('abdg')) # completely different index pdf5l = pd.DataFrame({'a': [1, 1, 2, 2, 3, 3, 4], 'b': [7, 6, 5, 4, 3, 2, 1]}, index=list('lmnopqr')) pdf5r = pd.DataFrame({'c': [1, 1, 1, 1], 'd': [5, 4, 3, 2]}, index=list('abcd')) pdf6l = pd.DataFrame({'a': [1, 1, 2, 2, 3, 3, 4], 'b': [7, 6, 5, 4, 3, 2, 1]}, index=list('cdefghi')) pdf6r = pd.DataFrame({'c': [1, 2, 1, 2], 'd': [5, 4, 3, 2]}, index=list('abcd')) pdf7l = pd.DataFrame({'a': [1, 1, 2, 2, 3, 3, 4], 'b': [7, 6, 5, 4, 3, 2, 1]}, index=list('abcdefg')) pdf7r = pd.DataFrame({'c': [5, 6, 7, 8], 'd': [5, 4, 3, 2]}, index=list('fghi')) for pdl, pdr in [(pdf1l, pdf1r), (pdf2l, pdf2r), (pdf3l, pdf3r), (pdf4l, pdf4r), (pdf5l, pdf5r), (pdf6l, pdf6r), (pdf7l, pdf7r)]: for lpart, rpart in [(2, 2), # same partition (3, 2), # left npartition > right npartition (2, 3)]: # left npartition < right npartition ddl = dd.from_pandas(pdl, lpart) ddr = dd.from_pandas(pdr, rpart) assert_eq(dd.merge(ddl, ddr, how=how, left_index=True, right_index=True, shuffle=shuffle), pd.merge(pdl, pdr, how=how, left_index=True, right_index=True)) assert_eq(dd.merge(ddr, ddl, how=how, left_index=True, right_index=True, shuffle=shuffle), pd.merge(pdr, pdl, how=how, left_index=True, right_index=True)) assert_eq(dd.merge(ddl, ddr, how=how, left_index=True, right_index=True, shuffle=shuffle, indicator=True), pd.merge(pdl, pdr, how=how, left_index=True, right_index=True, indicator=True)) assert_eq(dd.merge(ddr, ddl, how=how, left_index=True, right_index=True, shuffle=shuffle, indicator=True), pd.merge(pdr, pdl, how=how, left_index=True, right_index=True, indicator=True)) assert_eq(ddr.merge(ddl, how=how, left_index=True, right_index=True, shuffle=shuffle), pdr.merge(pdl, how=how, left_index=True, right_index=True)) assert_eq(ddl.merge(ddr, how=how, left_index=True, right_index=True, shuffle=shuffle), pdl.merge(pdr, how=how, left_index=True, right_index=True)) # hash join list_eq(dd.merge(ddl, ddr, how=how, left_on='a', right_on='c', shuffle=shuffle), pd.merge(pdl, pdr, how=how, left_on='a', right_on='c')) list_eq(dd.merge(ddl, ddr, how=how, left_on='b', right_on='d', shuffle=shuffle), pd.merge(pdl, pdr, how=how, left_on='b', right_on='d')) list_eq(dd.merge(ddr, ddl, how=how, left_on='c', right_on='a', shuffle=shuffle, indicator=True), pd.merge(pdr, pdl, how=how, left_on='c', right_on='a', indicator=True)) list_eq(dd.merge(ddr, ddl, how=how, left_on='d', right_on='b', shuffle=shuffle, indicator=True), pd.merge(pdr, pdl, how=how, left_on='d', right_on='b', indicator=True)) list_eq(dd.merge(ddr, ddl, how=how, left_on='c', right_on='a', shuffle=shuffle), pd.merge(pdr, pdl, how=how, left_on='c', right_on='a')) list_eq(dd.merge(ddr, ddl, how=how, left_on='d', right_on='b', shuffle=shuffle), pd.merge(pdr, pdl, how=how, left_on='d', right_on='b')) list_eq(ddl.merge(ddr, how=how, left_on='a', right_on='c', shuffle=shuffle), pdl.merge(pdr, how=how, left_on='a', right_on='c')) list_eq(ddl.merge(ddr, how=how, left_on='b', right_on='d', shuffle=shuffle), pdl.merge(pdr, how=how, left_on='b', right_on='d')) list_eq(ddr.merge(ddl, how=how, left_on='c', right_on='a', shuffle=shuffle), pdr.merge(pdl, how=how, left_on='c', right_on='a')) list_eq(ddr.merge(ddl, how=how, left_on='d', right_on='b', shuffle=shuffle), pdr.merge(pdl, how=how, left_on='d', right_on='b')) @pytest.mark.parametrize('how', ['inner', 'outer', 'left', 'right']) @pytest.mark.parametrize('shuffle', ['disk', 'tasks']) def test_join_by_index_patterns(how, shuffle): # Similar test cases as test_merge_by_index_patterns, # but columns / index for join have same dtype pdf1l = pd.DataFrame({'a': list('abcdefg'), 'b': [7, 6, 5, 4, 3, 2, 1]}, index=list('abcdefg')) pdf1r = pd.DataFrame({'c': list('abcdefg'), 'd': [7, 6, 5, 4, 3, 2, 1]}, index=list('abcdefg')) pdf2l = pdf1l pdf2r = pd.DataFrame({'c': list('gfedcba'), 'd': [7, 6, 5, 4, 3, 2, 1]}, index=list('abcdefg')) pdf3l = pdf1l pdf3r = pd.DataFrame({'c': list('abdg'), 'd': [5, 4, 3, 2]}, index=list('abdg')) pdf4l = pd.DataFrame({'a': list('abcabce'), 'b': [7, 6, 5, 4, 3, 2, 1]}, index=list('abcdefg')) pdf4r = pd.DataFrame({'c': list('abda'), 'd': [5, 4, 3, 2]}, index=list('abdg')) # completely different index pdf5l = pd.DataFrame({'a': list('lmnopqr'), 'b': [7, 6, 5, 4, 3, 2, 1]}, index=list('lmnopqr')) pdf5r = pd.DataFrame({'c': list('abcd'), 'd': [5, 4, 3, 2]}, index=list('abcd')) pdf6l = pd.DataFrame({'a': list('cdefghi'), 'b': [7, 6, 5, 4, 3, 2, 1]}, index=list('cdefghi')) pdf6r = pd.DataFrame({'c': list('abab'), 'd': [5, 4, 3, 2]}, index=list('abcd')) pdf7l = pd.DataFrame({'a': list('aabbccd'), 'b': [7, 6, 5, 4, 3, 2, 1]}, index=list('abcdefg')) pdf7r = pd.DataFrame({'c': list('aabb'), 'd': [5, 4, 3, 2]}, index=list('fghi')) for pdl, pdr in [(pdf1l, pdf1r), (pdf2l, pdf2r), (pdf3l, pdf3r), (pdf4l, pdf4r), (pdf5l, pdf5r), (pdf6l, pdf6r), (pdf7l, pdf7r)]: for lpart, rpart in [(2, 2), (3, 2), (2, 3)]: ddl = dd.from_pandas(pdl, lpart) ddr = dd.from_pandas(pdr, rpart) assert_eq(ddl.join(ddr, how=how, shuffle=shuffle), pdl.join(pdr, how=how)) assert_eq(ddr.join(ddl, how=how, shuffle=shuffle), pdr.join(pdl, how=how)) assert_eq(ddl.join(ddr, how=how, lsuffix='l', rsuffix='r', shuffle=shuffle), pdl.join(pdr, how=how, lsuffix='l', rsuffix='r')) assert_eq(ddr.join(ddl, how=how, lsuffix='l', rsuffix='r', shuffle=shuffle), pdr.join(pdl, how=how, lsuffix='l', rsuffix='r')) """ # temporary disabled bacause pandas may incorrectly raise # IndexError for empty DataFrame # https://github.com/pydata/pandas/pull/10826 list_assert_eq(ddl.join(ddr, how=how, on='a', lsuffix='l', rsuffix='r'), pdl.join(pdr, how=how, on='a', lsuffix='l', rsuffix='r')) list_eq(ddr.join(ddl, how=how, on='c', lsuffix='l', rsuffix='r'), pdr.join(pdl, how=how, on='c', lsuffix='l', rsuffix='r')) # merge with index and columns list_eq(ddl.merge(ddr, how=how, left_on='a', right_index=True), pdl.merge(pdr, how=how, left_on='a', right_index=True)) list_eq(ddr.merge(ddl, how=how, left_on='c', right_index=True), pdr.merge(pdl, how=how, left_on='c', right_index=True)) list_eq(ddl.merge(ddr, how=how, left_index=True, right_on='c'), pdl.merge(pdr, how=how, left_index=True, right_on='c')) list_eq(ddr.merge(ddl, how=how, left_index=True, right_on='a'), pdr.merge(pdl, how=how, left_index=True, right_on='a')) """ @pytest.mark.parametrize('how', ['inner', 'outer', 'left', 'right']) @pytest.mark.parametrize('shuffle', ['disk', 'tasks']) def test_merge_by_multiple_columns(how, shuffle): # warnings here from pandas pdf1l = pd.DataFrame({'a': list('abcdefghij'), 'b': list('abcdefghij'), 'c': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, index=list('abcdefghij')) pdf1r = pd.DataFrame({'d': list('abcdefghij'), 'e': list('abcdefghij'), 'f': [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]}, index=list('abcdefghij')) pdf2l = pd.DataFrame({'a': list('abcdeabcde'), 'b': list('abcabcabca'), 'c': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, index=list('abcdefghij')) pdf2r = pd.DataFrame({'d': list('edcbaedcba'), 'e': list('aaabbbcccd'), 'f': [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]}, index=list('fghijklmno')) pdf3l = pd.DataFrame({'a': list('aaaaaaaaaa'), 'b': list('aaaaaaaaaa'), 'c': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, index=list('abcdefghij')) pdf3r = pd.DataFrame({'d': list('aaabbbccaa'), 'e': list('abbbbbbbbb'), 'f': [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]}, index=list('ABCDEFGHIJ')) for pdl, pdr in [(pdf1l, pdf1r), (pdf2l, pdf2r), (pdf3l, pdf3r)]: for lpart, rpart in [(2, 2), (3, 2), (2, 3)]: ddl = dd.from_pandas(pdl, lpart) ddr = dd.from_pandas(pdr, rpart) assert_eq(ddl.join(ddr, how=how, shuffle=shuffle), pdl.join(pdr, how=how)) assert_eq(ddr.join(ddl, how=how, shuffle=shuffle), pdr.join(pdl, how=how)) assert_eq(dd.merge(ddl, ddr, how=how, left_index=True, right_index=True, shuffle=shuffle), pd.merge(pdl, pdr, how=how, left_index=True, right_index=True)) assert_eq(dd.merge(ddr, ddl, how=how, left_index=True, right_index=True, shuffle=shuffle), pd.merge(pdr, pdl, how=how, left_index=True, right_index=True)) # hash join list_eq(dd.merge(ddl, ddr, how=how, left_on='a', right_on='d', shuffle=shuffle), pd.merge(pdl, pdr, how=how, left_on='a', right_on='d')) list_eq(dd.merge(ddl, ddr, how=how, left_on='b', right_on='e', shuffle=shuffle), pd.merge(pdl, pdr, how=how, left_on='b', right_on='e')) list_eq(dd.merge(ddr, ddl, how=how, left_on='d', right_on='a', shuffle=shuffle), pd.merge(pdr, pdl, how=how, left_on='d', right_on='a')) list_eq(dd.merge(ddr, ddl, how=how, left_on='e', right_on='b', shuffle=shuffle), pd.merge(pdr, pdl, how=how, left_on='e', right_on='b')) list_eq(dd.merge(ddl, ddr, how=how, left_on=['a', 'b'], right_on=['d', 'e'], shuffle=shuffle), pd.merge(pdl, pdr, how=how, left_on=['a', 'b'], right_on=['d', 'e'])) def test_melt(): pdf = pd.DataFrame({'A': list('abcd') * 5, 'B': list('XY') * 10, 'C': np.random.randn(20)}) ddf = dd.from_pandas(pdf, 4) list_eq(dd.melt(ddf), pd.melt(pdf)) list_eq(dd.melt(ddf, id_vars='C'), pd.melt(pdf, id_vars='C')) list_eq(dd.melt(ddf, value_vars='C'), pd.melt(pdf, value_vars='C')) list_eq(dd.melt(ddf, value_vars=['A', 'C'], var_name='myvar'), pd.melt(pdf, value_vars=['A', 'C'], var_name='myvar')) list_eq(dd.melt(ddf, id_vars='B', value_vars=['A', 'C'], value_name='myval'), pd.melt(pdf, id_vars='B', value_vars=['A', 'C'], value_name='myval')) def test_cheap_inner_merge_with_pandas_object(): a = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': list('abdabd')}, index=[10, 20, 30, 40, 50, 60]) da = dd.from_pandas(a, npartitions=3) b = pd.DataFrame({'x': [1, 2, 3, 4], 'z': list('abda')}) dc = da.merge(b, on='x', how='inner') assert all('shuffle' not in k[0] for k in dc.dask) list_eq(da.merge(b, on='x', how='inner'), a.merge(b, on='x', how='inner')) def test_cheap_single_partition_merge(): a = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': list('abdabd')}, index=[10, 20, 30, 40, 50, 60]) aa = dd.from_pandas(a, npartitions=3) b = pd.DataFrame({'x': [1, 2, 3, 4], 'z': list('abda')}) bb = dd.from_pandas(b, npartitions=1, sort=False) cc = aa.merge(bb, on='x', how='inner') assert all('shuffle' not in k[0] for k in cc.dask) assert len(cc.dask) == len(aa.dask) * 2 + len(bb.dask) list_eq(aa.merge(bb, on='x', how='inner'), a.merge(b, on='x', how='inner')) def test_cheap_single_partition_merge_divisions(): a = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': list('abdabd')}, index=[10, 20, 30, 40, 50, 60]) aa = dd.from_pandas(a, npartitions=3) b = pd.DataFrame({'x': [1, 2, 3, 4], 'z': list('abda')}) bb = dd.from_pandas(b, npartitions=1, sort=False) actual = aa.merge(bb, on='x', how='inner') assert not actual.known_divisions assert_divisions(actual) actual = bb.merge(aa, on='x', how='inner') assert not actual.known_divisions assert_divisions(actual) def test_cheap_single_partition_merge_on_index(): a = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': list('abdabd')}, index=[10, 20, 30, 40, 50, 60]) aa = dd.from_pandas(a, npartitions=3) b = pd.DataFrame({'x': [1, 2, 3, 4], 'z': list('abda')}) bb = dd.from_pandas(b, npartitions=1, sort=False) actual = aa.merge(bb, left_index=True, right_on='x', how='inner') expected = a.merge(b, left_index=True, right_on='x', how='inner') assert actual.known_divisions assert_eq(actual, expected) actual = bb.merge(aa, right_index=True, left_on='x', how='inner') expected = b.merge(a, right_index=True, left_on='x', how='inner') assert actual.known_divisions assert_eq(actual, expected) def test_merge_maintains_columns(): lhs = pd.DataFrame({'A': [1, 2, 3], 'B': list('abc'), 'C': 'foo', 'D': 1.0}, columns=list('DCBA')) rhs = pd.DataFrame({'G': [4, 5], 'H': 6.0, 'I': 'bar', 'B': list('ab')}, columns=list('GHIB')) ddf = dd.from_pandas(lhs, npartitions=1) merged = dd.merge(ddf, rhs, on='B').compute() assert tuple(merged.columns) == ('D', 'C', 'B', 'A', 'G', 'H', 'I') @pytest.mark.parametrize('shuffle', ['disk', 'tasks']) def test_merge_index_without_divisions(shuffle): a = pd.DataFrame({'x': [1, 2, 3, 4, 5]}, index=[1, 2, 3, 4, 5]) b = pd.DataFrame({'y': [1, 2, 3, 4, 5]}, index=[5, 4, 3, 2, 1]) aa = dd.from_pandas(a, npartitions=3, sort=False) bb = dd.from_pandas(b, npartitions=2) result = aa.join(bb, how='inner', shuffle=shuffle) expected = a.join(b, how='inner') assert_eq(result, expected) def test_half_indexed_dataframe_avoids_shuffle(): a = pd.DataFrame({'x': np.random.randint(100, size=1000)}) b = pd.DataFrame({'y': np.random.randint(100, size=100)}, index=np.random.randint(100, size=100)) aa = dd.from_pandas(a, npartitions=100) bb = dd.from_pandas(b, npartitions=2) c = pd.merge(a, b, left_index=True, right_on='y') cc = dd.merge(aa, bb, left_index=True, right_on='y', shuffle='tasks') list_eq(c, cc) assert len(cc.dask) < 500 def test_errors_for_merge_on_frame_columns(): a = pd.DataFrame({'x': [1, 2, 3, 4, 5]}, index=[1, 2, 3, 4, 5]) b = pd.DataFrame({'y': [1, 2, 3, 4, 5]}, index=[5, 4, 3, 2, 1]) aa = dd.from_pandas(a, npartitions=3, sort=False) bb = dd.from_pandas(b, npartitions=2) with pytest.raises(NotImplementedError): dd.merge(aa, bb, left_on='x', right_on=bb.y) with pytest.raises(NotImplementedError): dd.merge(aa, bb, left_on=aa.x, right_on=bb.y) def test_concat_one_series(): a = pd.Series([1, 2, 3, 4]) aa = dd.from_pandas(a, npartitions=2, sort=False) c = dd.concat([aa], axis=0) assert isinstance(c, dd.Series) c = dd.concat([aa], axis=1) assert isinstance(c, dd.DataFrame) def test_concat_unknown_divisions(): a = pd.Series([1, 2, 3, 4]) b = pd.Series([4, 3, 2, 1]) aa = dd.from_pandas(a, npartitions=2, sort=False) bb = dd.from_pandas(b, npartitions=2, sort=False) assert not aa.known_divisions with pytest.warns(UserWarning): assert_eq(pd.concat([a, b], axis=1), dd.concat([aa, bb], axis=1)) cc = dd.from_pandas(b, npartitions=1, sort=False) with pytest.raises(ValueError): dd.concat([aa, cc], axis=1) def test_concat_unknown_divisions_errors(): a = pd.Series([1, 2, 3, 4, 5, 6]) b = pd.Series([4, 3, 2, 1]) aa = dd.from_pandas(a, npartitions=2, sort=False) bb = dd.from_pandas(b, npartitions=2, sort=False) with pytest.raises(ValueError): with pytest.warns(UserWarning): # Concat with unknown divisions dd.concat([aa, bb], axis=1).compute() def test_concat2(): dsk = {('x', 0): pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}), ('x', 1): pd.DataFrame({'a': [4, 5, 6], 'b': [3, 2, 1]}), ('x', 2): pd.DataFrame({'a': [7, 8, 9], 'b': [0, 0, 0]})} meta = make_meta({'a': 'i8', 'b': 'i8'}) a = dd.DataFrame(dsk, 'x', meta, [None, None]) dsk = {('y', 0): pd.DataFrame({'a': [10, 20, 30], 'b': [40, 50, 60]}), ('y', 1): pd.DataFrame({'a': [40, 50, 60], 'b': [30, 20, 10]}), ('y', 2): pd.DataFrame({'a': [70, 80, 90], 'b': [0, 0, 0]})} b = dd.DataFrame(dsk, 'y', meta, [None, None]) dsk = {('y', 0): pd.DataFrame({'b': [10, 20, 30], 'c': [40, 50, 60]}), ('y', 1): pd.DataFrame({'b': [40, 50, 60], 'c': [30, 20, 10]})} meta = make_meta({'b': 'i8', 'c': 'i8'}) c = dd.DataFrame(dsk, 'y', meta, [None, None]) dsk = {('y', 0): pd.DataFrame({'b': [10, 20, 30], 'c': [40, 50, 60], 'd': [70, 80, 90]}), ('y', 1): pd.DataFrame({'b': [40, 50, 60], 'c': [30, 20, 10], 'd': [90, 80, 70]}, index=[3, 4, 5])} meta = make_meta({'b': 'i8', 'c': 'i8', 'd': 'i8'}, index=pd.Index([], 'i8')) d = dd.DataFrame(dsk, 'y', meta, [0, 3, 5]) cases = [[a, b], [a, c], [a, d]] assert dd.concat([a]) is a for case in cases: result = dd.concat(case) pdcase = [_c.compute() for _c in case] assert result.npartitions == case[0].npartitions + case[1].npartitions assert result.divisions == (None, ) * (result.npartitions + 1) assert_eq(pd.concat(pdcase), result) assert set(result.dask) == set(dd.concat(case).dask) result = dd.concat(case, join='inner') assert result.npartitions == case[0].npartitions + case[1].npartitions assert result.divisions == (None, ) * (result.npartitions + 1) assert_eq(pd.concat(pdcase, join='inner'), result) assert set(result.dask) == set(dd.concat(case, join='inner').dask) def test_concat3(): pdf1 = pd.DataFrame(np.random.randn(6, 5), columns=list('ABCDE'), index=list('abcdef')) pdf2 = pd.DataFrame(np.random.randn(6, 5), columns=list('ABCFG'), index=list('ghijkl')) pdf3 = pd.DataFrame(np.random.randn(6, 5), columns=list('ABCHI'), index=list('mnopqr')) ddf1 = dd.from_pandas(pdf1, 2) ddf2 = dd.from_pandas(pdf2, 3) ddf3 = dd.from_pandas(pdf3, 2) result = dd.concat([ddf1, ddf2]) assert result.divisions == ddf1.divisions[:-1] + ddf2.divisions assert result.npartitions == ddf1.npartitions + ddf2.npartitions assert_eq(result, pd.concat([pdf1, pdf2])) assert_eq(dd.concat([ddf1, ddf2], interleave_partitions=True), pd.concat([pdf1, pdf2])) result = dd.concat([ddf1, ddf2, ddf3]) assert result.divisions == (ddf1.divisions[:-1] + ddf2.divisions[:-1] + ddf3.divisions) assert result.npartitions == (ddf1.npartitions + ddf2.npartitions + ddf3.npartitions) assert_eq(result, pd.concat([pdf1, pdf2, pdf3])) assert_eq(dd.concat([ddf1, ddf2, ddf3], interleave_partitions=True), pd.concat([pdf1, pdf2, pdf3])) def test_concat4_interleave_partitions(): pdf1 = pd.DataFrame(np.random.randn(10, 5), columns=list('ABCDE'), index=list('abcdefghij')) pdf2 = pd.DataFrame(np.random.randn(13, 5), columns=list('ABCDE'), index=list('fghijklmnopqr')) pdf3 = pd.DataFrame(np.random.randn(13, 6), columns=list('CDEXYZ'), index=list('fghijklmnopqr')) ddf1 = dd.from_pandas(pdf1, 2) ddf2 = dd.from_pandas(pdf2, 3) ddf3 = dd.from_pandas(pdf3, 2) msg = ('All inputs have known divisions which cannot be ' 'concatenated in order. Specify ' 'interleave_partitions=True to ignore order') cases = [[ddf1, ddf1], [ddf1, ddf2], [ddf1, ddf3], [ddf2, ddf1], [ddf2, ddf3], [ddf3, ddf1], [ddf3, ddf2]] for case in cases: pdcase = [c.compute() for c in case] with pytest.raises(ValueError) as err: dd.concat(case) assert msg in str(err.value) assert_eq(dd.concat(case, interleave_partitions=True), pd.concat(pdcase)) assert_eq(dd.concat(case, join='inner', interleave_partitions=True), pd.concat(pdcase, join='inner')) msg = "'join' must be 'inner' or 'outer'" with pytest.raises(ValueError) as err: dd.concat([ddf1, ddf1], join='invalid', interleave_partitions=True) assert msg in str(err.value) def test_concat5(): pdf1 = pd.DataFrame(np.random.randn(7, 5), columns=list('ABCDE'), index=list('abcdefg')) pdf2 = pd.DataFrame(np.random.randn(7, 6), columns=list('FGHIJK'), index=list('abcdefg')) pdf3 = pd.DataFrame(np.random.randn(7, 6), columns=list('FGHIJK'), index=list('cdefghi')) pdf4 = pd.DataFrame(np.random.randn(7, 5), columns=list('FGHAB'), index=list('cdefghi')) pdf5 = pd.DataFrame(np.random.randn(7, 5), columns=list('FGHAB'), index=list('fklmnop')) ddf1 = dd.from_pandas(pdf1, 2) ddf2 = dd.from_pandas(pdf2, 3) ddf3 = dd.from_pandas(pdf3, 2) ddf4 = dd.from_pandas(pdf4, 2) ddf5 = dd.from_pandas(pdf5, 3) cases = [[ddf1, ddf2], [ddf1, ddf3], [ddf1, ddf4], [ddf1, ddf5], [ddf3, ddf4], [ddf3, ddf5], [ddf5, ddf1, ddf4], [ddf5, ddf3], [ddf1.A, ddf4.A], [ddf2.F, ddf3.F], [ddf4.A, ddf5.A], [ddf1.A, ddf4.F], [ddf2.F, ddf3.H], [ddf4.A, ddf5.B], [ddf1, ddf4.A], [ddf3.F, ddf2], [ddf5, ddf1.A, ddf2]] for case in cases: pdcase = [c.compute() for c in case] with pytest.warns(None): # some cases will raise warning directly from pandas assert_eq(dd.concat(case, interleave_partitions=True), pd.concat(pdcase)) assert_eq(dd.concat(case, join='inner', interleave_partitions=True), pd.concat(pdcase, join='inner')) assert_eq(dd.concat(case, axis=1), pd.concat(pdcase, axis=1)) assert_eq(dd.concat(case, axis=1, join='inner'), pd.concat(pdcase, axis=1, join='inner')) # Dask + pandas cases = [[ddf1, pdf2], [ddf1, pdf3], [pdf1, ddf4], [pdf1.A, ddf4.A], [ddf2.F, pdf3.F], [ddf1, pdf4.A], [ddf3.F, pdf2], [ddf2, pdf1, ddf3.F]] for case in cases: pdcase = [c.compute() if isinstance(c, _Frame) else c for c in case] assert_eq(dd.concat(case, interleave_partitions=True), pd.concat(pdcase)) assert_eq(dd.concat(case, join='inner', interleave_partitions=True), pd.concat(pdcase, join='inner')) assert_eq(dd.concat(case, axis=1), pd.concat(pdcase, axis=1)) assert_eq(dd.concat(case, axis=1, join='inner'), pd.concat(pdcase, axis=1, join='inner')) @pytest.mark.parametrize('known, cat_index, divisions', [(True, True, False), (True, False, True), (True, False, False), (False, True, False), (False, False, True), (False, False, False)]) def test_concat_categorical(known, cat_index, divisions): frames = [pd.DataFrame({'w': list('xxxxx'), 'x': np.arange(5), 'y': list('abcbc'), 'z': np.arange(5, dtype='f8')}), pd.DataFrame({'w': list('yyyyy'), 'x': np.arange(5, 10), 'y': list('abbba'), 'z': np.arange(5, 10, dtype='f8')}), pd.DataFrame({'w': list('zzzzz'), 'x': np.arange(10, 15), 'y': list('bcbcc'), 'z': np.arange(10, 15, dtype='f8')})] for df in frames: df.w = df.w.astype('category') df.y = df.y.astype('category') if cat_index: frames = [df.set_index(df.y) for df in frames] dframes = [dd.from_pandas(p, npartitions=2, sort=divisions) for p in frames] if not known: dframes[0]._meta = clear_known_categories(dframes[0]._meta, ['y'], index=True) def check_and_return(ddfs, dfs, join): sol = concat(dfs, join=join) res = dd.concat(ddfs, join=join, interleave_partitions=divisions) assert_eq(res, sol) if known: parts = compute_as_if_collection(dd.DataFrame, res.dask, res.__dask_keys__()) for p in [i.iloc[:0] for i in parts]: res._meta == p # will error if schemas don't align assert not cat_index or has_known_categories(res.index) == known return res for join in ['inner', 'outer']: # Frame res = check_and_return(dframes, frames, join) assert has_known_categories(res.w) assert has_known_categories(res.y) == known # Series res = check_and_return([i.y for i in dframes], [i.y for i in frames], join) assert has_known_categories(res) == known # Non-cat series with cat index if cat_index: res = check_and_return([i.x for i in dframes], [i.x for i in frames], join) # Partition missing columns res = check_and_return([dframes[0][['x', 'y']]] + dframes[1:], [frames[0][['x', 'y']]] + frames[1:], join) assert not hasattr(res, 'w') or has_known_categories(res.w) assert has_known_categories(res.y) == known def test_append(): df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], 'b': [1, 2, 3, 4, 5, 6]}) df2 = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], 'b': [1, 2, 3, 4, 5, 6]}, index=[6, 7, 8, 9, 10, 11]) df3 = pd.DataFrame({'b': [1, 2, 3, 4, 5, 6], 'c': [1, 2, 3, 4, 5, 6]}, index=[6, 7, 8, 9, 10, 11]) ddf = dd.from_pandas(df, 2) ddf2 = dd.from_pandas(df2, 2) ddf3 = dd.from_pandas(df3, 2) s = pd.Series([7, 8], name=6, index=['a', 'b']) assert_eq(ddf.append(s), df.append(s)) assert_eq(ddf.append(ddf2), df.append(df2)) assert_eq(ddf.a.append(ddf2.a), df.a.append(df2.a)) # different columns assert_eq(ddf.append(ddf3), df.append(df3)) assert_eq(ddf.a.append(ddf3.b), df.a.append(df3.b)) # dask + pandas assert_eq(ddf.append(df2), df.append(df2)) assert_eq(ddf.a.append(df2.a), df.a.append(df2.a)) assert_eq(ddf.append(df3), df.append(df3)) assert_eq(ddf.a.append(df3.b), df.a.append(df3.b)) df4 = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], 'b': [1, 2, 3, 4, 5, 6]}, index=[4, 5, 6, 7, 8, 9]) ddf4 = dd.from_pandas(df4, 2) with pytest.raises(ValueError): ddf.append(ddf4) def test_append2(): dsk = {('x', 0): pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}), ('x', 1): pd.DataFrame({'a': [4, 5, 6], 'b': [3, 2, 1]}), ('x', 2): pd.DataFrame({'a': [7, 8, 9], 'b': [0, 0, 0]})} meta = make_meta({'a': 'i8', 'b': 'i8'}) ddf1 = dd.DataFrame(dsk, 'x', meta, [None, None]) dsk = {('y', 0): pd.DataFrame({'a': [10, 20, 30], 'b': [40, 50, 60]}), ('y', 1): pd.DataFrame({'a': [40, 50, 60], 'b': [30, 20, 10]}), ('y', 2): pd.DataFrame({'a': [70, 80, 90], 'b': [0, 0, 0]})} ddf2 = dd.DataFrame(dsk, 'y', meta, [None, None]) dsk = {('y', 0): pd.DataFrame({'b': [10, 20, 30], 'c': [40, 50, 60]}), ('y', 1): pd.DataFrame({'b': [40, 50, 60], 'c': [30, 20, 10]})} meta = make_meta({'b': 'i8', 'c': 'i8'}) ddf3 = dd.DataFrame(dsk, 'y', meta, [None, None]) assert_eq(ddf1.append(ddf2), ddf1.compute().append(ddf2.compute())) assert_eq(ddf2.append(ddf1), ddf2.compute().append(ddf1.compute())) # Series + DataFrame with pytest.warns(None): # RuntimeWarning from pandas on comparing int and str assert_eq(ddf1.a.append(ddf2), ddf1.a.compute().append(ddf2.compute())) assert_eq(ddf2.a.append(ddf1), ddf2.a.compute().append(ddf1.compute())) # different columns assert_eq(ddf1.append(ddf3), ddf1.compute().append(ddf3.compute())) assert_eq(ddf3.append(ddf1), ddf3.compute().append(ddf1.compute())) # Series + DataFrame with pytest.warns(None): # RuntimeWarning from pandas on comparing int and str assert_eq(ddf1.a.append(ddf3), ddf1.a.compute().append(ddf3.compute())) assert_eq(ddf3.b.append(ddf1), ddf3.b.compute().append(ddf1.compute())) # Dask + pandas assert_eq(ddf1.append(ddf2.compute()), ddf1.compute().append(ddf2.compute())) assert_eq(ddf2.append(ddf1.compute()), ddf2.compute().append(ddf1.compute())) # Series + DataFrame with pytest.warns(None): # RuntimeWarning from pandas on comparing int and str assert_eq(ddf1.a.append(ddf2.compute()), ddf1.a.compute().append(ddf2.compute())) assert_eq(ddf2.a.append(ddf1.compute()), ddf2.a.compute().append(ddf1.compute())) # different columns assert_eq(ddf1.append(ddf3.compute()), ddf1.compute().append(ddf3.compute())) assert_eq(ddf3.append(ddf1.compute()), ddf3.compute().append(ddf1.compute())) # Series + DataFrame with pytest.warns(None): # RuntimeWarning from pandas on comparing int and str assert_eq(ddf1.a.append(ddf3.compute()), ddf1.a.compute().append(ddf3.compute())) assert_eq(ddf3.b.append(ddf1.compute()), ddf3.b.compute().append(ddf1.compute())) def test_append_categorical(): frames = [pd.DataFrame({'x': np.arange(5, 10), 'y': list('abbba'), 'z': np.arange(5, 10, dtype='f8')}), pd.DataFrame({'x': np.arange(10, 15), 'y': list('bcbcc'), 'z': np.arange(10, 15, dtype='f8')})] frames2 = [] for df in frames: df.y = df.y.astype('category') df2 = df.copy() df2.y = df2.y.cat.set_categories(list('abc')) df.index = df.y frames2.append(df2.set_index(df2.y)) df1, df2 = frames2 for known in [True, False]: dframes = [dd.from_pandas(p, npartitions=2, sort=False) for p in frames] if not known: dframes[0]._meta = clear_known_categories(dframes[0]._meta, ['y'], index=True) ddf1, ddf2 = dframes res = ddf1.append(ddf2) assert_eq(res, df1.append(df2)) assert has_known_categories(res.index) == known assert has_known_categories(res.y) == known res = ddf1.y.append(ddf2.y) assert_eq(res, df1.y.append(df2.y)) assert has_known_categories(res.index) == known assert has_known_categories(res) == known res = ddf1.index.append(ddf2.index) assert_eq(res, df1.index.append(df2.index)) assert has_known_categories(res) == known def test_singleton_divisions(): df = pd.DataFrame({'x': [1, 1, 1]}, index=[1, 2, 3]) ddf = dd.from_pandas(df, npartitions=2) ddf2 = ddf.set_index('x') joined = ddf2.join(ddf2, rsuffix='r') assert joined.divisions == (1, 1) joined.compute() dask-0.16.0/dask/dataframe/tests/test_optimize_dataframe.py000066400000000000000000000032311320364734500240130ustar00rootroot00000000000000import pytest from operator import getitem from toolz import merge import dask from dask.dataframe.io import dataframe_from_ctable import dask.dataframe as dd import pandas as pd dsk = {('x', 0): pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[0, 1, 3]), ('x', 1): pd.DataFrame({'a': [4, 5, 6], 'b': [3, 2, 1]}, index=[5, 6, 8]), ('x', 2): pd.DataFrame({'a': [7, 8, 9], 'b': [0, 0, 0]}, index=[9, 9, 9])} dfs = list(dsk.values()) def test_column_optimizations_with_bcolz_and_rewrite(): bcolz = pytest.importorskip('bcolz') bc = bcolz.ctable([[1, 2, 3], [10, 20, 30]], names=['a', 'b']) for cols in [None, 'abc', ['abc']]: dsk2 = merge(dict((('x', i), (dataframe_from_ctable, bc, slice(0, 2), cols, {})) for i in [1, 2, 3]), dict((('y', i), (getitem, ('x', i), ['a', 'b'])) for i in [1, 2, 3])) expected = dict((('y', i), (dataframe_from_ctable, bc, slice(0, 2), ['a', 'b'], {})) for i in [1, 2, 3]) result = dd.optimize(dsk2, [('y', i) for i in [1, 2, 3]]) assert result == expected def test_fuse_ave_width(): df = pd.DataFrame({'x': range(10)}) df = dd.from_pandas(df, npartitions=5) s = ((df.x + 1) + (df.x + 2)) with dask.set_options(fuse_ave_width=4): a = s.__dask_optimize__(s.dask, s.__dask_keys__()) b = s.__dask_optimize__(s.dask, s.__dask_keys__()) assert len(a) < len(b) assert len(a) <= 15 dask-0.16.0/dask/dataframe/tests/test_reshape.py000066400000000000000000000153311320364734500216020ustar00rootroot00000000000000import numpy as np import pandas as pd import pandas.util.testing as tm import pytest import dask.dataframe as dd from dask.dataframe.utils import assert_eq, make_meta @pytest.mark.parametrize('data', [ pd.Series([1, 1, 1, 2, 2, 1, 3, 4], dtype='category'), pd.Series(pd.Categorical([1, 1, 1, 2, 2, 1, 3, 4], categories=[4, 3, 2, 1])), pd.DataFrame({'a': [1, 2, 3, 4, 4, 3, 2, 1], 'b': pd.Categorical(list('abcdabcd'))})] ) def test_get_dummies(data): exp = pd.get_dummies(data) ddata = dd.from_pandas(data, 2) res = dd.get_dummies(ddata) assert_eq(res, exp) tm.assert_index_equal(res.columns, exp.columns) def test_get_dummies_object(): df = pd.DataFrame({'a': pd.Categorical([1, 2, 3, 4, 4, 3, 2, 1]), 'b': list('abcdabcd'), 'c': pd.Categorical(list('abcdabcd'))}) ddf = dd.from_pandas(df, 2) # Explicitly exclude object columns exp = pd.get_dummies(df, columns=['a', 'c']) res = dd.get_dummies(ddf, columns=['a', 'c']) assert_eq(res, exp) tm.assert_index_equal(res.columns, exp.columns) with pytest.raises(NotImplementedError): dd.get_dummies(ddf) with pytest.raises(NotImplementedError): dd.get_dummies(ddf.b) with pytest.raises(NotImplementedError): dd.get_dummies(ddf, columns=['b']) def test_get_dummies_kwargs(): s = pd.Series([1, 1, 1, 2, 2, 1, 3, 4], dtype='category') exp = pd.get_dummies(s, prefix='X', prefix_sep='-') ds = dd.from_pandas(s, 2) res = dd.get_dummies(ds, prefix='X', prefix_sep='-') assert_eq(res, exp) tm.assert_index_equal(res.columns, pd.Index(['X-1', 'X-2', 'X-3', 'X-4'])) exp = pd.get_dummies(s, drop_first=True) ds = dd.from_pandas(s, 2) res = dd.get_dummies(ds, drop_first=True) assert_eq(res, exp) tm.assert_index_equal(res.columns, exp.columns) # nan s = pd.Series([1, 1, 1, 2, np.nan, 3, np.nan, 5], dtype='category') exp = pd.get_dummies(s) ds = dd.from_pandas(s, 2) res = dd.get_dummies(ds) assert_eq(res, exp) tm.assert_index_equal(res.columns, exp.columns) # dummy_na exp = pd.get_dummies(s, dummy_na=True) ds = dd.from_pandas(s, 2) res = dd.get_dummies(ds, dummy_na=True) assert_eq(res, exp) tm.assert_index_equal(res.columns, pd.Index([1, 2, 3, 5, np.nan])) msg = 'sparse=True is not supported' with pytest.raises(NotImplementedError) as err: dd.get_dummies(ds, sparse=True) assert msg in str(err.value) def test_get_dummies_errors(): with pytest.raises(NotImplementedError): # not Categorical s = pd.Series([1, 1, 1, 2, 2, 1, 3, 4]) ds = dd.from_pandas(s, 2) dd.get_dummies(ds) # unknown categories df = pd.DataFrame({'x': list('abcbc'), 'y': list('bcbcb')}) ddf = dd.from_pandas(df, npartitions=2) ddf._meta = make_meta({'x': 'category', 'y': 'category'}) with pytest.raises(NotImplementedError): dd.get_dummies(ddf) with pytest.raises(NotImplementedError): dd.get_dummies(ddf, columns=['x', 'y']) with pytest.raises(NotImplementedError): dd.get_dummies(ddf.x) @pytest.mark.parametrize('aggfunc', ['mean', 'sum', 'count']) def test_pivot_table(aggfunc): df = pd.DataFrame({'A': np.random.choice(list('XYZ'), size=100), 'B': np.random.randn(100), 'C': pd.Categorical(np.random.choice(list('abc'), size=100))}) ddf = dd.from_pandas(df, 5) res = dd.pivot_table(ddf, index='A', columns='C', values='B', aggfunc=aggfunc) exp = pd.pivot_table(df, index='A', columns='C', values='B', aggfunc=aggfunc) if aggfunc == 'count': # dask result cannot be int64 dtype depending on divisions because of NaN exp = exp.astype(np.float64) assert_eq(res, exp) # method res = ddf.pivot_table(index='A', columns='C', values='B', aggfunc=aggfunc) exp = df.pivot_table(index='A', columns='C', values='B', aggfunc=aggfunc) if aggfunc == 'count': # dask result cannot be int64 dtype depending on divisions because of NaN exp = exp.astype(np.float64) assert_eq(res, exp) def test_pivot_table_dtype(): df = pd.DataFrame({'A': list('AABB'), 'B': pd.Categorical(list('ABAB')), 'C': [1, 2, 3, 4]}) ddf = dd.from_pandas(df, 2) res = dd.pivot_table(ddf, index='A', columns='B', values='C', aggfunc='count') exp_index = pd.CategoricalIndex(['A', 'B'], name='B') exp = pd.Series([np.float64] * 2, index=exp_index) tm.assert_series_equal(res.dtypes, exp) exp = pd.pivot_table(df, index='A', columns='B', values='C', aggfunc='count').astype(np.float64) assert_eq(res, exp) def test_pivot_table_errors(): df = pd.DataFrame({'A': np.random.choice(list('abc'), size=10), 'B': np.random.randn(10), 'C': pd.Categorical(np.random.choice(list('abc'), size=10))}) ddf = dd.from_pandas(df, 2) msg = "'index' must be the name of an existing column" with pytest.raises(ValueError) as err: dd.pivot_table(ddf, index=['A'], columns='C', values='B') assert msg in str(err.value) msg = "'columns' must be the name of an existing column" with pytest.raises(ValueError) as err: dd.pivot_table(ddf, index='A', columns=['C'], values='B') assert msg in str(err.value) msg = "'values' must be the name of an existing column" with pytest.raises(ValueError) as err: dd.pivot_table(ddf, index='A', columns='C', values=['B']) assert msg in str(err.value) msg = "aggfunc must be either 'mean', 'sum' or 'count'" with pytest.raises(ValueError) as err: dd.pivot_table(ddf, index='A', columns='C', values='B', aggfunc=['sum']) assert msg in str(err.value) with pytest.raises(ValueError) as err: dd.pivot_table(ddf, index='A', columns='C', values='B', aggfunc='xx') assert msg in str(err.value) # unknown categories ddf._meta = make_meta({'A': object, 'B': float, 'C': 'category'}) msg = "'columns' must have known categories" with pytest.raises(ValueError) as err: dd.pivot_table(ddf, index='A', columns='C', values=['B']) assert msg in str(err.value) df = pd.DataFrame({'A': np.random.choice(list('abc'), size=10), 'B': np.random.randn(10), 'C': np.random.choice(list('abc'), size=10)}) ddf = dd.from_pandas(df, 2) msg = "'columns' must be category dtype" with pytest.raises(ValueError) as err: dd.pivot_table(ddf, index='A', columns='C', values='B') assert msg in str(err.value) dask-0.16.0/dask/dataframe/tests/test_rolling.py000066400000000000000000000244401320364734500216220ustar00rootroot00000000000000import pandas as pd import pytest import numpy as np import dask.dataframe as dd from dask.dataframe.utils import assert_eq N = 40 df = pd.DataFrame({'a': np.random.randn(N).cumsum(), 'b': np.random.randint(100, size=(N,)), 'c': np.random.randint(100, size=(N,)), 'd': np.random.randint(100, size=(N,)), 'e': np.random.randint(100, size=(N,))}) ddf = dd.from_pandas(df, 3) idx = (pd.date_range('2016-01-01', freq='3s', periods=100) | pd.date_range('2016-01-01', freq='5s', periods=100))[:N] ts = pd.DataFrame({'a': np.random.randn(N).cumsum(), 'b': np.random.randint(100, size=(N,)), 'c': np.random.randint(100, size=(N,)), 'd': np.random.randint(100, size=(N,)), 'e': np.random.randint(100, size=(N,))}, index=idx) dts = dd.from_pandas(ts, 3) def shifted_sum(df, before, after, c=0): a = df.shift(before) b = df.shift(-after) return df + a + b + c def ts_shifted_sum(df, before, after, c=0): a = df.shift(before.seconds) b = df.shift(-after.seconds) return df + a + b + c @pytest.mark.parametrize('npartitions', [1, 4]) def test_map_overlap(npartitions): ddf = dd.from_pandas(df, npartitions) for before, after in [(0, 3), (3, 0), (3, 3), (0, 0)]: # DataFrame res = ddf.map_overlap(shifted_sum, before, after, before, after, c=2) sol = shifted_sum(df, before, after, c=2) assert_eq(res, sol) # Series res = ddf.b.map_overlap(shifted_sum, before, after, before, after, c=2) sol = shifted_sum(df.b, before, after, c=2) assert_eq(res, sol) def test_map_partitions_names(): npartitions = 3 ddf = dd.from_pandas(df, npartitions) res = ddf.map_overlap(shifted_sum, 0, 3, 0, 3, c=2) res2 = ddf.map_overlap(shifted_sum, 0, 3, 0, 3, c=2) assert set(res.dask) == set(res2.dask) res3 = ddf.map_overlap(shifted_sum, 0, 3, 0, 3, c=3) assert res3._name != res._name # Difference is just the final map diff = set(res3.dask).difference(res.dask) assert len(diff) == npartitions res4 = ddf.map_overlap(shifted_sum, 3, 0, 0, 3, c=2) assert res4._name != res._name def test_map_partitions_errors(): # Non-integer with pytest.raises(ValueError): ddf.map_overlap(shifted_sum, 0.5, 3, 0, 2, c=2) # Negative with pytest.raises(ValueError): ddf.map_overlap(shifted_sum, 0, -5, 0, 2, c=2) # Partition size < window size with pytest.raises(NotImplementedError): ddf.map_overlap(shifted_sum, 0, 100, 0, 100, c=2).compute() # Offset with non-datetime with pytest.raises(TypeError): ddf.map_overlap(shifted_sum, pd.Timedelta('1s'), pd.Timedelta('1s'), 0, 2, c=2) def mad(x): return np.fabs(x - x.mean()).mean() def rolling_functions_tests(p, d): # Old-fashioned rolling API with pytest.warns(FutureWarning): assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) # see note around test_rolling_dataframe for logic concerning precision assert_eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3), check_less_precise=True) assert_eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3), check_less_precise=True) assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) # Test with edge-case window sizes assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs assert_eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3)) pytest.importorskip("scipy") assert_eq(pd.rolling_window(p, 3, win_type='boxcar'), dd.rolling_window(d, 3, win_type='boxcar')) def test_rolling_functions_series(): ts = pd.Series(np.random.randn(25).cumsum()) dts = dd.from_pandas(ts, 3) rolling_functions_tests(ts, dts) def test_rolling_functions_dataframe(): df = pd.DataFrame({'a': np.random.randn(25).cumsum(), 'b': np.random.randint(100, size=(25,))}) ddf = dd.from_pandas(df, 3) rolling_functions_tests(df, ddf) rolling_method_args_check_less_precise = [ ('count', (), False), ('sum', (), False), ('mean', (), False), ('median', (), False), ('min', (), False), ('max', (), False), ('std', (), False), ('var', (), False), ('skew', (), True), # here and elsewhere, results for kurt and skew are ('kurt', (), True), # checked with check_less_precise=True so that we are # only looking at 3ish decimal places for the equality check # rather than 5ish. I have encountered a case where a test # seems to have failed due to numerical problems with kurt. # So far, I am only weakening the check for kurt and skew, # as they involve third degree powers and higher ('quantile', (.38,), False), ('apply', (mad,), False), ] @pytest.mark.parametrize('method,args,check_less_precise', rolling_method_args_check_less_precise) @pytest.mark.parametrize('window', [1, 2, 4, 5]) @pytest.mark.parametrize('center', [True, False]) def test_rolling_methods(method, args, window, center, check_less_precise): # DataFrame prolling = df.rolling(window, center=center) drolling = ddf.rolling(window, center=center) assert_eq(getattr(prolling, method)(*args), getattr(drolling, method)(*args), check_less_precise=check_less_precise) # Series prolling = df.a.rolling(window, center=center) drolling = ddf.a.rolling(window, center=center) assert_eq(getattr(prolling, method)(*args), getattr(drolling, method)(*args), check_less_precise=check_less_precise) def test_rolling_raises(): df = pd.DataFrame({'a': np.random.randn(25).cumsum(), 'b': np.random.randint(100, size=(25,))}) ddf = dd.from_pandas(df, 3) pytest.raises(ValueError, lambda: ddf.rolling(1.5)) pytest.raises(ValueError, lambda: ddf.rolling(-1)) pytest.raises(ValueError, lambda: ddf.rolling(3, min_periods=1.2)) pytest.raises(ValueError, lambda: ddf.rolling(3, min_periods=-2)) pytest.raises(ValueError, lambda: ddf.rolling(3, axis=10)) pytest.raises(ValueError, lambda: ddf.rolling(3, axis='coulombs')) pytest.raises(NotImplementedError, lambda: ddf.rolling(100).mean().compute()) def test_rolling_names(): df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) a = dd.from_pandas(df, npartitions=2) assert sorted(a.rolling(2).sum().dask) == sorted(a.rolling(2).sum().dask) def test_rolling_axis(): df = pd.DataFrame(np.random.randn(20, 16)) ddf = dd.from_pandas(df, npartitions=3) assert_eq(df.rolling(3, axis=0).mean(), ddf.rolling(3, axis=0).mean()) assert_eq(df.rolling(3, axis=1).mean(), ddf.rolling(3, axis=1).mean()) assert_eq(df.rolling(3, min_periods=1, axis=1).mean(), ddf.rolling(3, min_periods=1, axis=1).mean()) assert_eq(df.rolling(3, axis='columns').mean(), ddf.rolling(3, axis='columns').mean()) assert_eq(df.rolling(3, axis='rows').mean(), ddf.rolling(3, axis='rows').mean()) s = df[3] ds = ddf[3] assert_eq(s.rolling(5, axis=0).std(), ds.rolling(5, axis=0).std()) def test_rolling_partition_size(): df = pd.DataFrame(np.random.randn(50, 2)) ddf = dd.from_pandas(df, npartitions=5) for obj, dobj in [(df, ddf), (df[0], ddf[0])]: assert_eq(obj.rolling(10).mean(), dobj.rolling(10).mean()) assert_eq(obj.rolling(11).mean(), dobj.rolling(11).mean()) with pytest.raises(NotImplementedError): dobj.rolling(12).mean().compute() def test_rolling_repr(): ddf = dd.from_pandas(pd.DataFrame([10] * 30), npartitions=3) assert repr(ddf.rolling(4)) == 'Rolling [window=4,center=False,axis=0]' def test_time_rolling_repr(): assert repr(dts.rolling('4s')) == ( 'Rolling [window=4000000000,center=False,win_type=freq,axis=0]') def test_time_rolling_constructor(): result = dts.rolling('4s') assert result.window == '4s' assert result.min_periods is None assert result.win_type is None assert result._win_type == 'freq' assert result._window == 4000000000 # ns assert result._min_periods == 1 @pytest.mark.parametrize('method,args,check_less_precise', rolling_method_args_check_less_precise) @pytest.mark.parametrize('window', ['1S', '2S', '3S', pd.offsets.Second(5)]) def test_time_rolling_methods(method, args, window, check_less_precise): # DataFrame prolling = ts.rolling(window) drolling = dts.rolling(window) assert_eq(getattr(prolling, method)(*args), getattr(drolling, method)(*args), check_less_precise=check_less_precise) # Series prolling = ts.a.rolling(window) drolling = dts.a.rolling(window) assert_eq(getattr(prolling, method)(*args), getattr(drolling, method)(*args), check_less_precise=check_less_precise) @pytest.mark.parametrize('window', [pd.Timedelta('31s'), pd.Timedelta('1M')]) def test_time_rolling_window_too_large(window): with pytest.raises(ValueError): dts.map_overlap(ts_shifted_sum, window, window, window, window, c=2) @pytest.mark.parametrize('before, after', [ ('6s', '6s'), ('2s', '2s'), ('6s', '2s'), ]) def test_time_rolling(before, after): window = before before = pd.Timedelta(before) after = pd.Timedelta(after) result = dts.map_overlap(lambda x: x.rolling(window).count(), before, after) expected = dts.compute().rolling(window).count() assert_eq(result, expected) dask-0.16.0/dask/dataframe/tests/test_shuffle.py000066400000000000000000000607171320364734500216170ustar00rootroot00000000000000import os import pandas as pd import pytest import pickle import numpy as np import string from copy import copy import dask import dask.dataframe as dd from dask import delayed from dask.base import compute_as_if_collection from dask.threaded import get as threaded_get from dask.multiprocessing import get as mp_get from dask.dataframe.shuffle import (shuffle, partitioning_index, rearrange_by_column, rearrange_by_divisions, maybe_buffered_partd, remove_nans) from dask.dataframe.utils import assert_eq, make_meta dsk = {('x', 0): pd.DataFrame({'a': [1, 2, 3], 'b': [1, 4, 7]}, index=[0, 1, 3]), ('x', 1): pd.DataFrame({'a': [4, 5, 6], 'b': [2, 5, 8]}, index=[5, 6, 8]), ('x', 2): pd.DataFrame({'a': [7, 8, 9], 'b': [3, 6, 9]}, index=[9, 9, 9])} meta = make_meta({'a': 'i8', 'b': 'i8'}, index=pd.Index([], 'i8')) d = dd.DataFrame(dsk, 'x', meta, [0, 4, 9, 9]) full = d.compute() shuffle_func = shuffle # conflicts with keyword argument @pytest.mark.parametrize('shuffle', ['disk', 'tasks']) def test_shuffle(shuffle): s = shuffle_func(d, d.b, shuffle=shuffle) assert isinstance(s, dd.DataFrame) assert s.npartitions == d.npartitions x = dask.get(s.dask, (s._name, 0)) y = dask.get(s.dask, (s._name, 1)) assert not (set(x.b) & set(y.b)) # disjoint assert set(s.dask).issuperset(d.dask) assert shuffle_func(d, d.b)._name == shuffle_func(d, d.b)._name def test_default_partitions(): assert shuffle(d, d.b).npartitions == d.npartitions def test_shuffle_npartitions_task(): df = pd.DataFrame({'x': np.random.random(100)}) ddf = dd.from_pandas(df, npartitions=10) s = shuffle(ddf, ddf.x, shuffle='tasks', npartitions=17, max_branch=4) sc = s.compute(get=dask.get) assert s.npartitions == 17 assert set(s.dask).issuperset(set(ddf.dask)) assert len(sc) == len(df) assert list(s.columns) == list(df.columns) assert (set(map(tuple, sc.values.tolist())) == set(map(tuple, df.values.tolist()))) @pytest.mark.parametrize('method', ['disk', 'tasks']) def test_index_with_non_series(method): from dask.dataframe.tests.test_multi import list_eq list_eq(shuffle(d, d.b, shuffle=method), shuffle(d, 'b', shuffle=method)) @pytest.mark.parametrize('method', ['disk', 'tasks']) def test_index_with_dataframe(method): res1 = shuffle(d, d[['b']], shuffle=method).compute() res2 = shuffle(d, ['b'], shuffle=method).compute() res3 = shuffle(d, 'b', shuffle=method).compute() assert sorted(res1.values.tolist()) == sorted(res2.values.tolist()) assert sorted(res1.values.tolist()) == sorted(res3.values.tolist()) @pytest.mark.parametrize('method', ['disk', 'tasks']) def test_shuffle_from_one_partition_to_one_other(method): df = pd.DataFrame({'x': [1, 2, 3]}) a = dd.from_pandas(df, 1) for i in [1, 2]: b = shuffle(a, 'x', npartitions=i, shuffle=method) assert len(a.compute(get=dask.get)) == len(b.compute(get=dask.get)) @pytest.mark.parametrize('method', ['disk', 'tasks']) def test_shuffle_empty_partitions(method): df = pd.DataFrame({'x': [1, 2, 3] * 10}) ddf = dd.from_pandas(df, npartitions=3) s = shuffle(ddf, ddf.x, npartitions=6, shuffle=method) parts = compute_as_if_collection(dd.DataFrame, s.dask, s.__dask_keys__()) for p in parts: assert s.columns == p.columns df2 = pd.DataFrame({'i32': np.array([1, 2, 3] * 3, dtype='int32'), 'f32': np.array([None, 2.5, 3.5] * 3, dtype='float32'), 'cat': pd.Series(['a', 'b', 'c'] * 3).astype('category'), 'obj': pd.Series(['d', 'e', 'f'] * 3), 'bool': np.array([True, False, True] * 3), 'dt': pd.Series(pd.date_range('20130101', periods=9)), 'dt_tz': pd.Series(pd.date_range('20130101', periods=9, tz='US/Eastern')), 'td': pd.Series(pd.timedelta_range('2000', periods=9))}) def test_partitioning_index(): res = partitioning_index(df2.i32, 3) assert ((res < 3) & (res >= 0)).all() assert len(np.unique(res)) > 1 assert (partitioning_index(df2.i32, 3) == partitioning_index(df2.i32, 3)).all() res = partitioning_index(df2[['i32']], 3) assert ((res < 3) & (res >= 0)).all() assert len(np.unique(res)) > 1 res = partitioning_index(df2[['cat', 'bool', 'f32']], 2) assert ((0 <= res) & (res < 2)).all() res = partitioning_index(df2.index, 4) assert ((res < 4) & (res >= 0)).all() assert len(np.unique(res)) > 1 def test_partitioning_index_categorical_on_values(): df = pd.DataFrame({'a': list(string.ascii_letters), 'b': [1, 2, 3, 4] * 13}) df.a = df.a.astype('category') df2 = df.copy() df2.a = df2.a.cat.set_categories(list(reversed(df2.a.cat.categories))) res = partitioning_index(df.a, 5) res2 = partitioning_index(df2.a, 5) assert (res == res2).all() res = partitioning_index(df, 5) res2 = partitioning_index(df2, 5) assert (res == res2).all() @pytest.mark.parametrize('npartitions', [1, 4, 7, pytest.mark.slow(23)]) def test_set_index_tasks(npartitions): df = pd.DataFrame({'x': np.random.random(100), 'y': np.random.random(100) // 0.2}, index=np.random.random(100)) ddf = dd.from_pandas(df, npartitions=npartitions) assert_eq(df.set_index('x'), ddf.set_index('x', shuffle='tasks')) assert_eq(df.set_index('y'), ddf.set_index('y', shuffle='tasks')) assert_eq(df.set_index(df.x), ddf.set_index(ddf.x, shuffle='tasks')) assert_eq(df.set_index(df.x + df.y), ddf.set_index(ddf.x + ddf.y, shuffle='tasks')) assert_eq(df.set_index(df.x + 1), ddf.set_index(ddf.x + 1, shuffle='tasks')) assert_eq(df.set_index(df.index), ddf.set_index(ddf.index, shuffle='tasks')) @pytest.mark.parametrize('shuffle', ['disk', 'tasks']) def test_set_index_self_index(shuffle): df = pd.DataFrame({'x': np.random.random(100), 'y': np.random.random(100) // 0.2}, index=np.random.random(100)) a = dd.from_pandas(df, npartitions=4) b = a.set_index(a.index, shuffle=shuffle) assert a is b assert_eq(b, df.set_index(df.index)) @pytest.mark.parametrize('shuffle', ['tasks']) def test_set_index_names(shuffle): df = pd.DataFrame({'x': np.random.random(100), 'y': np.random.random(100) // 0.2}, index=np.random.random(100)) ddf = dd.from_pandas(df, npartitions=4) assert (set(ddf.set_index('x', shuffle=shuffle).dask) == set(ddf.set_index('x', shuffle=shuffle).dask)) assert (set(ddf.set_index('x', shuffle=shuffle).dask) != set(ddf.set_index('y', shuffle=shuffle).dask)) assert (set(ddf.set_index('x', max_branch=4, shuffle=shuffle).dask) != set(ddf.set_index('x', max_branch=3, shuffle=shuffle).dask)) assert (set(ddf.set_index('x', drop=True, shuffle=shuffle).dask) != set(ddf.set_index('x', drop=False, shuffle=shuffle).dask)) @pytest.mark.parametrize('shuffle', ['disk', 'tasks']) def test_set_index_tasks_2(shuffle): df = dd.demo.make_timeseries( '2000', '2004', {'value': float, 'name': str, 'id': int}, freq='2H', partition_freq='1M', seed=1) df2 = df.set_index('name', shuffle=shuffle) df2.value.sum().compute(get=dask.get) @pytest.mark.parametrize('shuffle', ['disk', 'tasks']) def test_set_index_tasks_3(shuffle): df = pd.DataFrame(np.random.random((10, 2)), columns=['x', 'y']) ddf = dd.from_pandas(df, npartitions=5) ddf2 = ddf.set_index('x', shuffle=shuffle, max_branch=2, npartitions=ddf.npartitions) df2 = df.set_index('x') assert_eq(df2, ddf2) assert ddf2.npartitions == ddf.npartitions @pytest.mark.parametrize('shuffle', ['tasks', 'disk']) def test_shuffle_sort(shuffle): df = pd.DataFrame({'x': [1, 2, 3, 2, 1], 'y': [9, 8, 7, 1, 5]}) ddf = dd.from_pandas(df, npartitions=3) df2 = df.set_index('x').sort_index() ddf2 = ddf.set_index('x', shuffle=shuffle) assert_eq(ddf2.loc[2:3], df2.loc[2:3]) @pytest.mark.parametrize('shuffle', ['tasks', 'disk']) @pytest.mark.parametrize('get', [threaded_get, mp_get]) def test_rearrange(shuffle, get): df = pd.DataFrame({'x': np.random.random(10)}) ddf = dd.from_pandas(df, npartitions=4) ddf2 = ddf.assign(y=ddf.x % 4) result = rearrange_by_column(ddf2, 'y', max_branch=32, shuffle=shuffle) assert result.npartitions == ddf.npartitions assert set(ddf.dask).issubset(result.dask) # Every value in exactly one partition a = result.compute(get=get) parts = get(result.dask, result.__dask_keys__()) for i in a.y.drop_duplicates(): assert sum(i in part.y for part in parts) == 1 def test_rearrange_by_column_with_narrow_divisions(): from dask.dataframe.tests.test_multi import list_eq A = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': [1, 1, 2, 2, 3, 4]}) a = dd.repartition(A, [0, 4, 5]) df = rearrange_by_divisions(a, 'x', (0, 2, 5)) list_eq(df, a) def test_maybe_buffered_partd(): import partd f = maybe_buffered_partd() p1 = f() assert isinstance(p1.partd, partd.Buffer) f2 = pickle.loads(pickle.dumps(f)) assert not f2.buffer p2 = f2() assert isinstance(p2.partd, partd.File) def test_set_index_with_explicit_divisions(): df = pd.DataFrame({'x': [4, 1, 2, 5]}, index=[10, 20, 30, 40]) ddf = dd.from_pandas(df, npartitions=2) def throw(*args, **kwargs): raise Exception() with dask.set_options(get=throw): ddf2 = ddf.set_index('x', divisions=[1, 3, 5]) assert ddf2.divisions == (1, 3, 5) df2 = df.set_index('x') assert_eq(ddf2, df2) # Divisions must be sorted with pytest.raises(ValueError): ddf.set_index('x', divisions=[3, 1, 5]) def test_set_index_divisions_2(): df = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': list('abdabd')}) ddf = dd.from_pandas(df, 2) result = ddf.set_index('y', divisions=['a', 'c', 'd']) assert result.divisions == ('a', 'c', 'd') assert list(result.compute(get=dask.get).index[-2:]) == ['d', 'd'] def test_set_index_divisions_compute(): d2 = d.set_index('b', divisions=[0, 2, 9], compute=False) d3 = d.set_index('b', divisions=[0, 2, 9], compute=True) assert_eq(d2, d3) assert_eq(d2, full.set_index('b')) assert_eq(d3, full.set_index('b')) assert len(d2.dask) > len(d3.dask) d4 = d.set_index(d.b, divisions=[0, 2, 9], compute=False) d5 = d.set_index(d.b, divisions=[0, 2, 9], compute=True) exp = full.copy() exp.index = exp.b assert_eq(d4, d5) assert_eq(d4, exp) assert_eq(d5, exp) assert len(d4.dask) > len(d5.dask) def test_set_index_divisions_sorted(): p1 = pd.DataFrame({'x': [10, 11, 12], 'y': ['a', 'a', 'a']}) p2 = pd.DataFrame({'x': [13, 14, 15], 'y': ['b', 'b', 'c']}) p3 = pd.DataFrame({'x': [16, 17, 18], 'y': ['d', 'e', 'e']}) ddf = dd.DataFrame({('x', 0): p1, ('x', 1): p2, ('x', 2): p3}, 'x', p1, [None, None, None, None]) df = ddf.compute() def throw(*args, **kwargs): raise Exception("Shouldn't have computed") with dask.set_options(get=throw): res = ddf.set_index('x', divisions=[10, 13, 16, 18], sorted=True) assert_eq(res, df.set_index('x')) with dask.set_options(get=throw): res = ddf.set_index('y', divisions=['a', 'b', 'd', 'e'], sorted=True) assert_eq(res, df.set_index('y')) # with sorted=True, divisions must be same length as df.divisions with pytest.raises(ValueError): ddf.set_index('y', divisions=['a', 'b', 'c', 'd', 'e'], sorted=True) # Divisions must be sorted with pytest.raises(ValueError): ddf.set_index('y', divisions=['a', 'b', 'd', 'c'], sorted=True) @pytest.mark.parametrize('shuffle', ['disk', 'tasks']) def test_set_index_reduces_partitions_small(shuffle): df = pd.DataFrame({'x': np.random.random(100)}) ddf = dd.from_pandas(df, npartitions=50) ddf2 = ddf.set_index('x', shuffle=shuffle, npartitions='auto') assert ddf2.npartitions < 10 def make_part(n): return pd.DataFrame({'x': np.random.random(n), 'y': np.random.random(n)}) @pytest.mark.parametrize('shuffle', ['disk', 'tasks']) def test_set_index_reduces_partitions_large(shuffle): nbytes = 1e6 nparts = 50 n = int(nbytes / (nparts * 8)) ddf = dd.DataFrame({('x', i): (make_part, n) for i in range(nparts)}, 'x', make_part(1), [None] * (nparts + 1)) ddf2 = ddf.set_index('x', shuffle=shuffle, npartitions='auto', partition_size=nbytes) assert 1 < ddf2.npartitions < 20 @pytest.mark.parametrize('shuffle', ['disk', 'tasks']) def test_set_index_doesnt_increase_partitions(shuffle): nparts = 2 nbytes = 1e6 n = int(nbytes / (nparts * 8)) ddf = dd.DataFrame({('x', i): (make_part, n) for i in range(nparts)}, 'x', make_part(1), [None] * (nparts + 1)) ddf2 = ddf.set_index('x', shuffle=shuffle, npartitions='auto', partition_size=nbytes) assert ddf2.npartitions <= ddf.npartitions @pytest.mark.parametrize('shuffle', ['disk', 'tasks']) def test_set_index_detects_sorted_data(shuffle): df = pd.DataFrame({'x': range(100), 'y': range(100)}) ddf = dd.from_pandas(df, npartitions=10, name='x', sort=False) ddf2 = ddf.set_index('x', shuffle=shuffle) assert len(ddf2.dask) < ddf.npartitions * 4 def test_set_index_sorts(): # https://github.com/dask/dask/issues/2288 vals = np.array([1348550149000000000, 1348550149000000000, 1348558142000000000, 1348558142000000000, 1348585928000000000, 1348585928000000000, 1348600739000000000, 1348601706000000000, 1348600739000000000, 1348601706000000000, 1348614789000000000, 1348614789000000000, 1348621037000000000, 1348621038000000000, 1348621040000000000, 1348621037000000000, 1348621038000000000, 1348621040000000000, 1348637628000000000, 1348638159000000000, 1348638160000000000, 1348638159000000000, 1348638160000000000, 1348637628000000000, 1348646354000000000, 1348646354000000000, 1348659107000000000, 1348657111000000000, 1348659107000000000, 1348657111000000000, 1348672876000000000, 1348672876000000000, 1348682787000000000, 1348681985000000000, 1348682787000000000, 1348681985000000000, 1348728167000000000, 1348728167000000000, 1348730745000000000, 1348730745000000000, 1348750198000000000, 1348750198000000000, 1348750198000000000, 1348753539000000000, 1348753539000000000, 1348753539000000000, 1348754449000000000, 1348754449000000000, 1348761333000000000, 1348761554000000000, 1348761610000000000, 1348761333000000000, 1348761554000000000, 1348761610000000000, 1348782624000000000, 1348782624000000000, 1348782624000000000, 1348782624000000000]) vals = pd.to_datetime(vals, unit='ns') breaks = [10, 36, 58] dfs = [] for i in range(len(breaks)): lo = sum(breaks[:i]) hi = sum(breaks[i:i + 1]) dfs.append(pd.DataFrame({"timestamp": vals[lo:hi]}, index=range(lo, hi))) ddf = dd.concat(dfs).clear_divisions() assert ddf.set_index("timestamp").index.compute().is_monotonic is True def test_set_index(): dsk = {('x', 0): pd.DataFrame({'a': [1, 2, 3], 'b': [4, 2, 6]}, index=[0, 1, 3]), ('x', 1): pd.DataFrame({'a': [4, 5, 6], 'b': [3, 5, 8]}, index=[5, 6, 8]), ('x', 2): pd.DataFrame({'a': [7, 8, 9], 'b': [9, 1, 8]}, index=[9, 9, 9])} d = dd.DataFrame(dsk, 'x', meta, [0, 4, 9, 9]) full = d.compute() d2 = d.set_index('b', npartitions=3) assert d2.npartitions == 3 assert d2.index.name == 'b' assert_eq(d2, full.set_index('b')) d3 = d.set_index(d.b, npartitions=3) assert d3.npartitions == 3 assert d3.index.name == 'b' assert_eq(d3, full.set_index(full.b)) d4 = d.set_index('b') assert d4.index.name == 'b' assert_eq(d4, full.set_index('b')) def test_set_index_interpolate(): df = pd.DataFrame({'x': [4, 1, 1, 3, 3], 'y': [1., 1, 1, 1, 2]}) d = dd.from_pandas(df, 2) d1 = d.set_index('x', npartitions=3) assert d1.npartitions == 3 assert set(d1.divisions) == set([1, 2, 3, 4]) d2 = d.set_index('y', npartitions=3) assert d2.divisions[0] == 1. assert 1. < d2.divisions[1] < d2.divisions[2] < 2. assert d2.divisions[3] == 2. def test_set_index_interpolate_int(): L = sorted(list(range(0, 200, 10)) * 2) df = pd.DataFrame({'x': 2 * L}) d = dd.from_pandas(df, 2) d1 = d.set_index('x', npartitions=10) assert all(np.issubdtype(type(x), np.integer) for x in d1.divisions) def test_set_index_timezone(): s_naive = pd.Series(pd.date_range('20130101', periods=3)) s_aware = pd.Series(pd.date_range('20130101', periods=3, tz='US/Eastern')) df = pd.DataFrame({'tz': s_aware, 'notz': s_naive}) d = dd.from_pandas(df, 2) d1 = d.set_index('notz', npartitions=2) s1 = pd.DatetimeIndex(s_naive.values, dtype=s_naive.dtype) assert d1.divisions[0] == s_naive[0] == s1[0] assert d1.divisions[-1] == s_naive[2] == s1[2] # We currently lose "freq". Converting data with pandas-defined dtypes # to numpy or pure Python can be lossy like this. d2 = d.set_index('tz', npartitions=2) s2 = pd.DatetimeIndex(s_aware, dtype=s_aware.dtype) assert d2.divisions[0] == s2[0] assert d2.divisions[-1] == s2[2] assert d2.divisions[0].tz == s2[0].tz assert d2.divisions[0].tz is not None s2badtype = pd.DatetimeIndex(s_aware.values, dtype=s_naive.dtype) with pytest.raises(TypeError): d2.divisions[0] == s2badtype[0] @pytest.mark.parametrize('drop', [True, False]) def test_set_index_drop(drop): pdf = pd.DataFrame({'A': list('ABAABBABAA'), 'B': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'C': [1, 2, 3, 2, 1, 3, 2, 4, 2, 3]}) ddf = dd.from_pandas(pdf, 3) assert_eq(ddf.set_index('A', drop=drop), pdf.set_index('A', drop=drop)) assert_eq(ddf.set_index('B', drop=drop), pdf.set_index('B', drop=drop)) assert_eq(ddf.set_index('C', drop=drop), pdf.set_index('C', drop=drop)) assert_eq(ddf.set_index(ddf.A, drop=drop), pdf.set_index(pdf.A, drop=drop)) assert_eq(ddf.set_index(ddf.B, drop=drop), pdf.set_index(pdf.B, drop=drop)) assert_eq(ddf.set_index(ddf.C, drop=drop), pdf.set_index(pdf.C, drop=drop)) # numeric columns pdf = pd.DataFrame({0: list('ABAABBABAA'), 1: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 2: [1, 2, 3, 2, 1, 3, 2, 4, 2, 3]}) ddf = dd.from_pandas(pdf, 3) assert_eq(ddf.set_index(0, drop=drop), pdf.set_index(0, drop=drop)) assert_eq(ddf.set_index(2, drop=drop), pdf.set_index(2, drop=drop)) def test_set_index_raises_error_on_bad_input(): df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7], 'b': [7, 6, 5, 4, 3, 2, 1]}) ddf = dd.from_pandas(df, 2) msg = r"Dask dataframe does not yet support multi-indexes" with pytest.raises(NotImplementedError) as err: ddf.set_index(['a', 'b']) assert msg in str(err.value) def test_set_index_sorted_true(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [10, 20, 30, 40], 'z': [4, 3, 2, 1]}) a = dd.from_pandas(df, 2, sort=False) assert not a.known_divisions b = a.set_index('x', sorted=True) assert b.known_divisions assert set(a.dask).issubset(set(b.dask)) for drop in [True, False]: assert_eq(a.set_index('x', drop=drop), df.set_index('x', drop=drop)) assert_eq(a.set_index(a.x, sorted=True, drop=drop), df.set_index(df.x, drop=drop)) assert_eq(a.set_index(a.x + 1, sorted=True, drop=drop), df.set_index(df.x + 1, drop=drop)) with pytest.raises(ValueError): a.set_index(a.z, sorted=True) def test_set_index_sorted_single_partition(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [1, 0, 1, 0]}) ddf = dd.from_pandas(df, npartitions=1) assert_eq(ddf.set_index('x', sorted=True), df.set_index('x')) def test_set_index_sorted_min_max_same(): a = pd.DataFrame({'x': [1, 2, 3], 'y': [0, 0, 0]}) b = pd.DataFrame({'x': [1, 2, 3], 'y': [1, 1, 1]}) aa = delayed(a) bb = delayed(b) df = dd.from_delayed([aa, bb], meta=a) assert not df.known_divisions df2 = df.set_index('y', sorted=True) assert df2.divisions == (0, 1, 1) def test_set_index_empty_partition(): test_vals = [1, 2, 3] converters = [ int, float, str, lambda x: pd.to_datetime(x, unit='ns'), ] for conv in converters: df = pd.DataFrame([{'x': conv(i), 'y': i} for i in test_vals], columns=['x', 'y']) ddf = dd.concat([ dd.from_pandas(df, npartitions=1), dd.from_pandas(df[df.y > df.y.max()], npartitions=1), ]) assert any(ddf.get_partition(p).compute().empty for p in range(ddf.npartitions)) assert assert_eq(ddf.set_index('x'), df.set_index('x')) def test_set_index_on_empty(): test_vals = [1, 2, 3, 4] converters = [ int, float, str, lambda x: pd.to_datetime(x, unit='ns'), ] for converter in converters: df = pd.DataFrame([{'x': converter(x), 'y': x} for x in test_vals]) ddf = dd.from_pandas(df, npartitions=4) assert ddf.npartitions > 1 ddf = ddf[ddf.y > df.y.max()].set_index('x') expected_df = df[df.y > df.y.max()].set_index('x') assert assert_eq(ddf, expected_df) assert ddf.npartitions == 1 def test_compute_divisions(): from dask.dataframe.shuffle import compute_divisions df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [10, 20, 30, 40], 'z': [4, 3, 2, 1]}, index=[1, 3, 10, 20]) a = dd.from_pandas(df, 2, sort=False) assert not a.known_divisions divisions = compute_divisions(a) b = copy(a) b.divisions = divisions assert_eq(a, b, check_divisions=False) assert b.known_divisions def test_temporary_directory(tmpdir): df = pd.DataFrame({'x': np.random.random(100), 'y': np.random.random(100), 'z': np.random.random(100)}) ddf = dd.from_pandas(df, npartitions=10, name='x', sort=False) with dask.set_options(temporary_directory=str(tmpdir), get=dask.multiprocessing.get): ddf2 = ddf.set_index('x', shuffle='disk') ddf2.compute() assert any(fn.endswith('.partd') for fn in os.listdir(str(tmpdir))) def test_empty_partitions(): # See https://github.com/dask/dask/issues/2408 df = pd.DataFrame({'a': list(range(10))}) df['b'] = df['a'] % 3 df['c'] = df['b'].astype(str) ddf = dd.from_pandas(df, npartitions=3) ddf = ddf.set_index('b') ddf = ddf.repartition(npartitions=3) ddf.get_partition(0).compute() assert_eq(ddf, df.set_index('b')) ddf = ddf.set_index('c') assert_eq(ddf, df.set_index('b').set_index('c')) def test_remove_nans(): tests = [ ((1, 1, 2), (1, 1, 2)), ((None, 1, 2), (1, 1, 2)), ((1, None, 2), (1, 2, 2)), ((1, 2, None), (1, 2, 2)), ((1, 2, None, None), (1, 2, 2, 2)), ((None, None, 1, 2), (1, 1, 1, 2)), ((1, None, None, 2), (1, 2, 2, 2)), ((None, 1, None, 2, None, 3, None), (1, 1, 2, 2, 3, 3, 3)), ] converters = [ (int, np.nan), (float, np.nan), (str, np.nan), (lambda x: pd.to_datetime(x, unit='ns'), np.datetime64('NaT')), ] for conv, none_val in converters: for inputs, expected in tests: params = [none_val if x is None else conv(x) for x in inputs] expected = [conv(x) for x in expected] assert remove_nans(params) == expected dask-0.16.0/dask/dataframe/tests/test_ufunc.py000066400000000000000000000243031320364734500212720ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import pytest pd = pytest.importorskip('pandas') import pandas.util.testing as tm import numpy as np import dask.array as da import dask.dataframe as dd from dask.dataframe.utils import assert_eq _BASE_UFUNCS = ['conj', 'exp', 'log', 'log2', 'log10', 'log1p', 'expm1', 'sqrt', 'square', 'sin', 'cos', 'tan', 'arcsin','arccos', 'arctan', 'sinh', 'cosh', 'tanh', 'arcsinh', 'arccosh', 'arctanh', 'deg2rad', 'rad2deg', 'isfinite', 'isinf', 'isnan', 'signbit', 'degrees', 'radians', 'rint', 'fabs', 'sign', 'absolute', 'floor', 'ceil', 'trunc', 'logical_not'] @pytest.mark.parametrize('ufunc', _BASE_UFUNCS) def test_ufunc(ufunc): dafunc = getattr(da, ufunc) npfunc = getattr(np, ufunc) s = pd.Series(np.random.randint(1, 100, size=20)) ds = dd.from_pandas(s, 3) # applying Dask ufunc doesn't trigger computation with pytest.warns(None): # Some cause warnings (arcsine) assert isinstance(dafunc(ds), dd.Series) assert_eq(dafunc(ds), npfunc(s)) # applying NumPy ufunc triggers computation assert isinstance(npfunc(ds), pd.Series) assert_eq(npfunc(ds), npfunc(s)) # applying Dask ufunc to normal Series triggers computation assert isinstance(dafunc(s), pd.Series) assert_eq(dafunc(s), npfunc(s)) s = pd.Series(np.abs(np.random.randn(100))) ds = dd.from_pandas(s, 3) with pytest.warns(None): # applying Dask ufunc doesn't trigger computation assert isinstance(dafunc(ds), dd.Series) assert_eq(dafunc(ds), npfunc(s)) # applying NumPy ufunc triggers computation assert isinstance(npfunc(ds), pd.Series) assert_eq(npfunc(ds), npfunc(s)) # applying Dask ufunc to normal Series triggers computation assert isinstance(dafunc(s), pd.Series) assert_eq(dafunc(s), npfunc(s)) # DataFrame df = pd.DataFrame({'A': np.random.randint(1, 100, size=20), 'B': np.random.randint(1, 100, size=20), 'C': np.abs(np.random.randn(20))}) ddf = dd.from_pandas(df, 3) with pytest.warns(None): # applying Dask ufunc doesn't trigger computation assert isinstance(dafunc(ddf), dd.DataFrame) assert_eq(dafunc(ddf), npfunc(df)) # applying NumPy ufunc triggers computation assert isinstance(npfunc(ddf), pd.DataFrame) assert_eq(npfunc(ddf), npfunc(df)) # applying Dask ufunc to normal Dataframe triggers computation assert isinstance(dafunc(df), pd.DataFrame) assert_eq(dafunc(df), npfunc(df)) # Index if ufunc in ('logical_not', 'signbit', 'isnan', 'isinf', 'isfinite'): return with pytest.warns(None): assert isinstance(dafunc(ddf.index), dd.Index) assert_eq(dafunc(ddf.index), npfunc(df.index)) # applying NumPy ufunc triggers computation assert isinstance(npfunc(ddf.index), pd.Index) assert_eq(npfunc(ddf.index), npfunc(df.index)) # applying Dask ufunc to normal Series triggers computation with pytest.warns(None): # some (da.log) cause warnings assert isinstance(dafunc(df.index), pd.Index) assert_eq(dafunc(df), npfunc(df)) @pytest.mark.parametrize('ufunc', _BASE_UFUNCS) def test_ufunc_with_index(ufunc): dafunc = getattr(da, ufunc) npfunc = getattr(np, ufunc) s = pd.Series(np.random.randint(1, 100, size=20), index=list('abcdefghijklmnopqrst')) ds = dd.from_pandas(s, 3) # applying Dask ufunc doesn't trigger computation with pytest.warns(None): assert isinstance(dafunc(ds), dd.Series) assert_eq(dafunc(ds), npfunc(s)) # applying NumPy ufunc triggers computation assert isinstance(npfunc(ds), pd.Series) assert_eq(npfunc(ds), npfunc(s)) # applying Dask ufunc to normal Series triggers computation assert isinstance(dafunc(s), pd.Series) assert_eq(dafunc(s), npfunc(s)) s = pd.Series(np.abs(np.random.randn(20)), index=list('abcdefghijklmnopqrst')) ds = dd.from_pandas(s, 3) with pytest.warns(None): # applying Dask ufunc doesn't trigger computation assert isinstance(dafunc(ds), dd.Series) assert_eq(dafunc(ds), npfunc(s)) # applying NumPy ufunc triggers computation assert isinstance(npfunc(ds), pd.Series) assert_eq(npfunc(ds), npfunc(s)) # applying Dask ufunc to normal Series triggers computation assert isinstance(dafunc(s), pd.Series) assert_eq(dafunc(s), npfunc(s)) df = pd.DataFrame({'A': np.random.randint(1, 100, size=20), 'B': np.random.randint(1, 100, size=20), 'C': np.abs(np.random.randn(20))}, index=list('abcdefghijklmnopqrst')) ddf = dd.from_pandas(df, 3) with pytest.warns(None): # applying Dask ufunc doesn't trigger computation assert isinstance(dafunc(ddf), dd.DataFrame) assert_eq(dafunc(ddf), npfunc(df)) # applying NumPy ufunc triggers computation assert isinstance(npfunc(ddf), pd.DataFrame) assert_eq(npfunc(ddf), npfunc(df)) # applying Dask ufunc to normal DataFrame triggers computation assert isinstance(dafunc(df), pd.DataFrame) assert_eq(dafunc(df), npfunc(df)) @pytest.mark.parametrize('ufunc', ['isreal', 'iscomplex', 'real', 'imag', 'angle', 'fix']) def test_ufunc_array_wrap(ufunc): """ some np.ufuncs doesn't call __array_wrap__, it should work as below - da.ufunc(dd.Series) => dd.Series - da.ufunc(pd.Series) => np.ndarray - np.ufunc(dd.Series) => np.ndarray - np.ufunc(pd.Series) => np.ndarray """ dafunc = getattr(da, ufunc) npfunc = getattr(np, ufunc) s = pd.Series(np.random.randint(1, 100, size=20), index=list('abcdefghijklmnopqrst')) ds = dd.from_pandas(s, 3) # applying Dask ufunc doesn't trigger computation assert isinstance(dafunc(ds), dd.Series) assert_eq(dafunc(ds), pd.Series(npfunc(s), index=s.index)) assert isinstance(npfunc(ds), np.ndarray) tm.assert_numpy_array_equal(npfunc(ds), npfunc(s)) assert isinstance(dafunc(s), np.ndarray) tm.assert_numpy_array_equal(dafunc(s), npfunc(s)) df = pd.DataFrame({'A': np.random.randint(1, 100, size=20), 'B': np.random.randint(1, 100, size=20), 'C': np.abs(np.random.randn(20))}, index=list('abcdefghijklmnopqrst')) ddf = dd.from_pandas(df, 3) # applying Dask ufunc doesn't trigger computation assert isinstance(dafunc(ddf), dd.DataFrame) # result may be read-only ndarray exp = pd.DataFrame(npfunc(df).copy(), columns=df.columns, index=df.index) assert_eq(dafunc(ddf), exp) assert isinstance(npfunc(ddf), np.ndarray) tm.assert_numpy_array_equal(npfunc(ddf), npfunc(df)) assert isinstance(dafunc(df), np.ndarray) tm.assert_numpy_array_equal(dafunc(df), npfunc(df)) @pytest.mark.parametrize('ufunc', ['logaddexp', 'logaddexp2', 'arctan2', 'hypot', 'copysign', 'nextafter', 'ldexp', 'fmod', 'logical_and', 'logical_or', 'logical_xor', 'maximum', 'minimum', 'fmax', 'fmin']) def test_ufunc_with_2args(ufunc): dafunc = getattr(da, ufunc) npfunc = getattr(np, ufunc) s1 = pd.Series(np.random.randint(1, 100, size=20)) ds1 = dd.from_pandas(s1, 3) s2 = pd.Series(np.random.randint(1, 100, size=20)) ds2 = dd.from_pandas(s2, 4) # applying Dask ufunc doesn't trigger computation assert isinstance(dafunc(ds1, ds2), dd.Series) assert_eq(dafunc(ds1, ds2), npfunc(s1, s2)) # applying NumPy ufunc triggers computation assert isinstance(npfunc(ds1, ds2), pd.Series) assert_eq(npfunc(ds1, ds2), npfunc(s1, s2)) # applying Dask ufunc to normal Series triggers computation assert isinstance(dafunc(s1, s2), pd.Series) assert_eq(dafunc(s1, s2), npfunc(s1, s2)) df1 = pd.DataFrame(np.random.randint(1, 100, size=(20, 2)), columns=['A', 'B']) ddf1 = dd.from_pandas(df1, 3) df2 = pd.DataFrame(np.random.randint(1, 100, size=(20, 2)), columns=['A', 'B']) ddf2 = dd.from_pandas(df2, 4) # applying Dask ufunc doesn't trigger computation assert isinstance(dafunc(ddf1, ddf2), dd.DataFrame) assert_eq(dafunc(ddf1, ddf2), npfunc(df1, df2)) # applying NumPy ufunc triggers computation assert isinstance(npfunc(ddf1, ddf2), pd.DataFrame) assert_eq(npfunc(ddf1, ddf2), npfunc(df1, df2)) # applying Dask ufunc to normal DataFrame triggers computation assert isinstance(dafunc(df1, df2), pd.DataFrame) assert_eq(dafunc(df1, df2), npfunc(df1, df2)) def test_clip(): # clip internally calls dd.Series.clip s = pd.Series(np.random.randint(1, 100, size=20)) ds = dd.from_pandas(s, 3) # applying Dask ufunc doesn't trigger computation assert isinstance(da.clip(ds, 5, 50), dd.Series) assert_eq(da.clip(ds, 5, 50), np.clip(s, 5, 50)) # applying Dask ufunc doesn't trigger computation assert isinstance(np.clip(ds, 5, 50), dd.Series) assert_eq(np.clip(ds, 5, 50), np.clip(s, 5, 50)) # applying Dask ufunc to normal Series triggers computation assert isinstance(da.clip(s, 5, 50), pd.Series) assert_eq(da.clip(s, 5, 50), np.clip(s, 5, 50)) df = pd.DataFrame(np.random.randint(1, 100, size=(20, 2)), columns=['A', 'B']) ddf = dd.from_pandas(df, 3) # applying Dask ufunc doesn't trigger computation assert isinstance(da.clip(ddf, 5.5, 40.5), dd.DataFrame) assert_eq(da.clip(ddf, 5.5, 40.5), np.clip(df, 5.5, 40.5)) # applying Dask ufunc doesn't trigger computation assert isinstance(np.clip(ddf, 5.5, 40.5), dd.DataFrame) assert_eq(np.clip(ddf, 5.5, 40.5), np.clip(df, 5.5, 40.5)) # applying Dask ufunc to normal DataFrame triggers computation assert isinstance(da.clip(df, 5.5, 40.5), pd.DataFrame) assert_eq(da.clip(df, 5.5, 40.5), np.clip(df, 5.5, 40.5)) dask-0.16.0/dask/dataframe/tests/test_utils_dataframe.py000066400000000000000000000251511320364734500233200ustar00rootroot00000000000000import numpy as np import pandas as pd import pandas.util.testing as tm import dask.dataframe as dd from dask.dataframe.utils import (shard_df_on_index, meta_nonempty, make_meta, raise_on_meta_error, check_meta, UNKNOWN_CATEGORIES, PANDAS_VERSION) import pytest def test_shard_df_on_index(): df = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': list('abdabd')}, index=[10, 20, 30, 40, 50, 60]) result = list(shard_df_on_index(df, [20, 50])) assert list(result[0].index) == [10] assert list(result[1].index) == [20, 30, 40] assert list(result[2].index) == [50, 60] def test_make_meta(): df = pd.DataFrame({'a': [1, 2, 3], 'b': list('abc'), 'c': [1., 2., 3.]}, index=[10, 20, 30]) # Pandas dataframe meta = make_meta(df) assert len(meta) == 0 assert (meta.dtypes == df.dtypes).all() assert isinstance(meta.index, type(df.index)) # Pandas series meta = make_meta(df.a) assert len(meta) == 0 assert meta.dtype == df.a.dtype assert isinstance(meta.index, type(df.index)) # Pandas index meta = make_meta(df.index) assert isinstance(meta, type(df.index)) assert len(meta) == 0 # Dask object ddf = dd.from_pandas(df, npartitions=2) assert make_meta(ddf) is ddf._meta # Dict meta = make_meta({'a': 'i8', 'b': 'O', 'c': 'f8'}) assert isinstance(meta, pd.DataFrame) assert len(meta) == 0 assert (meta.dtypes == df.dtypes).all() assert isinstance(meta.index, pd.RangeIndex) # Iterable meta = make_meta([('a', 'i8'), ('c', 'f8'), ('b', 'O')]) assert (meta.columns == ['a', 'c', 'b']).all() assert len(meta) == 0 assert (meta.dtypes == df.dtypes[meta.dtypes.index]).all() assert isinstance(meta.index, pd.RangeIndex) # Tuple meta = make_meta(('a', 'i8')) assert isinstance(meta, pd.Series) assert len(meta) == 0 assert meta.dtype == 'i8' assert meta.name == 'a' # With index meta = make_meta({'a': 'i8', 'b': 'i4'}, pd.Int64Index([1, 2], name='foo')) assert isinstance(meta.index, pd.Int64Index) assert len(meta.index) == 0 meta = make_meta(('a', 'i8'), pd.Int64Index([1, 2], name='foo')) assert isinstance(meta.index, pd.Int64Index) assert len(meta.index) == 0 # Categoricals meta = make_meta({'a': 'category'}) assert len(meta.a.cat.categories) == 1 assert meta.a.cat.categories[0] == UNKNOWN_CATEGORIES meta = make_meta(('a', 'category')) assert len(meta.cat.categories) == 1 assert meta.cat.categories[0] == UNKNOWN_CATEGORIES # Numpy scalar meta = make_meta(np.float64(1.0)) assert isinstance(meta, np.float64) # Python scalar meta = make_meta(1.0) assert isinstance(meta, np.float64) # Timestamp x = pd.Timestamp(2000, 1, 1) meta = make_meta(x) assert meta is x # Dtype expressions meta = make_meta('i8') assert isinstance(meta, np.int64) meta = make_meta(float) assert isinstance(meta, np.dtype(float).type) meta = make_meta(np.dtype('bool')) assert isinstance(meta, np.bool_) assert pytest.raises(TypeError, lambda: make_meta(None)) def test_meta_nonempty(): df1 = pd.DataFrame({'A': pd.Categorical(['Alice', 'Bob', 'Carol']), 'B': list('abc'), 'C': 'bar', 'D': np.float32(1), 'E': np.int32(1), 'F': pd.Timestamp('2016-01-01'), 'G': pd.date_range('2016-01-01', periods=3, tz='America/New_York'), 'H': pd.Timedelta('1 hours', 'ms'), 'I': np.void(b' '), 'J': pd.Categorical([UNKNOWN_CATEGORIES] * 3)}, columns=list('DCBAHGFEIJ')) df2 = df1.iloc[0:0] df3 = meta_nonempty(df2) assert (df3.dtypes == df2.dtypes).all() assert df3['A'][0] == 'Alice' assert df3['B'][0] == 'foo' assert df3['C'][0] == 'foo' assert df3['D'][0] == np.float32(1) assert df3['D'][0].dtype == 'f4' assert df3['E'][0] == np.int32(1) assert df3['E'][0].dtype == 'i4' assert df3['F'][0] == pd.Timestamp('1970-01-01 00:00:00') assert df3['G'][0] == pd.Timestamp('1970-01-01 00:00:00', tz='America/New_York') assert df3['H'][0] == pd.Timedelta('1', 'ms') assert df3['I'][0] == 'foo' assert df3['J'][0] == UNKNOWN_CATEGORIES s = meta_nonempty(df2['A']) assert s.dtype == df2['A'].dtype assert (df3['A'] == s).all() def test_meta_duplicated(): df = pd.DataFrame(columns=['A', 'A', 'B']) res = meta_nonempty(df) exp = pd.DataFrame([['foo', 'foo', 'foo'], ['foo', 'foo', 'foo']], index=['a', 'b'], columns=['A', 'A', 'B']) tm.assert_frame_equal(res, exp) def test_meta_nonempty_empty_categories(): for dtype in ['O', 'f8', 'M8']: # Index idx = pd.CategoricalIndex([], pd.Index([], dtype=dtype), ordered=True, name='foo') res = meta_nonempty(idx) assert type(res) is pd.CategoricalIndex assert type(res.categories) is type(idx.categories) assert res.ordered == idx.ordered assert res.name == idx.name # Series s = idx.to_series() res = meta_nonempty(s) assert res.dtype == 'category' assert s.dtype == 'category' assert type(res.cat.categories) is type(s.cat.categories) assert res.cat.ordered == s.cat.ordered assert res.name == s.name def test_meta_nonempty_index(): idx = pd.RangeIndex(1, name='foo') res = meta_nonempty(idx) assert type(res) is pd.RangeIndex assert res.name == idx.name idx = pd.Int64Index([1], name='foo') res = meta_nonempty(idx) assert type(res) is pd.Int64Index assert res.name == idx.name idx = pd.Index(['a'], name='foo') res = meta_nonempty(idx) assert type(res) is pd.Index assert res.name == idx.name idx = pd.DatetimeIndex(['1970-01-01'], freq='d', tz='America/New_York', name='foo') res = meta_nonempty(idx) assert type(res) is pd.DatetimeIndex assert res.tz == idx.tz assert res.freq == idx.freq assert res.name == idx.name idx = pd.PeriodIndex(['1970-01-01'], freq='d', name='foo') res = meta_nonempty(idx) assert type(res) is pd.PeriodIndex assert res.freq == idx.freq assert res.name == idx.name idx = pd.TimedeltaIndex([np.timedelta64(1, 'D')], freq='d', name='foo') res = meta_nonempty(idx) assert type(res) is pd.TimedeltaIndex assert res.freq == idx.freq assert res.name == idx.name idx = pd.CategoricalIndex(['a'], ['a', 'b'], ordered=True, name='foo') res = meta_nonempty(idx) assert type(res) is pd.CategoricalIndex assert (res.categories == idx.categories).all() assert res.ordered == idx.ordered assert res.name == idx.name idx = pd.CategoricalIndex([], [UNKNOWN_CATEGORIES], ordered=True, name='foo') res = meta_nonempty(idx) assert type(res) is pd.CategoricalIndex assert res.ordered == idx.ordered assert res.name == idx.name levels = [pd.Int64Index([1], name='a'), pd.Float64Index([1.0], name='b')] idx = pd.MultiIndex(levels=levels, labels=[[0], [0]], names=['a', 'b']) res = meta_nonempty(idx) assert type(res) is pd.MultiIndex for idx1, idx2 in zip(idx.levels, res.levels): assert type(idx1) is type(idx2) assert idx1.name == idx2.name assert res.names == idx.names @pytest.mark.skipif(PANDAS_VERSION < '0.20.0', reason="Pandas < 0.20.0 doesn't support UInt64Index") def test_meta_nonempty_uint64index(): idx = pd.UInt64Index([1], name='foo') res = meta_nonempty(idx) assert type(res) is pd.UInt64Index assert res.name == idx.name def test_meta_nonempty_scalar(): meta = meta_nonempty(np.float64(1.0)) assert isinstance(meta, np.float64) x = pd.Timestamp(2000, 1, 1) meta = meta_nonempty(x) assert meta is x def test_raise_on_meta_error(): try: with raise_on_meta_error(): raise RuntimeError("Bad stuff") except Exception as e: assert e.args[0].startswith("Metadata inference failed.\n") assert 'RuntimeError' in e.args[0] else: assert False, "should have errored" try: with raise_on_meta_error("myfunc"): raise RuntimeError("Bad stuff") except Exception as e: assert e.args[0].startswith("Metadata inference failed in `myfunc`.\n") assert 'RuntimeError' in e.args[0] else: assert False, "should have errored" def test_check_meta(): df = pd.DataFrame({'a': ['x', 'y', 'z'], 'b': [True, False, True], 'c': [1, 2.5, 3.5], 'd': [1, 2, 3], 'e': pd.Categorical(['x', 'y', 'z'])}) meta = df.iloc[:0] # DataFrame metadata passthrough if correct assert check_meta(df, meta) is df # Series metadata passthrough if correct e = df.e assert check_meta(e, meta.e) is e # numeric_equal means floats and ints are equivalent d = df.d assert check_meta(d, meta.d.astype('f8'), numeric_equal=True) is d # Series metadata error with pytest.raises(ValueError) as err: check_meta(d, meta.d.astype('f8'), numeric_equal=False) assert str(err.value) == ('Metadata mismatch found.\n' '\n' 'Partition type: `Series`\n' '+----------+---------+\n' '| | dtype |\n' '+----------+---------+\n' '| Found | int64 |\n' '| Expected | float64 |\n' '+----------+---------+') # DataFrame metadata error meta2 = meta.astype({'a': 'category', 'd': 'f8'})[['a', 'b', 'c', 'd']] df2 = df[['a', 'b', 'd', 'e']] with pytest.raises(ValueError) as err: check_meta(df2, meta2, funcname='from_delayed') exp = ( 'Metadata mismatch found in `from_delayed`.\n' '\n' 'Partition type: `DataFrame`\n' '+--------+----------+----------+\n' '| Column | Found | Expected |\n' '+--------+----------+----------+\n' '| a | object | category |\n' '| c | - | float64 |\n' '| e | category | - |\n' '+--------+----------+----------+') assert str(err.value) == exp dask-0.16.0/dask/dataframe/tseries/000077500000000000000000000000001320364734500170535ustar00rootroot00000000000000dask-0.16.0/dask/dataframe/tseries/__init__.py000066400000000000000000000000001320364734500211520ustar00rootroot00000000000000dask-0.16.0/dask/dataframe/tseries/resample.py000066400000000000000000000131721320364734500212410ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import warnings import pandas as pd import numpy as np from ..core import DataFrame, Series from ..utils import PANDAS_VERSION from ...base import tokenize from ...utils import derived_from if PANDAS_VERSION >= '0.20.0': from pandas.core.resample import Resampler as pd_Resampler else: from pandas.tseries.resample import Resampler as pd_Resampler def getnanos(rule): try: return getattr(rule, 'nanos', None) except ValueError: return None def _resample(obj, rule, how, **kwargs): resampler = Resampler(obj, rule, **kwargs) if how is not None: w = FutureWarning(("how in .resample() is deprecated " "the new syntax is .resample(...)" ".{0}()").format(how)) warnings.warn(w) return getattr(resampler, how)() return resampler def _resample_series(series, start, end, reindex_closed, rule, resample_kwargs, how, fill_value): out = getattr(series.resample(rule, **resample_kwargs), how)() return out.reindex(pd.date_range(start, end, freq=rule, closed=reindex_closed), fill_value=fill_value) def _resample_bin_and_out_divs(divisions, rule, closed='left', label='left'): rule = pd.tseries.frequencies.to_offset(rule) g = pd.Grouper(freq=rule, how='count', closed=closed, label=label) # Determine bins to apply `how` to. Disregard labeling scheme. divs = pd.Series(range(len(divisions)), index=divisions) temp = divs.resample(rule, closed=closed, label='left').count() tempdivs = temp.loc[temp > 0].index # Cleanup closed == 'right' and label == 'right' res = pd.offsets.Nano() if hasattr(rule, 'delta') else pd.offsets.Day() if g.closed == 'right': newdivs = tempdivs + res else: newdivs = tempdivs if g.label == 'right': outdivs = tempdivs + rule else: outdivs = tempdivs newdivs = newdivs.tolist() outdivs = outdivs.tolist() # Adjust ends if newdivs[0] < divisions[0]: newdivs[0] = divisions[0] if newdivs[-1] < divisions[-1]: if len(newdivs) < len(divs): setter = lambda a, val: a.append(val) else: setter = lambda a, val: a.__setitem__(-1, val) setter(newdivs, divisions[-1]) if outdivs[-1] > divisions[-1]: setter(outdivs, outdivs[-1]) elif outdivs[-1] < divisions[-1]: setter(outdivs, temp.index[-1]) return tuple(map(pd.Timestamp, newdivs)), tuple(map(pd.Timestamp, outdivs)) class Resampler(object): def __init__(self, obj, rule, **kwargs): if not obj.known_divisions: msg = ("Can only resample dataframes with known divisions\n" "See dask.pydata.org/en/latest/dataframe-design.html#partitions\n" "for more information.") raise ValueError(msg) self.obj = obj rule = pd.tseries.frequencies.to_offset(rule) day_nanos = pd.tseries.frequencies.Day().nanos if getnanos(rule) and day_nanos % rule.nanos: raise NotImplementedError('Resampling frequency %s that does' ' not evenly divide a day is not ' 'implemented' % rule) self._rule = rule self._kwargs = kwargs def _agg(self, how, meta=None, fill_value=np.nan): rule = self._rule kwargs = self._kwargs name = 'resample-' + tokenize(self.obj, rule, kwargs, how) # Create a grouper to determine closed and label conventions newdivs, outdivs = _resample_bin_and_out_divs(self.obj.divisions, rule, **kwargs) # Repartition divs into bins. These won't match labels after mapping partitioned = self.obj.repartition(newdivs, force=True) keys = partitioned.__dask_keys__() dsk = partitioned.dask args = zip(keys, outdivs, outdivs[1:], ['left'] * (len(keys) - 1) + [None]) for i, (k, s, e, c) in enumerate(args): dsk[(name, i)] = (_resample_series, k, s, e, c, rule, kwargs, how, fill_value) # Infer output metadata meta_r = self.obj._meta_nonempty.resample(self._rule, **self._kwargs) meta = getattr(meta_r, how)() if isinstance(meta, pd.DataFrame): return DataFrame(dsk, name, meta, outdivs) return Series(dsk, name, meta, outdivs) @derived_from(pd_Resampler) def count(self): return self._agg('count', fill_value=0) @derived_from(pd_Resampler) def first(self): return self._agg('first') @derived_from(pd_Resampler) def last(self): return self._agg('last') @derived_from(pd_Resampler) def mean(self): return self._agg('mean') @derived_from(pd_Resampler) def min(self): return self._agg('min') @derived_from(pd_Resampler) def median(self): return self._agg('median') @derived_from(pd_Resampler) def max(self): return self._agg('max') @derived_from(pd_Resampler) def ohlc(self): return self._agg('ohlc') @derived_from(pd_Resampler) def prod(self): return self._agg('prod') @derived_from(pd_Resampler) def sem(self): return self._agg('sem') @derived_from(pd_Resampler) def std(self): return self._agg('std') @derived_from(pd_Resampler) def sum(self): return self._agg('sum') @derived_from(pd_Resampler) def var(self): return self._agg('var') dask-0.16.0/dask/dataframe/tseries/tests/000077500000000000000000000000001320364734500202155ustar00rootroot00000000000000dask-0.16.0/dask/dataframe/tseries/tests/__init__.py000066400000000000000000000000001320364734500223140ustar00rootroot00000000000000dask-0.16.0/dask/dataframe/tseries/tests/test_resample.py000066400000000000000000000037641320364734500234500ustar00rootroot00000000000000from itertools import product import pandas as pd import pytest from dask.dataframe.utils import assert_eq import dask.dataframe as dd def resample(df, freq, how='mean', **kwargs): return getattr(df.resample(freq, **kwargs), how)() @pytest.mark.parametrize(['obj', 'method', 'npartitions', 'freq', 'closed', 'label'], list(product(['series', 'frame'], ['count', 'mean', 'ohlc'], [2, 5], ['30T', 'h', 'd', 'w', 'M'], ['right', 'left'], ['right', 'left']))) def test_series_resample(obj, method, npartitions, freq, closed, label): index = pd.date_range('1-1-2000', '2-15-2000', freq='h') index = index.union(pd.date_range('4-15-2000', '5-15-2000', freq='h')) if obj == 'series': ps = pd.Series(range(len(index)), index=index) elif obj == 'frame': ps = pd.DataFrame({'a':range(len(index))}, index=index) ds = dd.from_pandas(ps, npartitions=npartitions) # Series output result = resample(ds, freq, how=method, closed=closed, label=label) expected = resample(ps, freq, how=method, closed=closed, label=label) assert_eq(result, expected, check_dtype=False) divisions = result.divisions assert expected.index[0] == divisions[0] assert expected.index[-1] == divisions[-1] def test_series_resample_not_implemented(): index = pd.date_range(start='20120102', periods=100, freq='T') s = pd.Series(range(len(index)), index=index) ds = dd.from_pandas(s, npartitions=5) # Frequency doesn't evenly divide day pytest.raises(NotImplementedError, lambda: resample(ds, '57T')) def test_unknown_divisions_error(): df = pd.DataFrame({'x': [1, 2, 3]}) ddf = dd.from_pandas(df, npartitions=2, sort=False) try: ddf.x.resample('1m').mean() assert False except ValueError as e: assert 'divisions' in str(e) dask-0.16.0/dask/dataframe/utils.py000066400000000000000000000576171320364734500171270ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import re import textwrap from distutils.version import LooseVersion from collections import Iterator import sys import traceback from contextlib import contextmanager import numpy as np import pandas as pd import pandas.util.testing as tm from pandas.api.types import is_categorical_dtype, is_scalar try: from pandas.api.types import is_datetime64tz_dtype except ImportError: # pandas < 0.19.2 from pandas.core.common import is_datetime64tz_dtype from ..core import get_deps from ..local import get_sync from ..utils import asciitable PANDAS_VERSION = LooseVersion(pd.__version__) def shard_df_on_index(df, divisions): """ Shard a DataFrame by ranges on its index Examples -------- >>> df = pd.DataFrame({'a': [0, 10, 20, 30, 40], 'b': [5, 4 ,3, 2, 1]}) >>> df a b 0 0 5 1 10 4 2 20 3 3 30 2 4 40 1 >>> shards = list(shard_df_on_index(df, [2, 4])) >>> shards[0] a b 0 0 5 1 10 4 >>> shards[1] a b 2 20 3 3 30 2 >>> shards[2] a b 4 40 1 >>> list(shard_df_on_index(df, []))[0] # empty case a b 0 0 5 1 10 4 2 20 3 3 30 2 4 40 1 """ if isinstance(divisions, Iterator): divisions = list(divisions) if not len(divisions): yield df else: divisions = np.array(divisions) df = df.sort_index() index = df.index if is_categorical_dtype(index): index = index.as_ordered() indices = index.searchsorted(divisions) yield df.iloc[:indices[0]] for i in range(len(indices) - 1): yield df.iloc[indices[i]: indices[i + 1]] yield df.iloc[indices[-1]:] _META_TYPES = "meta : pd.DataFrame, pd.Series, dict, iterable, tuple, optional" _META_DESCRIPTION = """\ An empty ``pd.DataFrame`` or ``pd.Series`` that matches the dtypes and column names of the output. This metadata is necessary for many algorithms in dask dataframe to work. For ease of use, some alternative inputs are also available. Instead of a ``DataFrame``, a ``dict`` of ``{name: dtype}`` or iterable of ``(name, dtype)`` can be provided. Instead of a series, a tuple of ``(name, dtype)`` can be used. If not provided, dask will try to infer the metadata. This may lead to unexpected results, so providing ``meta`` is recommended. For more information, see ``dask.dataframe.utils.make_meta``. """ def insert_meta_param_description(*args, **kwargs): """Replace `$META` in docstring with param description. If pad keyword is provided, will pad description by that number of spaces (default is 8).""" if not args: return lambda f: insert_meta_param_description(f, **kwargs) f = args[0] indent = " " * kwargs.get('pad', 8) body = textwrap.wrap(_META_DESCRIPTION, initial_indent=indent, subsequent_indent=indent, width=78) descr = '{0}\n{1}'.format(_META_TYPES, '\n'.join(body)) if f.__doc__: if '$META' in f.__doc__: f.__doc__ = f.__doc__.replace('$META', descr) else: # Put it at the end of the parameters section parameter_header = 'Parameters\n%s----------' % indent[4:] first, last = re.split('Parameters\\n[ ]*----------', f.__doc__) parameters, rest = last.split('\n\n', 1) f.__doc__ = '{0}{1}{2}\n{3}{4}\n\n{5}'.format(first, parameter_header, parameters, indent[4:], descr, rest) return f @contextmanager def raise_on_meta_error(funcname=None): """Reraise errors in this block to show metadata inference failure. Parameters ---------- funcname : str, optional If provided, will be added to the error message to indicate the name of the method that failed. """ try: yield except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() tb = ''.join(traceback.format_tb(exc_traceback)) msg = ("Metadata inference failed{0}.\n\n" "Original error is below:\n" "------------------------\n" "{1}\n\n" "Traceback:\n" "---------\n" "{2}" ).format(" in `{0}`".format(funcname) if funcname else "", repr(e), tb) raise ValueError(msg) UNKNOWN_CATEGORIES = '__UNKNOWN_CATEGORIES__' def has_known_categories(x): """Returns whether the categories in `x` are known. Parameters ---------- x : Series or CategoricalIndex """ x = getattr(x, '_meta', x) if isinstance(x, pd.Series): return UNKNOWN_CATEGORIES not in x.cat.categories elif isinstance(x, pd.CategoricalIndex): return UNKNOWN_CATEGORIES not in x.categories raise TypeError("Expected Series or CategoricalIndex") def strip_unknown_categories(x): """Replace any unknown categoricals with empty categoricals. Useful for preventing ``UNKNOWN_CATEGORIES`` from leaking into results. """ if isinstance(x, (pd.Series, pd.DataFrame)): x = x.copy() if isinstance(x, pd.DataFrame): cat_mask = x.dtypes == 'category' if cat_mask.any(): cats = cat_mask[cat_mask].index for c in cats: if not has_known_categories(x[c]): x[c].cat.set_categories([], inplace=True) elif isinstance(x, pd.Series): if is_categorical_dtype(x.dtype) and not has_known_categories(x): x.cat.set_categories([], inplace=True) if (isinstance(x.index, pd.CategoricalIndex) and not has_known_categories(x.index)): x.index = x.index.set_categories([]) elif isinstance(x, pd.CategoricalIndex) and not has_known_categories(x): x = x.set_categories([]) return x def clear_known_categories(x, cols=None, index=True): """Set categories to be unknown. Parameters ---------- x : DataFrame, Series, Index cols : iterable, optional If x is a DataFrame, set only categoricals in these columns to unknown. By default, all categorical columns are set to unknown categoricals index : bool, optional If True and x is a Series or DataFrame, set the clear known categories in the index as well. """ if isinstance(x, (pd.Series, pd.DataFrame)): x = x.copy() if isinstance(x, pd.DataFrame): mask = x.dtypes == 'category' if cols is None: cols = mask[mask].index elif not mask.loc[cols].all(): raise ValueError("Not all columns are categoricals") for c in cols: x[c].cat.set_categories([UNKNOWN_CATEGORIES], inplace=True) elif isinstance(x, pd.Series): if is_categorical_dtype(x.dtype): x.cat.set_categories([UNKNOWN_CATEGORIES], inplace=True) if index and isinstance(x.index, pd.CategoricalIndex): x.index = x.index.set_categories([UNKNOWN_CATEGORIES]) elif isinstance(x, pd.CategoricalIndex): x = x.set_categories([UNKNOWN_CATEGORIES]) return x def _empty_series(name, dtype, index=None): if isinstance(dtype, str) and dtype == 'category': return pd.Series(pd.Categorical([UNKNOWN_CATEGORIES]), name=name, index=index).iloc[:0] return pd.Series([], dtype=dtype, name=name, index=index) def make_meta(x, index=None): """Create an empty pandas object containing the desired metadata. Parameters ---------- x : dict, tuple, list, pd.Series, pd.DataFrame, pd.Index, dtype, scalar To create a DataFrame, provide a `dict` mapping of `{name: dtype}`, or an iterable of `(name, dtype)` tuples. To create a `Series`, provide a tuple of `(name, dtype)`. If a pandas object, names, dtypes, and index should match the desired output. If a dtype or scalar, a scalar of the same dtype is returned. index : pd.Index, optional Any pandas index to use in the metadata. If none provided, a `RangeIndex` will be used. Examples -------- >>> make_meta([('a', 'i8'), ('b', 'O')]) Empty DataFrame Columns: [a, b] Index: [] >>> make_meta(('a', 'f8')) Series([], Name: a, dtype: float64) >>> make_meta('i8') 1 """ if hasattr(x, '_meta'): return x._meta if isinstance(x, (pd.Series, pd.DataFrame)): return x.iloc[0:0] elif isinstance(x, pd.Index): return x[0:0] index = index if index is None else index[0:0] if isinstance(x, dict): return pd.DataFrame({c: _empty_series(c, d, index=index) for (c, d) in x.items()}, index=index) if isinstance(x, tuple) and len(x) == 2: return _empty_series(x[0], x[1], index=index) elif isinstance(x, (list, tuple)): if not all(isinstance(i, tuple) and len(i) == 2 for i in x): raise ValueError("Expected iterable of tuples of (name, dtype), " "got {0}".format(x)) return pd.DataFrame({c: _empty_series(c, d, index=index) for (c, d) in x}, columns=[c for c, d in x], index=index) elif not hasattr(x, 'dtype') and x is not None: # could be a string, a dtype object, or a python type. Skip `None`, # because it is implictly converted to `dtype('f8')`, which we don't # want here. try: dtype = np.dtype(x) return _scalar_from_dtype(dtype) except Exception: # Continue on to next check pass if is_scalar(x): return _nonempty_scalar(x) raise TypeError("Don't know how to create metadata from {0}".format(x)) if PANDAS_VERSION >= "0.20.0": _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index) else: _numeric_index_types = (pd.Int64Index, pd.Float64Index) def _nonempty_index(idx): typ = type(idx) if typ is pd.RangeIndex: return pd.RangeIndex(2, name=idx.name) elif typ in _numeric_index_types: return typ([1, 2], name=idx.name) elif typ is pd.Index: return pd.Index(['a', 'b'], name=idx.name) elif typ is pd.DatetimeIndex: start = '1970-01-01' # Need a non-monotonic decreasing index to avoid issues with # partial string indexing see https://github.com/dask/dask/issues/2389 # and https://github.com/pandas-dev/pandas/issues/16515 # This doesn't mean `_meta_nonempty` should ever rely on # `self.monotonic_increasing` or `self.monotonic_decreasing` data = [start, '1970-01-02'] if idx.freq is None else None return pd.DatetimeIndex(data, start=start, periods=2, freq=idx.freq, tz=idx.tz, name=idx.name) elif typ is pd.PeriodIndex: return pd.PeriodIndex(start='1970-01-01', periods=2, freq=idx.freq, name=idx.name) elif typ is pd.TimedeltaIndex: start = np.timedelta64(1, 'D') data = [start, start] if idx.freq is None else None return pd.TimedeltaIndex(data, start=start, periods=2, freq=idx.freq, name=idx.name) elif typ is pd.CategoricalIndex: if len(idx.categories): data = [idx.categories[0]] * 2 cats = idx.categories else: data = _nonempty_index(idx.categories) cats = None return pd.CategoricalIndex(data, categories=cats, ordered=idx.ordered, name=idx.name) elif typ is pd.MultiIndex: levels = [_nonempty_index(i) for i in idx.levels] labels = [[0, 0] for i in idx.levels] return pd.MultiIndex(levels=levels, labels=labels, names=idx.names) raise TypeError("Don't know how to handle index of " "type {0}".format(type(idx).__name__)) _simple_fake_mapping = { 'b': np.bool_(True), 'V': np.void(b' '), 'M': np.datetime64('1970-01-01'), 'm': np.timedelta64(1), 'S': np.str_('foo'), 'a': np.str_('foo'), 'U': np.unicode_('foo'), 'O': 'foo' } def _scalar_from_dtype(dtype): if dtype.kind in ('i', 'f', 'u'): return dtype.type(1) elif dtype.kind == 'c': return dtype.type(complex(1, 0)) elif dtype.kind in _simple_fake_mapping: o = _simple_fake_mapping[dtype.kind] return o.astype(dtype) if dtype.kind in ('m', 'M') else o else: raise TypeError("Can't handle dtype: {0}".format(dtype)) def _nonempty_scalar(x): if isinstance(x, (pd.Timestamp, pd.Timedelta, pd.Period)): return x elif np.isscalar(x): dtype = x.dtype if hasattr(x, 'dtype') else np.dtype(type(x)) return _scalar_from_dtype(dtype) else: raise TypeError("Can't handle meta of type " "'{0}'".format(type(x).__name__)) def _nonempty_series(s, idx): dtype = s.dtype if is_datetime64tz_dtype(dtype): entry = pd.Timestamp('1970-01-01', tz=dtype.tz) data = [entry, entry] elif is_categorical_dtype(dtype): if len(s.cat.categories): data = [s.cat.categories[0]] * 2 cats = s.cat.categories else: data = _nonempty_index(s.cat.categories) cats = None data = pd.Categorical(data, categories=cats, ordered=s.cat.ordered) else: entry = _scalar_from_dtype(dtype) data = np.array([entry, entry], dtype=dtype) return pd.Series(data, name=s.name, index=idx) def meta_nonempty(x): """Create a nonempty pandas object from the given metadata. Returns a pandas DataFrame, Series, or Index that contains two rows of fake data. """ if isinstance(x, pd.Index): return _nonempty_index(x) elif isinstance(x, pd.Series): idx = _nonempty_index(x.index) return _nonempty_series(x, idx) elif isinstance(x, pd.DataFrame): idx = _nonempty_index(x.index) data = {i: _nonempty_series(x.iloc[:, i], idx) for i, c in enumerate(x.columns)} res = pd.DataFrame(data, index=idx, columns=np.arange(len(x.columns))) res.columns = x.columns return res elif is_scalar(x): return _nonempty_scalar(x) else: raise TypeError("Expected Index, Series, DataFrame, or scalar, " "got {0}".format(type(x).__name__)) def check_meta(x, meta, funcname=None, numeric_equal=True): """Check that the dask metadata matches the result. If metadata matches, ``x`` is passed through unchanged. A nice error is raised if metadata doesn't match. Parameters ---------- x : DataFrame, Series, or Index meta : DataFrame, Series, or Index The expected metadata that ``x`` should match funcname : str, optional The name of the function in which the metadata was specified. If provided, the function name will be included in the error message to be more helpful to users. numeric_equal : bool, optionl If True, integer and floating dtypes compare equal. This is useful due to panda's implicit conversion of integer to floating upon encountering missingness, which is hard to infer statically. """ eq_types = {'i', 'f'} if numeric_equal else {} def equal_dtypes(a, b): if is_categorical_dtype(a) != is_categorical_dtype(b): return False if (a is '-' or b is '-'): return False if is_categorical_dtype(a) and is_categorical_dtype(b): # Pandas 0.21 CategoricalDtype compat if (PANDAS_VERSION >= '0.21.0' and (UNKNOWN_CATEGORIES in a.categories or UNKNOWN_CATEGORIES in b.categories)): return True return a == b return (a.kind in eq_types and b.kind in eq_types) or (a == b) if not isinstance(meta, (pd.Series, pd.Index, pd.DataFrame)): raise TypeError("Expected partition to be DataFrame, Series, or " "Index, got `%s`" % type(meta).__name__) if type(x) != type(meta): errmsg = ("Expected partition of type `%s` but got " "`%s`" % (type(meta).__name__, type(x).__name__)) elif isinstance(meta, pd.DataFrame): dtypes = pd.concat([x.dtypes, meta.dtypes], axis=1) bad = [(col, a, b) for col, a, b in dtypes.fillna('-').itertuples() if not equal_dtypes(a, b)] if not bad: return x errmsg = ("Partition type: `%s`\n%s" % (type(meta).__name__, asciitable(['Column', 'Found', 'Expected'], bad))) else: if equal_dtypes(x.dtype, meta.dtype): return x errmsg = ("Partition type: `%s`\n%s" % (type(meta).__name__, asciitable(['', 'dtype'], [('Found', x.dtype), ('Expected', meta.dtype)]))) raise ValueError("Metadata mismatch found%s.\n\n" "%s" % ((" in `%s`" % funcname if funcname else ""), errmsg)) ############################################################### # Testing ############################################################### def _check_dask(dsk, check_names=True, check_dtypes=True, result=None): import dask.dataframe as dd if hasattr(dsk, 'dask'): if result is None: result = dsk.compute(get=get_sync) if isinstance(dsk, dd.Index): assert isinstance(result, pd.Index), type(result) assert isinstance(dsk._meta, pd.Index), type(dsk._meta) if check_names: assert dsk.name == result.name assert dsk._meta.name == result.name if isinstance(result, pd.MultiIndex): assert result.names == dsk._meta.names if check_dtypes: assert_dask_dtypes(dsk, result) elif isinstance(dsk, dd.Series): assert isinstance(result, pd.Series), type(result) assert isinstance(dsk._meta, pd.Series), type(dsk._meta) if check_names: assert dsk.name == result.name, (dsk.name, result.name) assert dsk._meta.name == result.name if check_dtypes: assert_dask_dtypes(dsk, result) _check_dask(dsk.index, check_names=check_names, check_dtypes=check_dtypes, result=result.index) elif isinstance(dsk, dd.DataFrame): assert isinstance(result, pd.DataFrame), type(result) assert isinstance(dsk.columns, pd.Index), type(dsk.columns) assert isinstance(dsk._meta, pd.DataFrame), type(dsk._meta) if check_names: tm.assert_index_equal(dsk.columns, result.columns) tm.assert_index_equal(dsk._meta.columns, result.columns) if check_dtypes: assert_dask_dtypes(dsk, result) _check_dask(dsk.index, check_names=check_names, check_dtypes=check_dtypes, result=result.index) elif isinstance(dsk, dd.core.Scalar): assert (np.isscalar(result) or isinstance(result, (pd.Timestamp, pd.Timedelta))) if check_dtypes: assert_dask_dtypes(dsk, result) else: msg = 'Unsupported dask instance {0} found'.format(type(dsk)) raise AssertionError(msg) return result return dsk def _maybe_sort(a): # sort by value, then index try: if isinstance(a, pd.DataFrame): a = a.sort_values(by=a.columns.tolist()) else: a = a.sort_values() except (TypeError, IndexError, ValueError): pass return a.sort_index() def assert_eq(a, b, check_names=True, check_dtypes=True, check_divisions=True, check_index=True, **kwargs): if check_divisions: assert_divisions(a) assert_divisions(b) if hasattr(a, 'divisions') and hasattr(b, 'divisions'): at = type(np.asarray(a.divisions).tolist()[0]) # numpy to python bt = type(np.asarray(b.divisions).tolist()[0]) # scalar conversion assert at == bt, (at, bt) assert_sane_keynames(a) assert_sane_keynames(b) a = _check_dask(a, check_names=check_names, check_dtypes=check_dtypes) b = _check_dask(b, check_names=check_names, check_dtypes=check_dtypes) if not check_index: a = a.reset_index(drop=True) b = b.reset_index(drop=True) if isinstance(a, pd.DataFrame): a = _maybe_sort(a) b = _maybe_sort(b) tm.assert_frame_equal(a, b, **kwargs) elif isinstance(a, pd.Series): a = _maybe_sort(a) b = _maybe_sort(b) tm.assert_series_equal(a, b, check_names=check_names, **kwargs) elif isinstance(a, pd.Index): tm.assert_index_equal(a, b, **kwargs) else: if a == b: return True else: if np.isnan(a): assert np.isnan(b) else: assert np.allclose(a, b) return True def assert_dask_graph(dask, label): if hasattr(dask, 'dask'): dask = dask.dask assert isinstance(dask, dict) for k in dask: if isinstance(k, tuple): k = k[0] if k.startswith(label): return True raise AssertionError("given dask graph doesn't contain label: {label}" .format(label=label)) def assert_divisions(ddf): if not hasattr(ddf, 'divisions'): return if not hasattr(ddf, 'index'): return if not ddf.known_divisions: return def index(x): return (x if isinstance(x, pd.Index) else x.index.get_level_values(0)) results = get_sync(ddf.dask, ddf.__dask_keys__()) for i, df in enumerate(results[:-1]): if len(df): assert index(df).min() >= ddf.divisions[i] assert index(df).max() < ddf.divisions[i + 1] if len(results[-1]): assert index(results[-1]).min() >= ddf.divisions[-2] assert index(results[-1]).max() <= ddf.divisions[-1] def assert_sane_keynames(ddf): if not hasattr(ddf, 'dask'): return for k in ddf.dask.keys(): while isinstance(k, tuple): k = k[0] assert isinstance(k, (str, bytes)) assert len(k) < 100 assert ' ' not in k if sys.version_info[0] >= 3: assert k.split('-')[0].isidentifier() def assert_dask_dtypes(ddf, res, numeric_equal=True): """Check that the dask metadata matches the result. If `numeric_equal`, integer and floating dtypes compare equal. This is useful due to the implicit conversion of integer to floating upon encountering missingness, which is hard to infer statically.""" eq_types = {'O', 'S', 'U', 'a'} # treat object and strings alike if numeric_equal: eq_types.update(('i', 'f')) if isinstance(res, pd.DataFrame): for col, a, b in pd.concat([ddf._meta.dtypes, res.dtypes], axis=1).itertuples(): assert (a.kind in eq_types and b.kind in eq_types) or (a == b) elif isinstance(res, (pd.Series, pd.Index)): a = ddf._meta.dtype b = res.dtype assert (a.kind in eq_types and b.kind in eq_types) or (a == b) else: if hasattr(ddf._meta, 'dtype'): a = ddf._meta.dtype if not hasattr(res, 'dtype'): assert np.isscalar(res) b = np.dtype(type(res)) else: b = res.dtype assert (a.kind in eq_types and b.kind in eq_types) or (a == b) else: assert type(ddf._meta) == type(res) def assert_max_deps(x, n, eq=True): dependencies, dependents = get_deps(x.dask) if eq: assert max(map(len, dependencies.values())) == n else: assert max(map(len, dependencies.values())) <= n dask-0.16.0/dask/delayed.py000066400000000000000000000377311320364734500154450ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from collections import Iterator import operator import uuid try: from cytoolz import curry, first, pluck except ImportError: from toolz import curry, first, pluck from . import threaded from .base import Base, is_dask_collection, dont_optimize from .base import tokenize as _tokenize from .compatibility import apply from .core import quote from .context import _globals, globalmethod from .utils import funcname, methodcaller, OperatorMethodMixin from . import sharedict __all__ = ['Delayed', 'delayed'] def unzip(ls, nout): """Unzip a list of lists into ``nout`` outputs.""" out = list(zip(*ls)) if not out: out = [()] * nout return out def to_task_dask(expr): """Normalize a python object and merge all sub-graphs. - Replace ``Delayed`` with their keys - Convert literals to things the schedulers can handle - Extract dask graphs from all enclosed values Parameters ---------- expr : object The object to be normalized. This function knows how to handle ``Delayed``s, as well as most builtin python types. Returns ------- task : normalized task to be run dask : a merged dask graph that forms the dag for this task Examples -------- >>> a = delayed(1, 'a') >>> b = delayed(2, 'b') >>> task, dask = to_task_dask([a, b, 3]) >>> task # doctest: +SKIP ['a', 'b', 3] >>> dict(dask) # doctest: +SKIP {'a': 1, 'b': 2} >>> task, dasks = to_task_dask({a: 1, b: 2}) >>> task # doctest: +SKIP (dict, [['a', 1], ['b', 2]]) >>> dict(dask) # doctest: +SKIP {'a': 1, 'b': 2} """ if isinstance(expr, Delayed): return expr.key, expr.dask if is_dask_collection(expr): name = 'finalize-' + tokenize(expr, pure=True) keys = expr.__dask_keys__() opt = getattr(expr, '__dask_optimize__', dont_optimize) finalize, args = expr.__dask_postcompute__() dsk = {name: (finalize, keys) + args} dsk.update(opt(expr.__dask_graph__(), keys)) return name, dsk if isinstance(expr, Iterator): expr = list(expr) typ = type(expr) if typ in (list, tuple, set): args, dasks = unzip((to_task_dask(e) for e in expr), 2) args = list(args) dsk = sharedict.merge(*dasks) # Ensure output type matches input type return (args, dsk) if typ is list else ((typ, args), dsk) if typ is dict: args, dsk = to_task_dask([[k, v] for k, v in expr.items()]) return (dict, args), dsk if typ is slice: args, dsk = to_task_dask([expr.start, expr.stop, expr.step]) return (slice,) + tuple(args), dsk return expr, {} def tokenize(*args, **kwargs): """Mapping function from task -> consistent name. Parameters ---------- args : object Python objects that summarize the task. pure : boolean, optional If True, a consistent hash function is tried on the input. If this fails, then a unique identifier is used. If False (default), then a unique identifier is always used. """ pure = kwargs.pop('pure', None) if pure is None: pure = _globals.get('delayed_pure', False) if pure: return _tokenize(*args, **kwargs) else: return str(uuid.uuid4()) @curry def delayed(obj, name=None, pure=None, nout=None, traverse=True): """Wraps a function or object to produce a ``Delayed``. ``Delayed`` objects act as proxies for the object they wrap, but all operations on them are done lazily by building up a dask graph internally. Parameters ---------- obj : object The function or object to wrap name : string or hashable, optional The key to use in the underlying graph for the wrapped object. Defaults to hashing content. pure : bool, optional Indicates whether calling the resulting ``Delayed`` object is a pure operation. If True, arguments to the call are hashed to produce deterministic keys. If not provided, the default is to check the global ``delayed_pure`` setting, and fallback to ``False`` if unset. nout : int, optional The number of outputs returned from calling the resulting ``Delayed`` object. If provided, the ``Delayed`` output of the call can be iterated into ``nout`` objects, allowing for unpacking of results. By default iteration over ``Delayed`` objects will error. Note, that ``nout=1`` expects ``obj``, to return a tuple of length 1, and consequently for `nout=0``, ``obj`` should return an empty tuple. traverse : bool, optional By default dask traverses builtin python collections looking for dask objects passed to ``delayed``. For large collections this can be expensive. If ``obj`` doesn't contain any dask objects, set ``traverse=False`` to avoid doing this traversal. Examples -------- Apply to functions to delay execution: >>> def inc(x): ... return x + 1 >>> inc(10) 11 >>> x = delayed(inc, pure=True)(10) >>> type(x) == Delayed True >>> x.compute() 11 Can be used as a decorator: >>> @delayed(pure=True) ... def add(a, b): ... return a + b >>> add(1, 2).compute() 3 ``delayed`` also accepts an optional keyword ``pure``. If False, then subsequent calls will always produce a different ``Delayed``. This is useful for non-pure functions (such as ``time`` or ``random``). >>> from random import random >>> out1 = delayed(random, pure=False)() >>> out2 = delayed(random, pure=False)() >>> out1.key == out2.key False If you know a function is pure (output only depends on the input, with no global state), then you can set ``pure=True``. This will attempt to apply a consistent name to the output, but will fallback on the same behavior of ``pure=False`` if this fails. >>> @delayed(pure=True) ... def add(a, b): ... return a + b >>> out1 = add(1, 2) >>> out2 = add(1, 2) >>> out1.key == out2.key True Instead of setting ``pure`` as a property of the callable, you can also set it contextually using the ``delayed_pure`` setting. Note that this influences the *call* and not the *creation* of the callable: >>> import dask >>> @delayed ... def mul(a, b): ... return a * b >>> with dask.set_options(delayed_pure=True): ... print(mul(1, 2).key == mul(1, 2).key) True >>> with dask.set_options(delayed_pure=False): ... print(mul(1, 2).key == mul(1, 2).key) False The key name of the result of calling a delayed object is determined by hashing the arguments by default. To explicitly set the name, you can use the ``dask_key_name`` keyword when calling the function: >>> add(1, 2) # doctest: +SKIP Delayed('add-3dce7c56edd1ac2614add714086e950f') >>> add(1, 2, dask_key_name='three') Delayed('three') Note that objects with the same key name are assumed to have the same result. If you set the names explicitly you should make sure your key names are different for different results. >>> add(1, 2, dask_key_name='three') # doctest: +SKIP >>> add(2, 1, dask_key_name='three') # doctest: +SKIP >>> add(2, 2, dask_key_name='four') # doctest: +SKIP ``delayed`` can also be applied to objects to make operations on them lazy: >>> a = delayed([1, 2, 3]) >>> isinstance(a, Delayed) True >>> a.compute() [1, 2, 3] The key name of a delayed object is hashed by default if ``pure=True`` or is generated randomly if ``pure=False`` (default). To explicitly set the name, you can use the ``name`` keyword: >>> a = delayed([1, 2, 3], name='mylist') >>> a Delayed('mylist') Delayed results act as a proxy to the underlying object. Many operators are supported: >>> (a + [1, 2]).compute() [1, 2, 3, 1, 2] >>> a[1].compute() 2 Method and attribute access also works: >>> a.count(2).compute() 1 Note that if a method doesn't exist, no error will be thrown until runtime: >>> res = a.not_a_real_method() >>> res.compute() # doctest: +SKIP AttributeError("'list' object has no attribute 'not_a_real_method'") "Magic" methods (e.g. operators and attribute access) are assumed to be pure, meaning that subsequent calls must return the same results. This is not overrideable. To invoke an impure attribute or operator, you'd need to use it in a delayed function with ``pure=False``. >>> class Incrementer(object): ... def __init__(self): ... self._n = 0 ... @property ... def n(self): ... self._n += 1 ... return self._n ... >>> x = delayed(Incrementer()) >>> x.n.key == x.n.key True >>> get_n = delayed(lambda x: x.n, pure=False) >>> get_n(x).key == get_n(x).key False In contrast, methods are assumed to be impure by default, meaning that subsequent calls may return different results. To assume purity, set `pure=True`. This allows sharing of any intermediate values. >>> a.count(2, pure=True).key == a.count(2, pure=True).key True As with function calls, method calls also respect the global ``delayed_pure`` setting and support the ``dask_key_name`` keyword: >>> a.count(2, dask_key_name="count_2") Delayed('count_2') >>> with dask.set_options(delayed_pure=True): ... print(a.count(2).key == a.count(2).key) True """ if isinstance(obj, Delayed): return obj if is_dask_collection(obj) or traverse: task, dsk = to_task_dask(obj) else: task = quote(obj) dsk = {} if task is obj: if not (nout is None or (type(nout) is int and nout >= 0)): raise ValueError("nout must be None or a positive integer," " got %s" % nout) if not name: try: prefix = obj.__name__ except AttributeError: prefix = type(obj).__name__ token = tokenize(obj, nout, pure=pure) name = '%s-%s' % (prefix, token) return DelayedLeaf(obj, name, pure=pure, nout=nout) else: if not name: name = '%s-%s' % (type(obj).__name__, tokenize(task, pure=pure)) dsk = sharedict.merge(dsk, (name, {name: task})) return Delayed(name, dsk) def right(method): """Wrapper to create 'right' version of operator given left version""" def _inner(self, other): return method(other, self) return _inner def rebuild(dsk, key, length): return Delayed(key, dsk, length) class Delayed(Base, OperatorMethodMixin): """Represents a value to be computed by dask. Equivalent to the output from a single key in a dask graph. """ __slots__ = ('_key', 'dask', '_length') def __init__(self, key, dsk, length=None): self._key = key if type(dsk) is list: # compatibility with older versions dsk = sharedict.merge(*dsk) self.dask = dsk self._length = length def __dask_graph__(self): return self.dask def __dask_keys__(self): return [self.key] def __dask_tokenize__(self): return self.key __dask_scheduler__ = staticmethod(threaded.get) __dask_optimize__ = globalmethod(dont_optimize, key='delayed_optimize') def __dask_postcompute__(self): return first, () def __dask_postpersist__(self): return rebuild, (self._key, getattr(self, '_length', None)) def __getstate__(self): return tuple(getattr(self, i) for i in self.__slots__) def __setstate__(self, state): for k, v in zip(self.__slots__, state): setattr(self, k, v) @property def key(self): return self._key def __repr__(self): return "Delayed({0})".format(repr(self.key)) def __hash__(self): return hash(self.key) def __dir__(self): return dir(type(self)) def __getattr__(self, attr): if attr.startswith('_'): raise AttributeError("Attribute {0} not found".format(attr)) return DelayedAttr(self, attr) def __setattr__(self, attr, val): if attr in self.__slots__: object.__setattr__(self, attr, val) else: raise TypeError("Delayed objects are immutable") def __setitem__(self, index, val): raise TypeError("Delayed objects are immutable") def __iter__(self): if getattr(self, '_length', None) is None: raise TypeError("Delayed objects of unspecified length are " "not iterable") for i in range(self._length): yield self[i] def __len__(self): if getattr(self, '_length', None) is None: raise TypeError("Delayed objects of unspecified length have " "no len()") return self._length def __call__(self, *args, **kwargs): pure = kwargs.pop('pure', None) name = kwargs.pop('dask_key_name', None) func = delayed(apply, pure=pure) if name is not None: return func(self, args, kwargs, dask_key_name=name) return func(self, args, kwargs) def __bool__(self): raise TypeError("Truth of Delayed objects is not supported") __nonzero__ = __bool__ @classmethod def _get_binary_operator(cls, op, inv=False): method = delayed(right(op) if inv else op, pure=True) return lambda *args, **kwargs: method(*args, **kwargs) _get_unary_operator = _get_binary_operator def call_function(func, func_token, args, kwargs, pure=None, nout=None): dask_key_name = kwargs.pop('dask_key_name', None) pure = kwargs.pop('pure', pure) if dask_key_name is None: name = '%s-%s' % (funcname(func), tokenize(func_token, *args, pure=pure, **kwargs)) else: name = dask_key_name dsk = sharedict.ShareDict() args_dasks = list(map(to_task_dask, args)) for arg, d in args_dasks: if isinstance(d, sharedict.ShareDict): dsk.update_with_key(d) elif isinstance(arg, (str, tuple)): dsk.update_with_key(d, key=arg) else: dsk.update(d) args = tuple(pluck(0, args_dasks)) if kwargs: dask_kwargs, dsk2 = to_task_dask(kwargs) dsk.update(dsk2) task = (apply, func, list(args), dask_kwargs) else: task = (func,) + args dsk.update_with_key({name: task}, key=name) nout = nout if nout is not None else None return Delayed(name, dsk, length=nout) class DelayedLeaf(Delayed): __slots__ = ('_obj', '_key', '_pure', '_nout') def __init__(self, obj, key, pure=None, nout=None): self._obj = obj self._key = key self._pure = pure self._nout = nout @property def dask(self): return {self._key: self._obj} def __call__(self, *args, **kwargs): return call_function(self._obj, self._key, args, kwargs, pure=self._pure, nout=self._nout) class DelayedAttr(Delayed): __slots__ = ('_obj', '_attr', '_key') def __init__(self, obj, attr): self._obj = obj self._attr = attr self._key = 'getattr-%s' % tokenize(obj, attr, pure=True) @property def dask(self): dsk = {self._key: (getattr, self._obj._key, self._attr)} return sharedict.merge(self._obj.dask, (self._key, dsk)) def __call__(self, *args, **kwargs): return call_function(methodcaller(self._attr), self._attr, (self._obj,) + args, kwargs) for op in [operator.abs, operator.neg, operator.pos, operator.invert, operator.add, operator.sub, operator.mul, operator.floordiv, operator.truediv, operator.mod, operator.pow, operator.and_, operator.or_, operator.xor, operator.lshift, operator.rshift, operator.eq, operator.ge, operator.gt, operator.ne, operator.le, operator.lt, operator.getitem]: Delayed._bind_operator(op) dask-0.16.0/dask/diagnostics/000077500000000000000000000000001320364734500157605ustar00rootroot00000000000000dask-0.16.0/dask/diagnostics/__init__.py000066400000000000000000000003141320364734500200670ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from .profile import Profiler, ResourceProfiler, CacheProfiler from .progress import ProgressBar from .profile_visualize import visualize dask-0.16.0/dask/diagnostics/profile.py000066400000000000000000000260621320364734500200000ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from collections import namedtuple from itertools import starmap from timeit import default_timer from time import sleep from multiprocessing import Process, Pipe, current_process from ..callbacks import Callback from ..utils import import_required # Stores execution data for each task TaskData = namedtuple('TaskData', ('key', 'task', 'start_time', 'end_time', 'worker_id')) class Profiler(Callback): """A profiler for dask execution at the task level. Records the following information for each task: 1. Key 2. Task 3. Start time in seconds since the epoch 4. Finish time in seconds since the epoch 5. Worker id Examples -------- >>> from operator import add, mul >>> from dask.threaded import get >>> dsk = {'x': 1, 'y': (add, 'x', 10), 'z': (mul, 'y', 2)} >>> with Profiler() as prof: ... get(dsk, 'z') 22 >>> prof.results # doctest: +SKIP [('y', (add, 'x', 10), 1435352238.48039, 1435352238.480655, 140285575100160), ('z', (mul, 'y', 2), 1435352238.480657, 1435352238.480803, 140285566707456)] These results can be visualized in a bokeh plot using the ``visualize`` method. Note that this requires bokeh to be installed. >>> prof.visualize() # doctest: +SKIP You can activate the profiler globally >>> prof.register() # doctest: +SKIP If you use the profiler globally you will need to clear out old results manually. >>> prof.clear() """ def __init__(self): self._results = {} self.results = [] self._dsk = {} def __enter__(self): self.clear() return super(Profiler, self).__enter__() def _start(self, dsk): self._dsk.update(dsk) def _pretask(self, key, dsk, state): start = default_timer() self._results[key] = (key, dsk[key], start) def _posttask(self, key, value, dsk, state, id): end = default_timer() self._results[key] += (end, id) def _finish(self, dsk, state, failed): results = dict((k, v) for k, v in self._results.items() if len(v) == 5) self.results += list(starmap(TaskData, results.values())) self._results.clear() def _plot(self, **kwargs): from .profile_visualize import plot_tasks return plot_tasks(self.results, self._dsk, **kwargs) def visualize(self, **kwargs): """Visualize the profiling run in a bokeh plot. See also -------- dask.diagnostics.profile_visualize.visualize """ from .profile_visualize import visualize return visualize(self, **kwargs) def clear(self): """Clear out old results from profiler""" self._results.clear() del self.results[:] self._dsk = {} ResourceData = namedtuple('ResourceData', ('time', 'mem', 'cpu')) class ResourceProfiler(Callback): """A profiler for resource use. Records the following each timestep 1. Time in seconds since the epoch 2. Memory usage in MB 3. % CPU usage Examples -------- >>> from operator import add, mul >>> from dask.threaded import get >>> dsk = {'x': 1, 'y': (add, 'x', 10), 'z': (mul, 'y', 2)} >>> with ResourceProfiler() as prof: # doctest: +SKIP ... get(dsk, 'z') 22 These results can be visualized in a bokeh plot using the ``visualize`` method. Note that this requires bokeh to be installed. >>> prof.visualize() # doctest: +SKIP You can activate the profiler globally >>> prof.register() # doctest: +SKIP If you use the profiler globally you will need to clear out old results manually. >>> prof.clear() # doctest: +SKIP Note that when used as a context manager data will be collected throughout the duration of the enclosed block. In contrast, when registered globally data will only be collected while a dask scheduler is active. """ def __init__(self, dt=1): self._dt = dt self._entered = False self._tracker = None self.results = [] def _is_running(self): return self._tracker is not None and self._tracker.is_alive() def _start_collect(self): if not self._is_running(): self._tracker = _Tracker(self._dt) self._tracker.start() self._tracker.parent_conn.send('collect') def _stop_collect(self): if self._is_running(): self._tracker.parent_conn.send('send_data') self.results.extend(starmap(ResourceData, self._tracker.parent_conn.recv())) def __enter__(self): self._entered = True self.clear() self._start_collect() return super(ResourceProfiler, self).__enter__() def __exit__(self, *args): self._entered = False self._stop_collect() self.close() super(ResourceProfiler, self).__exit__(*args) def _start(self, dsk): self._start_collect() def _finish(self, dsk, state, failed): if not self._entered: self._stop_collect() def close(self): """Shutdown the resource tracker process""" if self._is_running(): self._tracker.shutdown() self._tracker = None __del__ = close def clear(self): self.results = [] def _plot(self, **kwargs): from .profile_visualize import plot_resources return plot_resources(self.results, **kwargs) def visualize(self, **kwargs): """Visualize the profiling run in a bokeh plot. See also -------- dask.diagnostics.profile_visualize.visualize """ from .profile_visualize import visualize return visualize(self, **kwargs) class _Tracker(Process): """Background process for tracking resource usage""" def __init__(self, dt=1): psutil = import_required("psutil", "Tracking resource usage requires " "`psutil` to be installed") Process.__init__(self) self.daemon = True self.dt = dt self.parent = psutil.Process(current_process().pid) self.parent_conn, self.child_conn = Pipe() def shutdown(self): if not self.parent_conn.closed: self.parent_conn.send('shutdown') self.parent_conn.close() self.join() def _update_pids(self, pid): return [self.parent] + [p for p in self.parent.children() if p.pid != pid and p.status() != 'zombie'] def run(self): pid = current_process() data = [] while True: try: msg = self.child_conn.recv() except KeyboardInterrupt: continue if msg == 'shutdown': break elif msg == 'collect': ps = self._update_pids(pid) while not data or not self.child_conn.poll(): tic = default_timer() mem = cpu = 0 for p in ps: try: mem2 = p.memory_info().rss cpu2 = p.cpu_percent() except Exception: # could be a few different exceptions pass else: # Only increment if both were successful mem += mem2 cpu += cpu2 data.append((tic, mem / 1e6, cpu)) sleep(self.dt) elif msg == 'send_data': self.child_conn.send(data) data = [] self.child_conn.close() CacheData = namedtuple('CacheData', ('key', 'task', 'metric', 'cache_time', 'free_time')) class CacheProfiler(Callback): """A profiler for dask execution at the scheduler cache level. Records the following information for each task: 1. Key 2. Task 3. Size metric 4. Cache entry time in seconds since the epoch 5. Cache exit time in seconds since the epoch Examples -------- >>> from operator import add, mul >>> from dask.threaded import get >>> dsk = {'x': 1, 'y': (add, 'x', 10), 'z': (mul, 'y', 2)} >>> with CacheProfiler() as prof: ... get(dsk, 'z') 22 >>> prof.results # doctest: +SKIP [CacheData('y', (add, 'x', 10), 1, 1435352238.48039, 1435352238.480655), CacheData('z', (mul, 'y', 2), 1, 1435352238.480657, 1435352238.480803)] The default is to count each task (``metric`` is 1 for all tasks). Other functions may used as a metric instead through the ``metric`` keyword. For example, the ``nbytes`` function found in ``cachey`` can be used to measure the number of bytes in the cache. >>> from cachey import nbytes # doctest: +SKIP >>> with CacheProfiler(metric=nbytes) as prof: # doctest: +SKIP ... get(dsk, 'z') The profiling results can be visualized in a bokeh plot using the ``visualize`` method. Note that this requires bokeh to be installed. >>> prof.visualize() # doctest: +SKIP You can activate the profiler globally >>> prof.register() # doctest: +SKIP If you use the profiler globally you will need to clear out old results manually. >>> prof.clear() """ def __init__(self, metric=None, metric_name=None): self.clear() self._metric = metric if metric else lambda value: 1 if metric_name: self._metric_name = metric_name elif metric: self._metric_name = metric.__name__ else: self._metric_name = 'count' def __enter__(self): self.clear() return super(CacheProfiler, self).__enter__() def _start(self, dsk): self._dsk.update(dsk) if not self._start_time: self._start_time = default_timer() def _posttask(self, key, value, dsk, state, id): t = default_timer() self._cache[key] = (self._metric(value), t) for k in state['released'].intersection(self._cache): metric, start = self._cache.pop(k) self.results.append(CacheData(k, dsk[k], metric, start, t)) def _finish(self, dsk, state, failed): t = default_timer() for k, (metric, start) in self._cache.items(): self.results.append(CacheData(k, dsk[k], metric, start, t)) self._cache.clear() def _plot(self, **kwargs): from .profile_visualize import plot_cache return plot_cache(self.results, self._dsk, self._start_time, self._metric_name, **kwargs) def visualize(self, **kwargs): """Visualize the profiling run in a bokeh plot. See also -------- dask.diagnostics.profile_visualize.visualize """ from .profile_visualize import visualize return visualize(self, **kwargs) def clear(self): """Clear out old results from profiler""" self.results = [] self._cache = {} self._dsk = {} self._start_time = None dask-0.16.0/dask/diagnostics/profile_visualize.py000066400000000000000000000344671320364734500221030ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import random from bisect import bisect_left from distutils.version import LooseVersion from itertools import cycle from operator import itemgetter, add from ..utils import funcname, import_required from ..core import istask from ..compatibility import apply _BOKEH_MISSING_MSG = "Diagnostics plots require `bokeh` to be installed" _TOOLZ_MISSING_MSG = "Diagnostics plots require `toolz` to be installed" def unquote(expr): if istask(expr): if expr[0] in (tuple, list, set): return expr[0](map(unquote, expr[1])) elif (expr[0] == dict and isinstance(expr[1], list) and isinstance(expr[1][0], list)): return dict(map(unquote, expr[1])) return expr def pprint_task(task, keys, label_size=60): """Return a nicely formatted string for a task. Parameters ---------- task: Value within dask graph to render as text keys: iterable List of keys within dask graph label_size: int (optional) Maximum size of output label, defaults to 60 Examples -------- >>> from operator import add, mul >>> dsk = {'a': 1, ... 'b': 2, ... 'c': (add, 'a', 'b'), ... 'd': (add, (mul, 'a', 'b'), 'c'), ... 'e': (sum, ['a', 'b', 5]), ... 'f': (add,), ... 'g': []} >>> pprint_task(dsk['c'], dsk) 'add(_, _)' >>> pprint_task(dsk['d'], dsk) 'add(mul(_, _), _)' >>> pprint_task(dsk['e'], dsk) 'sum([_, _, *])' >>> pprint_task(dsk['f'], dsk) 'add()' >>> pprint_task(dsk['g'], dsk) '[]' """ if istask(task): func = task[0] if func is apply: head = funcname(task[1]) tail = ')' args = unquote(task[2]) if len(task) > 2 else () kwargs = unquote(task[3]) if len(task) > 3 else {} else: if hasattr(func, 'funcs'): head = '('.join(funcname(f) for f in func.funcs) tail = ')' * len(func.funcs) else: head = funcname(task[0]) tail = ')' args = task[1:] kwargs = {} if args or kwargs: label_size2 = int((label_size - len(head) - len(tail)) // (len(args) + len(kwargs))) pprint = lambda t: pprint_task(t, keys, label_size2) if args: if label_size2 > 5: args = ', '.join(pprint(t) for t in args) else: args = '...' else: args = '' if kwargs: if label_size2 > 5: kwargs = ', ' + ', '.join('{0}={1}'.format(k, pprint(v)) for k, v in sorted(kwargs.items())) else: kwargs = ', ...' else: kwargs = '' return '{0}({1}{2}{3}'.format(head, args, kwargs, tail) elif isinstance(task, list): if not task: return '[]' elif len(task) > 3: result = pprint_task(task[:3], keys, label_size) return result[:-1] + ', ...]' else: label_size2 = int((label_size - 2 - 2 * len(task)) // len(task)) args = ', '.join(pprint_task(t, keys, label_size2) for t in task) return '[{0}]'.format(args) else: try: if task in keys: return '_' else: return '*' except TypeError: return '*' def get_colors(palette, funcs): """Get a dict mapping funcs to colors from palette. Parameters ---------- palette : string Name of the bokeh palette to use, must be a member of bokeh.palettes.all_palettes. funcs : iterable Iterable of function names """ palettes = import_required('bokeh.palettes', _BOKEH_MISSING_MSG) tz = import_required('toolz', _TOOLZ_MISSING_MSG) unique_funcs = list(sorted(tz.unique(funcs))) n_funcs = len(unique_funcs) palette_lookup = palettes.all_palettes[palette] keys = list(sorted(palette_lookup.keys())) index = keys[min(bisect_left(keys, n_funcs), len(keys) - 1)] palette = palette_lookup[index] # Some bokeh palettes repeat colors, we want just the unique set palette = list(tz.unique(palette)) if len(palette) > n_funcs: # Consistently shuffle palette - prevents just using low-range random.Random(42).shuffle(palette) color_lookup = dict(zip(unique_funcs, cycle(palette))) return [color_lookup[n] for n in funcs] def visualize(profilers, file_path=None, show=True, save=True, **kwargs): """Visualize the results of profiling in a bokeh plot. If multiple profilers are passed in, the plots are stacked vertically. Parameters ---------- profilers : profiler or list Profiler or list of profilers. file_path : string, optional Name of the plot output file. show : boolean, optional If True (default), the plot is opened in a browser. save : boolean, optional If True (default), the plot is saved to disk. **kwargs Other keyword arguments, passed to bokeh.figure. These will override all defaults set by visualize. Returns ------- The completed bokeh plot object. """ bp = import_required('bokeh.plotting', _BOKEH_MISSING_MSG) import bokeh if LooseVersion(bokeh.__version__) >= "0.12.10": from bokeh.io import state in_notebook = state.curstate().notebook else: from bokeh.io import _state in_notebook = _state._notebook if not in_notebook: file_path = file_path or "profile.html" bp.output_file(file_path) if not isinstance(profilers, list): profilers = [profilers] figs = [prof._plot(**kwargs) for prof in profilers] # Stack the plots if len(figs) == 1: p = figs[0] else: top = figs[0] for f in figs[1:]: f.x_range = top.x_range f.title = None f.min_border_top = 20 f.plot_height -= 30 for f in figs[:-1]: f.xaxis.axis_label = None f.min_border_bottom = 20 f.plot_height -= 30 for f in figs: f.min_border_left = 75 f.min_border_right = 75 p = bp.gridplot([[f] for f in figs]) if show: bp.show(p) if file_path and save: bp.save(p) return p def _get_figure_keywords(): bp = import_required('bokeh.plotting', _BOKEH_MISSING_MSG) o = bp.Figure.properties() o.add('tools') return o def plot_tasks(results, dsk, palette='Viridis', label_size=60, **kwargs): """Visualize the results of profiling in a bokeh plot. Parameters ---------- results : sequence Output of Profiler.results dsk : dict The dask graph being profiled. palette : string, optional Name of the bokeh palette to use, must be a member of bokeh.palettes.all_palettes. label_size: int (optional) Maximum size of output labels in plot, defaults to 60 **kwargs Other keyword arguments, passed to bokeh.figure. These will override all defaults set by visualize. Returns ------- The completed bokeh plot object. """ bp = import_required('bokeh.plotting', _BOKEH_MISSING_MSG) from bokeh.models import HoverTool tz = import_required('toolz', _TOOLZ_MISSING_MSG) defaults = dict(title="Profile Results", tools="hover,save,reset,xwheel_zoom,xpan", toolbar_location='above', plot_width=800, plot_height=300) defaults.update((k, v) for (k, v) in kwargs.items() if k in _get_figure_keywords()) if results: keys, tasks, starts, ends, ids = zip(*results) id_group = tz.groupby(itemgetter(4), results) timings = dict((k, [i.end_time - i.start_time for i in v]) for (k, v) in id_group.items()) id_lk = dict((t[0], n) for (n, t) in enumerate(sorted(timings.items(), key=itemgetter(1), reverse=True))) left = min(starts) right = max(ends) p = bp.figure(y_range=[str(i) for i in range(len(id_lk))], x_range=[0, right - left], **defaults) data = {} data['width'] = width = [e - s for (s, e) in zip(starts, ends)] data['x'] = [w / 2 + s - left for (w, s) in zip(width, starts)] data['y'] = [id_lk[i] + 1 for i in ids] data['function'] = funcs = [pprint_task(i, dsk, label_size) for i in tasks] data['color'] = get_colors(palette, funcs) data['key'] = [str(i) for i in keys] source = bp.ColumnDataSource(data=data) p.rect(source=source, x='x', y='y', height=1, width='width', color='color', line_color='gray') else: p = bp.figure(y_range=[str(i) for i in range(8)], x_range=[0, 10], **defaults) p.grid.grid_line_color = None p.axis.axis_line_color = None p.axis.major_tick_line_color = None p.yaxis.axis_label = "Worker ID" p.xaxis.axis_label = "Time (s)" hover = p.select(HoverTool) hover.tooltips = """
Key:  @key
Task:  @function
""" hover.point_policy = 'follow_mouse' return p def plot_resources(results, palette='Viridis', **kwargs): """Plot resource usage in a bokeh plot. Parameters ---------- results : sequence Output of ResourceProfiler.results palette : string, optional Name of the bokeh palette to use, must be a member of bokeh.palettes.all_palettes. **kwargs Other keyword arguments, passed to bokeh.figure. These will override all defaults set by plot_resources. Returns ------- The completed bokeh plot object. """ bp = import_required('bokeh.plotting', _BOKEH_MISSING_MSG) from bokeh import palettes from bokeh.models import LinearAxis, Range1d defaults = dict(title="Profile Results", tools="save,reset,xwheel_zoom,xpan", toolbar_location='above', plot_width=800, plot_height=300) defaults.update((k, v) for (k, v) in kwargs.items() if k in _get_figure_keywords()) if results: t, mem, cpu = zip(*results) left, right = min(t), max(t) t = [i - left for i in t] p = bp.figure(y_range=fix_bounds(0, max(cpu), 100), x_range=fix_bounds(0, right - left, 1), **defaults) else: t = mem = cpu = [] p = bp.figure(y_range=(0, 100), x_range=(0, 1), **defaults) colors = palettes.all_palettes[palette][6] p.line(t, cpu, color=colors[0], line_width=4, legend='% CPU') p.yaxis.axis_label = "% CPU" p.extra_y_ranges = {'memory': Range1d(*fix_bounds(min(mem) if mem else 0, max(mem) if mem else 100, 100))} p.line(t, mem, color=colors[2], y_range_name='memory', line_width=4, legend='Memory') p.add_layout(LinearAxis(y_range_name='memory', axis_label='Memory (MB)'), 'right') p.xaxis.axis_label = "Time (s)" return p def fix_bounds(start, end, min_span): """Adjust end point to ensure span of at least `min_span`""" return start, max(end, start + min_span) def plot_cache(results, dsk, start_time, metric_name, palette='Viridis', label_size=60, **kwargs): """Visualize the results of profiling in a bokeh plot. Parameters ---------- results : sequence Output of CacheProfiler.results dsk : dict The dask graph being profiled. start_time : float Start time of the profile. metric_name : string Metric used to measure cache size palette : string, optional Name of the bokeh palette to use, must be a member of bokeh.palettes.all_palettes. label_size: int (optional) Maximum size of output labels in plot, defaults to 60 **kwargs Other keyword arguments, passed to bokeh.figure. These will override all defaults set by visualize. Returns ------- The completed bokeh plot object. """ bp = import_required('bokeh.plotting', _BOKEH_MISSING_MSG) from bokeh.models import HoverTool tz = import_required('toolz', _TOOLZ_MISSING_MSG) defaults = dict(title="Profile Results", tools="hover,save,reset,wheel_zoom,xpan", toolbar_location='above', plot_width=800, plot_height=300) defaults.update((k, v) for (k, v) in kwargs.items() if k in _get_figure_keywords()) if results: starts, ends = list(zip(*results))[3:] tics = list(sorted(tz.unique(starts + ends))) groups = tz.groupby(lambda d: pprint_task(d[1], dsk, label_size), results) data = {} for k, vals in groups.items(): cnts = dict.fromkeys(tics, 0) for v in vals: cnts[v.cache_time] += v.metric cnts[v.free_time] -= v.metric data[k] = [0] + list(tz.accumulate(add, tz.pluck(1, sorted(cnts.items())))) tics = [0] + [i - start_time for i in tics] p = bp.figure(x_range=[0, max(tics)], **defaults) for (key, val), color in zip(data.items(), get_colors(palette, data.keys())): p.line('x', 'y', line_color=color, line_width=3, source=bp.ColumnDataSource({'x': tics, 'y': val, 'label': [key for i in val]})) else: p = bp.figure(y_range=[0, 10], x_range=[0, 10], **defaults) p.yaxis.axis_label = "Cache Size ({0})".format(metric_name) p.xaxis.axis_label = "Time (s)" hover = p.select(HoverTool) hover.tooltips = """
Task:  @label
""" return p dask-0.16.0/dask/diagnostics/progress.py000066400000000000000000000101531320364734500201760ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import sys import threading import time from timeit import default_timer from ..callbacks import Callback from ..utils import ignoring def format_time(t): """Format seconds into a human readable form. >>> format_time(10.4) '10.4s' >>> format_time(1000.4) '16min 40.4s' """ m, s = divmod(t, 60) h, m = divmod(m, 60) if h: return '{0:2.0f}hr {1:2.0f}min {2:4.1f}s'.format(h, m, s) elif m: return '{0:2.0f}min {1:4.1f}s'.format(m, s) else: return '{0:4.1f}s'.format(s) class ProgressBar(Callback): """A progress bar for dask. Parameters ---------- minimum : int, optional Minimum time threshold in seconds before displaying a progress bar. Default is 0 (always display) width : int, optional Width of the bar dt : float, optional Update resolution in seconds, default is 0.1 seconds Examples -------- Below we create a progress bar with a minimum threshold of 1 second before displaying. For cheap computations nothing is shown: >>> with ProgressBar(minimum=1.0): # doctest: +SKIP ... out = some_fast_computation.compute() But for expensive computations a full progress bar is displayed: >>> with ProgressBar(minimum=1.0): # doctest: +SKIP ... out = some_slow_computation.compute() [########################################] | 100% Completed | 10.4 s The duration of the last computation is available as an attribute >>> pbar = ProgressBar() >>> with pbar: # doctest: +SKIP ... out = some_computation.compute() [########################################] | 100% Completed | 10.4 s >>> pbar.last_duration # doctest: +SKIP 10.4 You can also register a progress bar so that it displays for all computations: >>> pbar = ProgressBar() # doctest: +SKIP >>> pbar.register() # doctest: +SKIP >>> some_slow_computation.compute() # doctest: +SKIP [########################################] | 100% Completed | 10.4 s """ def __init__(self, minimum=0, width=40, dt=0.1): self._minimum = minimum self._width = width self._dt = dt self.last_duration = 0 def _start(self, dsk): self._state = None self._start_time = default_timer() # Start background thread self._running = True self._timer = threading.Thread(target=self._timer_func) self._timer.daemon = True self._timer.start() def _pretask(self, key, dsk, state): self._state = state sys.stdout.flush() def _finish(self, dsk, state, errored): self._running = False self._timer.join() elapsed = default_timer() - self._start_time self.last_duration = elapsed if elapsed < self._minimum: return if not errored: self._draw_bar(1, elapsed) else: self._update_bar(elapsed) sys.stdout.write('\n') sys.stdout.flush() def _timer_func(self): """Background thread for updating the progress bar""" while self._running: elapsed = default_timer() - self._start_time if elapsed > self._minimum: self._update_bar(elapsed) time.sleep(self._dt) def _update_bar(self, elapsed): s = self._state if not s: self._draw_bar(0, elapsed) return ndone = len(s['finished']) ntasks = sum(len(s[k]) for k in ['ready', 'waiting', 'running']) + ndone self._draw_bar(ndone / ntasks if ntasks else 0, elapsed) def _draw_bar(self, frac, elapsed): bar = '#' * int(self._width * frac) percent = int(100 * frac) elapsed = format_time(elapsed) msg = '\r[{0:<{1}}] | {2}% Completed | {3}'.format(bar, self._width, percent, elapsed) with ignoring(ValueError): sys.stdout.write(msg) sys.stdout.flush() dask-0.16.0/dask/diagnostics/tests/000077500000000000000000000000001320364734500171225ustar00rootroot00000000000000dask-0.16.0/dask/diagnostics/tests/__init__.py000066400000000000000000000000001320364734500212210ustar00rootroot00000000000000dask-0.16.0/dask/diagnostics/tests/test_profiler.py000066400000000000000000000241611320364734500223610ustar00rootroot00000000000000from operator import add, mul import os from time import sleep from distutils.version import LooseVersion from dask.diagnostics import Profiler, ResourceProfiler, CacheProfiler from dask.threaded import get from dask.utils import ignoring, tmpfile from dask.compatibility import apply import pytest try: import bokeh except ImportError: bokeh = None try: import psutil except ImportError: psutil = None prof = Profiler() dsk = {'a': 1, 'b': 2, 'c': (add, 'a', 'b'), 'd': (mul, 'a', 'b'), 'e': (mul, 'c', 'd')} dsk2 = {'a': 1, 'b': 2, 'c': (lambda a, b: sleep(0.1) or (a + b), 'a', 'b')} def test_profiler(): with prof: out = get(dsk, 'e') assert out == 6 prof_data = sorted(prof.results, key=lambda d: d.key) keys = [i.key for i in prof_data] assert keys == ['c', 'd', 'e'] tasks = [i.task for i in prof_data] assert tasks == [(add, 'a', 'b'), (mul, 'a', 'b'), (mul, 'c', 'd')] prof.clear() assert prof.results == [] def test_profiler_works_under_error(): div = lambda x, y: x / y dsk = {'x': (div, 1, 1), 'y': (div, 'x', 2), 'z': (div, 'y', 0)} with ignoring(ZeroDivisionError): with prof: get(dsk, 'z') assert all(len(v) == 5 for v in prof.results) assert len(prof.results) == 2 def test_two_gets(): with prof: get(dsk, 'e') n = len(prof.results) dsk2 = {'x': (add, 1, 2), 'y': (add, 'x', 'x')} with prof: get(dsk2, 'y') m = len(prof.results) with prof: get(dsk, 'e') get(dsk2, 'y') get(dsk, 'e') assert len(prof.results) == n + m + n @pytest.mark.skipif("not psutil") def test_resource_profiler(): with ResourceProfiler(dt=0.01) as rprof: get(dsk2, 'c') results = rprof.results assert len(results) > 0 assert all(isinstance(i, tuple) and len(i) == 3 for i in results) # Tracker stopped on exit assert not rprof._is_running() rprof.clear() assert rprof.results == [] # Close is idempotent rprof.close() assert not rprof._is_running() # Restarts tracker if already closed with rprof: get(dsk2, 'c') assert len(rprof.results) > 0 @pytest.mark.skipif("not psutil") def test_resource_profiler_multiple_gets(): with ResourceProfiler(dt=0.01) as rprof: get(dsk2, 'c') assert len(rprof.results) == 0 get(dsk2, 'c') results = rprof.results assert all(isinstance(i, tuple) and len(i) == 3 for i in results) rprof.clear() rprof.register() get(dsk2, 'c') assert len(rprof.results) > 0 get(dsk2, 'c') rprof.unregister() results = rprof.results assert all(isinstance(i, tuple) and len(i) == 3 for i in results) rprof.close() assert not rprof._is_running() def test_cache_profiler(): with CacheProfiler() as cprof: get(dsk2, 'c') results = cprof.results assert all(isinstance(i, tuple) and len(i) == 5 for i in results) cprof.clear() assert cprof.results == [] tics = [0] def nbytes(res): tics[0] += 1 return tics[0] with CacheProfiler(nbytes) as cprof: get(dsk2, 'c') results = cprof.results assert tics[-1] == len(results) assert tics[-1] == results[-1].metric assert cprof._metric_name == 'nbytes' assert CacheProfiler(metric=nbytes, metric_name='foo')._metric_name == 'foo' @pytest.mark.parametrize( 'profiler', [Profiler, pytest.param(lambda: ResourceProfiler(dt=0.01), marks=pytest.mark.skipif("not psutil")), CacheProfiler]) def test_register(profiler): prof = profiler() try: prof.register() get(dsk2, 'c') n = len(prof.results) assert n > 0 get(dsk2, 'c') assert len(prof.results) > n finally: prof.unregister() @pytest.mark.skipif("not bokeh") def test_unquote(): from dask.diagnostics.profile_visualize import unquote from dask.delayed import to_task_dask f = lambda x: to_task_dask(x)[0] t = {'a': 1, 'b': 2, 'c': 3} assert unquote(f(t)) == t t = {'a': [1, 2, 3], 'b': 2, 'c': 3} assert unquote(f(t)) == t t = [1, 2, 3] assert unquote(f(t)) == t @pytest.mark.skipif("not bokeh") def test_pprint_task(): from dask.diagnostics.profile_visualize import pprint_task keys = set(['a', 'b', 'c', 'd', 'e']) assert pprint_task((add, 'a', 1), keys) == 'add(_, *)' assert pprint_task((add, (add, 'a', 1)), keys) == 'add(add(_, *))' res = 'sum([*, _, add(_, *)])' assert pprint_task((sum, [1, 'b', (add, 'a', 1)]), keys) == res assert pprint_task((sum, (1, 2, 3, 4, 5, 6, 7)), keys) == 'sum(*)' assert len(pprint_task((sum, list(keys) * 100), keys)) < 100 assert pprint_task((sum, list(keys) * 100), keys) == 'sum([_, _, _, ...])' assert (pprint_task((sum, [1, 2, (sum, ['a', 4]), 5, 6] * 100), keys) == 'sum([*, *, sum([_, *]), ...])') assert pprint_task((sum, [1, 2, (sum, ['a', (sum, [1, 2, 3])]), 5, 6]), keys) == 'sum([*, *, sum([_, sum(...)]), ...])' # With kwargs def foo(w, x, y=(), z=3): return w + x + sum(y) + z task = (apply, foo, (tuple, ['a', 'b']), (dict, [['y', ['a', 'b']], ['z', 'c']])) assert pprint_task(task, keys) == 'foo(_, _, y=[_, _], z=_)' task = (apply, foo, (tuple, ['a', 'b']), (dict, [['y', ['a', 1]], ['z', 1]])) assert pprint_task(task, keys) == 'foo(_, _, y=[_, *], z=*)' def check_title(p, title): # bokeh 0.12 changed the title attribute to not a string return getattr(p.title, 'text', p.title) == title @pytest.mark.skipif("not bokeh") def test_profiler_plot(): with prof: get(dsk, 'e') p = prof.visualize(plot_width=500, plot_height=300, tools="hover", title="Not the default", show=False, save=False) assert p.plot_width == 500 assert p.plot_height == 300 assert len(p.tools) == 1 assert isinstance(p.tools[0], bokeh.models.HoverTool) assert check_title(p, "Not the default") # Test empty, checking for errors prof.clear() with pytest.warns(None) as record: prof.visualize(show=False, save=False) assert len(record) == 0 @pytest.mark.skipif("not bokeh") @pytest.mark.skipif("not psutil") def test_resource_profiler_plot(): with ResourceProfiler(dt=0.01) as rprof: get(dsk2, 'c') p = rprof.visualize(plot_width=500, plot_height=300, tools="hover", title="Not the default", show=False, save=False) assert p.plot_width == 500 assert p.plot_height == 300 assert len(p.tools) == 1 assert isinstance(p.tools[0], bokeh.models.HoverTool) assert check_title(p, "Not the default") # Test with empty and one point, checking for errors rprof.clear() for results in [[], [(1.0, 0, 0)]]: rprof.results = results with pytest.warns(None) as record: p = rprof.visualize(show=False, save=False) assert len(record) == 0 # Check bounds are valid assert p.x_range.start == 0 assert p.x_range.end == 1 assert p.y_range.start == 0 assert p.y_range.end == 100 assert p.extra_y_ranges['memory'].start == 0 assert p.extra_y_ranges['memory'].end == 100 @pytest.mark.skipif("not bokeh") def test_cache_profiler_plot(): with CacheProfiler(metric_name='non-standard') as cprof: get(dsk, 'e') p = cprof.visualize(plot_width=500, plot_height=300, tools="hover", title="Not the default", show=False, save=False) assert p.plot_width == 500 assert p.plot_height == 300 assert len(p.tools) == 1 assert isinstance(p.tools[0], bokeh.models.HoverTool) assert check_title(p, "Not the default") assert p.axis[1].axis_label == 'Cache Size (non-standard)' # Test empty, checking for errors cprof.clear() with pytest.warns(None) as record: cprof.visualize(show=False, save=False) assert len(record) == 0 @pytest.mark.skipif("not bokeh") @pytest.mark.skipif("not psutil") def test_plot_multiple(): from dask.diagnostics.profile_visualize import visualize with ResourceProfiler(dt=0.01) as rprof: with prof: get(dsk2, 'c') p = visualize([prof, rprof], label_size=50, title="Not the default", show=False, save=False) if LooseVersion(bokeh.__version__) >= '0.12.0': figures = [r.children[0] for r in p.children[1].children] else: figures = [r[0] for r in p.children] assert len(figures) == 2 assert check_title(figures[0], "Not the default") assert figures[0].xaxis[0].axis_label is None assert figures[1].title is None assert figures[1].xaxis[0].axis_label == 'Time (s)' # Test empty, checking for errors prof.clear() rprof.clear() visualize([prof, rprof], show=False, save=False) @pytest.mark.skipif("not bokeh") def test_saves_file(): with tmpfile('html') as fn: with prof: get(dsk, 'e') # Run just to see that it doesn't error prof.visualize(show=False, file_path=fn) assert os.path.exists(fn) with open(fn) as f: assert 'html' in f.read().lower() @pytest.mark.skipif("not bokeh") def test_get_colors(): from dask.diagnostics.profile_visualize import get_colors from bokeh.palettes import Blues9, Blues5, Viridis from itertools import cycle funcs = list(range(11)) cmap = get_colors('Blues', funcs) lk = dict(zip(funcs, cycle(Blues9))) assert cmap == [lk[i] for i in funcs] funcs = list(range(5)) cmap = get_colors('Blues', funcs) lk = dict(zip(funcs, Blues5)) assert cmap == [lk[i] for i in funcs] funcs = [0, 1, 0, 1, 0, 1] cmap = get_colors('BrBG', funcs) assert len(set(cmap)) == 2 funcs = list(range(100)) cmap = get_colors('Viridis', funcs) assert len(set(cmap)) == 100 funcs = list(range(300)) cmap = get_colors('Viridis', funcs) assert len(set(cmap)) == len(set(Viridis[256])) dask-0.16.0/dask/diagnostics/tests/test_progress.py000066400000000000000000000055301320364734500224020ustar00rootroot00000000000000from operator import add, mul import pytest from dask.local import get_sync from dask.diagnostics import ProgressBar from dask.diagnostics.progress import format_time from dask.threaded import get as get_threaded from dask.context import _globals dsk = {'a': 1, 'b': 2, 'c': (add, 'a', 'b'), 'd': (mul, 'a', 'b'), 'e': (mul, 'c', 'd')} def check_bar_completed(capsys, width=40): out, err = capsys.readouterr() bar, percent, time = [i.strip() for i in out.split('\r')[-1].split('|')] assert bar == '[' + '#' * width + ']' assert percent == '100% Completed' def test_progressbar(capsys): with ProgressBar(): out = get_threaded(dsk, 'e') assert out == 6 check_bar_completed(capsys) with ProgressBar(width=20): out = get_threaded(dsk, 'e') check_bar_completed(capsys, 20) def test_minimum_time(capsys): with ProgressBar(1.0): out = get_threaded(dsk, 'e') out, err = capsys.readouterr() assert out == '' and err == '' @pytest.mark.parametrize('get', [get_threaded, get_sync]) def test_clean_exit(get): dsk = {'a': (lambda: 1 / 0, )} try: with ProgressBar() as pbar: get_threaded(dsk, 'a') except ZeroDivisionError: pass assert not pbar._running assert not pbar._timer.is_alive() def test_format_time(): assert format_time(1.4) == ' 1.4s' assert format_time(10.4) == '10.4s' assert format_time(100.4) == ' 1min 40.4s' assert format_time(1000.4) == '16min 40.4s' assert format_time(10000.4) == ' 2hr 46min 40.4s' def test_register(capsys): try: p = ProgressBar() p.register() assert _globals['callbacks'] get_threaded(dsk, 'e') check_bar_completed(capsys) p.unregister() assert not _globals['callbacks'] finally: _globals['callbacks'].clear() def test_no_tasks(capsys): with ProgressBar(): get_threaded({'x': 1}, 'x') check_bar_completed(capsys) def test_with_cache(capsys): cachey = pytest.importorskip('cachey') from dask.cache import Cache c = cachey.Cache(10000) cc = Cache(c) with cc: with ProgressBar(): assert get_threaded({'x': (mul, 1, 2)}, 'x') == 2 check_bar_completed(capsys) assert c.data['x'] == 2 with cc: with ProgressBar(): assert get_threaded({'x': (mul, 1, 2), 'y': (mul, 'x', 3)}, 'y') == 6 check_bar_completed(capsys) def test_with_alias(capsys): dsk = {'a': 1, 'b': 2, 'c': (add, 'a', 'b'), 'd': (add, 1, 2), 'e': 'd', 'f': (mul, 'e', 'c')} with ProgressBar(): get_threaded(dsk, 'f') check_bar_completed(capsys) def test_store_time(): p = ProgressBar() with p: get_threaded({'x': 1}, 'x') assert isinstance(p.last_duration, float) dask-0.16.0/dask/distributed.py000066400000000000000000000007021320364734500163440ustar00rootroot00000000000000# flake8: noqa from __future__ import absolute_import, division, print_function try: from distributed import * except ImportError: msg = ("Dask's distributed scheduler is not installed.\n\n" "Please either conda or pip install dask distributed:\n\n" " conda install dask distributed # either conda install\n" " pip install dask distributed --upgrade # or pip install") raise ImportError(msg) dask-0.16.0/dask/dot.py000066400000000000000000000165471320364734500146260ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import re import os from functools import partial from .compatibility import apply from .core import istask, get_dependencies, ishashable from .utils import funcname, import_required graphviz = import_required("graphviz", "Drawing dask graphs requires the " "`graphviz` python library and the " "`graphviz` system library to be " "installed.") def task_label(task): """Label for a task on a dot graph. Examples -------- >>> from operator import add >>> task_label((add, 1, 2)) 'add' >>> task_label((add, (add, 1, 2), 3)) 'add(...)' """ func = task[0] if func is apply: func = task[1] if hasattr(func, 'funcs'): if len(func.funcs) > 1: return '{0}(...)'.format(funcname(func.funcs[0])) else: head = funcname(func.funcs[0]) else: head = funcname(func) if any(has_sub_tasks(i) for i in task[1:]): return '{0}(...)'.format(head) else: return head def has_sub_tasks(task): """Returns True if the task has sub tasks""" if istask(task): return True elif isinstance(task, list): return any(has_sub_tasks(i) for i in task) else: return False def name(x): try: return str(hash(x)) except TypeError: return str(hash(str(x))) _HASHPAT = re.compile('([0-9a-z]{32})') _UUIDPAT = re.compile('([0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12})') def label(x, cache=None): """ >>> label('x') 'x' >>> label(('x', 1)) "('x', 1)" >>> from hashlib import md5 >>> x = 'x-%s-hello' % md5(b'1234').hexdigest() >>> x 'x-81dc9bdb52d04dc20036dbd8313ed055-hello' >>> label(x) 'x-#-hello' >>> from uuid import uuid1 >>> x = 'x-%s-hello' % uuid1() >>> x # doctest: +SKIP 'x-4c1a3d7e-0b45-11e6-8334-54ee75105593-hello' >>> label(x) 'x-#-hello' """ s = str(x) for pattern in (_HASHPAT, _UUIDPAT): m = re.search(pattern, s) if m is not None: for h in m.groups(): if cache is not None: n = cache.get(h, len(cache)) label = '#{0}'.format(n) # cache will be overwritten destructively cache[h] = n else: label = '#' s = s.replace(h, label) return s def to_graphviz(dsk, data_attributes=None, function_attributes=None, rankdir='BT', graph_attr={}, node_attr=None, edge_attr=None, **kwargs): if data_attributes is None: data_attributes = {} if function_attributes is None: function_attributes = {} graph_attr = graph_attr or {} graph_attr['rankdir'] = rankdir graph_attr.update(kwargs) g = graphviz.Digraph(graph_attr=graph_attr, node_attr=node_attr, edge_attr=edge_attr) seen = set() cache = {} for k, v in dsk.items(): k_name = name(k) if k_name not in seen: seen.add(k_name) g.node(k_name, label=label(k, cache=cache), shape='box', **data_attributes.get(k, {})) if istask(v): func_name = name((k, 'function')) if func_name not in seen: seen.add(func_name) g.node(func_name, label=task_label(v), shape='circle', **function_attributes.get(k, {})) g.edge(func_name, k_name) for dep in get_dependencies(dsk, k): dep_name = name(dep) if dep_name not in seen: seen.add(dep_name) g.node(dep_name, label=label(dep, cache=cache), shape='box', **data_attributes.get(dep, {})) g.edge(dep_name, func_name) elif ishashable(v) and v in dsk: g.edge(name(v), k_name) return g IPYTHON_IMAGE_FORMATS = frozenset(['jpeg', 'png']) IPYTHON_NO_DISPLAY_FORMATS = frozenset(['dot', 'pdf']) def _get_display_cls(format): """ Get the appropriate IPython display class for `format`. Returns `IPython.display.SVG` if format=='svg', otherwise `IPython.display.Image`. If IPython is not importable, return dummy function that swallows its arguments and returns None. """ dummy = lambda *args, **kwargs: None try: import IPython.display as display except ImportError: # Can't return a display object if no IPython. return dummy if format in IPYTHON_NO_DISPLAY_FORMATS: # IPython can't display this format natively, so just return None. return dummy elif format in IPYTHON_IMAGE_FORMATS: # Partially apply `format` so that `Image` and `SVG` supply a uniform # interface to the caller. return partial(display.Image, format=format) elif format == 'svg': return display.SVG else: raise ValueError("Unknown format '%s' passed to `dot_graph`" % format) def dot_graph(dsk, filename='mydask', format=None, **kwargs): """ Render a task graph using dot. If `filename` is not None, write a file to disk with that name in the format specified by `format`. `filename` should not include an extension. Parameters ---------- dsk : dict The graph to display. filename : str or None, optional The name (without an extension) of the file to write to disk. If `filename` is None, no file will be written, and we communicate with dot using only pipes. Default is 'mydask'. format : {'png', 'pdf', 'dot', 'svg', 'jpeg', 'jpg'}, optional Format in which to write output file. Default is 'png'. **kwargs Additional keyword arguments to forward to `to_graphviz`. Returns ------- result : None or IPython.display.Image or IPython.display.SVG (See below.) Notes ----- If IPython is installed, we return an IPython.display object in the requested format. If IPython is not installed, we just return None. We always return None if format is 'pdf' or 'dot', because IPython can't display these formats natively. Passing these formats with filename=None will not produce any useful output. See Also -------- dask.dot.to_graphviz """ g = to_graphviz(dsk, **kwargs) fmts = ['.png', '.pdf', '.dot', '.svg', '.jpeg', '.jpg'] if format is None and any(filename.lower().endswith(fmt) for fmt in fmts): filename, format = os.path.splitext(filename) format = format[1:].lower() if format is None: format = 'png' data = g.pipe(format=format) if not data: raise RuntimeError("Graphviz failed to properly produce an image. " "This probably means your installation of graphviz " "is missing png support. See: " "https://github.com/ContinuumIO/anaconda-issues/" "issues/485 for more information.") display_cls = _get_display_cls(format) if not filename: return display_cls(data=data) full_filename = '.'.join([filename, format]) with open(full_filename, 'wb') as f: f.write(data) return display_cls(filename=full_filename) dask-0.16.0/dask/hashing.py000066400000000000000000000051411320364734500154450ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import binascii import hashlib import sys hashers = [] # In decreasing performance order # Timings on a largish array: # - CityHash is 2x faster than MurmurHash # - xxHash is slightly slower than CityHash # - MurmurHash is 8x faster than SHA1 # - SHA1 is significantly faster than all other hashlib algorithms try: import cityhash # `pip install cityhash` except ImportError: pass else: # CityHash disabled unless the reference leak in # https://github.com/escherba/python-cityhash/pull/16 # is fixed. if cityhash.__version__ >= '0.2.2': def _hash_cityhash(buf): """ Produce a 16-bytes hash of *buf* using CityHash. """ h = cityhash.CityHash128(buf) if sys.version_info >= (3,): return h.to_bytes(16, 'little') else: return binascii.a2b_hex('%032x' % h) hashers.append(_hash_cityhash) try: import xxhash # `pip install xxhash` except ImportError: pass else: def _hash_xxhash(buf): """ Produce a 8-bytes hash of *buf* using xxHash. """ return xxhash.xxh64(buf).digest() hashers.append(_hash_xxhash) try: import mmh3 # `pip install mmh3` except ImportError: pass else: def _hash_murmurhash(buf): """ Produce a 16-bytes hash of *buf* using MurmurHash. """ return mmh3.hash_bytes(buf) hashers.append(_hash_murmurhash) def _hash_sha1(buf): """ Produce a 20-bytes hash of *buf* using SHA1. """ return hashlib.sha1(buf).digest() hashers.append(_hash_sha1) def hash_buffer(buf, hasher=None): """ Hash a bytes-like (buffer-compatible) object. This function returns a good quality hash but is not cryptographically secure. The fastest available algorithm is selected. A fixed-length bytes object is returned. """ if hasher is not None: try: return hasher(buf) except (TypeError, OverflowError): # Some hash libraries may have overly-strict type checking, # not accepting all buffers pass for hasher in hashers: try: return hasher(buf) except (TypeError, OverflowError): pass raise TypeError("unsupported type for hashing: %s" % (type(buf),)) def hash_buffer_hex(buf, hasher=None): """ Same as hash_buffer, but returns its result in hex-encoded form. """ h = hash_buffer(buf, hasher) s = binascii.b2a_hex(h) return s.decode() if sys.version_info >= (3,) else s dask-0.16.0/dask/local.py000066400000000000000000000433231320364734500151220ustar00rootroot00000000000000""" Asynchronous Shared-Memory Scheduler for Dask Graphs. This scheduler coordinates several workers to execute tasks in a dask graph in parallel. It depends on an apply_async function as would be found in thread or process Pools and a corresponding Queue for worker-to-scheduler communication. It tries to execute tasks in an order which maintains a small memory footprint throughout execution. It does this by running tasks that allow us to release data resources. Task Selection Policy ===================== When we complete a task we add more data in to our set of available data; this new data makes new tasks available. We preferentially choose tasks that were just made available in a last-in-first-out fashion. We implement this as a simple stack. This results in more depth-first rather than breadth first behavior which encourages us to process batches of data to completion before starting in on new data when possible. When the addition of new data readies multiple tasks simultaneously we add tasks to the stack in sorted order so that tasks with greater keynames are run first. This can be handy to break ties in a predictable fashion. State ===== Many functions pass around a ``state`` variable that holds the current state of the computation. This variable consists of several other dictionaries and sets, explained below. Constant state -------------- 1. dependencies: {x: [a, b ,c]} a,b,c, must be run before x 2. dependents: {a: [x, y]} a must run before x or y Changing state -------------- ### Data 1. cache: available concrete data. {key: actual-data} 2. released: data that we've seen, used, and released because it is no longer needed ### Jobs 1. ready: A fifo stack of ready-to-run tasks 2. running: A set of tasks currently in execution 3. finished: A set of finished tasks 4. waiting: which tasks are still waiting on others :: {key: {keys}} Real-time equivalent of dependencies 5. waiting_data: available data to yet-to-be-run-tasks :: {key: {keys}} Real-time equivalent of dependents Examples -------- >>> import pprint >>> dsk = {'x': 1, 'y': 2, 'z': (inc, 'x'), 'w': (add, 'z', 'y')} >>> pprint.pprint(start_state_from_dask(dsk)) # doctest: +NORMALIZE_WHITESPACE {'cache': {'x': 1, 'y': 2}, 'dependencies': {'w': set(['y', 'z']), 'x': set([]), 'y': set([]), 'z': set(['x'])}, 'dependents': {'w': set([]), 'x': set(['z']), 'y': set(['w']), 'z': set(['w'])}, 'finished': set([]), 'ready': ['z'], 'released': set([]), 'running': set([]), 'waiting': {'w': set(['z'])}, 'waiting_data': {'x': set(['z']), 'y': set(['w']), 'z': set(['w'])}} Optimizations ============= We build this scheduler with out-of-core array operations in mind. To this end we have encoded some particular optimizations. Compute to release data ----------------------- When we choose a new task to execute we often have many options. Policies at this stage are cheap and can significantly impact performance. One could imagine policies that expose parallelism, drive towards a particular output, etc.. Our current policy is to run tasks that were most recently made available. Inlining computations --------------------- We hold on to intermediate computations either in memory or on disk. For very cheap computations that may emit new copies of the data, like ``np.transpose`` or possibly even ``x + 1`` we choose not to store these as separate pieces of data / tasks. Instead we combine them with the computations that require them. This may result in repeated computation but saves significantly on space and computation complexity. See the function ``inline_functions`` for more information. """ from __future__ import absolute_import, division, print_function import os import sys from .compatibility import Queue, Empty, reraise from .core import (istask, flatten, reverse_dict, get_dependencies, ishashable, has_tasks) from .context import _globals from .order import order from .callbacks import unpack_callbacks, local_callbacks from .optimize import cull from .utils_test import add, inc # noqa: F401 if sys.version_info.major < 3: # Due to a bug in python 2.7 Queue.get, if a timeout isn't specified then # `Queue.get` can't be interrupted. A workaround is to specify an extremely # long timeout, which then allows it to be interrupted. # For more information see: https://bugs.python.org/issue1360 def queue_get(q): return q.get(block=True, timeout=(365 * 24 * 60 * 60)) elif os.name == 'nt': # Python 3 windows Queue.get also doesn't handle interrupts properly. To # workaround this we poll at a sufficiently large interval that it # shouldn't affect performance, but small enough that users trying to kill # an application shouldn't care. def queue_get(q): while True: try: return q.get(block=True, timeout=0.1) except Empty: pass else: def queue_get(q): return q.get() DEBUG = False def start_state_from_dask(dsk, cache=None, sortkey=None): """ Start state from a dask Examples -------- >>> dsk = {'x': 1, 'y': 2, 'z': (inc, 'x'), 'w': (add, 'z', 'y')} >>> from pprint import pprint >>> pprint(start_state_from_dask(dsk)) # doctest: +NORMALIZE_WHITESPACE {'cache': {'x': 1, 'y': 2}, 'dependencies': {'w': set(['y', 'z']), 'x': set([]), 'y': set([]), 'z': set(['x'])}, 'dependents': {'w': set([]), 'x': set(['z']), 'y': set(['w']), 'z': set(['w'])}, 'finished': set([]), 'ready': ['z'], 'released': set([]), 'running': set([]), 'waiting': {'w': set(['z'])}, 'waiting_data': {'x': set(['z']), 'y': set(['w']), 'z': set(['w'])}} """ if sortkey is None: sortkey = order(dsk).get if cache is None: cache = _globals['cache'] if cache is None: cache = dict() data_keys = set() for k, v in dsk.items(): if not has_tasks(dsk, v): cache[k] = v data_keys.add(k) dsk2 = dsk.copy() dsk2.update(cache) dependencies = {k: get_dependencies(dsk2, k) for k in dsk} waiting = {k: v.copy() for k, v in dependencies.items() if k not in data_keys} dependents = reverse_dict(dependencies) for a in cache: for b in dependents.get(a, ()): waiting[b].remove(a) waiting_data = dict((k, v.copy()) for k, v in dependents.items() if v) ready_set = set([k for k, v in waiting.items() if not v]) ready = sorted(ready_set, key=sortkey, reverse=True) waiting = dict((k, v) for k, v in waiting.items() if v) state = {'dependencies': dependencies, 'dependents': dependents, 'waiting': waiting, 'waiting_data': waiting_data, 'cache': cache, 'ready': ready, 'running': set(), 'finished': set(), 'released': set()} return state ''' Running tasks ------------- When we execute tasks we both 1. Perform the actual work of collecting the appropriate data and calling the function 2. Manage administrative state to coordinate with the scheduler ''' def _execute_task(arg, cache, dsk=None): """ Do the actual work of collecting data and executing a function Examples -------- >>> cache = {'x': 1, 'y': 2} Compute tasks against a cache >>> _execute_task((add, 'x', 1), cache) # Compute task in naive manner 2 >>> _execute_task((add, (inc, 'x'), 1), cache) # Support nested computation 3 Also grab data from cache >>> _execute_task('x', cache) 1 Support nested lists >>> list(_execute_task(['x', 'y'], cache)) [1, 2] >>> list(map(list, _execute_task([['x', 'y'], ['y', 'x']], cache))) [[1, 2], [2, 1]] >>> _execute_task('foo', cache) # Passes through on non-keys 'foo' """ if isinstance(arg, list): return [_execute_task(a, cache) for a in arg] elif istask(arg): func, args = arg[0], arg[1:] args2 = [_execute_task(a, cache) for a in args] return func(*args2) elif not ishashable(arg): return arg elif arg in cache: return cache[arg] else: return arg def execute_task(key, task_info, dumps, loads, get_id, pack_exception): """ Compute task and handle all administration See Also -------- _execute_task - actually execute task """ try: task, data = loads(task_info) result = _execute_task(task, data) id = get_id() result = dumps((result, id)) failed = False except BaseException as e: result = pack_exception(e, dumps) failed = True return key, result, failed def release_data(key, state, delete=True): """ Remove data from temporary storage See Also finish_task """ if key in state['waiting_data']: assert not state['waiting_data'][key] del state['waiting_data'][key] state['released'].add(key) if delete: del state['cache'][key] def finish_task(dsk, key, state, results, sortkey, delete=True, release_data=release_data): """ Update execution state after a task finishes Mutates. This should run atomically (with a lock). """ for dep in sorted(state['dependents'][key], key=sortkey, reverse=True): s = state['waiting'][dep] s.remove(key) if not s: del state['waiting'][dep] state['ready'].append(dep) for dep in state['dependencies'][key]: if dep in state['waiting_data']: s = state['waiting_data'][dep] s.remove(key) if not s and dep not in results: if DEBUG: from chest.core import nbytes print("Key: %s\tDep: %s\t NBytes: %.2f\t Release" % (key, dep, sum(map(nbytes, state['cache'].values()) / 1e6))) release_data(dep, state, delete=delete) elif delete and dep not in results: release_data(dep, state, delete=delete) state['finished'].add(key) state['running'].remove(key) return state def nested_get(ind, coll): """ Get nested index from collection Examples -------- >>> nested_get(1, 'abc') 'b' >>> nested_get([1, 0], 'abc') ('b', 'a') >>> nested_get([[1, 0], [0, 1]], 'abc') (('b', 'a'), ('a', 'b')) """ if isinstance(ind, list): return tuple([nested_get(i, coll) for i in ind]) else: return coll[ind] def default_get_id(): """Default get_id""" return None def default_pack_exception(e, dumps): raise def identity(x): """ Identity function. Returns x. >>> identity(3) 3 """ return x ''' Task Selection -------------- We often have a choice among many tasks to run next. This choice is both cheap and can significantly impact performance. We currently select tasks that have recently been made ready. We hope that this first-in-first-out policy reduces memory footprint ''' ''' `get` ----- The main function of the scheduler. Get is the main entry point. ''' def get_async(apply_async, num_workers, dsk, result, cache=None, get_id=default_get_id, rerun_exceptions_locally=None, pack_exception=default_pack_exception, raise_exception=reraise, callbacks=None, dumps=identity, loads=identity, **kwargs): """ Asynchronous get function This is a general version of various asynchronous schedulers for dask. It takes a an apply_async function as found on Pool objects to form a more specific ``get`` method that walks through the dask array with parallel workers, avoiding repeat computation and minimizing memory use. Parameters ---------- apply_async : function Asynchronous apply function as found on Pool or ThreadPool num_workers : int The number of active tasks we should have at any one time dsk : dict A dask dictionary specifying a workflow result : key or list of keys Keys corresponding to desired data cache : dict-like, optional Temporary storage of results get_id : callable, optional Function to return the worker id, takes no arguments. Examples are `threading.current_thread` and `multiprocessing.current_process`. rerun_exceptions_locally : bool, optional Whether to rerun failing tasks in local process to enable debugging (False by default) pack_exception : callable, optional Function to take an exception and ``dumps`` method, and return a serialized tuple of ``(exception, traceback)`` to send back to the scheduler. Default is to just raise the exception. raise_exception : callable, optional Function that takes an exception and a traceback, and raises an error. dumps: callable, optional Function to serialize task data and results to communicate between worker and parent. Defaults to identity. loads: callable, optional Inverse function of `dumps`. Defaults to identity. callbacks : tuple or list of tuples, optional Callbacks are passed in as tuples of length 5. Multiple sets of callbacks may be passed in as a list of tuples. For more information, see the dask.diagnostics documentation. See Also -------- threaded.get """ queue = Queue() if isinstance(result, list): result_flat = set(flatten(result)) else: result_flat = set([result]) results = set(result_flat) dsk = dict(dsk) with local_callbacks(callbacks) as callbacks: _, _, pretask_cbs, posttask_cbs, _ = unpack_callbacks(callbacks) started_cbs = [] succeeded = False try: for cb in callbacks: if cb[0]: cb[0](dsk) started_cbs.append(cb) dsk, dependencies = cull(dsk, list(results)) keyorder = order(dsk) state = start_state_from_dask(dsk, cache=cache, sortkey=keyorder.get) for _, start_state, _, _, _ in callbacks: if start_state: start_state(dsk, state) if rerun_exceptions_locally is None: rerun_exceptions_locally = _globals.get('rerun_exceptions_locally', False) if state['waiting'] and not state['ready']: raise ValueError("Found no accessible jobs in dask") def fire_task(): """ Fire off a task to the thread pool """ # Choose a good task to compute key = state['ready'].pop() state['running'].add(key) for f in pretask_cbs: f(key, dsk, state) # Prep data to send data = dict((dep, state['cache'][dep]) for dep in get_dependencies(dsk, key)) # Submit apply_async(execute_task, args=(key, dumps((dsk[key], data)), dumps, loads, get_id, pack_exception), callback=queue.put) # Seed initial tasks into the thread pool while state['ready'] and len(state['running']) < num_workers: fire_task() # Main loop, wait on tasks to finish, insert new ones while state['waiting'] or state['ready'] or state['running']: key, res_info, failed = queue_get(queue) if failed: exc, tb = loads(res_info) if rerun_exceptions_locally: data = dict((dep, state['cache'][dep]) for dep in get_dependencies(dsk, key)) task = dsk[key] _execute_task(task, data) # Re-execute locally else: raise_exception(exc, tb) res, worker_id = loads(res_info) state['cache'][key] = res finish_task(dsk, key, state, results, keyorder.get) for f in posttask_cbs: f(key, res, dsk, state, worker_id) while state['ready'] and len(state['running']) < num_workers: fire_task() succeeded = True finally: for _, _, _, _, finish in started_cbs: if finish: finish(dsk, state, not succeeded) return nested_get(result, state['cache']) """ Synchronous concrete version of get_async Usually we supply a multi-core apply_async function. Here we provide a sequential one. This is useful for debugging and for code dominated by the GIL """ def apply_sync(func, args=(), kwds={}, callback=None): """ A naive synchronous version of apply_async """ res = func(*args, **kwds) if callback is not None: callback(res) def get_sync(dsk, keys, **kwargs): """A naive synchronous version of get_async Can be useful for debugging. """ kwargs.pop('num_workers', None) # if num_workers present, remove it return get_async(apply_sync, 1, dsk, keys, **kwargs) def sortkey(item): """ Sorting key function that is robust to different types Both strings and tuples are common key types in dask graphs. However In Python 3 one can not compare strings with tuples directly. This function maps many types to a form where they can be compared Examples -------- >>> sortkey('Hello') ('str', 'Hello') >>> sortkey(('x', 1)) ('tuple', ('x', 1)) """ return (type(item).__name__, item) dask-0.16.0/dask/multiprocessing.py000066400000000000000000000127031320364734500172550ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import multiprocessing import traceback import pickle import sys from .local import get_async # TODO: get better get from .context import _globals from .optimize import fuse, cull import cloudpickle if sys.version_info.major < 3: import copy_reg as copyreg else: import copyreg def _reduce_method_descriptor(m): return getattr, (m.__objclass__, m.__name__) # type(set.union) is used as a proxy to copyreg.pickle(type(set.union), _reduce_method_descriptor) def _dumps(x): return cloudpickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL) _loads = pickle.loads def _process_get_id(): return multiprocessing.current_process().ident # -- Remote Exception Handling -- # By default, tracebacks can't be serialized using pickle. However, the # `tblib` library can enable support for this. Since we don't mandate # that tblib is installed, we do the following: # # - If tblib is installed, use it to serialize the traceback and reraise # in the scheduler process # - Otherwise, use a ``RemoteException`` class to contain a serialized # version of the formatted traceback, which will then print in the # scheduler process. # # To enable testing of the ``RemoteException`` class even when tblib is # installed, we don't wrap the class in the try block below class RemoteException(Exception): """ Remote Exception Contains the exception and traceback from a remotely run task """ def __init__(self, exception, traceback): self.exception = exception self.traceback = traceback def __str__(self): return (str(self.exception) + "\n\n" "Traceback\n" "---------\n" + self.traceback) def __dir__(self): return sorted(set(dir(type(self)) + list(self.__dict__) + dir(self.exception))) def __getattr__(self, key): try: return object.__getattribute__(self, key) except AttributeError: return getattr(self.exception, key) exceptions = dict() def remote_exception(exc, tb): """ Metaclass that wraps exception type in RemoteException """ if type(exc) in exceptions: typ = exceptions[type(exc)] return typ(exc, tb) else: try: typ = type(exc.__class__.__name__, (RemoteException, type(exc)), {'exception_type': type(exc)}) exceptions[type(exc)] = typ return typ(exc, tb) except TypeError: return exc try: import tblib.pickling_support tblib.pickling_support.install() from dask.compatibility import reraise def _pack_traceback(tb): return tb except ImportError: def _pack_traceback(tb): return ''.join(traceback.format_tb(tb)) def reraise(exc, tb): exc = remote_exception(exc, tb) raise exc def pack_exception(e, dumps): exc_type, exc_value, exc_traceback = sys.exc_info() tb = _pack_traceback(exc_traceback) try: result = dumps((e, tb)) except BaseException as e: exc_type, exc_value, exc_traceback = sys.exc_info() tb = _pack_traceback(exc_traceback) result = dumps((e, tb)) return result def get(dsk, keys, num_workers=None, func_loads=None, func_dumps=None, optimize_graph=True, **kwargs): """ Multiprocessed get function appropriate for Bags Parameters ---------- dsk : dict dask graph keys : object or list Desired results from graph num_workers : int Number of worker processes (defaults to number of cores) func_dumps : function Function to use for function serialization (defaults to cloudpickle.dumps) func_loads : function Function to use for function deserialization (defaults to cloudpickle.loads) optimize_graph : bool If True [default], `fuse` is applied to the graph before computation. """ pool = _globals['pool'] if pool is None: pool = multiprocessing.Pool(num_workers, initializer=initialize_worker_process) cleanup = True else: cleanup = False # Optimize Dask dsk2, dependencies = cull(dsk, keys) if optimize_graph: dsk3, dependencies = fuse(dsk2, keys, dependencies) else: dsk3 = dsk2 # We specify marshalling functions in order to catch serialization # errors and report them to the user. loads = func_loads or _globals.get('func_loads') or _loads dumps = func_dumps or _globals.get('func_dumps') or _dumps # Note former versions used a multiprocessing Manager to share # a Queue between parent and workers, but this is fragile on Windows # (issue #1652). try: # Run result = get_async(pool.apply_async, len(pool._pool), dsk3, keys, get_id=_process_get_id, dumps=dumps, loads=loads, pack_exception=pack_exception, raise_exception=reraise, **kwargs) finally: if cleanup: pool.close() return result def initialize_worker_process(): """ Initialize a worker process before running any tasks in it. """ # If Numpy is already imported, presumably its random state was # inherited from the parent => re-seed it. np = sys.modules.get('numpy') if np is not None: np.random.seed() dask-0.16.0/dask/optimize.py000066400000000000000000000705471320364734500157000ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import math import re from operator import getitem from .compatibility import unicode from .context import _globals from .core import (istask, get_dependencies, subs, toposort, flatten, reverse_dict, ishashable) from .utils_test import add, inc # noqa: F401 def cull(dsk, keys): """ Return new dask with only the tasks required to calculate keys. In other words, remove unnecessary tasks from dask. ``keys`` may be a single key or list of keys. Examples -------- >>> d = {'x': 1, 'y': (inc, 'x'), 'out': (add, 'x', 10)} >>> dsk, dependencies = cull(d, 'out') # doctest: +SKIP >>> dsk # doctest: +SKIP {'x': 1, 'out': (add, 'x', 10)} >>> dependencies # doctest: +SKIP {'x': set(), 'out': set(['x'])} Returns ------- dsk: culled dask graph dependencies: Dict mapping {key: [deps]}. Useful side effect to accelerate other optimizations, notably fuse. """ if not isinstance(keys, (list, set)): keys = [keys] out_keys = [] seen = set() dependencies = dict() work = list(set(flatten(keys))) while work: new_work = [] out_keys += work deps = [(k, get_dependencies(dsk, k, as_list=True)) # fuse needs lists for k in work] dependencies.update(deps) for _, deplist in deps: for d in deplist: if d not in seen: seen.add(d) new_work.append(d) work = new_work out = {k: dsk[k] for k in out_keys} return out, dependencies def default_fused_linear_keys_renamer(keys): """Create new keys for fused tasks""" typ = type(keys[0]) if typ is str or typ is unicode: names = [key_split(x) for x in keys[:0:-1]] names.append(keys[0]) return '-'.join(names) elif (typ is tuple and len(keys[0]) > 0 and isinstance(keys[0][0], (str, unicode))): names = [key_split(x) for x in keys[:0:-1]] names.append(keys[0][0]) return ('-'.join(names),) + keys[0][1:] else: return None def fuse_linear(dsk, keys=None, dependencies=None, rename_keys=True): """ Return new dask graph with linear sequence of tasks fused together. If specified, the keys in ``keys`` keyword argument are *not* fused. Supply ``dependencies`` from output of ``cull`` if available to avoid recomputing dependencies. **This function is mostly superseded by ``fuse``** Parameters ---------- dsk: dict keys: list dependencies: dict, optional {key: [list-of-keys]}. Must be a list to provide count of each key This optional input often comes from ``cull`` rename_keys: bool or func, optional Whether to rename fused keys with ``default_fused_linear_keys_renamer`` or not. Renaming fused keys can keep the graph more understandable and comprehensive, but it comes at the cost of additional processing. If False, then the top-most key will be used. For advanced usage, a func is also accepted, ``new_key = rename_keys(fused_key_list)``. Examples -------- >>> d = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b')} >>> dsk, dependencies = fuse(d) >>> dsk # doctest: +SKIP {'a-b-c': (inc, (inc, 1)), 'c': 'a-b-c'} >>> dsk, dependencies = fuse(d, rename_keys=False) >>> dsk # doctest: +SKIP {'c': (inc, (inc, 1))} >>> dsk, dependencies = fuse(d, keys=['b'], rename_keys=False) >>> dsk # doctest: +SKIP {'b': (inc, 1), 'c': (inc, 'b')} Returns ------- dsk: output graph with keys fused dependencies: dict mapping dependencies after fusion. Useful side effect to accelerate other downstream optimizations. """ if keys is not None and not isinstance(keys, set): if not isinstance(keys, list): keys = [keys] keys = set(flatten(keys)) if dependencies is None: dependencies = {k: get_dependencies(dsk, k, as_list=True) for k in dsk} # locate all members of linear chains child2parent = {} unfusible = set() for parent in dsk: deps = dependencies[parent] has_many_children = len(deps) > 1 for child in deps: if keys is not None and child in keys: unfusible.add(child) elif child in child2parent: del child2parent[child] unfusible.add(child) elif has_many_children: unfusible.add(child) elif child not in unfusible: child2parent[child] = parent # construct the chains from ancestor to descendant chains = [] parent2child = dict(map(reversed, child2parent.items())) while child2parent: child, parent = child2parent.popitem() chain = [child, parent] while parent in child2parent: parent = child2parent.pop(parent) del parent2child[parent] chain.append(parent) chain.reverse() while child in parent2child: child = parent2child.pop(child) del child2parent[child] chain.append(child) chains.append(chain) dependencies = {k: set(v) for k, v in dependencies.items()} if rename_keys is True: key_renamer = default_fused_linear_keys_renamer elif rename_keys is False: key_renamer = None else: key_renamer = rename_keys # create a new dask with fused chains rv = {} fused = set() aliases = set() is_renamed = False for chain in chains: if key_renamer is not None: new_key = key_renamer(chain) is_renamed = (new_key is not None and new_key not in dsk and new_key not in rv) child = chain.pop() val = dsk[child] while chain: parent = chain.pop() dependencies[parent].update(dependencies.pop(child)) dependencies[parent].remove(child) val = subs(dsk[parent], child, val) fused.add(child) child = parent fused.add(child) if is_renamed: rv[new_key] = val rv[child] = new_key dependencies[new_key] = dependencies[child] dependencies[child] = {new_key} aliases.add(child) else: rv[child] = val for key, val in dsk.items(): if key not in fused: rv[key] = val if aliases: for key, deps in dependencies.items(): for old_key in deps & aliases: new_key = rv[old_key] deps.remove(old_key) deps.add(new_key) rv[key] = subs(rv[key], old_key, new_key) if keys is not None: for key in aliases - keys: del rv[key] del dependencies[key] return rv, dependencies def _flat_set(x): if x is None: return set() elif isinstance(x, set): return x elif not isinstance(x, (list, set)): x = [x] return set(x) def inline(dsk, keys=None, inline_constants=True, dependencies=None): """ Return new dask with the given keys inlined with their values. Inlines all constants if ``inline_constants`` keyword is True. Note that the constant keys will remain in the graph, to remove them follow ``inline`` with ``cull``. Examples -------- >>> d = {'x': 1, 'y': (inc, 'x'), 'z': (add, 'x', 'y')} >>> inline(d) # doctest: +SKIP {'x': 1, 'y': (inc, 1), 'z': (add, 1, 'y')} >>> inline(d, keys='y') # doctest: +SKIP {'x': 1, 'y': (inc, 1), 'z': (add, 1, (inc, 1))} >>> inline(d, keys='y', inline_constants=False) # doctest: +SKIP {'x': 1, 'y': (inc, 1), 'z': (add, 'x', (inc, 'x'))} """ if dependencies and isinstance(next(iter(dependencies.values())), list): dependencies = {k: set(v) for k, v in dependencies.items()} keys = _flat_set(keys) if dependencies is None: dependencies = {k: get_dependencies(dsk, k) for k in dsk} if inline_constants: keys.update(k for k, v in dsk.items() if (ishashable(v) and v in dsk) or (not dependencies[k] and not istask(v))) # Keys may depend on other keys, so determine replace order with toposort. # The values stored in `keysubs` do not include other keys. replaceorder = toposort(dict((k, dsk[k]) for k in keys if k in dsk), dependencies=dependencies) keysubs = {} for key in replaceorder: val = dsk[key] for dep in keys & dependencies[key]: if dep in keysubs: replace = keysubs[dep] else: replace = dsk[dep] val = subs(val, dep, replace) keysubs[key] = val # Make new dask with substitutions dsk2 = keysubs.copy() for key, val in dsk.items(): if key not in dsk2: for item in keys & dependencies[key]: val = subs(val, item, keysubs[item]) dsk2[key] = val return dsk2 def inline_functions(dsk, output, fast_functions=None, inline_constants=False, dependencies=None): """ Inline cheap functions into larger operations Examples -------- >>> dsk = {'out': (add, 'i', 'd'), # doctest: +SKIP ... 'i': (inc, 'x'), ... 'd': (double, 'y'), ... 'x': 1, 'y': 1} >>> inline_functions(dsk, [], [inc]) # doctest: +SKIP {'out': (add, (inc, 'x'), 'd'), 'd': (double, 'y'), 'x': 1, 'y': 1} Protect output keys. In the example below ``i`` is not inlined because it is marked as an output key. >>> inline_functions(dsk, ['i', 'out'], [inc, double]) # doctest: +SKIP {'out': (add, 'i', (double, 'y')), 'i': (inc, 'x'), 'x': 1, 'y': 1} """ if not fast_functions: return dsk output = set(output) fast_functions = set(fast_functions) if dependencies is None: dependencies = {k: get_dependencies(dsk, k) for k in dsk} dependents = reverse_dict(dependencies) keys = [k for k, v in dsk.items() if istask(v) and functions_of(v).issubset(fast_functions) and dependents[k] and k not in output ] if keys: dsk = inline(dsk, keys, inline_constants=inline_constants, dependencies=dependencies) for k in keys: del dsk[k] return dsk def unwrap_partial(func): while hasattr(func, 'func'): func = func.func return func def functions_of(task): """ Set of functions contained within nested task Examples -------- >>> task = (add, (mul, 1, 2), (inc, 3)) # doctest: +SKIP >>> functions_of(task) # doctest: +SKIP set([add, mul, inc]) """ funcs = set() work = [task] sequence_types = {list, tuple} while work: new_work = [] for task in work: if type(task) in sequence_types: if istask(task): funcs.add(unwrap_partial(task[0])) new_work += task[1:] else: new_work += task work = new_work return funcs def fuse_selections(dsk, head1, head2, merge): """Fuse selections with lower operation. Handles graphs of the form: ``{key1: (head1, key2, ...), key2: (head2, ...)}`` Parameters ---------- dsk : dict dask graph head1 : function The first element of task1 head2 : function The first element of task2 merge : function Takes ``task1`` and ``task2`` and returns a merged task to replace ``task1``. Examples -------- >>> def load(store, partition, columns): ... pass >>> dsk = {'x': (load, 'store', 'part', ['a', 'b']), ... 'y': (getitem, 'x', 'a')} >>> merge = lambda t1, t2: (load, t2[1], t2[2], t1[2]) >>> dsk2 = fuse_selections(dsk, getitem, load, merge) >>> cull(dsk2, 'y')[0] {'y': (, 'store', 'part', 'a')} """ dsk2 = dict() for k, v in dsk.items(): try: if (istask(v) and v[0] == head1 and v[1] in dsk and istask(dsk[v[1]]) and dsk[v[1]][0] == head2): dsk2[k] = merge(v, dsk[v[1]]) else: dsk2[k] = v except TypeError: dsk2[k] = v return dsk2 def fuse_getitem(dsk, func, place): """ Fuse getitem with lower operation Parameters ---------- dsk: dict dask graph func: function A function in a task to merge place: int Location in task to insert the getitem key Examples -------- >>> def load(store, partition, columns): ... pass >>> dsk = {'x': (load, 'store', 'part', ['a', 'b']), ... 'y': (getitem, 'x', 'a')} >>> dsk2 = fuse_getitem(dsk, load, 3) # columns in arg place 3 >>> cull(dsk2, 'y')[0] {'y': (, 'store', 'part', 'a')} """ return fuse_selections(dsk, getitem, func, lambda a, b: tuple(b[:place]) + (a[2], ) + tuple(b[place + 1:])) def default_fused_keys_renamer(keys): """Create new keys for ``fuse`` tasks""" it = reversed(keys) first_key = next(it) typ = type(first_key) if typ is str or typ is unicode: first_name = key_split(first_key) names = {key_split(k) for k in it} names.discard(first_name) names = sorted(names) names.append(first_key) return '-'.join(names) elif (typ is tuple and len(first_key) > 0 and isinstance(first_key[0], (str, unicode))): first_name = key_split(first_key) names = {key_split(k) for k in it} names.discard(first_name) names = sorted(names) names.append(first_key[0]) return ('-'.join(names),) + first_key[1:] def fuse(dsk, keys=None, dependencies=None, ave_width=None, max_width=None, max_height=None, max_depth_new_edges=None, rename_keys=None): """ Fuse tasks that form reductions; more advanced than ``fuse_linear`` This trades parallelism opportunities for faster scheduling by making tasks less granular. It can replace ``fuse_linear`` in optimization passes. This optimization applies to all reductions--tasks that have at most one dependent--so it may be viewed as fusing "multiple input, single output" groups of tasks into a single task. There are many parameters to fine tune the behavior, which are described below. ``ave_width`` is the natural parameter with which to compare parallelism to granularity, so it should always be specified. Reasonable values for other parameters with be determined using ``ave_width`` if necessary. Parameters ---------- dsk: dict dask graph keys: list or set, optional Keys that must remain in the returned dask graph dependencies: dict, optional {key: [list-of-keys]}. Must be a list to provide count of each key This optional input often comes from ``cull`` ave_width: float (default 2) Upper limit for ``width = num_nodes / height``, a good measure of parallelizability max_width: int Don't fuse if total width is greater than this max_height: int Don't fuse more than this many levels max_depth_new_edges: int Don't fuse if new dependencies are added after this many levels rename_keys: bool or func, optional Whether to rename the fused keys with ``default_fused_keys_renamer`` or not. Renaming fused keys can keep the graph more understandable and comprehensive, but it comes at the cost of additional processing. If False, then the top-most key will be used. For advanced usage, a function to create the new name is also accepted. Returns ------- dsk: output graph with keys fused dependencies: dict mapping dependencies after fusion. Useful side effect to accelerate other downstream optimizations. """ if keys is not None and not isinstance(keys, set): if not isinstance(keys, list): keys = [keys] keys = set(flatten(keys)) # Assign reasonable, not too restrictive defaults if ave_width is None: if _globals.get('fuse_ave_width') is None: ave_width = 1 else: ave_width = _globals['fuse_ave_width'] if max_height is None: if _globals.get('fuse_max_height') is None: max_height = len(dsk) else: max_height = _globals['fuse_max_height'] max_depth_new_edges = ( max_depth_new_edges or _globals.get('fuse_max_depth_new_edges') or ave_width + 1.5 ) max_width = ( max_width or _globals.get('fuse_max_width') or 1.5 + ave_width * math.log(ave_width + 1) ) if not ave_width or not max_height: return dsk, dependencies if rename_keys is None: rename_keys = _globals.get('fuse_rename_keys', True) if rename_keys is True: key_renamer = default_fused_keys_renamer elif rename_keys is False: key_renamer = None else: key_renamer = rename_keys if dependencies is None: deps = {k: get_dependencies(dsk, k, as_list=True) for k in dsk} else: deps = dict(dependencies) rdeps = {} for k, vals in deps.items(): for v in vals: if v not in rdeps: rdeps[v] = [k] else: rdeps[v].append(k) deps[k] = set(vals) reducible = {k for k, vals in rdeps.items() if len(vals) == 1} if keys: reducible -= keys if not reducible: return dsk, deps rv = dsk.copy() fused_trees = {} # These are the stacks we use to store data as we traverse the graph info_stack = [] children_stack = [] # For speed deps_pop = deps.pop reducible_add = reducible.add reducible_pop = reducible.pop reducible_remove = reducible.remove fused_trees_pop = fused_trees.pop info_stack_append = info_stack.append info_stack_pop = info_stack.pop children_stack_append = children_stack.append children_stack_extend = children_stack.extend children_stack_pop = children_stack.pop while reducible: parent = reducible_pop() reducible_add(parent) while parent in reducible: # Go to the top parent = rdeps[parent][0] children_stack_append(parent) children_stack_extend(reducible & deps[parent]) while True: child = children_stack[-1] if child != parent: children = reducible & deps[child] while children: # Depth-first search children_stack_extend(children) parent = child child = children_stack[-1] children = reducible & deps[child] children_stack_pop() # This is a leaf node in the reduction region # key, task, fused_keys, height, width, number of nodes, fudge, set of edges info_stack_append((child, rv[child], None if key_renamer is None else [child], 1, 1, 1, 0, deps[child] - reducible)) else: children_stack_pop() # Calculate metrics and fuse as appropriate deps_parent = deps[parent] edges = deps_parent - reducible children = deps_parent - edges num_children = len(children) if num_children == 1: (child_key, child_task, child_keys, height, width, num_nodes, fudge, children_edges) = info_stack_pop() num_children_edges = len(children_edges) if fudge > num_children_edges - 1 >= 0: fudge = num_children_edges - 1 edges |= children_edges no_new_edges = len(edges) == num_children_edges if not no_new_edges: fudge += 1 if ( (num_nodes + fudge) / height <= ave_width and # Sanity check; don't go too deep if new levels introduce new edge dependencies (no_new_edges or height < max_depth_new_edges) ): # Perform substitutions as we go val = subs(dsk[parent], child_key, child_task) deps_parent.remove(child_key) deps_parent |= deps_pop(child_key) del rv[child_key] reducible_remove(child_key) if key_renamer is not None: child_keys.append(parent) fused_trees[parent] = child_keys fused_trees_pop(child_key, None) if children_stack: if no_new_edges: # Linear fuse info_stack_append((parent, val, child_keys, height, width, num_nodes, fudge, edges)) else: info_stack_append((parent, val, child_keys, height + 1, width, num_nodes + 1, fudge, edges)) else: rv[parent] = val break else: rv[child_key] = child_task reducible_remove(child_key) if children_stack: # Allow the parent to be fused, but only under strict circumstances. # Ensure that linear chains may still be fused. if fudge > int(ave_width - 1): fudge = int(ave_width - 1) # This task *implicitly* depends on `edges` info_stack_append((parent, rv[parent], None if key_renamer is None else [parent], 1, width, 1, fudge, edges)) else: break else: child_keys = [] height = 1 width = 0 num_single_nodes = 0 num_nodes = 0 fudge = 0 children_edges = set() max_num_edges = 0 children_info = info_stack[-num_children:] del info_stack[-num_children:] for cur_key, cur_task, cur_keys, cur_height, cur_width, cur_num_nodes, cur_fudge, \ cur_edges in children_info: if cur_height == 1: num_single_nodes += 1 elif cur_height > height: height = cur_height width += cur_width num_nodes += cur_num_nodes fudge += cur_fudge if len(cur_edges) > max_num_edges: max_num_edges = len(cur_edges) children_edges |= cur_edges # Fudge factor to account for possible parallelism with the boundaries num_children_edges = len(children_edges) fudge += min(num_children - 1, max(0, num_children_edges - max_num_edges)) if fudge > num_children_edges - 1 >= 0: fudge = num_children_edges - 1 edges |= children_edges no_new_edges = len(edges) == num_children_edges if not no_new_edges: fudge += 1 if ( (num_nodes + fudge) / height <= ave_width and num_single_nodes <= ave_width and width <= max_width and height <= max_height and # Sanity check; don't go too deep if new levels introduce new edge dependencies (no_new_edges or height < max_depth_new_edges) ): # Perform substitutions as we go val = dsk[parent] children_deps = set() for child_info in children_info: cur_child = child_info[0] val = subs(val, cur_child, child_info[1]) del rv[cur_child] children_deps |= deps_pop(cur_child) reducible_remove(cur_child) if key_renamer is not None: fused_trees_pop(cur_child, None) child_keys.extend(child_info[2]) deps_parent -= children deps_parent |= children_deps if key_renamer is not None: child_keys.append(parent) fused_trees[parent] = child_keys if children_stack: info_stack_append((parent, val, child_keys, height + 1, width, num_nodes + 1, fudge, edges)) else: rv[parent] = val break else: for child_info in children_info: rv[child_info[0]] = child_info[1] reducible_remove(child_info[0]) if children_stack: # Allow the parent to be fused, but only under strict circumstances. # Ensure that linear chains may still be fused. if width > max_width: width = max_width if fudge > int(ave_width - 1): fudge = int(ave_width - 1) # key, task, height, width, number of nodes, fudge, set of edges # This task *implicitly* depends on `edges` info_stack_append((parent, rv[parent], None if key_renamer is None else [parent], 1, width, 1, fudge, edges)) else: break # Traverse upwards parent = rdeps[parent][0] if key_renamer is not None: for root_key, fused_keys in fused_trees.items(): alias = key_renamer(fused_keys) if alias is not None and alias not in rv: rv[alias] = rv[root_key] rv[root_key] = alias deps[alias] = deps[root_key] deps[root_key] = {alias} return rv, deps # Defining `key_split` (used by key renamers in `fuse`) in utils.py # results in messy circular imports, so define it here instead. hex_pattern = re.compile('[a-f]+') def key_split(s): """ >>> key_split('x') u'x' >>> key_split('x-1') u'x' >>> key_split('x-1-2-3') u'x' >>> key_split(('x-2', 1)) 'x' >>> key_split("('x-2', 1)") u'x' >>> key_split('hello-world-1') u'hello-world' >>> key_split(b'hello-world-1') u'hello-world' >>> key_split('ae05086432ca935f6eba409a8ecd4896') 'data' >>> key_split('>> key_split(None) 'Other' >>> key_split('x-abcdefab') # ignores hex u'x' """ if type(s) is bytes: s = s.decode() if type(s) is tuple: s = s[0] try: words = s.split('-') if not words[0][0].isalpha(): result = words[0].lstrip("'(\"") else: result = words[0] for word in words[1:]: if word.isalpha() and not (len(word) == 8 and hex_pattern.match(word) is not None): result += '-' + word else: break if len(result) == 32 and re.match(r'[a-f0-9]{32}', result): return 'data' else: if result[0] == '<': result = result.strip('<>').split()[0].split('.')[-1] return result except Exception: return 'Other' dask-0.16.0/dask/order.py000066400000000000000000000143721320364734500151450ustar00rootroot00000000000000""" Static order of nodes in dask graph We can make decisions on what tasks to run next both * Dynamically at runtime * Statically before runtime Dask's async scheduler runs dynamically and prefers to run tasks that were just made available. However when several tasks become available at the same time we have an opportunity to break ties in an intelligent way d | b c \ / a E.g. when we run ``a`` we can choose to run either ``b`` or ``c`` next. In this case we may choose to start with ``c``, because it has other dependencies. This is particularly important at the beginning of the computation when we often dump hundreds of leaf nodes onto the scheduler at once. The order in which we start this computation can significantly change performance. Breaking Ties ------------- And so we create a total ordering over all nodes to serve as a tie breaker. We represent this ordering with a dictionary. Lower scores have higher priority. {'d': 0, 'c': 1, 'a': 2, 'b': 3} There are several ways in which we might order our keys. In practice we have found the following objectives important: 1. **Finish subtrees before starting new subtrees:** Often our computation consists of many independent subtrees (e.g. reductions in an array). We want to work on and finish individual subtrees before moving on to others in order to keep a low memory footprint. 2. **Run heavily depended-on tasks first**: Some tasks produce data that is required by many other tasks, either in a deep linear chain (critical path) or in a shallow but broad nexus (critical point). By preferring these we allow other computations to flow to completion more easily. Approach: Depth First Search with Intelligent Tie-Breaking ---------------------------------------------------------- To satisfy concern (1) we perform a depth first search (``dfs``). To satisfy concern (2) we prefer to traverse down children in the order of which child has the descendent on whose result the most tasks depend. """ from __future__ import absolute_import, division, print_function from .core import get_dependencies, reverse_dict, get_deps # noqa: F401 from .utils_test import add, inc # noqa: F401 def order(dsk, dependencies=None): """ Order nodes in dask graph The ordering will be a toposort but will also have other convenient properties 1. Depth first search 2. DFS prefers nodes that enable the most data >>> dsk = {'a': 1, 'b': 2, 'c': (inc, 'a'), 'd': (add, 'b', 'c')} >>> order(dsk) {'a': 2, 'c': 1, 'b': 3, 'd': 0} """ if dependencies is None: dependencies = {k: get_dependencies(dsk, k) for k in dsk} dependents = reverse_dict(dependencies) ndeps = ndependents(dependencies, dependents) maxes = child_max(dependencies, dependents, ndeps) def key(x): return -maxes.get(x, 0), str(x) return dfs(dependencies, dependents, key=key) def ndependents(dependencies, dependents): """ Number of total data elements that depend on key For each key we return the number of data that can only be run after this key is run. The root nodes have value 1 while deep child nodes will have larger values. Examples -------- >>> dsk = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b')} >>> dependencies, dependents = get_deps(dsk) >>> sorted(ndependents(dependencies, dependents).items()) [('a', 3), ('b', 2), ('c', 1)] """ result = dict() num_needed = dict((k, len(v)) for k, v in dependents.items()) current = set(k for k, v in num_needed.items() if v == 0) while current: key = current.pop() result[key] = 1 + sum(result[parent] for parent in dependents[key]) for child in dependencies[key]: num_needed[child] -= 1 if num_needed[child] == 0: current.add(child) return result def child_max(dependencies, dependents, scores): """ Maximum-ish of scores of children This takes a dictionary of scores per key and returns a new set of scores per key that is the maximum of the scores of all children of that node plus its own score. In some sense this ranks each node by the maximum importance of their children plus their own value. This is generally fed the result from ``ndependents`` Examples -------- >>> dsk = {'a': 1, 'b': 2, 'c': (inc, 'a'), 'd': (add, 'b', 'c')} >>> scores = {'a': 3, 'b': 2, 'c': 2, 'd': 1} >>> dependencies, dependents = get_deps(dsk) >>> sorted(child_max(dependencies, dependents, scores).items()) [('a', 3), ('b', 2), ('c', 5), ('d', 6)] """ result = dict() num_needed = dict((k, len(v)) for k, v in dependencies.items()) current = set(k for k, v in num_needed.items() if v == 0) while current: key = current.pop() score = scores[key] children = dependencies[key] if children: score += max(result[child] for child in children) result[key] = score for parent in dependents[key]: num_needed[parent] -= 1 if num_needed[parent] == 0: current.add(parent) return result def dfs(dependencies, dependents, key=lambda x: x): """ Depth First Search of dask graph This traverses from root/output nodes down to leaf/input nodes in a depth first manner. At each node it traverses down its immediate children by the order determined by maximizing the key function. As inputs it takes dependencies and dependents as can be computed from ``get_deps(dsk)``. Examples -------- >>> dsk = {'a': 1, 'b': 2, 'c': (inc, 'a'), 'd': (add, 'b', 'c')} >>> dependencies, dependents = get_deps(dsk) >>> sorted(dfs(dependencies, dependents).items()) [('a', 3), ('b', 1), ('c', 2), ('d', 0)] """ result = dict() i = 0 roots = [k for k, v in dependents.items() if not v] stack = sorted(roots, key=key, reverse=True) seen = set() while stack: item = stack.pop() if item in seen: continue seen.add(item) result[item] = i deps = dependencies[item] if deps: deps = deps - seen deps = sorted(deps, key=key, reverse=True) stack.extend(deps) i += 1 return result dask-0.16.0/dask/rewrite.py000066400000000000000000000306651320364734500155160ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from collections import deque from dask.core import istask, subs def head(task): """Return the top level node of a task""" if istask(task): return task[0] elif isinstance(task, list): return list else: return task def args(task): """Get the arguments for the current task""" if istask(task): return task[1:] elif isinstance(task, list): return task else: return () class Traverser(object): """Traverser interface for tasks. Class for storing the state while performing a preorder-traversal of a task. Parameters ---------- term : task The task to be traversed Attributes ---------- term The current element in the traversal current The head of the current element in the traversal. This is simply `head` applied to the attribute `term`. """ def __init__(self, term, stack=None): self.term = term if not stack: self._stack = deque([END]) else: self._stack = stack def __iter__(self): while self.current is not END: yield self.current self.next() def copy(self): """Copy the traverser in its current state. This allows the traversal to be pushed onto a stack, for easy backtracking.""" return Traverser(self.term, deque(self._stack)) def next(self): """Proceed to the next term in the preorder traversal.""" subterms = args(self.term) if not subterms: # No subterms, pop off stack self.term = self._stack.pop() else: self.term = subterms[0] self._stack.extend(reversed(subterms[1:])) @property def current(self): return head(self.term) def skip(self): """Skip over all subterms of the current level in the traversal""" self.term = self._stack.pop() class Token(object): """A token object. Used to express certain objects in the traversal of a task or pattern.""" def __init__(self, name): self.name = name def __repr__(self): return self.name # A variable to represent *all* variables in a discrimination net VAR = Token('?') # Represents the end of the traversal of an expression. We can't use `None`, # 'False', etc... here, as anything may be an argument to a function. END = Token('end') class Node(tuple): """A Discrimination Net node.""" __slots__ = () def __new__(cls, edges=None, patterns=None): edges = edges if edges else {} patterns = patterns if patterns else [] return tuple.__new__(cls, (edges, patterns)) @property def edges(self): """A dictionary, where the keys are edges, and the values are nodes""" return self[0] @property def patterns(self): """A list of all patterns that currently match at this node""" return self[1] class RewriteRule(object): """A rewrite rule. Expresses `lhs` -> `rhs`, for variables `vars`. Parameters ---------- lhs : task The left-hand-side of the rewrite rule. rhs : task or function The right-hand-side of the rewrite rule. If it's a task, variables in `rhs` will be replaced by terms in the subject that match the variables in `lhs`. If it's a function, the function will be called with a dict of such matches. vars: tuple, optional Tuple of variables found in the lhs. Variables can be represented as any hashable object; a good convention is to use strings. If there are no variables, this can be omitted. Examples -------- Here's a `RewriteRule` to replace all nested calls to `list`, so that `(list, (list, 'x'))` is replaced with `(list, 'x')`, where `'x'` is a variable. >>> lhs = (list, (list, 'x')) >>> rhs = (list, 'x') >>> variables = ('x',) >>> rule = RewriteRule(lhs, rhs, variables) Here's a more complicated rule that uses a callable right-hand-side. A callable `rhs` takes in a dictionary mapping variables to their matching values. This rule replaces all occurrences of `(list, 'x')` with `'x'` if `'x'` is a list itself. >>> lhs = (list, 'x') >>> def repl_list(sd): ... x = sd['x'] ... if isinstance(x, list): ... return x ... else: ... return (list, x) >>> rule = RewriteRule(lhs, repl_list, variables) """ def __init__(self, lhs, rhs, vars=()): if not isinstance(vars, tuple): raise TypeError("vars must be a tuple of variables") self.lhs = lhs if callable(rhs): self.subs = rhs else: self.subs = self._apply self.rhs = rhs self._varlist = [t for t in Traverser(lhs) if t in vars] # Reduce vars down to just variables found in lhs self.vars = tuple(sorted(set(self._varlist))) def _apply(self, sub_dict): term = self.rhs for key, val in sub_dict.items(): term = subs(term, key, val) return term def __str__(self): return "RewriteRule({0}, {1}, {2})".format(self.lhs, self.rhs, self.vars) def __repr__(self): return str(self) class RuleSet(object): """A set of rewrite rules. Forms a structure for fast rewriting over a set of rewrite rules. This allows for syntactic matching of terms to patterns for many patterns at the same time. Examples -------- >>> def f(*args): pass >>> def g(*args): pass >>> def h(*args): pass >>> from operator import add >>> rs = RuleSet( # Make RuleSet with two Rules ... RewriteRule((add, 'x', 0), 'x', ('x',)), ... RewriteRule((f, (g, 'x'), 'y'), ... (h, 'x', 'y'), ... ('x', 'y'))) >>> rs.rewrite((add, 2, 0)) # Apply ruleset to single task 2 >>> rs.rewrite((f, (g, 'a', 3))) # doctest: +SKIP (h, 'a', 3) >>> dsk = {'a': (add, 2, 0), # Apply ruleset to full dask graph ... 'b': (f, (g, 'a', 3))} >>> from toolz import valmap >>> valmap(rs.rewrite, dsk) # doctest: +SKIP {'a': 2, 'b': (h, 'a', 3)} Attributes ---------- rules : list A list of `RewriteRule`s included in the `RuleSet`. """ def __init__(self, *rules): """Create a `RuleSet` for a number of rules Parameters ---------- rules One or more instances of RewriteRule """ self._net = Node() self.rules = [] for p in rules: self.add(p) def add(self, rule): """Add a rule to the RuleSet. Parameters ---------- rule : RewriteRule """ if not isinstance(rule, RewriteRule): raise TypeError("rule must be instance of RewriteRule") vars = rule.vars curr_node = self._net ind = len(self.rules) # List of variables, in order they appear in the POT of the term for t in Traverser(rule.lhs): prev_node = curr_node if t in vars: t = VAR if t in curr_node.edges: curr_node = curr_node.edges[t] else: curr_node.edges[t] = Node() curr_node = curr_node.edges[t] # We've reached a leaf node. Add the term index to this leaf. prev_node.edges[t].patterns.append(ind) self.rules.append(rule) def iter_matches(self, term): """A generator that lazily finds matchings for term from the RuleSet. Parameters ---------- term : task Yields ------ Tuples of `(rule, subs)`, where `rule` is the rewrite rule being matched, and `subs` is a dictionary mapping the variables in the lhs of the rule to their matching values in the term.""" S = Traverser(term) for m, syms in _match(S, self._net): for i in m: rule = self.rules[i] subs = _process_match(rule, syms) if subs is not None: yield rule, subs def _rewrite(self, term): """Apply the rewrite rules in RuleSet to top level of term""" for rule, sd in self.iter_matches(term): # We use for (...) because it's fast in all cases for getting the # first element from the match iterator. As we only want that # element, we break here term = rule.subs(sd) break return term def rewrite(self, task, strategy="bottom_up"): """Apply the `RuleSet` to `task`. This applies the most specific matching rule in the RuleSet to the task, using the provided strategy. Parameters ---------- term: a task The task to be rewritten strategy: str, optional The rewriting strategy to use. Options are "bottom_up" (default), or "top_level". Examples -------- Suppose there was a function `add` that returned the sum of 2 numbers, and another function `double` that returned twice its input: >>> add = lambda x, y: x + y >>> double = lambda x: 2*x Now suppose `double` was *significantly* faster than `add`, so you'd like to replace all expressions `(add, x, x)` with `(double, x)`, where `x` is a variable. This can be expressed as a rewrite rule: >>> rule = RewriteRule((add, 'x', 'x'), (double, 'x'), ('x',)) >>> rs = RuleSet(rule) This can then be applied to terms to perform the rewriting: >>> term = (add, (add, 2, 2), (add, 2, 2)) >>> rs.rewrite(term) # doctest: +SKIP (double, (double, 2)) If we only wanted to apply this to the top level of the term, the `strategy` kwarg can be set to "top_level". >>> rs.rewrite(term) # doctest: +SKIP (double, (add, 2, 2)) """ return strategies[strategy](self, task) def _top_level(net, term): return net._rewrite(term) def _bottom_up(net, term): if istask(term): term = (head(term),) + tuple(_bottom_up(net, t) for t in args(term)) elif isinstance(term, list): term = [_bottom_up(net, t) for t in args(term)] return net._rewrite(term) strategies = {'top_level': _top_level, 'bottom_up': _bottom_up} def _match(S, N): """Structural matching of term S to discrimination net node N.""" stack = deque() restore_state_flag = False # matches are stored in a tuple, because all mutations result in a copy, # preventing operations from changing matches stored on the stack. matches = () while True: if S.current is END: yield N.patterns, matches try: # This try-except block is to catch hashing errors from un-hashable # types. This allows for variables to be matched with un-hashable # objects. n = N.edges.get(S.current, None) if n and not restore_state_flag: stack.append((S.copy(), N, matches)) N = n S.next() continue except TypeError: pass n = N.edges.get(VAR, None) if n: restore_state_flag = False matches = matches + (S.term,) S.skip() N = n continue try: # Backtrack here (S, N, matches) = stack.pop() restore_state_flag = True except Exception: return def _process_match(rule, syms): """Process a match to determine if it is correct, and to find the correct substitution that will convert the term into the pattern. Parameters ---------- rule : RewriteRule syms : iterable Iterable of subterms that match a corresponding variable. Returns ------- A dictionary of {vars : subterms} describing the substitution to make the pattern equivalent with the term. Returns `None` if the match is invalid.""" subs = {} varlist = rule._varlist if not len(varlist) == len(syms): raise RuntimeError("length of varlist doesn't match length of syms.") for v, s in zip(varlist, syms): if v in subs and subs[v] != s: return None else: subs[v] = s return subs dask-0.16.0/dask/sharedict.py000066400000000000000000000053321320364734500157740ustar00rootroot00000000000000from toolz import concat, unique, count from collections import Mapping class ShareDict(Mapping): """ A Mapping composed of other Mappings This is a union of other disjoint mappings. It allows the combination of many dicts into a single dict-like object without creating copies of the underlying dicts. It provides cheap ``update``, ``len`` and ``__iter__`` operations as well as a fairly cheap ``__getitem__`` operation (linear in the number of constituent mappings). This class is optimized for Dask's use, and may not be generally useful. Users may want to consider the standard ``collections.ChainMap`` data structure. This class makes the following assumptions: 1. Constituent mappings are disjoint. No key is in more than one mapping. 2. Constituent mappings will not be modified Note that ShareDict does not enforce these assumptions. It is up to the user to guarantee them. Examples -------- >>> a = {'x': 1, 'y': 2} >>> b = {'z': 3} >>> s = ShareDict() >>> s.update(a) >>> s.update(b) >>> dict(s) # doctest: +SKIP {'x': 1, 'y': 2, 'z': 3} These dictionaries are stored within an internal dictionary of dictionaries >>> list(s.dicts.values()) # doctest: +SKIP [{'x': 1, 'y': 2}, {'z': 3}] By default these are named by their object id. However, you can also provide explicit names. >>> s = ShareDict() >>> s.update_with_key(a, key='a') >>> s.update_with_key(b, key='b') >>> s.dicts # doctest: +SKIP {'a': {'x': 1, 'y': 2}, 'b': {'z': 3}} """ def __init__(self): self.dicts = dict() def update_with_key(self, arg, key=None): if type(arg) is ShareDict: assert key is None self.dicts.update(arg.dicts) return if key is None: key = id(arg) assert isinstance(arg, dict) if arg: self.dicts[key] = arg def update(self, arg): self.update_with_key(arg) def __getitem__(self, key): for d in self.dicts.values(): if key in d: return d[key] raise KeyError(key) def __len__(self): return count(iter(self)) def items(self): seen = set() for d in self.dicts.values(): for key in d: if key not in seen: seen.add(key) yield (key, d[key]) def __iter__(self): return unique(concat(self.dicts.values())) def merge(*dicts): result = ShareDict() for d in dicts: if isinstance(d, tuple): key, d = d result.update_with_key(d, key=key) else: result.update_with_key(d) return result dask-0.16.0/dask/sizeof.py000066400000000000000000000035731320364734500153320ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import import sys from .utils import Dispatch try: # PyPy does not support sys.getsizeof sys.getsizeof(1) getsizeof = sys.getsizeof except (AttributeError, TypeError): # Monkey patch getsizeof = lambda x: 100 sizeof = Dispatch(name='sizeof') @sizeof.register(object) def sizeof_default(o): return getsizeof(o) @sizeof.register(list) @sizeof.register(tuple) @sizeof.register(set) @sizeof.register(frozenset) def sizeof_python_collection(seq): return getsizeof(seq) + sum(map(sizeof, seq)) @sizeof.register_lazy("numpy") def register_numpy(): import numpy as np @sizeof.register(np.ndarray) def sizeof_numpy_ndarray(x): return int(x.nbytes) @sizeof.register_lazy("pandas") def register_pandas(): import pandas as pd @sizeof.register(pd.DataFrame) def sizeof_pandas_dataframe(df): p = int(df.memory_usage(index=True).sum()) obj = int((df.dtypes == object).sum() * len(df) * 100) if df.index.dtype == object: obj += len(df) * 100 return int(p + obj) + 1000 @sizeof.register(pd.Series) def sizeof_pandas_series(s): p = int(s.memory_usage(index=True)) if s.dtype == object: p += len(s) * 100 if s.index.dtype == object: p += len(s) * 100 return int(p) + 1000 @sizeof.register(pd.Index) def sizeof_pandas_index(i): p = int(i.memory_usage()) obj = len(i) * 100 if i.dtype == object else 0 return int(p + obj) + 1000 @sizeof.register_lazy("scipy") def register_spmatrix(): from scipy import sparse @sizeof.register(sparse.dok_matrix) def sizeof_spmatrix_dok(s): return s.__sizeof__() @sizeof.register(sparse.spmatrix) def sizeof_spmatrix(s): return sum( sizeof(v) for v in s.__dict__.values() ) dask-0.16.0/dask/store/000077500000000000000000000000001320364734500146055ustar00rootroot00000000000000dask-0.16.0/dask/store/__init__.py000066400000000000000000000000301320364734500167070ustar00rootroot00000000000000from .core import Store dask-0.16.0/dask/store/core.py000066400000000000000000000055061320364734500161150ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from collections import defaultdict, MutableMapping from operator import getitem from datetime import datetime from time import time from ..core import istask, ishashable from ..utils_test import add # noqa: F401 class Store(MutableMapping): """ Store - A storage of data and computation Examples -------- Store data like a dictionary >>> import dask.store as ds >>> s = ds.Store() >>> s['x'] = 10 >>> s['x'] 10 Also store computation on that data >>> s['y'] = (add, 'x', 5) Accessing these keys results in computations. Results may be cached for reuse. >>> s['y'] 15 Design ------ A Store maintains the following state dsk: dict A dask to define all computation cache: dict-like Stores both ground data and cached intermediate values data: set The keys in the cache that can not be removed for correctness. compute_time: dict:: {key: float} dict mapping the time it took to compute each key access_times: dict:: {key: [datetimes]} The times at which a key was accessed """ def __init__(self, cache=None): self.dsk = dict() if cache is None: cache = dict() self.cache = cache self.data = set() self.compute_time = dict() self.access_times = defaultdict(list) def __setitem__(self, key, value): if key in self.dsk: if (self.dsk[key] == value or self.dsk[key] == (getitem, self.cache, key) and self.cache[key] == value): return else: raise KeyError("Can not overwrite data") if istask(value): self.dsk[key] = value else: self.cache[key] = value self.dsk[key] = (getitem, self.cache, key) self.data.add(key) def __getitem__(self, key): if isinstance(key, list): return (self[item] for item in key) if not ishashable(key): return key if key not in self.dsk: return key self.access_times[key].append(datetime.now()) if key in self.cache: return self.cache[key] task = self.dsk[key] func, args = task[0], task[1:] if func == getitem and args[0] is self.cache: return self.cache[args[1]] args = [self[arg] for arg in args] start = time() result = func(*args) end = time() self.cache[key] = result self.compute_time[key] = end - start return result def __len__(self): return len(self.dsk) def __iter__(self): return iter(self.dsk) def __delitem__(self, key): raise ValueError("Dask Store does not support deletion") dask-0.16.0/dask/store/tests/000077500000000000000000000000001320364734500157475ustar00rootroot00000000000000dask-0.16.0/dask/store/tests/__init__.py000066400000000000000000000000001320364734500200460ustar00rootroot00000000000000dask-0.16.0/dask/store/tests/test_store.py000066400000000000000000000020561320364734500205170ustar00rootroot00000000000000import pytest from dask.store import Store from dask.utils_test import inc, add def test_basic(): s = Store() s['x'] = 1 s['y'] = (inc, 'x') s['z'] = (add, 'x', 'y') assert s.data == set(['x']) assert s['z'] == 3 assert 'x' in s.data assert s.cache['z'] == 3 assert s.cache['y'] == 2 assert len(s.access_times['z']) == 1 assert len(s.access_times['y']) == 1 assert len(s.access_times['x']) == 2 assert s.compute_time['z'] < 0.1 cache = s.cache.copy() assert s['z'] == 3 assert s.cache == cache assert len(s.access_times['z']) == 2 assert len(s.access_times['y']) == 1 assert len(s.access_times['x']) == 2 assert s[5] == 5 assert list(s[['x', 'y']]) == [s['x'], s['y']] def reassign(): s['x'] = 2 pytest.raises(Exception, reassign) def test_update(): s = Store() dsk = {'x': 1, 'y': (inc, 'x')} s.update(dsk) assert s['y'] == 2 pytest.raises(Exception, lambda: s.update({'x': 2})) # Test that it doesn't raise s.update({'x': 1}) dask-0.16.0/dask/tests/000077500000000000000000000000001320364734500146135ustar00rootroot00000000000000dask-0.16.0/dask/tests/__init__.py000066400000000000000000000000001320364734500167120ustar00rootroot00000000000000dask-0.16.0/dask/tests/test_base.py000066400000000000000000000433361320364734500171470ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import pytest from operator import add, mul import subprocess import sys from toolz import merge import dask from dask import delayed from dask.base import (compute, tokenize, normalize_token, normalize_function, visualize, persist, function_cache, is_dask_collection, DaskMethodsMixin) from dask.delayed import Delayed from dask.utils import tmpdir, tmpfile, ignoring from dask.utils_test import inc, dec from dask.compatibility import long, unicode def import_or_none(path): with ignoring(): return pytest.importorskip(path) return None tz = pytest.importorskip('toolz') da = import_or_none('dask.array') db = import_or_none('dask.bag') dd = import_or_none('dask.dataframe') np = import_or_none('numpy') pd = import_or_none('pandas') def test_normalize_function(): def f1(a, b, c=1): pass def f2(a, b=1, c=2): pass def f3(a): pass assert normalize_function(f2) assert normalize_function(lambda a: a) assert (normalize_function(tz.partial(f2, b=2)) == normalize_function(tz.partial(f2, b=2))) assert (normalize_function(tz.partial(f2, b=2)) != normalize_function(tz.partial(f2, b=3))) assert (normalize_function(tz.partial(f1, b=2)) != normalize_function(tz.partial(f2, b=2))) assert (normalize_function(tz.compose(f2, f3)) == normalize_function(tz.compose(f2, f3))) assert (normalize_function(tz.compose(f2, f3)) != normalize_function(tz.compose(f2, f1))) assert normalize_function(tz.curry(f2)) == normalize_function(tz.curry(f2)) assert normalize_function(tz.curry(f2)) != normalize_function(tz.curry(f1)) assert (normalize_function(tz.curry(f2, b=1)) == normalize_function(tz.curry(f2, b=1))) assert (normalize_function(tz.curry(f2, b=1)) != normalize_function(tz.curry(f2, b=2))) def test_tokenize(): a = (1, 2, 3) assert isinstance(tokenize(a), (str, bytes)) @pytest.mark.skipif('not np') def test_tokenize_numpy_array_consistent_on_values(): assert (tokenize(np.random.RandomState(1234).random_sample(1000)) == tokenize(np.random.RandomState(1234).random_sample(1000))) @pytest.mark.skipif('not np') def test_tokenize_numpy_array_supports_uneven_sizes(): tokenize(np.random.random(7).astype(dtype='i2')) @pytest.mark.skipif('not np') def test_tokenize_discontiguous_numpy_array(): tokenize(np.random.random(8)[::2]) @pytest.mark.skipif('not np') def test_tokenize_numpy_datetime(): tokenize(np.array(['2000-01-01T12:00:00'], dtype='M8[ns]')) @pytest.mark.skipif('not np') def test_tokenize_numpy_scalar(): assert tokenize(np.array(1.0, dtype='f8')) == tokenize(np.array(1.0, dtype='f8')) assert (tokenize(np.array([(1, 2)], dtype=[('a', 'i4'), ('b', 'i8')])[0]) == tokenize(np.array([(1, 2)], dtype=[('a', 'i4'), ('b', 'i8')])[0])) @pytest.mark.skipif('not np') def test_tokenize_numpy_array_on_object_dtype(): assert (tokenize(np.array(['a', 'aa', 'aaa'], dtype=object)) == tokenize(np.array(['a', 'aa', 'aaa'], dtype=object))) assert (tokenize(np.array(['a', None, 'aaa'], dtype=object)) == tokenize(np.array(['a', None, 'aaa'], dtype=object))) assert (tokenize(np.array([(1, 'a'), (1, None), (1, 'aaa')], dtype=object)) == tokenize(np.array([(1, 'a'), (1, None), (1, 'aaa')], dtype=object))) if sys.version_info[0] == 2: assert (tokenize(np.array([unicode("Rebeca Alón", encoding="utf-8")], dtype=object)) == tokenize(np.array([unicode("Rebeca Alón", encoding="utf-8")], dtype=object))) @pytest.mark.skipif('not np') def test_tokenize_numpy_memmap(): with tmpfile('.npy') as fn: x = np.arange(5) np.save(fn, x) y = tokenize(np.load(fn, mmap_mode='r')) with tmpfile('.npy') as fn: x = np.arange(5) np.save(fn, x) z = tokenize(np.load(fn, mmap_mode='r')) assert y != z with tmpfile('.npy') as fn: x = np.random.normal(size=(10, 10)) np.save(fn, x) mm = np.load(fn, mmap_mode='r') mm2 = np.load(fn, mmap_mode='r') a = tokenize(mm[0, :]) b = tokenize(mm[1, :]) c = tokenize(mm[0:3, :]) d = tokenize(mm[:, 0]) assert len(set([a, b, c, d])) == 4 assert tokenize(mm) == tokenize(mm2) assert tokenize(mm[1, :]) == tokenize(mm2[1, :]) @pytest.mark.skipif('not np') def test_tokenize_numpy_memmap_no_filename(): # GH 1562: with tmpfile('.npy') as fn1, tmpfile('.npy') as fn2: x = np.arange(5) np.save(fn1, x) np.save(fn2, x) a = np.load(fn1, mmap_mode='r') b = a + a assert tokenize(b) == tokenize(b) @pytest.mark.skipif('not np') def test_tokenize_numpy_ufunc_consistent(): assert tokenize(np.sin) == '02106e2c67daf452fb480d264e0dac21' assert tokenize(np.cos) == 'c99e52e912e4379882a9a4b387957a0b' # Make a ufunc that isn't in the numpy namespace. Similar to # any found in other packages. inc = np.frompyfunc(lambda x: x + 1, 1, 1) assert tokenize(inc) == tokenize(inc) def test_normalize_base(): for i in [1, long(1), 1.1, '1', slice(1, 2, 3)]: assert normalize_token(i) is i @pytest.mark.skipif('not pd') def test_tokenize_pandas(): a = pd.DataFrame({'x': [1, 2, 3], 'y': ['4', 'asd', None]}, index=[1, 2, 3]) b = pd.DataFrame({'x': [1, 2, 3], 'y': ['4', 'asd', None]}, index=[1, 2, 3]) assert tokenize(a) == tokenize(b) b.index.name = 'foo' assert tokenize(a) != tokenize(b) a = pd.DataFrame({'x': [1, 2, 3], 'y': ['a', 'b', 'a']}) b = pd.DataFrame({'x': [1, 2, 3], 'y': ['a', 'b', 'a']}) a['z'] = a.y.astype('category') assert tokenize(a) != tokenize(b) b['z'] = a.y.astype('category') assert tokenize(a) == tokenize(b) def test_tokenize_kwargs(): assert tokenize(5, x=1) == tokenize(5, x=1) assert tokenize(5) != tokenize(5, x=1) assert tokenize(5, x=1) != tokenize(5, x=2) assert tokenize(5, x=1) != tokenize(5, y=1) def test_tokenize_same_repr(): class Foo(object): def __init__(self, x): self.x = x def __repr__(self): return 'a foo' assert tokenize(Foo(1)) != tokenize(Foo(2)) def test_tokenize_method(): class Foo(object): def __init__(self, x): self.x = x def __dask_tokenize__(self): return self.x a, b = Foo(1), Foo(2) assert tokenize(a) == tokenize(a) assert tokenize(a) != tokenize(b) # dispatch takes precedence before = tokenize(a) normalize_token.register(Foo, lambda self: self.x + 1) after = tokenize(a) assert before != after @pytest.mark.skipif('not np') def test_tokenize_sequences(): assert tokenize([1]) != tokenize([2]) assert tokenize([1]) != tokenize((1,)) assert tokenize([1]) == tokenize([1]) x = np.arange(2000) # long enough to drop information in repr y = np.arange(2000) y[1000] = 0 # middle isn't printed in repr assert tokenize([x]) != tokenize([y]) def test_tokenize_dict(): assert tokenize({'x': 1, 1: 'x'}) == tokenize({'x': 1, 1: 'x'}) def test_tokenize_set(): assert tokenize({1, 2, 'x', (1, 'x')}) == tokenize({1, 2, 'x', (1, 'x')}) def test_tokenize_ordered_dict(): with ignoring(ImportError): from collections import OrderedDict a = OrderedDict([('a', 1), ('b', 2)]) b = OrderedDict([('a', 1), ('b', 2)]) c = OrderedDict([('b', 2), ('a', 1)]) assert tokenize(a) == tokenize(b) assert tokenize(a) != tokenize(c) @pytest.mark.skipif('not np') def test_tokenize_object_array_with_nans(): a = np.array([u'foo', u'Jos\xe9', np.nan], dtype='O') assert tokenize(a) == tokenize(a) @pytest.mark.parametrize('x', [1, True, 'a', b'a', 1.0, 1j, 1.0j, [], (), {}, None, str, int]) def test_tokenize_base_types(x): assert tokenize(x) == tokenize(x), x def test_is_dask_collection(): class DummyCollection(object): def __init__(self, dsk=None): self.dask = dsk def __dask_graph__(self): return self.dask x = delayed(1) + 2 assert is_dask_collection(x) assert not is_dask_collection(2) assert is_dask_collection(DummyCollection({})) assert not is_dask_collection(DummyCollection()) assert not is_dask_collection(DummyCollection) class Tuple(DaskMethodsMixin): __slots__ = ('_dask', '_keys') __dask_scheduler__ = staticmethod(dask.threaded.get) def __init__(self, dsk, keys): self._dask = dsk self._keys = keys def __add__(self, other): if isinstance(other, Tuple): return Tuple(merge(self._dask, other._dask), self._keys + other._keys) return NotImplemented def __dask_graph__(self): return self._dask def __dask_keys__(self): return self._keys def __dask_tokenize__(self): return self._keys def __dask_postcompute__(self): return tuple, () def __dask_postpersist__(self): return Tuple, (self._keys,) def test_custom_collection(): dsk = {'a': 1, 'b': 2} dsk2 = {'c': (add, 'a', 'b'), 'd': (add, 'c', 1)} dsk2.update(dsk) dsk3 = {'e': (add, 'a', 4), 'f': (inc, 'e')} dsk3.update(dsk) x = Tuple(dsk, ['a', 'b']) y = Tuple(dsk2, ['c', 'd']) z = Tuple(dsk3, ['e', 'f']) # __slots__ defined on base mixin class propogates with pytest.raises(AttributeError): x.foo = 1 # is_dask_collection assert is_dask_collection(x) # tokenize assert tokenize(x) == tokenize(x) assert tokenize(x) != tokenize(y) # compute assert x.compute() == (1, 2) assert dask.compute(x, [y, z]) == ((1, 2), [(3, 4), (5, 6)]) t = x + y + z assert t.compute() == (1, 2, 3, 4, 5, 6) # persist t2 = t.persist() assert isinstance(t2, Tuple) assert t2._dask == dict(zip('abcdef', range(1, 7))) assert t2.compute() == (1, 2, 3, 4, 5, 6) x2, y2, z2 = dask.persist(x, y, z) t3 = x2 + y2 + z2 assert t2._dask == t3._dask @pytest.mark.skipif('not db') def test_compute_no_opt(): # Bag does `fuse` by default. Test that with `optimize_graph=False` that # doesn't get called. We check this by using a callback to track the keys # that are computed. from dask.callbacks import Callback b = db.from_sequence(range(100), npartitions=4) add1 = tz.partial(add, 1) mul2 = tz.partial(mul, 2) o = b.map(add1).map(mul2) # Check that with the kwarg, the optimization doesn't happen keys = [] with Callback(pretask=lambda key, *args: keys.append(key)): o.compute(get=dask.get, optimize_graph=False) assert len([k for k in keys if 'mul' in k[0]]) == 4 assert len([k for k in keys if 'add' in k[0]]) == 4 # Check that without the kwarg, the optimization does happen keys = [] with Callback(pretask=lambda key, *args: keys.append(key)): o.compute(get=dask.get) # Names of fused tasks have been merged, and the original key is an alias. # Otherwise, the lengths below would be 4 and 0. assert len([k for k in keys if 'mul' in k[0]]) == 8 assert len([k for k in keys if 'add' in k[0]]) == 4 assert len([k for k in keys if 'add-map-mul' in k[0]]) == 4 # See? Renamed @pytest.mark.skipif('not da') def test_compute_array(): arr = np.arange(100).reshape((10, 10)) darr = da.from_array(arr, chunks=(5, 5)) darr1 = darr + 1 darr2 = darr + 2 out1, out2 = compute(darr1, darr2) assert np.allclose(out1, arr + 1) assert np.allclose(out2, arr + 2) @pytest.mark.skipif('not da') def test_persist_array(): from dask.array.utils import assert_eq arr = np.arange(100).reshape((10, 10)) x = da.from_array(arr, chunks=(5, 5)) x = (x + 1) - x.mean(axis=0) y = x.persist() assert_eq(x, y) assert set(y.dask).issubset(x.dask) assert len(y.dask) == y.npartitions @pytest.mark.skipif('not dd') def test_compute_dataframe(): df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 5, 3, 3]}) ddf = dd.from_pandas(df, npartitions=2) ddf1 = ddf.a + 1 ddf2 = ddf.a + ddf.b out1, out2 = compute(ddf1, ddf2) pd.util.testing.assert_series_equal(out1, df.a + 1) pd.util.testing.assert_series_equal(out2, df.a + df.b) @pytest.mark.skipif('not dd or not da') def test_compute_array_dataframe(): arr = np.arange(100).reshape((10, 10)) darr = da.from_array(arr, chunks=(5, 5)) + 1 df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 5, 3, 3]}) ddf = dd.from_pandas(df, npartitions=2).a + 2 arr_out, df_out = compute(darr, ddf) assert np.allclose(arr_out, arr + 1) pd.util.testing.assert_series_equal(df_out, df.a + 2) @pytest.mark.skipif('not da or not db') def test_compute_array_bag(): x = da.arange(5, chunks=2) b = db.from_sequence([1, 2, 3]) pytest.raises(ValueError, lambda: compute(x, b)) xx, bb = compute(x, b, get=dask.get) assert np.allclose(xx, np.arange(5)) assert bb == [1, 2, 3] @pytest.mark.skipif('not da') def test_compute_with_literal(): x = da.arange(5, chunks=2) y = 10 xx, yy = compute(x, y) assert (xx == x.compute()).all() assert yy == y assert compute(5) == (5,) def test_compute_nested(): a = delayed(1) + 5 b = a + 1 c = a + 2 assert (compute({'a': a, 'b': [1, 2, b]}, (c, 2)) == ({'a': 6, 'b': [1, 2, 7]}, (8, 2))) res = compute([a, b], c, traverse=False) assert res[0][0] is a assert res[0][1] is b assert res[1] == 8 @pytest.mark.skipif('not da') @pytest.mark.skipif(sys.flags.optimize, reason="graphviz exception with Python -OO flag") def test_visualize(): pytest.importorskip('graphviz') with tmpdir() as d: x = da.arange(5, chunks=2) x.visualize(filename=os.path.join(d, 'mydask')) assert os.path.exists(os.path.join(d, 'mydask.png')) x.visualize(filename=os.path.join(d, 'mydask.pdf')) assert os.path.exists(os.path.join(d, 'mydask.pdf')) visualize(x, 1, 2, filename=os.path.join(d, 'mydask.png')) assert os.path.exists(os.path.join(d, 'mydask.png')) dsk = {'a': 1, 'b': (add, 'a', 2), 'c': (mul, 'a', 1)} visualize(x, dsk, filename=os.path.join(d, 'mydask.png')) assert os.path.exists(os.path.join(d, 'mydask.png')) x = Tuple(dsk, ['a', 'b', 'c']) visualize(x, filename=os.path.join(d, 'mydask.png')) assert os.path.exists(os.path.join(d, 'mydask.png')) def test_use_cloudpickle_to_tokenize_functions_in__main__(): import sys from textwrap import dedent defn = dedent(""" def inc(): return x """) __main__ = sys.modules['__main__'] exec(compile(defn, '', 'exec'), __main__.__dict__) f = __main__.inc t = normalize_token(f) assert b'cloudpickle' in t def test_optimizations_keyword(): def inc_to_dec(dsk, keys): for key in dsk: if dsk[key][0] == inc: dsk[key] = (dec,) + dsk[key][1:] return dsk x = dask.delayed(inc)(1) assert x.compute() == 2 with dask.set_options(optimizations=[inc_to_dec]): assert x.compute() == 0 assert x.compute() == 2 def test_default_imports(): """ Startup time: `import dask` should not import too many modules. """ code = """if 1: import dask import sys print(sorted(sys.modules)) """ out = subprocess.check_output([sys.executable, '-c', code]) modules = set(eval(out.decode())) assert 'dask' in modules blacklist = ['dask.array', 'dask.dataframe', 'numpy', 'pandas', 'partd', 's3fs', 'distributed'] for mod in blacklist: assert mod not in modules def test_persist_literals(): assert persist(1, 2, 3) == (1, 2, 3) def test_persist_delayed(): x1 = delayed(1) x2 = delayed(inc)(x1) x3 = delayed(inc)(x2) xx, = persist(x3) assert isinstance(xx, Delayed) assert xx.key == x3.key assert len(xx.dask) == 1 assert x3.compute() == xx.compute() @pytest.mark.skipif('not da or not db') def test_persist_array_bag(): x = da.arange(5, chunks=2) + 1 b = db.from_sequence([1, 2, 3]).map(inc) with pytest.raises(ValueError): persist(x, b) xx, bb = persist(x, b, get=dask.get) assert isinstance(xx, da.Array) assert isinstance(bb, db.Bag) assert xx.name == x.name assert bb.name == b.name assert len(xx.dask) == xx.npartitions < len(x.dask) assert len(bb.dask) == bb.npartitions < len(b.dask) assert np.allclose(x, xx) assert list(b) == list(bb) def test_normalize_function_limited_size(): for i in range(1000): normalize_function(lambda x: x) assert 50 < len(function_cache) < 600 def test_optimize_globals(): da = pytest.importorskip('dask.array') db = pytest.importorskip('dask.bag') x = da.ones(10, chunks=(5,)) def optimize_double(dsk, keys): return {k: (mul, 2, v) for k, v in dsk.items()} from dask.array.utils import assert_eq assert_eq(x + 1, np.ones(10) + 1) with dask.set_options(array_optimize=optimize_double): assert_eq(x + 1, (np.ones(10) * 2 + 1) * 2) assert_eq(x + 1, np.ones(10) + 1) b = db.range(10, npartitions=2) with dask.set_options(array_optimize=optimize_double): xx, bb = dask.compute(x + 1, b.map(inc), get=dask.get) assert_eq(xx, (np.ones(10) * 2 + 1) * 2) def test_optimize_None(): da = pytest.importorskip('dask.array') x = da.ones(10, chunks=(5,)) y = x[:9][1:8][::2] + 1 # normally these slices would be fused def my_get(dsk, keys): assert dsk == dict(y.dask) # but they aren't return dask.get(dsk, keys) with dask.set_options(array_optimize=None, get=my_get): y.compute() dask-0.16.0/dask/tests/test_cache.py000066400000000000000000000023101320364734500172630ustar00rootroot00000000000000from dask.cache import Cache from dask.local import get_sync from dask.threaded import get from operator import add from dask.context import _globals from time import sleep import pytest cachey = pytest.importorskip('cachey') flag = [] def inc(x): flag.append(x) return x + 1 def test_cache(): c = cachey.Cache(10000) cc = Cache(c) with cc: assert get({'x': (inc, 1)}, 'x') == 2 assert flag == [1] assert c.data['x'] == 2 assert not cc.starttimes assert not cc.durations while flag: flag.pop() dsk = {'x': (inc, 1), 'y': (inc, 2), 'z': (add, 'x', 'y')} with cc: assert get(dsk, 'z') == 5 assert flag == [2] # no x present assert not _globals['callbacks'] def test_cache_with_number(): c = Cache(10000, limit=1) assert isinstance(c.cache, cachey.Cache) assert c.cache.available_bytes == 10000 assert c.cache.limit == 1 def f(duration, size, *args): sleep(duration) return [0] * size def test_prefer_cheap_dependent(): dsk = {'x': (f, 0.01, 10), 'y': (f, 0.000001, 1, 'x')} c = Cache(10000) with c: get_sync(dsk, 'y') assert c.cache.scorer.cost['x'] < c.cache.scorer.cost['y'] dask-0.16.0/dask/tests/test_callbacks.py000066400000000000000000000046331320364734500201510ustar00rootroot00000000000000from dask.local import get_sync from dask.context import _globals from dask.threaded import get as get_threaded from dask.callbacks import Callback from dask.utils_test import add def test_start_callback(): flag = [False] class MyCallback(Callback): def _start(self, dsk): flag[0] = True with MyCallback(): get_sync({'x': 1}, 'x') assert flag[0] is True def test_start_state_callback(): flag = [False] class MyCallback(Callback): def _start_state(self, dsk, state): flag[0] = True assert dsk['x'] == 1 assert len(state['cache']) == 1 with MyCallback(): get_sync({'x': 1}, 'x') assert flag[0] is True def test_finish_always_called(): flag = [False] class MyCallback(Callback): def _finish(self, dsk, state, errored): flag[0] = True assert errored dsk = {'x': (lambda: 1 / 0,)} # `raise_on_exception=True` try: with MyCallback(): get_sync(dsk, 'x') except Exception as e: assert isinstance(e, ZeroDivisionError) assert flag[0] # `raise_on_exception=False` flag[0] = False try: with MyCallback(): get_threaded(dsk, 'x') except Exception as e: assert isinstance(e, ZeroDivisionError) assert flag[0] # KeyboardInterrupt def raise_keyboard(): raise KeyboardInterrupt() dsk = {'x': (raise_keyboard,)} flag[0] = False try: with MyCallback(): get_sync(dsk, 'x') except BaseException as e: assert isinstance(e, KeyboardInterrupt) assert flag[0] def test_nested_schedulers(): class MyCallback(Callback): def _start(self, dsk): self.dsk = dsk def _pretask(self, key, dsk, state): assert key in self.dsk inner_callback = MyCallback() inner_dsk = {'x': (add, 1, 2), 'y': (add, 'x', 3)} def nested_call(x): assert not _globals['callbacks'] with inner_callback: return get_threaded(inner_dsk, 'y') + x outer_callback = MyCallback() outer_dsk = {'a': (nested_call, 1), 'b': (add, 'a', 2)} with outer_callback: get_threaded(outer_dsk, 'b') assert not _globals['callbacks'] assert outer_callback.dsk == outer_dsk assert inner_callback.dsk == inner_dsk assert not _globals['callbacks'] dask-0.16.0/dask/tests/test_context.py000066400000000000000000000025021320364734500177070ustar00rootroot00000000000000from dask.context import set_options, _globals, globalmethod import dask.array as da import dask def test_with_get(): var = [0] def myget(dsk, keys, **kwargs): var[0] = var[0] + 1 return dask.get(dsk, keys, **kwargs) x = da.ones(10, chunks=(5,)) assert x.sum().compute() == 10 assert var[0] == 0 with set_options(get=myget): assert x.sum().compute() == 10 assert var[0] == 1 # Make sure we've cleaned up assert x.sum().compute() == 10 assert var[0] == 1 def test_set_options_context_manger(): with set_options(foo='bar'): assert _globals['foo'] == 'bar' assert _globals['foo'] is None try: set_options(foo='baz') assert _globals['foo'] == 'baz' finally: del _globals['foo'] def foo(): return 'foo' def bar(): return 'bar' class Foo(object): @globalmethod(key='f') def f(): return 1 g = globalmethod(foo, key='g', falsey=bar) def test_globalmethod(): x = Foo() assert x.f() == 1 with dask.set_options(f=lambda: 2): assert x.f() == 2 with dask.set_options(f=foo): assert x.f is foo assert x.f() == 'foo' assert x.g is foo assert x.g() == 'foo' with dask.set_options(g=False): assert x.g is bar assert x.g() == 'bar' dask-0.16.0/dask/tests/test_core.py000066400000000000000000000143531320364734500171620ustar00rootroot00000000000000from collections import namedtuple import pytest import pickle from dask.utils_test import GetFunctionTestMixin, inc, add from dask import core from dask.core import (istask, get_dependencies, get_deps, flatten, subs, preorder_traversal, literal, quote, has_tasks) def contains(a, b): """ >>> contains({'x': 1, 'y': 2}, {'x': 1}) True >>> contains({'x': 1, 'y': 2}, {'z': 3}) False """ return all(a.get(k) == v for k, v in b.items()) def test_istask(): assert istask((inc, 1)) assert not istask(1) assert not istask((1, 2)) f = namedtuple('f', ['x', 'y']) assert not istask(f(sum, 2)) def test_has_tasks(): dsk = {'a': [1, 2, 3], 'b': 'a', 'c': [1, (inc, 1)], 'd': [(sum, 'a')], 'e': ['a', 'b'], 'f': [['a', 'b'], 2, 3]} assert not has_tasks(dsk, dsk['a']) assert has_tasks(dsk, dsk['b']) assert has_tasks(dsk, dsk['c']) assert has_tasks(dsk, dsk['d']) assert has_tasks(dsk, dsk['e']) assert has_tasks(dsk, dsk['f']) def test_preorder_traversal(): t = (add, 1, 2) assert list(preorder_traversal(t)) == [add, 1, 2] t = (add, (add, 1, 2), (add, 3, 4)) assert list(preorder_traversal(t)) == [add, add, 1, 2, add, 3, 4] t = (add, (sum, [1, 2]), 3) assert list(preorder_traversal(t)) == [add, sum, list, 1, 2, 3] class TestGet(GetFunctionTestMixin): get = staticmethod(core.get) class TestRecursiveGet(GetFunctionTestMixin): get = staticmethod(lambda d, k: core.get(d, k, recursive=True)) def test_get_stack_limit(self): # will blow stack in recursive mode pass def test_GetFunctionTestMixin_class(): class TestCustomGetFail(GetFunctionTestMixin): get = staticmethod(lambda x, y: 1) custom_testget = TestCustomGetFail() pytest.raises(AssertionError, custom_testget.test_get) class TestCustomGetPass(GetFunctionTestMixin): get = staticmethod(core.get) custom_testget = TestCustomGetPass() custom_testget.test_get() def test_get_dependencies_nested(): dsk = {'x': 1, 'y': 2, 'z': (add, (inc, [['x']]), 'y')} assert get_dependencies(dsk, 'z') == set(['x', 'y']) assert sorted(get_dependencies(dsk, 'z', as_list=True)) == ['x', 'y'] def test_get_dependencies_empty(): dsk = {'x': (inc,)} assert get_dependencies(dsk, 'x') == set() assert get_dependencies(dsk, 'x', as_list=True) == [] def test_get_dependencies_list(): dsk = {'x': 1, 'y': 2, 'z': ['x', [(inc, 'y')]]} assert get_dependencies(dsk, 'z') == set(['x', 'y']) assert sorted(get_dependencies(dsk, 'z', as_list=True)) == ['x', 'y'] def test_get_dependencies_task(): dsk = {'x': 1, 'y': 2, 'z': ['x', [(inc, 'y')]]} assert get_dependencies(dsk, task=(inc, 'x')) == set(['x']) assert get_dependencies(dsk, task=(inc, 'x'), as_list=True) == ['x'] def test_get_dependencies_nothing(): with pytest.raises(ValueError): get_dependencies({}) def test_get_dependencies_many(): dsk = {'a': [1, 2, 3], 'b': 'a', 'c': [1, (inc, 1)], 'd': [(sum, 'c')], 'e': ['a', 'b', 'zzz'], 'f': [['a', 'b'], 2, 3]} tasks = [dsk[k] for k in ('d', 'f')] s = get_dependencies(dsk, task=tasks) assert s == {'a', 'b', 'c'} s = get_dependencies(dsk, task=tasks, as_list=True) assert sorted(s) == ['a', 'b', 'c'] s = get_dependencies(dsk, task=[]) assert s == set() s = get_dependencies(dsk, task=[], as_list=True) assert s == [] def test_get_deps(): """ >>> dsk = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b')} >>> dependencies, dependents = get_deps(dsk) >>> dependencies {'a': set([]), 'c': set(['b']), 'b': set(['a'])} >>> dependents {'a': set(['b']), 'c': set([]), 'b': set(['c'])} """ dsk = {'a': [1, 2, 3], 'b': 'a', 'c': [1, (inc, 1)], 'd': [(sum, 'c')], 'e': ['b', 'zzz', 'b'], 'f': [['a', 'b'], 2, 3]} dependencies, dependents = get_deps(dsk) assert dependencies == {'a': set(), 'b': {'a'}, 'c': set(), 'd': {'c'}, 'e': {'b'}, 'f': {'a', 'b'}, } assert dependents == {'a': {'b', 'f'}, 'b': {'e', 'f'}, 'c': {'d'}, 'd': set(), 'e': set(), 'f': set(), } def test_flatten(): assert list(flatten(())) == [] assert list(flatten('foo')) == ['foo'] def test_subs(): assert subs((sum, [1, 'x']), 'x', 2) == (sum, [1, 2]) assert subs((sum, [1, ['x']]), 'x', 2) == (sum, [1, [2]]) class MutateOnEq(object): hit_eq = 0 def __eq__(self, other): self.hit_eq += 1 return False def test_subs_no_key_data_eq(): # Numpy throws a deprecation warning on bool(array == scalar), which # pollutes the terminal. This test checks that `subs` never tries to # compare keys (scalars) with values (which could be arrays)`subs` never # tries to compare keys (scalars) with values (which could be arrays). a = MutateOnEq() subs(a, 'x', 1) assert a.hit_eq == 0 subs((add, a, 'x'), 'x', 1) assert a.hit_eq == 0 def test_subs_with_unfriendly_eq(): try: import numpy as np except ImportError: return else: task = (np.sum, np.array([1, 2])) assert (subs(task, (4, 5), 1) == task) is True class MyException(Exception): pass class F(): def __eq__(self, other): raise MyException() task = F() assert subs(task, 1, 2) is task def test_subs_with_surprisingly_friendly_eq(): try: import pandas as pd except ImportError: return else: df = pd.DataFrame() assert subs(df, 'x', 1) is df def test_quote(): literals = [[1, 2, 3], (add, 1, 2), [1, [2, 3]], (add, 1, (add, 2, 3))] for l in literals: assert core.get({'x': quote(l)}, 'x') == l def test_literal_serializable(): l = literal((add, 1, 2)) assert pickle.loads(pickle.dumps(l)).data == (add, 1, 2) dask-0.16.0/dask/tests/test_delayed.py000066400000000000000000000327161320364734500176440ustar00rootroot00000000000000from collections import namedtuple from operator import add, setitem import pickle from random import random from toolz import identity, partial, merge import pytest import dask from dask import set_options, compute from dask.compatibility import PY2, PY3 from dask.delayed import delayed, to_task_dask, Delayed from dask.utils_test import inc class Tuple(object): __dask_scheduler__ = staticmethod(dask.threaded.get) def __init__(self, dsk, keys): self._dask = dsk self._keys = keys def __dask_tokenize__(self): return self._keys def __dask_graph__(self): return self._dask def __dask_keys__(self): return self._keys def __dask_postcompute__(self): return tuple, () def test_to_task_dask(): a = delayed(1, name='a') b = delayed(2, name='b') task, dask = to_task_dask([a, b, 3]) assert task == ['a', 'b', 3] task, dask = to_task_dask((a, b, 3)) assert task == (tuple, ['a', 'b', 3]) assert dict(dask) == merge(a.dask, b.dask) task, dask = to_task_dask({a: 1, b: 2}) assert (task == (dict, [['b', 2], ['a', 1]]) or task == (dict, [['a', 1], ['b', 2]])) assert dict(dask) == merge(a.dask, b.dask) f = namedtuple('f', ['x', 'y']) x = f(1, 2) task, dask = to_task_dask(x) assert task == x assert dict(dask) == {} task, dask = to_task_dask(slice(a, b, 3)) assert task == (slice, 'a', 'b', 3) assert dict(dask) == merge(a.dask, b.dask) # Issue https://github.com/dask/dask/issues/2107 class MyClass(dict): pass task, dask = to_task_dask(MyClass()) assert type(task) is MyClass assert dict(dask) == {} # Custom dask objects x = Tuple({'a': 1, 'b': 2, 'c': (add, 'a', 'b')}, ['a', 'b', 'c']) task, dask = to_task_dask(x) assert task in dask f = dask.pop(task) assert f == (tuple, ['a', 'b', 'c']) assert dask == x._dask def test_delayed(): add2 = delayed(add) assert add2(1, 2).compute() == 3 assert (add2(1, 2) + 3).compute() == 6 assert add2(add2(1, 2), 3).compute() == 6 a = delayed(1) assert a.compute() == 1 assert 1 in a.dask.values() b = add2(add2(a, 2), 3) assert a.key in b.dask def test_operators(): a = delayed([1, 2, 3]) assert a[0].compute() == 1 assert (a + a).compute() == [1, 2, 3, 1, 2, 3] b = delayed(2) assert a[:b].compute() == [1, 2] a = delayed(10) assert (a + 1).compute() == 11 assert (1 + a).compute() == 11 assert (a >> 1).compute() == 5 assert (a > 2).compute() assert (a ** 2).compute() == 100 def test_methods(): a = delayed("a b c d e") assert a.split(' ').compute() == ['a', 'b', 'c', 'd', 'e'] assert a.upper().replace('B', 'A').split().count('A').compute() == 2 assert a.split(' ', pure=True).key == a.split(' ', pure=True).key o = a.split(' ', dask_key_name='test') assert o.key == 'test' def test_attributes(): a = delayed(2 + 1j) assert a.real._key == a.real._key assert a.real.compute() == 2 assert a.imag.compute() == 1 assert (a.real + a.imag).compute() == 3 def test_method_getattr_call_same_task(): a = delayed([1, 2, 3]) o = a.index(1) # Don't getattr the method, then call in separate task assert getattr not in set(v[0] for v in o.__dask_graph__().values()) def test_delayed_errors(): a = delayed([1, 2, 3]) # Immutable pytest.raises(TypeError, lambda: setattr(a, 'foo', 1)) pytest.raises(TypeError, lambda: setitem(a, 1, 0)) # Can't iterate, or check if contains pytest.raises(TypeError, lambda: 1 in a) pytest.raises(TypeError, lambda: list(a)) # No dynamic generation of magic/hidden methods pytest.raises(AttributeError, lambda: a._hidden()) # Truth of delayed forbidden pytest.raises(TypeError, lambda: bool(a)) def test_common_subexpressions(): a = delayed([1, 2, 3]) res = a[0] + a[0] assert a[0].key in res.dask assert a.key in res.dask assert len(res.dask) == 3 def test_lists(): a = delayed(1) b = delayed(2) c = delayed(sum)([a, b]) assert c.compute() == 3 def test_literates(): a = delayed(1) b = a + 1 lit = (a, b, 3) assert delayed(lit).compute() == (1, 2, 3) lit = [a, b, 3] assert delayed(lit).compute() == [1, 2, 3] lit = set((a, b, 3)) assert delayed(lit).compute() == set((1, 2, 3)) lit = {a: 'a', b: 'b', 3: 'c'} assert delayed(lit).compute() == {1: 'a', 2: 'b', 3: 'c'} assert delayed(lit)[a].compute() == 'a' lit = {'a': a, 'b': b, 'c': 3} assert delayed(lit).compute() == {'a': 1, 'b': 2, 'c': 3} assert delayed(lit)['a'].compute() == 1 def test_literates_keys(): a = delayed(1) b = a + 1 lit = (a, b, 3) assert delayed(lit).key != delayed(lit).key assert delayed(lit, pure=True).key == delayed(lit, pure=True).key def test_lists_are_concrete(): a = delayed(1) b = delayed(2) c = delayed(max)([[a, 10], [b, 20]], key=lambda x: x[0])[1] assert c.compute() == 20 def test_iterators(): a = delayed(1) b = delayed(2) c = delayed(sum)(iter([a, b])) assert c.compute() == 3 def f(seq): return sum(seq) c = delayed(f)(iter([a, b])) assert c.compute() == 3 def test_traverse_false(): # Create a list with a dask value, and test that it's not computed def fail(*args): raise ValueError("shouldn't have computed") a = delayed(fail)() # list x = [a, 1, 2, 3] res = delayed(x, traverse=False).compute() assert len(res) == 4 assert res[0] is a assert res[1:] == x[1:] # tuple that looks like a task x = (fail, a, (fail, a)) res = delayed(x, traverse=False).compute() assert isinstance(res, tuple) assert res[0] == fail assert res[1] is a # list containing task-like-things x = [1, (fail, a), a] res = delayed(x, traverse=False).compute() assert isinstance(res, list) assert res[0] == 1 assert res[1][0] == fail and res[1][1] is a assert res[2] is a # traverse=False still hits top level b = delayed(1) x = delayed(b, traverse=False) assert x.compute() == 1 def test_pure(): v1 = delayed(add, pure=True)(1, 2) v2 = delayed(add, pure=True)(1, 2) assert v1.key == v2.key myrand = delayed(random) assert myrand().key != myrand().key def test_pure_global_setting(): # delayed functions func = delayed(add) with set_options(delayed_pure=True): assert func(1, 2).key == func(1, 2).key with set_options(delayed_pure=False): assert func(1, 2).key != func(1, 2).key func = delayed(add, pure=True) with set_options(delayed_pure=False): assert func(1, 2).key == func(1, 2).key # delayed objects assert delayed(1).key != delayed(1).key with set_options(delayed_pure=True): assert delayed(1).key == delayed(1).key with set_options(delayed_pure=False): assert delayed(1, pure=True).key == delayed(1, pure=True).key # delayed methods data = delayed([1, 2, 3]) assert data.index(1).key != data.index(1).key with set_options(delayed_pure=True): assert data.index(1).key == data.index(1).key assert data.index(1, pure=False).key != data.index(1, pure=False).key with set_options(delayed_pure=False): assert data.index(1, pure=True).key == data.index(1, pure=True).key # magic methods always pure with set_options(delayed_pure=False): assert data.index.key == data.index.key element = data[0] assert (element + element).key == (element + element).key def test_nout(): func = delayed(lambda x: (x, -x), nout=2, pure=True) x = func(1) assert len(x) == 2 a, b = x assert compute(a, b) == (1, -1) assert a._length is None assert b._length is None pytest.raises(TypeError, lambda: len(a)) pytest.raises(TypeError, lambda: list(a)) pytest.raises(ValueError, lambda: delayed(add, nout=-1)) pytest.raises(ValueError, lambda: delayed(add, nout=True)) func = delayed(add, nout=None) a = func(1) assert a._length is None pytest.raises(TypeError, lambda: list(a)) pytest.raises(TypeError, lambda: len(a)) func = delayed(lambda x: (x,), nout=1, pure=True) x = func(1) assert len(x) == 1 a, = x assert a.compute() == 1 assert a._length is None pytest.raises(TypeError, lambda: len(a)) func = delayed(lambda x: tuple(), nout=0, pure=True) x = func(1) assert len(x) == 0 assert x.compute() == tuple() def test_kwargs(): def mysum(a, b, c=(), **kwargs): return a + b + sum(c) + sum(kwargs.values()) dmysum = delayed(mysum) ten = dmysum(1, 2, c=[delayed(3), 0], four=dmysum(2, 2)) assert ten.compute() == 10 dmysum = delayed(mysum, pure=True) c = [delayed(3), 0] ten = dmysum(1, 2, c=c, four=dmysum(2, 2)) assert ten.compute() == 10 assert dmysum(1, 2, c=c, four=dmysum(2, 2)).key == ten.key assert dmysum(1, 2, c=c, four=dmysum(2, 3)).key != ten.key assert dmysum(1, 2, c=c, four=4).key != ten.key assert dmysum(1, 2, c=c, four=4).key != dmysum(2, 2, c=c, four=4).key def test_custom_delayed(): x = Tuple({'a': 1, 'b': 2, 'c': (add, 'a', 'b')}, ['a', 'b', 'c']) x2 = delayed(add, pure=True)(x, (4, 5, 6)) n = delayed(len, pure=True)(x) assert delayed(len, pure=True)(x).key == n.key assert x2.compute() == (1, 2, 3, 4, 5, 6) assert compute(n, x2, x) == (3, (1, 2, 3, 4, 5, 6), (1, 2, 3)) def test_array_delayed(): np = pytest.importorskip('numpy') da = pytest.importorskip('dask.array') arr = np.arange(100).reshape((10, 10)) darr = da.from_array(arr, chunks=(5, 5)) val = delayed(sum)([arr, darr, 1]) assert isinstance(val, Delayed) assert np.allclose(val.compute(), arr + arr + 1) assert val.sum().compute() == (arr + arr + 1).sum() assert val[0, 0].compute() == (arr + arr + 1)[0, 0] task, dsk = to_task_dask(darr) orig = set(darr.dask) final = set(dsk) assert orig.issubset(final) diff = final.difference(orig) assert len(diff) == 1 delayed_arr = delayed(darr) assert (delayed_arr.compute() == arr).all() def test_array_bag_delayed(): db = pytest.importorskip('dask.bag') da = pytest.importorskip('dask.array') np = pytest.importorskip('numpy') arr1 = np.arange(100).reshape((10, 10)) arr2 = arr1.dot(arr1.T) darr1 = da.from_array(arr1, chunks=(5, 5)) darr2 = da.from_array(arr2, chunks=(5, 5)) b = db.from_sequence([1, 2, 3]) seq = [arr1, arr2, darr1, darr2, b] out = delayed(sum)([i.sum() for i in seq]) assert out.compute() == 2 * arr1.sum() + 2 * arr2.sum() + sum([1, 2, 3]) def test_delayed_picklable(): # Delayed x = delayed(divmod, nout=2, pure=True)(1, 2) y = pickle.loads(pickle.dumps(x)) assert x.dask == y.dask assert x._key == y._key assert x._length == y._length # DelayedLeaf x = delayed(1j + 2) y = pickle.loads(pickle.dumps(x)) assert x.dask == y.dask assert x._key == y._key assert x._nout == y._nout assert x._pure == y._pure # DelayedAttr x = x.real y = pickle.loads(pickle.dumps(x)) assert x._obj._key == y._obj._key assert x._obj.dask == y._obj.dask assert x._attr == y._attr assert x._key == y._key def test_delayed_compute_forward_kwargs(): x = delayed(1) + 2 x.compute(bogus_keyword=10) def test_delayed_method_descriptor(): delayed(bytes.decode)(b'') # does not err def test_delayed_callable(): f = delayed(add, pure=True) v = f(1, 2) assert v.dask == {v.key: (add, 1, 2)} assert f.dask == {f.key: add} assert f.compute() == add def test_delayed_name_on_call(): f = delayed(add, pure=True) assert f(1, 2, dask_key_name='foo')._key == 'foo' def test_callable_obj(): class Foo(object): def __init__(self, a): self.a = a def __call__(self): return 2 foo = Foo(1) f = delayed(foo) assert f.compute() is foo assert f.a.compute() == 1 assert f().compute() == 2 def test_name_consistent_across_instances(): func = delayed(identity, pure=True) data = {'x': 1, 'y': 25, 'z': [1, 2, 3]} if PY2: assert func(data)._key == 'identity-6700b857eea9a7d3079762c9a253ffbd' if PY3: assert func(data)._key == 'identity-84c5e2194036c17d1d97c4e3a2b90482' data = {'x': 1, 1: 'x'} assert func(data)._key == func(data)._key if PY2: assert func(1)._key == 'identity-91f02358e13dca18cde218a63fee436a' if PY3: assert func(1)._key == 'identity-7126728842461bf3d2caecf7b954fa3b' def test_sensitive_to_partials(): assert (delayed(partial(add, 10), pure=True)(2)._key != delayed(partial(add, 20), pure=True)(2)._key) def test_delayed_name(): assert delayed(1)._key.startswith('int-') assert delayed(1, pure=True)._key.startswith('int-') assert delayed(1, name='X')._key == 'X' def myfunc(x): return x + 1 assert delayed(myfunc)(1).key.startswith('myfunc') def test_finalize_name(): da = pytest.importorskip('dask.array') x = da.ones(10, chunks=5) v = delayed([x]) assert set(x.dask).issubset(v.dask) def key(s): if isinstance(s, tuple): s = s[0] return s.split('-')[0] assert all(key(k).isalpha() for k in v.dask) def test_keys_from_array(): da = pytest.importorskip('dask.array') from dask.array.utils import _check_dsk X = da.ones((10, 10), chunks=5).to_delayed().flatten() xs = [delayed(inc)(x) for x in X] _check_dsk(xs[0].dask) dask-0.16.0/dask/tests/test_distributed.py000066400000000000000000000067131320364734500205550ustar00rootroot00000000000000import pytest distributed = pytest.importorskip('distributed') from functools import partial import inspect from tornado import gen import dask from dask import persist, delayed from distributed.client import wait, Client from distributed.utils_test import gen_cluster, inc, cluster, loop # flake8: noqa if 'should_check_state' in inspect.getargspec(gen_cluster).args: gen_cluster = partial(gen_cluster, should_check_state=False) cluster = partial(cluster, should_check_state=False) def test_can_import_client(): from dask.distributed import Client # noqa: F401 @gen_cluster(client=True) def test_persist(c, s, a, b): x = delayed(inc)(1) x2, = persist(x) yield wait(x2) assert x2.key in a.data or x2.key in b.data y = delayed(inc)(10) y2, one = persist(y, 1) yield wait(y2) assert y2.key in a.data or y2.key in b.data def test_futures_to_delayed_dataframe(loop): pd = pytest.importorskip('pandas') dd = pytest.importorskip('dask.dataframe') df = pd.DataFrame({'x': [1, 2, 3]}) with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: futures = c.scatter([df, df]) ddf = dd.from_delayed(futures) dd.utils.assert_eq(ddf.compute(), pd.concat([df, df], axis=0)) with pytest.raises(TypeError): ddf = dd.from_delayed([1, 2]) def test_futures_to_delayed_bag(loop): db = pytest.importorskip('dask.bag') L = [1, 2, 3] with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: futures = c.scatter([L, L]) b = db.from_delayed(futures) assert list(b) == L + L def test_futures_to_delayed_array(loop): da = pytest.importorskip('dask.array') from dask.array.utils import assert_eq np = pytest.importorskip('numpy') x = np.arange(5) with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: futures = c.scatter([x, x]) A = da.concatenate([da.from_delayed(f, shape=x.shape, dtype=x.dtype) for f in futures], axis=0) assert_eq(A.compute(), np.concatenate([x, x], axis=0)) @gen_cluster(client=True) def test_local_get_with_distributed_active(c, s, a, b): with dask.set_options(get=dask.get): x = delayed(inc)(1).persist() yield gen.sleep(0.01) assert not s.task_state # scheduler hasn't done anything y = delayed(inc)(2).persist(get=dask.get) yield gen.sleep(0.01) assert not s.task_state # scheduler hasn't done anything def test_to_hdf_distributed(loop): from ..dataframe.io.tests.test_hdf import test_to_hdf with cluster() as (s, [a, b]): with distributed.Client(s['address'], loop=loop): test_to_hdf() @pytest.mark.xfail(reason='HDF not multi-process safe') @pytest.mark.parametrize('npartitions', [1, 4, 10]) def test_to_hdf_scheduler_distributed(npartitions, loop): from ..dataframe.io.tests.test_hdf import test_to_hdf_schedulers with cluster() as (s, [a, b]): with distributed.Client(s['address'], loop=loop): test_to_hdf_schedulers(None, npartitions) @gen_cluster(client=True) def test_serializable_groupby_agg(c, s, a, b): pd = pytest.importorskip('pandas') dd = pytest.importorskip('dask.dataframe') df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [1, 0, 1, 0]}) ddf = dd.from_pandas(df, npartitions=2) result = ddf.groupby('y').agg('count') yield c.compute(result) dask-0.16.0/dask/tests/test_dot.py000066400000000000000000000124651320364734500170220ustar00rootroot00000000000000import os from functools import partial import re from operator import add, neg import sys import pytest optimize2 = (sys.flags.optimize == 2) if not optimize2: pytest.importorskip("graphviz") from dask.dot import dot_graph, task_label, label, to_graphviz else: pytestmark = pytest.mark.skipif(True, reason="graphviz exception with Python -OO flag") from dask import delayed from dask.utils import ensure_not_exists from IPython.display import Image, SVG # Since graphviz doesn't store a graph, we need to parse the output label_re = re.compile('.*\[label=(.*?) shape=.*\]') def get_label(line): m = label_re.match(line) if m: return m.group(1) dsk = {'a': 1, 'b': 2, 'c': (neg, 'a'), 'd': (neg, 'b'), 'e': (add, 'c', 'd'), 'f': (sum, ['a', 'e'])} def test_task_label(): assert task_label((partial(add, 1), 1)) == 'add' assert task_label((add, 1)) == 'add' assert task_label((add, (add, 1, 2))) == 'add(...)' def test_label(): assert label('x') == 'x' assert label('elemwise-ffcd9aa2231d466b5aa91e8bfa9e9487') == 'elemwise-#' cache = {} result = label('elemwise-ffcd9aa2231d466b5aa91e8bfa9e9487', cache=cache) assert result == 'elemwise-#0' # cached result = label('elemwise-ffcd9aa2231d466b5aa91e8bfa9e9487', cache=cache) assert result == 'elemwise-#0' assert len(cache) == 1 result = label('elemwise-e890b510984f344edea9a5e5fe05c0db', cache=cache) assert result == 'elemwise-#1' assert len(cache) == 2 result = label('elemwise-ffcd9aa2231d466b5aa91e8bfa9e9487', cache=cache) assert result == 'elemwise-#0' assert len(cache) == 2 assert label('x', cache=cache) == 'x' assert len(cache) == 2 def test_to_graphviz(): g = to_graphviz(dsk) labels = list(filter(None, map(get_label, g.body))) assert len(labels) == 10 # 10 nodes total funcs = set(('add', 'sum', 'neg')) assert set(labels).difference(dsk) == funcs assert set(labels).difference(funcs) == set(dsk) def test_to_graphviz_attributes(): assert to_graphviz(dsk).graph_attr['rankdir'] == 'BT' assert to_graphviz(dsk, rankdir='LR').graph_attr['rankdir'] == 'LR' assert to_graphviz(dsk, node_attr={'color': 'white'}).node_attr['color'] == 'white' assert to_graphviz(dsk, edge_attr={'color': 'white'}).edge_attr['color'] == 'white' def test_aliases(): g = to_graphviz({'x': 1, 'y': 'x'}) labels = list(filter(None, map(get_label, g.body))) assert len(labels) == 2 assert len(g.body) - len(labels) == 1 # Single edge def test_dot_graph(tmpdir): # Use a name that the shell would interpret specially to ensure that we're # not vulnerable to shell injection when interacting with `dot`. filename = str(tmpdir.join('$(touch should_not_get_created.txt)')) # Map from format extension to expected return type. result_types = { 'png': Image, 'jpeg': Image, 'dot': type(None), 'pdf': type(None), 'svg': SVG, } for format in result_types: target = '.'.join([filename, format]) ensure_not_exists(target) try: result = dot_graph(dsk, filename=filename, format=format) assert not os.path.exists('should_not_get_created.txt') assert os.path.isfile(target) assert isinstance(result, result_types[format]) finally: ensure_not_exists(target) def test_dot_graph_no_filename(tmpdir): # Map from format extension to expected return type. result_types = { 'png': Image, 'jpeg': Image, 'dot': type(None), 'pdf': type(None), 'svg': SVG, } for format in result_types: before = tmpdir.listdir() result = dot_graph(dsk, filename=None, format=format) # We shouldn't write any files if filename is None. after = tmpdir.listdir() assert before == after assert isinstance(result, result_types[format]) def test_dot_graph_defaults(): # Test with default args. default_name = 'mydask' default_format = 'png' target = '.'.join([default_name, default_format]) ensure_not_exists(target) try: result = dot_graph(dsk) assert os.path.isfile(target) assert isinstance(result, Image) finally: ensure_not_exists(target) def test_filenames_and_formats(): # Test with a variety of user provided args filenames = ['mydaskpdf', 'mydask.pdf', 'mydask.pdf', 'mydaskpdf', 'mydask.pdf.svg'] formats = ['svg', None, 'svg', None, None] targets = ['mydaskpdf.svg', 'mydask.pdf', 'mydask.pdf.svg', 'mydaskpdf.png', 'mydask.pdf.svg'] result_types = { 'png': Image, 'jpeg': Image, 'dot': type(None), 'pdf': type(None), 'svg': SVG, } for filename, format, target in zip(filenames, formats, targets): expected_result_type = result_types[target.split('.')[-1]] result = dot_graph(dsk, filename=filename, format=format) assert os.path.isfile(target) assert isinstance(result, expected_result_type) ensure_not_exists(target) def test_delayed_kwargs_apply(): def f(x, y=True): return x + y x = delayed(f)(1, y=2) label = task_label(x.dask[x.key]) assert 'f' in label assert 'apply' not in label dask-0.16.0/dask/tests/test_hashing.py000066400000000000000000000021461320364734500176500ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import pytest from dask.hashing import hashers, hash_buffer, hash_buffer_hex np = pytest.importorskip('numpy') buffers = [ b'abc', bytearray(b'123'), memoryview(b'456'), np.array(42), np.ones((100, 100)), np.zeros((100, 100), dtype=[('a', 'i4'), ('b', 'i2')]), np.ones(10000, dtype=np.int8)[1:], # unaligned ] @pytest.mark.parametrize('x', buffers) def test_hash_buffer(x): for hasher in [None] + hashers: h = hash_buffer(x, hasher=hasher) assert isinstance(h, bytes) assert 8 <= len(h) < 32 assert h == hash_buffer(x, hasher=hasher) @pytest.mark.parametrize('x', buffers) def test_hash_buffer_hex(x): for hasher in [None] + hashers: h = hash_buffer_hex(x, hasher=hasher) assert isinstance(h, str) assert 16 <= len(h) < 64 assert h == hash_buffer_hex(x, hasher=hasher) @pytest.mark.parametrize('hasher', hashers) def test_hashers(hasher): # Sanity check x = b'x' h = hasher(x) assert isinstance(h, bytes) assert 8 <= len(h) < 32 dask-0.16.0/dask/tests/test_local.py000066400000000000000000000127501320364734500173230ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import dask from dask.local import start_state_from_dask, get_sync, finish_task, sortkey from dask.order import order from dask.utils_test import GetFunctionTestMixin, inc, add fib_dask = {'f0': 0, 'f1': 1, 'f2': 1, 'f3': 2, 'f4': 3, 'f5': 5, 'f6': 8} def test_start_state(): dsk = {'x': 1, 'y': 2, 'z': (inc, 'x'), 'w': (add, 'z', 'y')} result = start_state_from_dask(dsk) expected = {'cache': {'x': 1, 'y': 2}, 'dependencies': {'w': set(['y', 'z']), 'x': set([]), 'y': set([]), 'z': set(['x'])}, 'dependents': {'w': set([]), 'x': set(['z']), 'y': set(['w']), 'z': set(['w'])}, 'finished': set([]), 'released': set([]), 'running': set([]), 'ready': ['z'], 'waiting': {'w': set(['z'])}, 'waiting_data': {'x': set(['z']), 'y': set(['w']), 'z': set(['w'])}} assert result == expected def test_start_state_looks_at_cache(): dsk = {'b': (inc, 'a')} cache = {'a': 1} result = start_state_from_dask(dsk, cache) assert result['dependencies']['b'] == set(['a']) assert result['ready'] == ['b'] def test_start_state_with_redirects(): dsk = {'x': 1, 'y': 'x', 'z': (inc, 'y')} result = start_state_from_dask(dsk) assert result['cache'] == {'x': 1} def test_start_state_with_independent_but_runnable_tasks(): assert start_state_from_dask({'x': (inc, 1)})['ready'] == ['x'] def test_start_state_with_tasks_no_deps(): dsk = {'a': [1, (inc, 2)], 'b': [1, 2, 3, 4], 'c': (inc, 3)} state = start_state_from_dask(dsk) assert list(state['cache'].keys()) == ['b'] assert 'a' in state['ready'] and 'c' in state['ready'] deps = dict((k, set()) for k in 'abc') assert state['dependencies'] == deps assert state['dependents'] == deps def test_finish_task(): dsk = {'x': 1, 'y': 2, 'z': (inc, 'x'), 'w': (add, 'z', 'y')} sortkey = order(dsk).get state = start_state_from_dask(dsk) state['ready'].remove('z') state['running'] = set(['z', 'other-task']) task = 'z' result = 2 state['cache']['z'] = result finish_task(dsk, task, state, set(), sortkey) assert state == {'cache': {'y': 2, 'z': 2}, 'dependencies': {'w': set(['y', 'z']), 'x': set([]), 'y': set([]), 'z': set(['x'])}, 'finished': set(['z']), 'released': set(['x']), 'running': set(['other-task']), 'dependents': {'w': set([]), 'x': set(['z']), 'y': set(['w']), 'z': set(['w'])}, 'ready': ['w'], 'waiting': {}, 'waiting_data': {'y': set(['w']), 'z': set(['w'])}} class TestGetAsync(GetFunctionTestMixin): get = staticmethod(get_sync) def test_get_sync_num_workers(self): self.get({'x': (inc, 'y'), 'y': 1}, 'x', num_workers=2) def test_cache_options(): try: from chest import Chest except ImportError: return cache = Chest() def inc2(x): assert 'y' in cache return x + 1 with dask.set_options(cache=cache): get_sync({'x': (inc2, 'y'), 'y': 1}, 'x') def test_sort_key(): L = ['x', ('x', 1), ('z', 0), ('x', 0)] assert sorted(L, key=sortkey) == ['x', ('x', 0), ('x', 1), ('z', 0)] def test_callback(): f = lambda x: x + 1 dsk = {'a': (f, 1)} from dask.threaded import get def start_callback(key, d, state): assert key == 'a' or key is None assert d == dsk assert isinstance(state, dict) def end_callback(key, value, d, state, worker_id): assert key == 'a' or key is None assert value == 2 or value is None assert d == dsk assert isinstance(state, dict) get(dsk, 'a', start_callback=start_callback, end_callback=end_callback) def test_order_of_startstate(): dsk = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b'), 'x': 1, 'y': (inc, 'x')} result = start_state_from_dask(dsk) assert result['ready'] == ['y', 'b'] dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y'), 'a': 1, 'b': (inc, 'a')} result = start_state_from_dask(dsk) assert result['ready'] == ['b', 'y'] def test_exceptions_propagate(): class MyException(Exception): def __init__(self, a, b): self.a = a self.b = b def __str__(self): return "My Exception!" def f(): raise MyException(1, 2) from dask.threaded import get try: get({'x': (f,)}, 'x') assert False except MyException as e: assert "My Exception!" in str(e) assert 'a' in dir(e) assert e.a == 1 assert e.b == 2 def test_ordering(): L = [] def append(i): L.append(i) dsk = {('x', i): (append, i) for i in range(10)} x_keys = sorted(dsk) dsk['y'] = (lambda *args: None, list(x_keys)) get_sync(dsk, 'y') assert L == sorted(L) dask-0.16.0/dask/tests/test_multiprocessing.py000066400000000000000000000060121320364734500214520ustar00rootroot00000000000000import multiprocessing from operator import add import pickle import random import numpy as np import pytest from dask import compute, delayed from dask.context import set_options from dask.multiprocessing import get, _dumps, _loads, remote_exception from dask.utils_test import inc def test_pickle_globals(): """ For the function f(x) defined below, the only globals added in pickling should be 'np' and '__builtins__'""" def f(x): return np.sin(x) + np.cos(x) assert set(['np', '__builtins__']) == set( _loads(_dumps(f)).__globals__.keys()) def bad(): raise ValueError("12345") def test_errors_propagate(): dsk = {'x': (bad,)} try: get(dsk, 'x') except Exception as e: assert isinstance(e, ValueError) assert "12345" in str(e) def test_remote_exception(): e = TypeError("hello") a = remote_exception(e, 'traceback-body') b = remote_exception(e, 'traceback-body') assert type(a) == type(b) assert isinstance(a, TypeError) assert 'hello' in str(a) assert 'Traceback' in str(a) assert 'traceback-body' in str(a) def make_bad_result(): return lambda x: x + 1 def test_unpicklable_results_generate_errors(): dsk = {'x': (make_bad_result,)} try: get(dsk, 'x') except Exception as e: # can't use type because pickle / cPickle distinction assert type(e).__name__ in ('PicklingError', 'AttributeError') class NotUnpickleable(object): def __getstate__(self): return () def __setstate__(self, state): raise ValueError("Can't unpickle me") def test_unpicklable_args_generate_errors(): a = NotUnpickleable() def foo(a): return 1 dsk = {'x': (foo, a)} try: get(dsk, 'x') except Exception as e: assert isinstance(e, ValueError) dsk = {'x': (foo, 'a'), 'a': a} try: get(dsk, 'x') except Exception as e: assert isinstance(e, ValueError) def test_reuse_pool(): pool = multiprocessing.Pool() with set_options(pool=pool): assert get({'x': (inc, 1)}, 'x') == 2 assert get({'x': (inc, 1)}, 'x') == 2 def test_dumps_loads(): with set_options(func_dumps=pickle.dumps, func_loads=pickle.loads): assert get({'x': 1, 'y': (add, 'x', 2)}, 'y') == 3 def test_fuse_doesnt_clobber_intermediates(): d = {'x': 1, 'y': (inc, 'x'), 'z': (add, 10, 'y')} assert get(d, ['y', 'z']) == (2, 12) def test_optimize_graph_false(): from dask.callbacks import Callback d = {'x': 1, 'y': (inc, 'x'), 'z': (add, 10, 'y')} keys = [] with Callback(pretask=lambda key, *args: keys.append(key)): get(d, 'z', optimize_graph=False) assert len(keys) == 2 @pytest.mark.parametrize('random', [np.random, random]) def test_random_seeds(random): def f(): return tuple(random.randint(0, 10000) for i in range(5)) N = 10 with set_options(get=get): results, = compute([delayed(f, pure=False)() for i in range(N)]) assert len(set(results)) == N dask-0.16.0/dask/tests/test_optimize.py000066400000000000000000001013741320364734500200720ustar00rootroot00000000000000from operator import getitem from functools import partial import pytest from dask.utils_test import add, inc from dask.core import get_dependencies from dask.optimize import (cull, fuse, inline, inline_functions, functions_of, fuse_getitem, fuse_selections, fuse_linear) def double(x): return x * 2 def test_cull(): # 'out' depends on 'x' and 'y', but not 'z' d = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'x'), 'out': (add, 'y', 10)} culled, dependencies = cull(d, 'out') assert culled == {'x': 1, 'y': (inc, 'x'), 'out': (add, 'y', 10)} assert dependencies == {'x': [], 'y': ['x'], 'out': ['y']} assert cull(d, 'out') == cull(d, ['out']) assert cull(d, ['out', 'z'])[0] == d assert cull(d, [['out'], ['z']]) == cull(d, ['out', 'z']) pytest.raises(KeyError, lambda: cull(d, 'badkey')) def fuse2(*args, **kwargs): """Run both ``fuse`` and ``fuse_linear`` and compare results""" rv1 = fuse_linear(*args, **kwargs) if kwargs.get('rename_keys') is not False: return rv1 rv2 = fuse(*args, **kwargs) assert rv1 == rv2 return rv1 def with_deps(dsk): return dsk, {k: get_dependencies(dsk, k) for k in dsk} def test_fuse(): fuse = fuse2 # tests both `fuse` and `fuse_linear` d = { 'w': (inc, 'x'), 'x': (inc, 'y'), 'y': (inc, 'z'), 'z': (add, 'a', 'b'), 'a': 1, 'b': 2, } assert fuse(d, rename_keys=False) == with_deps({ 'w': (inc, (inc, (inc, (add, 'a', 'b')))), 'a': 1, 'b': 2, }) assert fuse(d, rename_keys=True) == with_deps({ 'z-y-x-w': (inc, (inc, (inc, (add, 'a', 'b')))), 'a': 1, 'b': 2, 'w': 'z-y-x-w', }) d = { 'NEW': (inc, 'y'), 'w': (inc, 'x'), 'x': (inc, 'y'), 'y': (inc, 'z'), 'z': (add, 'a', 'b'), 'a': 1, 'b': 2, } assert fuse(d, rename_keys=False) == with_deps({ 'NEW': (inc, 'y'), 'w': (inc, (inc, 'y')), 'y': (inc, (add, 'a', 'b')), 'a': 1, 'b': 2, }) assert fuse(d, rename_keys=True) == with_deps({ 'NEW': (inc, 'z-y'), 'x-w': (inc, (inc, 'z-y')), 'z-y': (inc, (add, 'a', 'b')), 'a': 1, 'b': 2, 'w': 'x-w', 'y': 'z-y', }) d = { 'v': (inc, 'y'), 'u': (inc, 'w'), 'w': (inc, 'x'), 'x': (inc, 'y'), 'y': (inc, 'z'), 'z': (add, 'a', 'b'), 'a': (inc, 'c'), 'b': (inc, 'd'), 'c': 1, 'd': 2, } assert fuse(d, rename_keys=False) == with_deps({ 'u': (inc, (inc, (inc, 'y'))), 'v': (inc, 'y'), 'y': (inc, (add, 'a', 'b')), 'a': (inc, 1), 'b': (inc, 2), }) assert fuse(d, rename_keys=True) == with_deps({ 'x-w-u': (inc, (inc, (inc, 'z-y'))), 'v': (inc, 'z-y'), 'z-y': (inc, (add, 'c-a', 'd-b')), 'c-a': (inc, 1), 'd-b': (inc, 2), 'a': 'c-a', 'b': 'd-b', 'u': 'x-w-u', 'y': 'z-y', }) d = { 'a': (inc, 'x'), 'b': (inc, 'x'), 'c': (inc, 'x'), 'd': (inc, 'c'), 'x': (inc, 'y'), 'y': 0, } assert fuse(d, rename_keys=False) == with_deps({ 'a': (inc, 'x'), 'b': (inc, 'x'), 'd': (inc, (inc, 'x')), 'x': (inc, 0) }) assert fuse(d, rename_keys=True) == with_deps({ 'a': (inc, 'y-x'), 'b': (inc, 'y-x'), 'c-d': (inc, (inc, 'y-x')), 'y-x': (inc, 0), 'd': 'c-d', 'x': 'y-x', }) d = { 'a': 1, 'b': (inc, 'a'), 'c': (add, 'b', 'b'), } assert fuse(d, rename_keys=False) == with_deps({ 'b': (inc, 1), 'c': (add, 'b', 'b'), }) assert fuse(d, rename_keys=True) == with_deps({ 'a-b': (inc, 1), 'c': (add, 'a-b', 'a-b'), 'b': 'a-b', }) def test_fuse_keys(): fuse = fuse2 # tests both `fuse` and `fuse_linear` d = { 'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b'), } keys = ['b'] assert fuse(d, keys, rename_keys=False) == with_deps({ 'b': (inc, 1), 'c': (inc, 'b'), }) assert fuse(d, keys, rename_keys=True) == with_deps({ 'a-b': (inc, 1), 'c': (inc, 'a-b'), 'b': 'a-b', }) d = { 'w': (inc, 'x'), 'x': (inc, 'y'), 'y': (inc, 'z'), 'z': (add, 'a', 'b'), 'a': 1, 'b': 2, } keys = ['x', 'z'] assert fuse(d, keys, rename_keys=False) == with_deps({ 'w': (inc, 'x'), 'x': (inc, (inc, 'z')), 'z': (add, 'a', 'b'), 'a': 1, 'b': 2 , }) assert fuse(d, keys, rename_keys=True) == with_deps({ 'w': (inc, 'y-x'), 'y-x': (inc, (inc, 'z')), 'z': (add, 'a', 'b'), 'a': 1, 'b': 2 , 'x': 'y-x', }) def test_inline(): d = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b'), 'd': (add, 'a', 'c')} assert inline(d) == {'a': 1, 'b': (inc, 1), 'c': (inc, 'b'), 'd': (add, 1, 'c')} assert inline(d, ['a', 'b', 'c']) == {'a': 1, 'b': (inc, 1), 'c': (inc, (inc, 1)), 'd': (add, 1, (inc, (inc, 1)))} d = {'x': 1, 'y': (inc, 'x'), 'z': (add, 'x', 'y')} assert inline(d) == {'x': 1, 'y': (inc, 1), 'z': (add, 1, 'y')} assert inline(d, keys='y') == {'x': 1, 'y': (inc, 1), 'z': (add, 1, (inc, 1))} assert inline(d, keys='y', inline_constants=False) == {'x': 1, 'y': (inc, 'x'), 'z': (add, 'x', (inc, 'x'))} d = {'a': 1, 'b': 'a', 'c': 'b', 'd': ['a', 'b', 'c'], 'e': (add, (len, 'd'), 'a')} assert inline(d, 'd') == {'a': 1, 'b': 1, 'c': 1, 'd': [1, 1, 1], 'e': (add, (len, [1, 1, 1]), 1)} assert inline(d, 'a', inline_constants=False) == {'a': 1, 'b': 1, 'c': 'b', 'd': [1, 'b', 'c'], 'e': (add, (len, 'd'), 1)} def test_inline_functions(): x, y, i, d = 'xyid' dsk = {'out': (add, i, d), i: (inc, x), d: (double, y), x: 1, y: 1} result = inline_functions(dsk, [], fast_functions=set([inc])) expected = {'out': (add, (inc, x), d), d: (double, y), x: 1, y: 1} assert result == expected def test_inline_ignores_curries_and_partials(): dsk = {'x': 1, 'y': 2, 'a': (partial(add, 1), 'x'), 'b': (inc, 'a')} result = inline_functions(dsk, [], fast_functions=set([add])) assert result['b'] == (inc, dsk['a']) assert 'a' not in result def test_inline_doesnt_shrink_fast_functions_at_top(): dsk = {'x': (inc, 'y'), 'y': 1} result = inline_functions(dsk, [], fast_functions=set([inc])) assert result == dsk def test_inline_traverses_lists(): x, y, i, d = 'xyid' dsk = {'out': (sum, [i, d]), i: (inc, x), d: (double, y), x: 1, y: 1} expected = {'out': (sum, [(inc, x), d]), d: (double, y), x: 1, y: 1} result = inline_functions(dsk, [], fast_functions=set([inc])) assert result == expected def test_inline_functions_protects_output_keys(): dsk = {'x': (inc, 1), 'y': (double, 'x')} assert inline_functions(dsk, [], [inc]) == {'y': (double, (inc, 1))} assert inline_functions(dsk, ['x'], [inc]) == {'y': (double, 'x'), 'x': (inc, 1)} def test_functions_of(): a = lambda x: x b = lambda x: x assert functions_of((a, 1)) == set([a]) assert functions_of((a, (b, 1))) == set([a, b]) assert functions_of((a, [(b, 1)])) == set([a, b]) assert functions_of((a, [[[(b, 1)]]])) == set([a, b]) assert functions_of(1) == set() assert functions_of(a) == set() assert functions_of((a,)) == set([a]) def test_fuse_getitem(): def load(*args): pass dsk = {'x': (load, 'store', 'part', ['a', 'b']), 'y': (getitem, 'x', 'a')} dsk2 = fuse_getitem(dsk, load, 3) dsk2, dependencies = cull(dsk2, 'y') assert dsk2 == {'y': (load, 'store', 'part', 'a')} def test_fuse_selections(): def load(*args): pass dsk = {'x': (load, 'store', 'part', ['a', 'b']), 'y': (getitem, 'x', 'a')} merge = lambda t1, t2: (load, t2[1], t2[2], t1[2]) dsk2 = fuse_selections(dsk, getitem, load, merge) dsk2, dependencies = cull(dsk2, 'y') assert dsk2 == {'y': (load, 'store', 'part', 'a')} def test_inline_cull_dependencies(): d = {'a': 1, 'b': 'a', 'c': 'b', 'd': ['a', 'b', 'c'], 'e': (add, (len, 'd'), 'a')} d2, dependencies = cull(d, ['d', 'e']) inline(d2, {'b'}, dependencies=dependencies) def test_fuse_reductions_single_input(): def f(*args): return args d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a', 'a'), 'c': (f, 'b1', 'b2'), } assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ 'a': 1, 'c': (f, (f, 'a'), (f, 'a', 'a')), }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ 'a': 1, 'b1-b2-c': (f, (f, 'a'), (f, 'a', 'a')), 'c': 'b1-b2-c', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a', 'a'), 'b3': (f, 'a', 'a', 'a'), 'c': (f, 'b1', 'b2', 'b3'), } assert fuse(d, ave_width=2.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=2.9, rename_keys=True) == with_deps(d) assert fuse(d, ave_width=3, rename_keys=False) == with_deps({ 'a': 1, 'c': (f, (f, 'a'), (f, 'a', 'a'), (f, 'a', 'a', 'a')), }) assert fuse(d, ave_width=3, rename_keys=True) == with_deps({ 'a': 1, 'b1-b2-b3-c': (f, (f, 'a'), (f, 'a', 'a'), (f, 'a', 'a', 'a')), 'c': 'b1-b2-b3-c', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a'), 'c': (f, 'a', 'b1', 'b2'), } assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ 'a': 1, 'c': (f, 'a', (f, 'a'), (f, 'a')), }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ 'a': 1, 'b1-b2-c': (f, 'a', (f, 'a'), (f, 'a')), 'c': 'b1-b2-c', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a'), 'c': (f, 'b1', 'b2'), 'd1': (f, 'c'), 'd2': (f, 'c'), 'e': (f, 'd1', 'd2'), } assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ 'a': 1, 'c': (f, (f, 'a'), (f, 'a')), 'e': (f, (f, 'c'), (f, 'c')), }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ 'a': 1, 'b1-b2-c': (f, (f, 'a'), (f, 'a')), 'd1-d2-e': (f, (f, 'c'), (f, 'c')), 'c': 'b1-b2-c', 'e': 'd1-d2-e', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a'), 'b3': (f, 'a'), 'b4': (f, 'a'), 'c1': (f, 'b1', 'b2'), 'c2': (f, 'b3', 'b4'), 'd': (f, 'c1', 'c2'), } assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) expected = with_deps({ 'a': 1, 'c1': (f, (f, 'a'), (f, 'a')), 'c2': (f, (f, 'a'), (f, 'a')), 'd': (f, 'c1', 'c2'), }) assert fuse(d, ave_width=2, rename_keys=False) == expected assert fuse(d, ave_width=2.9, rename_keys=False) == expected expected = with_deps({ 'a': 1, 'b1-b2-c1': (f, (f, 'a'), (f, 'a')), 'b3-b4-c2': (f, (f, 'a'), (f, 'a')), 'd': (f, 'c1', 'c2'), 'c1': 'b1-b2-c1', 'c2': 'b3-b4-c2', }) assert fuse(d, ave_width=2, rename_keys=True) == expected assert fuse(d, ave_width=2.9, rename_keys=True) == expected assert fuse(d, ave_width=3, rename_keys=False) == with_deps({ 'a': 1, 'd': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), }) assert fuse(d, ave_width=3, rename_keys=True) == with_deps({ 'a': 1, 'b1-b2-b3-b4-c1-c2-d': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'd': 'b1-b2-b3-b4-c1-c2-d', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a'), 'b3': (f, 'a'), 'b4': (f, 'a'), 'b5': (f, 'a'), 'b6': (f, 'a'), 'b7': (f, 'a'), 'b8': (f, 'a'), 'c1': (f, 'b1', 'b2'), 'c2': (f, 'b3', 'b4'), 'c3': (f, 'b5', 'b6'), 'c4': (f, 'b7', 'b8'), 'd1': (f, 'c1', 'c2'), 'd2': (f, 'c3', 'c4'), 'e': (f, 'd1', 'd2'), } assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) expected = with_deps({ 'a': 1, 'c1': (f, (f, 'a'), (f, 'a')), 'c2': (f, (f, 'a'), (f, 'a')), 'c3': (f, (f, 'a'), (f, 'a')), 'c4': (f, (f, 'a'), (f, 'a')), 'd1': (f, 'c1', 'c2'), 'd2': (f, 'c3', 'c4'), 'e': (f, 'd1', 'd2'), }) assert fuse(d, ave_width=2, rename_keys=False) == expected assert fuse(d, ave_width=2.9, rename_keys=False) == expected expected = with_deps({ 'a': 1, 'b1-b2-c1': (f, (f, 'a'), (f, 'a')), 'b3-b4-c2': (f, (f, 'a'), (f, 'a')), 'b5-b6-c3': (f, (f, 'a'), (f, 'a')), 'b7-b8-c4': (f, (f, 'a'), (f, 'a')), 'd1': (f, 'c1', 'c2'), 'd2': (f, 'c3', 'c4'), 'e': (f, 'd1', 'd2'), 'c1': 'b1-b2-c1', 'c2': 'b3-b4-c2', 'c3': 'b5-b6-c3', 'c4': 'b7-b8-c4', }) assert fuse(d, ave_width=2, rename_keys=True) == expected assert fuse(d, ave_width=2.9, rename_keys=True) == expected expected = with_deps({ 'a': 1, 'd1': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'd2': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'e': (f, 'd1', 'd2'), }) assert fuse(d, ave_width=3, rename_keys=False) == expected assert fuse(d, ave_width=4.6, rename_keys=False) == expected expected = with_deps({ 'a': 1, 'b1-b2-b3-b4-c1-c2-d1': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'b5-b6-b7-b8-c3-c4-d2': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'e': (f, 'd1', 'd2'), 'd1': 'b1-b2-b3-b4-c1-c2-d1', 'd2': 'b5-b6-b7-b8-c3-c4-d2', }) assert fuse(d, ave_width=3, rename_keys=True) == expected assert fuse(d, ave_width=4.6, rename_keys=True) == expected assert fuse(d, ave_width=4.7, rename_keys=False) == with_deps({ 'a': 1, 'e': (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))) }) assert fuse(d, ave_width=4.7, rename_keys=True) == with_deps({ 'a': 1, 'b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e': ( f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))) ), 'e': 'b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a'), 'b3': (f, 'a'), 'b4': (f, 'a'), 'b5': (f, 'a'), 'b6': (f, 'a'), 'b7': (f, 'a'), 'b8': (f, 'a'), 'b9': (f, 'a'), 'b10': (f, 'a'), 'b11': (f, 'a'), 'b12': (f, 'a'), 'b13': (f, 'a'), 'b14': (f, 'a'), 'b15': (f, 'a'), 'b16': (f, 'a'), 'c1': (f, 'b1', 'b2'), 'c2': (f, 'b3', 'b4'), 'c3': (f, 'b5', 'b6'), 'c4': (f, 'b7', 'b8'), 'c5': (f, 'b9', 'b10'), 'c6': (f, 'b11', 'b12'), 'c7': (f, 'b13', 'b14'), 'c8': (f, 'b15', 'b16'), 'd1': (f, 'c1', 'c2'), 'd2': (f, 'c3', 'c4'), 'd3': (f, 'c5', 'c6'), 'd4': (f, 'c7', 'c8'), 'e1': (f, 'd1', 'd2'), 'e2': (f, 'd3', 'd4'), 'f': (f, 'e1', 'e2'), } assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) expected = with_deps({ 'a': 1, 'c1': (f, (f, 'a'), (f, 'a')), 'c2': (f, (f, 'a'), (f, 'a')), 'c3': (f, (f, 'a'), (f, 'a')), 'c4': (f, (f, 'a'), (f, 'a')), 'c5': (f, (f, 'a'), (f, 'a')), 'c6': (f, (f, 'a'), (f, 'a')), 'c7': (f, (f, 'a'), (f, 'a')), 'c8': (f, (f, 'a'), (f, 'a')), 'd1': (f, 'c1', 'c2'), 'd2': (f, 'c3', 'c4'), 'd3': (f, 'c5', 'c6'), 'd4': (f, 'c7', 'c8'), 'e1': (f, 'd1', 'd2'), 'e2': (f, 'd3', 'd4'), 'f': (f, 'e1', 'e2'), }) assert fuse(d, ave_width=2, rename_keys=False) == expected assert fuse(d, ave_width=2.9, rename_keys=False) == expected expected = with_deps({ 'a': 1, 'b1-b2-c1': (f, (f, 'a'), (f, 'a')), 'b3-b4-c2': (f, (f, 'a'), (f, 'a')), 'b5-b6-c3': (f, (f, 'a'), (f, 'a')), 'b7-b8-c4': (f, (f, 'a'), (f, 'a')), 'b10-b9-c5': (f, (f, 'a'), (f, 'a')), 'b11-b12-c6': (f, (f, 'a'), (f, 'a')), 'b13-b14-c7': (f, (f, 'a'), (f, 'a')), 'b15-b16-c8': (f, (f, 'a'), (f, 'a')), 'd1': (f, 'c1', 'c2'), 'd2': (f, 'c3', 'c4'), 'd3': (f, 'c5', 'c6'), 'd4': (f, 'c7', 'c8'), 'e1': (f, 'd1', 'd2'), 'e2': (f, 'd3', 'd4'), 'f': (f, 'e1', 'e2'), 'c1': 'b1-b2-c1', 'c2': 'b3-b4-c2', 'c3': 'b5-b6-c3', 'c4': 'b7-b8-c4', 'c5': 'b10-b9-c5', 'c6': 'b11-b12-c6', 'c7': 'b13-b14-c7', 'c8': 'b15-b16-c8', }) assert fuse(d, ave_width=2, rename_keys=True) == expected assert fuse(d, ave_width=2.9, rename_keys=True) == expected expected = with_deps({ 'a': 1, 'd1': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'd2': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'd3': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'd4': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'e1': (f, 'd1', 'd2'), 'e2': (f, 'd3', 'd4'), 'f': (f, 'e1', 'e2'), }) assert fuse(d, ave_width=3, rename_keys=False) == expected assert fuse(d, ave_width=4.6, rename_keys=False) == expected expected = with_deps({ 'a': 1, 'b1-b2-b3-b4-c1-c2-d1': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'b5-b6-b7-b8-c3-c4-d2': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'b10-b11-b12-b9-c5-c6-d3': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'b13-b14-b15-b16-c7-c8-d4': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'e1': (f, 'd1', 'd2'), 'e2': (f, 'd3', 'd4'), 'f': (f, 'e1', 'e2'), 'd1': 'b1-b2-b3-b4-c1-c2-d1', 'd2': 'b5-b6-b7-b8-c3-c4-d2', 'd3': 'b10-b11-b12-b9-c5-c6-d3', 'd4': 'b13-b14-b15-b16-c7-c8-d4', }) assert fuse(d, ave_width=3, rename_keys=True) == expected assert fuse(d, ave_width=4.6, rename_keys=True) == expected expected = with_deps({ 'a': 1, 'e1': (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))), 'e2': (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))), 'f': (f, 'e1', 'e2'), }) assert fuse(d, ave_width=4.7, rename_keys=False) == expected assert fuse(d, ave_width=7.4, rename_keys=False) == expected expected = with_deps({ 'a': 1, 'b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e1': ( f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))) ), 'b10-b11-b12-b13-b14-b15-b16-b9-c5-c6-c7-c8-d3-d4-e2': ( f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))) ), 'f': (f, 'e1', 'e2'), 'e1': 'b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e1', 'e2': 'b10-b11-b12-b13-b14-b15-b16-b9-c5-c6-c7-c8-d3-d4-e2', }) assert fuse(d, ave_width=4.7, rename_keys=True) == expected assert fuse(d, ave_width=7.4, rename_keys=True) == expected assert fuse(d, ave_width=7.5, rename_keys=False) == with_deps({ 'a': 1, 'f': (f, (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))), (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))))), }) assert fuse(d, ave_width=7.5, rename_keys=True) == with_deps({ 'a': 1, 'b1-b10-b11-b12-b13-b14-b15-b16-b2-b3-b4-b5-b6-b7-b8-b9-c1-c2-c3-c4-c5-c6-c7-c8-d1-d2-d3-d4-e1-e2-f': ( f, (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))), (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))) ), 'f': 'b1-b10-b11-b12-b13-b14-b15-b16-b2-b3-b4-b5-b6-b7-b8-b9-c1-c2-c3-c4-c5-c6-c7-c8-d1-d2-d3-d4-e1-e2-f', }) d = { 'a': 1, 'b': (f, 'a'), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps({ 'b': (f, 1) }) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ 'a-b': (f, 1), 'b': 'a-b', }) d = { 'a': 1, 'b': (f, 'a'), 'c': (f, 'b'), 'd': (f, 'c'), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps({ 'd': (f, (f, (f, 1))) }) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ 'a-b-c-d': (f, (f, (f, 1))), 'd': 'a-b-c-d', }) d = { 'a': 1, 'b': (f, 'a'), 'c': (f, 'a', 'b'), 'd': (f, 'a', 'c'), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps({ 'a': 1, 'd': (f, 'a', (f, 'a', (f, 'a'))), }) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ 'a': 1, 'b-c-d': (f, 'a', (f, 'a', (f, 'a'))), 'd': 'b-c-d', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a'), 'c1': (f, 'b1'), 'd1': (f, 'c1'), 'e1': (f, 'd1'), 'f': (f, 'e1', 'b2'), } expected = with_deps({ 'a': 1, 'b2': (f, 'a'), 'e1': (f, (f, (f, (f, 'a')))), 'f': (f, 'e1', 'b2'), }) assert fuse(d, ave_width=1, rename_keys=False) == expected assert fuse(d, ave_width=1.9, rename_keys=False) == expected expected = with_deps({ 'a': 1, 'b2': (f, 'a'), 'b1-c1-d1-e1': (f, (f, (f, (f, 'a')))), 'f': (f, 'e1', 'b2'), 'e1': 'b1-c1-d1-e1', }) assert fuse(d, ave_width=1, rename_keys=True) == expected assert fuse(d, ave_width=1.9, rename_keys=True) == expected assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ 'a': 1, 'f': (f, (f, (f, (f, (f, 'a')))), (f, 'a')), }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ 'a': 1, 'b1-b2-c1-d1-e1-f': (f, (f, (f, (f, (f, 'a')))), (f, 'a')), 'f': 'b1-b2-c1-d1-e1-f', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a'), 'c1': (f, 'a', 'b1'), 'd1': (f, 'a', 'c1'), 'e1': (f, 'a', 'd1'), 'f': (f, 'a', 'e1', 'b2'), } expected = with_deps({ 'a': 1, 'b2': (f, 'a'), 'e1': (f, 'a', (f, 'a', (f, 'a', (f, 'a')))), 'f': (f, 'a', 'e1', 'b2'), }) assert fuse(d, ave_width=1, rename_keys=False) == expected assert fuse(d, ave_width=1.9, rename_keys=False) == expected expected = with_deps({ 'a': 1, 'b2': (f, 'a'), 'b1-c1-d1-e1': (f, 'a', (f, 'a', (f, 'a', (f, 'a')))), 'f': (f, 'a', 'e1', 'b2'), 'e1': 'b1-c1-d1-e1', }) assert fuse(d, ave_width=1, rename_keys=True) == expected assert fuse(d, ave_width=1.9, rename_keys=True) == expected assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ 'a': 1, 'f': (f, 'a', (f, 'a', (f, 'a', (f, 'a', (f, 'a')))), (f, 'a')), }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ 'a': 1, 'b1-b2-c1-d1-e1-f': (f, 'a', (f, 'a', (f, 'a', (f, 'a', (f, 'a')))), (f, 'a')), 'f': 'b1-b2-c1-d1-e1-f', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a'), 'b3': (f, 'a'), 'c1': (f, 'b1'), 'c2': (f, 'b2'), 'c3': (f, 'b3'), 'd1': (f, 'c1'), 'd2': (f, 'c2'), 'd3': (f, 'c3'), 'e': (f, 'd1', 'd2', 'd3'), 'f': (f, 'e'), 'g': (f, 'f'), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps({ 'a': 1, 'd1': (f, (f, (f, 'a'))), 'd2': (f, (f, (f, 'a'))), 'd3': (f, (f, (f, 'a'))), 'g': (f, (f, (f, 'd1', 'd2', 'd3'))), }) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ 'a': 1, 'b1-c1-d1': (f, (f, (f, 'a'))), 'b2-c2-d2': (f, (f, (f, 'a'))), 'b3-c3-d3': (f, (f, (f, 'a'))), 'e-f-g': (f, (f, (f, 'd1', 'd2', 'd3'))), 'd1': 'b1-c1-d1', 'd2': 'b2-c2-d2', 'd3': 'b3-c3-d3', 'g': 'e-f-g', }) d = { 'a': 1, 'b': (f, 'a'), 'c': (f, 'b'), 'd': (f, 'b', 'c'), 'e': (f, 'd'), 'f': (f, 'e'), 'g': (f, 'd', 'f'), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps({ 'b': (f, 1), 'd': (f, 'b', (f, 'b')), 'g': (f, 'd', (f, (f, 'd'))), }) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ 'a-b': (f, 1), 'c-d': (f, 'b', (f, 'b')), 'e-f-g': (f, 'd', (f, (f, 'd'))), 'b': 'a-b', 'd': 'c-d', 'g': 'e-f-g', }) def test_fuse_stressed(): def f(*args): return args d = { 'array-original-27b9f9d257a80fa6adae06a98faf71eb': 1, ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 0): ( f, ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 0), ), ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0): ( f, ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1), ), ('array-27b9f9d257a80fa6adae06a98faf71eb', 0, 0): ( f, 'array-original-27b9f9d257a80fa6adae06a98faf71eb', (slice(0, 10, None), slice(0, 10, None)), ), ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0): ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1), ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 1): ( f, (f, ('array-27b9f9d257a80fa6adae06a98faf71eb', 1, 1), (f, [('cholesky-lt-dot-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0, 1, 0)])) ), ('cholesky-lt-dot-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0, 1, 0): ( f, ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0), ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1), ), ('array-27b9f9d257a80fa6adae06a98faf71eb', 0, 1): ( f, 'array-original-27b9f9d257a80fa6adae06a98faf71eb', (slice(0, 10, None), slice(10, 20, None)), ), ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 1): ( f, ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 1) ), ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1): ( f, (10, 10) ), ('array-27b9f9d257a80fa6adae06a98faf71eb', 1, 1): ( f, 'array-original-27b9f9d257a80fa6adae06a98faf71eb', (slice(10, 20, None), slice(10, 20, None)), ), ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1): ( f, ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 0), ('array-27b9f9d257a80fa6adae06a98faf71eb', 0, 1), ), ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 0): ( f, ('array-27b9f9d257a80fa6adae06a98faf71eb', 0, 0), ), } keys = { ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 0), ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1), ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0), ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 1), } rv = fuse(d, keys=keys, ave_width=2, rename_keys=True) assert rv == with_deps(rv[0]) def test_fuse_reductions_multiple_input(): def f(*args): return args d = { 'a1': 1, 'a2': 2, 'b': (f, 'a1', 'a2'), 'c': (f, 'b'), } assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ 'c': (f, (f, 1, 2)), }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ 'a1-a2-b-c': (f, (f, 1, 2)), 'c': 'a1-a2-b-c', }) assert fuse(d, ave_width=1, rename_keys=False) == with_deps({ 'a1': 1, 'a2': 2, 'c': (f, (f, 'a1', 'a2')), }) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ 'a1': 1, 'a2': 2, 'b-c': (f, (f, 'a1', 'a2')), 'c': 'b-c', }) d = { 'a1': 1, 'a2': 2, 'b1': (f, 'a1'), 'b2': (f, 'a1', 'a2'), 'b3': (f, 'a2'), 'c': (f, 'b1', 'b2', 'b3'), } expected = with_deps(d) assert fuse(d, ave_width=1, rename_keys=False) == expected assert fuse(d, ave_width=2.9, rename_keys=False) == expected assert fuse(d, ave_width=1, rename_keys=True) == expected assert fuse(d, ave_width=2.9, rename_keys=True) == expected assert fuse(d, ave_width=3, rename_keys=False) == with_deps({ 'a1': 1, 'a2': 2, 'c': (f, (f, 'a1'), (f, 'a1', 'a2'), (f, 'a2')), }) assert fuse(d, ave_width=3, rename_keys=True) == with_deps({ 'a1': 1, 'a2': 2, 'b1-b2-b3-c': (f, (f, 'a1'), (f, 'a1', 'a2'), (f, 'a2')), 'c': 'b1-b2-b3-c', }) d = { 'a1': 1, 'a2': 2, 'b1': (f, 'a1'), 'b2': (f, 'a1', 'a2'), 'b3': (f, 'a2'), 'c1': (f, 'b1', 'b2'), 'c2': (f, 'b2', 'b3'), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1, rename_keys=True) == with_deps(d) assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ 'a1': 1, 'a2': 2, 'b2': (f, 'a1', 'a2'), 'c1': (f, (f, 'a1'), 'b2'), 'c2': (f, 'b2', (f, 'a2')), }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ 'a1': 1, 'a2': 2, 'b2': (f, 'a1', 'a2'), 'b1-c1': (f, (f, 'a1'), 'b2'), 'b3-c2': (f, 'b2', (f, 'a2')), 'c1': 'b1-c1', 'c2': 'b3-c2', }) d = { 'a1': 1, 'a2': 2, 'b1': (f, 'a1'), 'b2': (f, 'a1', 'a2'), 'b3': (f, 'a2'), 'c1': (f, 'b1', 'b2'), 'c2': (f, 'b2', 'b3'), 'd': (f, 'c1', 'c2'), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1, rename_keys=True) == with_deps(d) # A more aggressive heuristic could do this at `ave_width=2`. Perhaps # we can improve this. Nevertheless, this is behaving as intended. assert fuse(d, ave_width=3, rename_keys=False) == with_deps({ 'a1': 1, 'a2': 2, 'b2': (f, 'a1', 'a2'), 'd': (f, (f, (f, 'a1'), 'b2'), (f, 'b2', (f, 'a2'))), }) assert fuse(d, ave_width=3, rename_keys=True) == with_deps({ 'a1': 1, 'a2': 2, 'b2': (f, 'a1', 'a2'), 'b1-b3-c1-c2-d': (f, (f, (f, 'a1'), 'b2'), (f, 'b2', (f, 'a2'))), 'd': 'b1-b3-c1-c2-d', }) dask-0.16.0/dask/tests/test_order.py000066400000000000000000000105101320364734500173340ustar00rootroot00000000000000from itertools import chain from dask.order import child_max, ndependents, order from dask.core import get_deps from dask.utils_test import add, inc def issorted(L, reverse=False): return sorted(L, reverse=reverse) == L def f(*args): pass def test_ordering_keeps_groups_together(): a, b, c = 'abc' d = dict(((a, i), (f,)) for i in range(4)) d.update({(b, 0): (f, (a, 0), (a, 1)), (b, 1): (f, (a, 2), (a, 3))}) o = order(d) assert abs(o[(a, 0)] - o[(a, 1)]) == 1 assert abs(o[(a, 2)] - o[(a, 3)]) == 1 d = dict(((a, i), (f,)) for i in range(4)) d.update({(b, 0): (f, (a, 0), (a, 2)), (b, 1): (f, (a, 1), (a, 3))}) o = order(d) assert abs(o[(a, 0)] - o[(a, 2)]) == 1 assert abs(o[(a, 1)] - o[(a, 3)]) == 1 def test_prefer_broker_nodes(): """ b0 b1 b2 | \ / a0 a1 a1 should be run before a0 """ a, b, c = 'abc' dsk = {(a, 0): (f,), (a, 1): (f,), (b, 0): (f, (a, 0)), (b, 1): (f, (a, 1)), (b, 2): (f, (a, 1))} o = order(dsk) assert o[(a, 1)] < o[(a, 0)] # Switch name of 0, 1 to ensure that this isn't due to string comparison dsk = {(a, 0): (f,), (a, 1): (f,), (b, 0): (f, (a, 0)), (b, 1): (f, (a, 1)), (b, 2): (f, (a, 0))} o = order(dsk) assert o[(a, 1)] > o[(a, 0)] def test_base_of_reduce_preferred(): """ a3 /| a2 | /| | a1 | | /| | | a0 | | | | | | | b0 b1 b2 b3 \ \ / / c We really want to run b0 quickly """ dsk = dict((('a', i), (f, ('a', i - 1), ('b', i))) for i in [1, 2, 3]) dsk[('a', 0)] = (f, ('b', 0)) dsk.update(dict((('b', i), (f, 'c', 1)) for i in [0, 1, 2, 3])) dsk['c'] = 1 o = order(dsk) assert o == {('a', 3): 0, ('a', 2): 1, ('a', 1): 2, ('a', 0): 3, ('b', 0): 4, 'c': 5, ('b', 1): 6, ('b', 2): 7, ('b', 3): 8} # ('b', 0) is the most important out of ('b', i) assert min([('b', i) for i in [0, 1, 2, 3]], key=o.get) == ('b', 0) def test_deep_bases_win_over_dependents(): """ d should come before e and probably before one of b and c a / | \ . b c | / \ | / e d """ dsk = {'a': (f, 'b', 'c', 'd'), 'b': (f, 'd', 'e'), 'c': (f, 'd'), 'd': 1, 'e': 2} o = order(dsk) assert o['d'] < o['e'] assert o['d'] < o['b'] or o['d'] < o['c'] def test_prefer_deep(): """ c | y b | | x a Prefer longer chains first so we should start with c """ dsk = {'a': 1, 'b': (f, 'a'), 'c': (f, 'b'), 'x': 1, 'y': (f, 'x')} o = order(dsk) assert o == {'c': 0, 'b': 1, 'a': 2, 'y': 3, 'x': 4} def test_stacklimit(): dsk = dict(('x%s' % (i + 1), (inc, 'x%s' % i)) for i in range(10000)) dependencies, dependents = get_deps(dsk) scores = dict.fromkeys(dsk, 1) child_max(dependencies, dependents, scores) ndependents(dependencies, dependents) def test_ndependents(): a, b, c = 'abc' dsk = dict(chain((((a, i), i * 2) for i in range(5)), (((b, i), (add, i, (a, i))) for i in range(5)), (((c, i), (add, i, (b, i))) for i in range(5)))) result = ndependents(*get_deps(dsk)) expected = dict(chain((((a, i), 3) for i in range(5)), (((b, i), 2) for i in range(5)), (((c, i), 1) for i in range(5)))) assert result == expected dsk = {a: 1, b: 1} deps = get_deps(dsk) assert ndependents(*deps) == dsk dsk = {a: 1, b: (add, a, 1), c: (add, b, a)} assert ndependents(*get_deps(dsk)) == {a: 4, b: 2, c: 1} dsk = {a: 1, b: a, c: b} deps = get_deps(dsk) assert ndependents(*deps) == {a: 3, b: 2, c: 1} def test_break_ties_by_str(): dsk = {('x', i): (inc, i) for i in range(10)} x_keys = sorted(dsk) dsk['y'] = list(x_keys) o = order(dsk) expected = {'y': 0} expected.update({k: i + 1 for i, k in enumerate(x_keys)}) assert o == expected def test_order_doesnt_fail_on_mixed_type_keys(): order({'x': (inc, 1), ('y', 0): (inc, 2), 'z': (add, 'x', ('y', 0))}) dask-0.16.0/dask/tests/test_rewrite.py000066400000000000000000000105751320364734500177150ustar00rootroot00000000000000from dask.rewrite import RewriteRule, RuleSet, head, args, VAR, Traverser from dask.utils_test import inc, add def double(x): return x * 2 def test_head(): assert head((inc, 1)) == inc assert head((add, 1, 2)) == add assert head((add, (inc, 1), (inc, 1))) == add assert head([1, 2, 3]) == list def test_args(): assert args((inc, 1)) == (1,) assert args((add, 1, 2)) == (1, 2) assert args(1) == () assert args([1, 2, 3]) == [1, 2, 3] def test_traverser(): term = (add, (inc, 1), (double, (inc, 1), 2)) t = Traverser(term) t2 = t.copy() assert t.current == add t.next() assert t.current == inc # Ensure copies aren't advanced when the original advances assert t2.current == add t.skip() assert t.current == double t.next() assert t.current == inc assert list(t2) == [add, inc, 1, double, inc, 1, 2] vars = ("a", "b", "c") # add(a, 1) -> inc(a) rule1 = RewriteRule((add, "a", 1), (inc, "a"), vars) # add(a, a) -> double(a) rule2 = RewriteRule((add, "a", "a"), (double, "a"), vars) # add(inc(a), inc(a)) -> add(double(a), 2) rule3 = RewriteRule((add, (inc, "a"), (inc, "a")), (add, (double, "a"), 2), vars) # add(inc(b), inc(a)) -> add(add(a, b), 2) rule4 = RewriteRule((add, (inc, "b"), (inc, "a")), (add, (add, "a", "b"), 2), vars) # sum([c, b, a]) -> add(add(a, b), c) rule5 = RewriteRule((sum, ["c", "b", "a"]), (add, (add, "a", "b"), "c"), vars) # list(x) -> x if x is a list def repl_list(sd): x = sd['x'] if isinstance(x, list): return x else: return (list, x) rule6 = RewriteRule((list, 'x'), repl_list, ('x',)) def test_RewriteRule(): # Test extraneous vars are removed, varlist is correct assert rule1.vars == ("a",) assert rule1._varlist == ["a"] assert rule2.vars == ("a",) assert rule2._varlist == ["a", "a"] assert rule3.vars == ("a",) assert rule3._varlist == ["a", "a"] assert rule4.vars == ("a", "b") assert rule4._varlist == ["b", "a"] assert rule5.vars == ("a", "b", "c") assert rule5._varlist == ["c", "b", "a"] def test_RewriteRuleSubs(): # Test both rhs substitution and callable rhs assert rule1.subs({'a': 1}) == (inc, 1) assert rule6.subs({'x': [1, 2, 3]}) == [1, 2, 3] rules = [rule1, rule2, rule3, rule4, rule5, rule6] rs = RuleSet(*rules) def test_RuleSet(): net = ({add: ({VAR: ({VAR: ({}, [1]), 1: ({}, [0])}, []), inc: ({VAR: ({inc: ({VAR: ({}, [2, 3])}, [])}, [])}, [])}, []), list: ({VAR: ({}, [5])}, []), sum: ({list: ({VAR: ({VAR: ({VAR: ({}, [4])}, [])}, [])}, [])}, [])}, []) assert rs._net == net assert rs.rules == rules def test_matches(): term = (add, 2, 1) matches = list(rs.iter_matches(term)) assert len(matches) == 1 assert matches[0] == (rule1, {'a': 2}) # Test matches specific before general term = (add, 1, 1) matches = list(rs.iter_matches(term)) assert len(matches) == 2 assert matches[0] == (rule1, {'a': 1}) assert matches[1] == (rule2, {'a': 1}) # Test matches unhashable. What it's getting rewritten to doesn't make # sense, this is just to test that it works. :) term = (add, [1], [1]) matches = list(rs.iter_matches(term)) assert len(matches) == 1 assert matches[0] == (rule2, {'a': [1]}) # Test match at depth term = (add, (inc, 1), (inc, 1)) matches = list(rs.iter_matches(term)) assert len(matches) == 3 assert matches[0] == (rule3, {'a': 1}) assert matches[1] == (rule4, {'a': 1, 'b': 1}) assert matches[2] == (rule2, {'a': (inc, 1)}) # Test non-linear pattern checking term = (add, 2, 3) matches = list(rs.iter_matches(term)) assert len(matches) == 0 def test_rewrite(): # Rewrite inside list term = (sum, [(add, 1, 1), (add, 1, 1), (add, 1, 1)]) new_term = rs.rewrite(term) assert new_term == (add, (add, (inc, 1), (inc, 1)), (inc, 1)) # Rules aren't applied to exhaustion, this can be further simplified new_term = rs.rewrite(new_term) assert new_term == (add, (add, (double, 1), 2), (inc, 1)) term = (add, (add, (add, (add, 1, 2), (add, 1, 2)), (add, (add, 1, 2), (add, 1, 2))), 1) assert rs.rewrite(term) == (inc, (double, (double, (add, 1, 2)))) # Callable RewriteRule rhs term = (list, [1, 2, 3]) assert rs.rewrite(term) == [1, 2, 3] term = (list, (map, inc, [1, 2, 3])) assert rs.rewrite(term) == term dask-0.16.0/dask/tests/test_sharedict.py000066400000000000000000000030321320364734500201700ustar00rootroot00000000000000from collections import Mapping import pytest from toolz import merge from dask.sharedict import ShareDict a = {'x': 1, 'y': 2} b = {'z': 3} c = {'w': 2} def test_core(): s = ShareDict() assert isinstance(s, Mapping) s.update(a) s.update(b) assert s['x'] == 1 with pytest.raises(KeyError): s['abc'] with pytest.raises((NotImplementedError, TypeError)): s['abc'] = 123 def test_structure(): s = ShareDict() s.update(a) s.update(b) s.update(c) assert all(any(d is x for d in s.dicts.values()) for x in [a, b, c]) @pytest.mark.skip def test_structure_2(): s = ShareDict() s.update_with_key(a, key='a') s.update_with_key(b, key='b') s.update_with_key(c, key='c') assert s.order == ['a', 'b', 'c'] s.update_with_key(b, key='b') assert s.order == ['a', 'c', 'b'] def test_keys_items(): s = ShareDict() s.update_with_key(a, key='a') s.update_with_key(b, key='b') s.update_with_key(c, key='c') d = merge(a, b, c) for fn in [dict, set, len]: assert fn(s) == fn(d) for fn in [lambda x: x.values(), lambda x: x.keys(), lambda x: x.items()]: assert set(fn(s)) == set(fn(d)) def test_update_with_sharedict(): s = ShareDict() s.update_with_key(a, key='a') s.update_with_key(b, key='b') s.update_with_key(c, key='c') d = {'z': 5} s2 = ShareDict() s2.update_with_key(a, key='a') s2.update_with_key(d, key='d') s.update(s2) assert s.dicts['a'] is s.dicts['a'] dask-0.16.0/dask/tests/test_sizeof.py000066400000000000000000000027251320364734500175310ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import import sys import pytest from dask.sizeof import sizeof, getsizeof from dask.utils import funcname def test_base(): assert sizeof(1) == getsizeof(1) def test_name(): assert funcname(sizeof) == 'sizeof' def test_containers(): assert sizeof([1, 2, [3]]) > (getsizeof(3) * 3 + getsizeof([])) def test_numpy(): np = pytest.importorskip('numpy') assert sizeof(np.empty(1000, dtype='f8')) == 8000 dt = np.dtype('f8') assert sizeof(dt) == sys.getsizeof(dt) def test_pandas(): pd = pytest.importorskip('pandas') df = pd.DataFrame({'x': [1, 2, 3], 'y': ['a' * 100, 'b' * 100, 'c' * 100]}, index=[10, 20, 30]) assert sizeof(df) >= sizeof(df.x) + sizeof(df.y) - sizeof(df.index) assert sizeof(df.x) >= sizeof(df.index) if pd.__version__ >= '0.17.1': assert sizeof(df.y) >= 100 * 3 assert sizeof(df.index) >= 20 assert isinstance(sizeof(df), int) assert isinstance(sizeof(df.x), int) assert isinstance(sizeof(df.index), int) def test_sparse_matrix(): sparse = pytest.importorskip('scipy.sparse') sp = sparse.eye(10) # These are the 32-bit Python 2.7 values. assert sizeof(sp.todia()) >= 152 assert sizeof(sp.tobsr()) >= 232 assert sizeof(sp.tocoo()) >= 240 assert sizeof(sp.tocsc()) >= 232 assert sizeof(sp.tocsr()) >= 232 assert sizeof(sp.todok()) >= 192 assert sizeof(sp.tolil()) >= 204 dask-0.16.0/dask/tests/test_threaded.py000066400000000000000000000064561320364734500200170ustar00rootroot00000000000000import os import sys import signal import threading from multiprocessing.pool import ThreadPool from time import time, sleep import pytest from dask.context import set_options from dask.compatibility import PY2 from dask.threaded import get from dask.utils_test import inc, add def test_get(): dsk = {'x': 1, 'y': 2, 'z': (inc, 'x'), 'w': (add, 'z', 'y')} assert get(dsk, 'w') == 4 assert get(dsk, ['w', 'z']) == (4, 2) def test_nested_get(): dsk = {'x': 1, 'y': 2, 'a': (add, 'x', 'y'), 'b': (sum, ['x', 'y'])} assert get(dsk, ['a', 'b']) == (3, 3) def test_get_without_computation(): dsk = {'x': 1} assert get(dsk, 'x') == 1 def bad(x): raise ValueError() def test_exceptions_rise_to_top(): dsk = {'x': 1, 'y': (bad, 'x')} pytest.raises(ValueError, lambda: get(dsk, 'y')) def test_reuse_pool(): pool = ThreadPool() with set_options(pool=pool): assert get({'x': (inc, 1)}, 'x') == 2 assert get({'x': (inc, 1)}, 'x') == 2 def test_threaded_within_thread(): L = [] def f(i): result = get({'x': (lambda: i,)}, 'x', num_workers=2) L.append(result) before = threading.active_count() for i in range(20): t = threading.Thread(target=f, args=(1,)) t.daemon = True t.start() t.join() assert L == [1] del L[:] start = time() # wait for most threads to join while threading.active_count() > before + 10: sleep(0.01) assert time() < start + 5 def test_dont_spawn_too_many_threads(): before = threading.active_count() dsk = {('x', i): (lambda: i,) for i in range(10)} dsk['x'] = (sum, list(dsk)) for i in range(20): get(dsk, 'x', num_workers=4) after = threading.active_count() assert after <= before + 8 def test_thread_safety(): def f(x): return 1 dsk = {'x': (sleep, 0.05), 'y': (f, 'x')} L = [] def test_f(): L.append(get(dsk, 'y')) threads = [] for i in range(20): t = threading.Thread(target=test_f) t.daemon = True t.start() threads.append(t) for thread in threads: thread.join() assert L == [1] * 20 @pytest.mark.xfail('xdist' in sys.modules, reason=("This test fails intermittently when using " "pytest-xdist (maybe)")) def test_interrupt(): # Python 2 and windows 2 & 3 both implement `queue.get` using polling, # which means we can set an exception to interrupt the call to `get`. # Python 3 on other platforms requires sending SIGINT to the main thread. if PY2: from thread import interrupt_main elif os.name == 'nt': from _thread import interrupt_main else: main_thread = threading.get_ident() def interrupt_main(): signal.pthread_kill(main_thread, signal.SIGINT) def long_task(): sleep(5) dsk = {('x', i): (long_task,) for i in range(20)} dsk['x'] = (len, list(dsk.keys())) try: interrupter = threading.Timer(0.5, interrupt_main) interrupter.start() start = time() get(dsk, 'x') except KeyboardInterrupt: pass except Exception: assert False, "Failed to interrupt" stop = time() if stop - start > 4: assert False, "Failed to interrupt" dask-0.16.0/dask/tests/test_utils.py000066400000000000000000000156461320364734500174000ustar00rootroot00000000000000import functools import operator import pickle import numpy as np import pytest from dask.sharedict import ShareDict from dask.utils import (takes_multiple_arguments, Dispatch, random_state_data, memory_repr, methodcaller, M, skip_doctest, SerializableLock, funcname, ndeepmap, ensure_dict, extra_titles, asciitable, itemgetter, partial_by_order) from dask.utils_test import inc def test_takes_multiple_arguments(): assert takes_multiple_arguments(map) assert not takes_multiple_arguments(sum) def multi(a, b, c): return a, b, c class Singular(object): def __init__(self, a): pass class Multi(object): def __init__(self, a, b): pass assert takes_multiple_arguments(multi) assert not takes_multiple_arguments(Singular) assert takes_multiple_arguments(Multi) def f(): pass assert not takes_multiple_arguments(f) def vararg(*args): pass assert takes_multiple_arguments(vararg) assert not takes_multiple_arguments(vararg, varargs=False) def test_dispatch(): foo = Dispatch() foo.register(int, lambda a: a + 1) foo.register(float, lambda a: a - 1) foo.register(tuple, lambda a: tuple(foo(i) for i in a)) foo.register(object, lambda a: a) class Bar(object): pass b = Bar() assert foo(1) == 2 assert foo.dispatch(int)(1) == 2 assert foo(1.0) == 0.0 assert foo(b) == b assert foo((1, 2.0, b)) == (2, 1.0, b) def test_dispatch_lazy(): # this tests the recursive component of dispatch foo = Dispatch() foo.register(int, lambda a: a) import decimal # keep it outside lazy dec for test def foo_dec(a): return a + 1 @foo.register_lazy("decimal") def register_decimal(): import decimal foo.register(decimal.Decimal, foo_dec) # This test needs to be *before* any other calls assert foo.dispatch(decimal.Decimal) == foo_dec assert foo(decimal.Decimal(1)) == decimal.Decimal(2) assert foo(1) == 1 def test_random_state_data(): seed = 37 state = np.random.RandomState(seed) n = 10000 # Use an integer states = random_state_data(n, seed) assert len(states) == n # Use RandomState object states2 = random_state_data(n, state) for s1, s2 in zip(states, states2): assert s1.shape == (624,) assert (s1 == s2).all() # Consistent ordering states = random_state_data(10, 1234) states2 = random_state_data(20, 1234)[:10] for s1, s2 in zip(states, states2): assert (s1 == s2).all() def test_memory_repr(): for power, mem_repr in enumerate(['1.0 bytes', '1.0 KB', '1.0 MB', '1.0 GB']): assert memory_repr(1024 ** power) == mem_repr def test_method_caller(): a = [1, 2, 3, 3, 3] f = methodcaller('count') assert f(a, 3) == a.count(3) assert methodcaller('count') is f assert M.count is f assert pickle.loads(pickle.dumps(f)) is f assert 'count' in dir(M) assert 'count' in str(methodcaller('count')) assert 'count' in repr(methodcaller('count')) def test_skip_doctest(): example = """>>> xxx >>> >>> # comment >>> xxx""" res = skip_doctest(example) assert res == """>>> xxx # doctest: +SKIP >>> >>> # comment >>> xxx # doctest: +SKIP""" assert skip_doctest(None) == '' def test_extra_titles(): example = """ Notes ----- hello Foo --- Notes ----- bar """ expected = """ Notes ----- hello Foo --- Extra Notes ----------- bar """ assert extra_titles(example) == expected def test_asciitable(): res = asciitable(['fruit', 'color'], [('apple', 'red'), ('banana', 'yellow'), ('tomato', 'red'), ('pear', 'green')]) assert res == ('+--------+--------+\n' '| fruit | color |\n' '+--------+--------+\n' '| apple | red |\n' '| banana | yellow |\n' '| tomato | red |\n' '| pear | green |\n' '+--------+--------+') def test_SerializableLock(): a = SerializableLock() b = SerializableLock() with a: pass with a: with b: pass with a: assert not a.acquire(False) a2 = pickle.loads(pickle.dumps(a)) a3 = pickle.loads(pickle.dumps(a)) a4 = pickle.loads(pickle.dumps(a2)) for x in [a, a2, a3, a4]: for y in [a, a2, a3, a4]: with x: assert not y.acquire(False) b2 = pickle.loads(pickle.dumps(b)) b3 = pickle.loads(pickle.dumps(b2)) for x in [a, a2, a3, a4]: for y in [b, b2, b3]: with x: with y: pass with y: with x: pass def test_SerializableLock_name_collision(): a = SerializableLock('a') b = SerializableLock('b') c = SerializableLock('a') d = SerializableLock() assert a.lock is not b.lock assert a.lock is c.lock assert d.lock not in (a.lock, b.lock, c.lock) def test_funcname(): def foo(a, b, c): pass assert funcname(foo) == 'foo' assert funcname(functools.partial(foo, a=1)) == 'foo' assert funcname(M.sum) == 'sum' assert funcname(lambda: 1) == 'lambda' class Foo(object): pass assert funcname(Foo) == 'Foo' assert 'Foo' in funcname(Foo()) def test_funcname_toolz(): toolz = pytest.importorskip('toolz') @toolz.curry def foo(a, b, c): pass assert funcname(foo) == 'foo' assert funcname(foo(1)) == 'foo' def test_funcname_multipledispatch(): md = pytest.importorskip('multipledispatch') @md.dispatch(int, int, int) def foo(a, b, c): pass assert funcname(foo) == 'foo' assert funcname(functools.partial(foo, a=1)) == 'foo' def test_ndeepmap(): L = 1 assert ndeepmap(0, inc, L) == 2 L = [1] assert ndeepmap(0, inc, L) == 2 L = [1, 2, 3] assert ndeepmap(1, inc, L) == [2, 3, 4] L = [[1, 2], [3, 4]] assert ndeepmap(2, inc, L) == [[2, 3], [4, 5]] L = [[[1, 2], [3, 4, 5]], [[6], []]] assert ndeepmap(3, inc, L) == [[[2, 3], [4, 5, 6]], [[7], []]] def test_ensure_dict(): d = {'x': 1} assert ensure_dict(d) is d sd = ShareDict() sd.update(d) assert type(ensure_dict(sd)) is dict assert ensure_dict(sd) == d class mydict(dict): pass md = mydict() md['x'] = 1 assert type(ensure_dict(md)) is dict assert ensure_dict(md) == d def test_itemgetter(): data = [1, 2, 3] g = itemgetter(1) assert g(data) == 2 g2 = pickle.loads(pickle.dumps(g)) assert g2(data) == 2 assert g2.index == 1 def test_partial_by_order(): assert partial_by_order(5, function=operator.add, other=[(1, 20)]) == 25 dask-0.16.0/dask/threaded.py000066400000000000000000000044571320364734500156150ustar00rootroot00000000000000""" A threaded shared-memory scheduler See scheduler.py """ from __future__ import absolute_import, division, print_function import sys from collections import defaultdict from multiprocessing.pool import ThreadPool import threading from threading import current_thread, Lock from .local import get_async from .context import _globals from .utils_test import inc, add # noqa: F401 def _thread_get_id(): return current_thread().ident main_thread = current_thread() default_pool = None pools = defaultdict(dict) pools_lock = Lock() def pack_exception(e, dumps): return e, sys.exc_info()[2] def get(dsk, result, cache=None, num_workers=None, **kwargs): """ Threaded cached implementation of dask.get Parameters ---------- dsk: dict A dask dictionary specifying a workflow result: key or list of keys Keys corresponding to desired data num_workers: integer of thread count The number of threads to use in the ThreadPool that will actually execute tasks cache: dict-like (optional) Temporary storage of results Examples -------- >>> dsk = {'x': 1, 'y': 2, 'z': (inc, 'x'), 'w': (add, 'z', 'y')} >>> get(dsk, 'w') 4 >>> get(dsk, ['w', 'y']) (4, 2) """ global default_pool pool = _globals['pool'] thread = current_thread() with pools_lock: if pool is None: if num_workers is None and thread is main_thread: if default_pool is None: default_pool = ThreadPool() pool = default_pool elif thread in pools and num_workers in pools[thread]: pool = pools[thread][num_workers] else: pool = ThreadPool(num_workers) pools[thread][num_workers] = pool results = get_async(pool.apply_async, len(pool._pool), dsk, result, cache=cache, get_id=_thread_get_id, pack_exception=pack_exception, **kwargs) # Cleanup pools associated to dead threads with pools_lock: active_threads = set(threading.enumerate()) if thread is not main_thread: for t in list(pools): if t not in active_threads: for p in pools.pop(t).values(): p.close() return results dask-0.16.0/dask/utils.py000066400000000000000000000552261320364734500151750ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import functools import inspect import os import shutil import sys import tempfile from errno import ENOENT from collections import Iterator from contextlib import contextmanager from importlib import import_module from numbers import Integral from threading import Lock import multiprocessing as mp import uuid from weakref import WeakValueDictionary from .compatibility import getargspec, PY3, unicode, bind_method from .core import get_deps from .context import _globals from .optimize import key_split # noqa: F401 system_encoding = sys.getdefaultencoding() if system_encoding == 'ascii': system_encoding = 'utf-8' def deepmap(func, *seqs): """ Apply function inside nested lists >>> inc = lambda x: x + 1 >>> deepmap(inc, [[1, 2], [3, 4]]) [[2, 3], [4, 5]] >>> add = lambda x, y: x + y >>> deepmap(add, [[1, 2], [3, 4]], [[10, 20], [30, 40]]) [[11, 22], [33, 44]] """ if isinstance(seqs[0], (list, Iterator)): return [deepmap(func, *items) for items in zip(*seqs)] else: return func(*seqs) def homogeneous_deepmap(func, seq): if not seq: return seq n = 0 tmp = seq while isinstance(tmp, list): n += 1 tmp = tmp[0] return ndeepmap(n, func, seq) def ndeepmap(n, func, seq): """ Call a function on every element within a nested container >>> def inc(x): ... return x + 1 >>> L = [[1, 2], [3, 4, 5]] >>> ndeepmap(2, inc, L) [[2, 3], [4, 5, 6]] """ if n == 1: return [func(item) for item in seq] elif n > 1: return [ndeepmap(n - 1, func, item) for item in seq] elif isinstance(seq, list): return func(seq[0]) else: return func(seq) @contextmanager def ignoring(*exceptions): try: yield except exceptions: pass def import_required(mod_name, error_msg): """Attempt to import a required dependency. Raises a RuntimeError if the requested module is not available. """ try: return import_module(mod_name) except ImportError: raise RuntimeError(error_msg) @contextmanager def tmpfile(extension='', dir=None): extension = '.' + extension.lstrip('.') handle, filename = tempfile.mkstemp(extension, dir=dir) os.close(handle) os.remove(filename) try: yield filename finally: if os.path.exists(filename): if os.path.isdir(filename): shutil.rmtree(filename) else: with ignoring(OSError): os.remove(filename) @contextmanager def tmpdir(dir=None): dirname = tempfile.mkdtemp(dir=dir) try: yield dirname finally: if os.path.exists(dirname): if os.path.isdir(dirname): with ignoring(OSError): shutil.rmtree(dirname) else: with ignoring(OSError): os.remove(dirname) @contextmanager def filetext(text, extension='', open=open, mode='w'): with tmpfile(extension=extension) as filename: f = open(filename, mode=mode) try: f.write(text) finally: try: f.close() except AttributeError: pass yield filename @contextmanager def changed_cwd(new_cwd): old_cwd = os.getcwd() os.chdir(new_cwd) try: yield finally: os.chdir(old_cwd) @contextmanager def tmp_cwd(dir=None): with tmpdir(dir) as dirname: with changed_cwd(dirname): yield dirname @contextmanager def noop_context(): yield class IndexCallable(object): """ Provide getitem syntax for functions >>> def inc(x): ... return x + 1 >>> I = IndexCallable(inc) >>> I[3] 4 """ __slots__ = 'fn', def __init__(self, fn): self.fn = fn def __getitem__(self, key): return self.fn(key) @contextmanager def filetexts(d, open=open, mode='t', use_tmpdir=True): """ Dumps a number of textfiles to disk d - dict a mapping from filename to text like {'a.csv': '1,1\n2,2'} Since this is meant for use in tests, this context manager will automatically switch to a temporary current directory, to avoid race conditions when running tests in parallel. """ with (tmp_cwd() if use_tmpdir else noop_context()): for filename, text in d.items(): f = open(filename, 'w' + mode) try: f.write(text) finally: try: f.close() except AttributeError: pass yield list(d) for filename in d: if os.path.exists(filename): with ignoring(OSError): os.remove(filename) def concrete(seq): """ Make nested iterators concrete lists >>> data = [[1, 2], [3, 4]] >>> seq = iter(map(iter, data)) >>> concrete(seq) [[1, 2], [3, 4]] """ if isinstance(seq, Iterator): seq = list(seq) if isinstance(seq, (tuple, list)): seq = list(map(concrete, seq)) return seq def pseudorandom(n, p, random_state=None): """ Pseudorandom array of integer indexes >>> pseudorandom(5, [0.5, 0.5], random_state=123) array([1, 0, 0, 1, 1], dtype=int8) >>> pseudorandom(10, [0.5, 0.2, 0.2, 0.1], random_state=5) array([0, 2, 0, 3, 0, 1, 2, 1, 0, 0], dtype=int8) """ import numpy as np p = list(p) cp = np.cumsum([0] + p) assert np.allclose(1, cp[-1]) assert len(p) < 256 if not isinstance(random_state, np.random.RandomState): random_state = np.random.RandomState(random_state) x = random_state.random_sample(n) out = np.empty(n, dtype='i1') for i, (low, high) in enumerate(zip(cp[:-1], cp[1:])): out[(x >= low) & (x < high)] = i return out def random_state_data(n, random_state=None): """Return a list of arrays that can initialize ``np.random.RandomState``. Parameters ---------- n : int Number of arrays to return. random_state : int or np.random.RandomState, optional If an int, is used to seed a new ``RandomState``. """ import numpy as np if not isinstance(random_state, np.random.RandomState): random_state = np.random.RandomState(random_state) random_data = random_state.bytes(624 * n * 4) # `n * 624` 32-bit integers l = list(np.frombuffer(random_data, dtype=np.uint32).reshape((n, -1))) assert len(l) == n return l def is_integer(i): """ >>> is_integer(6) True >>> is_integer(42.0) True >>> is_integer('abc') False """ return isinstance(i, Integral) or (isinstance(i, float) and i.is_integer()) ONE_ARITY_BUILTINS = set([abs, all, any, bool, bytearray, bytes, callable, chr, classmethod, complex, dict, dir, enumerate, eval, float, format, frozenset, hash, hex, id, int, iter, len, list, max, min, next, oct, open, ord, range, repr, reversed, round, set, slice, sorted, staticmethod, str, sum, tuple, type, vars, zip, memoryview]) if PY3: ONE_ARITY_BUILTINS.add(ascii) # noqa: F821 MULTI_ARITY_BUILTINS = set([compile, delattr, divmod, filter, getattr, hasattr, isinstance, issubclass, map, pow, setattr]) def takes_multiple_arguments(func, varargs=True): """ Does this function take multiple arguments? >>> def f(x, y): pass >>> takes_multiple_arguments(f) True >>> def f(x): pass >>> takes_multiple_arguments(f) False >>> def f(x, y=None): pass >>> takes_multiple_arguments(f) False >>> def f(*args): pass >>> takes_multiple_arguments(f) True >>> class Thing(object): ... def __init__(self, a): pass >>> takes_multiple_arguments(Thing) False """ if func in ONE_ARITY_BUILTINS: return False elif func in MULTI_ARITY_BUILTINS: return True try: spec = getargspec(func) except Exception: return False try: is_constructor = spec.args[0] == 'self' and isinstance(func, type) except Exception: is_constructor = False if varargs and spec.varargs: return True ndefaults = 0 if spec.defaults is None else len(spec.defaults) return len(spec.args) - ndefaults - is_constructor > 1 class Dispatch(object): """Simple single dispatch.""" def __init__(self, name=None): self._lookup = {} self._lazy = {} if name: self.__name__ = name def register(self, type, func=None): """Register dispatch of `func` on arguments of type `type`""" def wrapper(func): if isinstance(type, tuple): for t in type: self.register(t, func) else: self._lookup[type] = func return func return wrapper(func) if func is not None else wrapper def register_lazy(self, toplevel, func=None): """ Register a registration function which will be called if the *toplevel* module (e.g. 'pandas') is ever loaded. """ def wrapper(func): self._lazy[toplevel] = func return func return wrapper(func) if func is not None else wrapper def dispatch(self, cls): """Return the function implementation for the given ``cls``""" # Fast path with direct lookup on cls lk = self._lookup try: impl = lk[cls] except KeyError: pass else: return impl # Is a lazy registration function present? toplevel, _, _ = cls.__module__.partition('.') try: register = self._lazy.pop(toplevel) except KeyError: pass else: register() return self.dispatch(cls) # recurse # Walk the MRO and cache the lookup result for cls2 in inspect.getmro(cls)[1:]: if cls2 in lk: lk[cls] = lk[cls2] return lk[cls2] raise TypeError("No dispatch for {0}".format(cls)) def __call__(self, arg): """ Call the corresponding method based on type of argument. """ meth = self.dispatch(type(arg)) return meth(arg) def ensure_not_exists(filename): """ Ensure that a file does not exist. """ try: os.unlink(filename) except OSError as e: if e.errno != ENOENT: raise def _skip_doctest(line): # NumPy docstring contains cursor and comment only example stripped = line.strip() if stripped == '>>>' or stripped.startswith('>>> #'): return stripped elif '>>>' in stripped and '+SKIP' not in stripped: return line + ' # doctest: +SKIP' else: return line def skip_doctest(doc): if doc is None: return '' return '\n'.join([_skip_doctest(line) for line in doc.split('\n')]) def extra_titles(doc): lines = doc.split('\n') titles = {i: lines[i].strip() for i in range(len(lines) - 1) if lines[i + 1] and all(c == '-' for c in lines[i + 1].strip())} seen = set() for i, title in sorted(titles.items()): if title in seen: new_title = 'Extra ' + title lines[i] = lines[i].replace(title, new_title) lines[i + 1] = lines[i + 1].replace('-' * len(title), '-' * len(new_title)) else: seen.add(title) return '\n'.join(lines) def derived_from(original_klass, version=None, ua_args=[]): """Decorator to attach original class's docstring to the wrapped method. Parameters ---------- original_klass: type Original class which the method is derived from version : str Original package version which supports the wrapped method ua_args : list List of keywords which Dask doesn't support. Keywords existing in original but not in Dask will automatically be added. """ def wrapper(method): method_name = method.__name__ try: # do not use wraps here, as it hides keyword arguments displayed # in the doc original_method = getattr(original_klass, method_name) doc = original_method.__doc__ if doc is None: doc = '' try: method_args = getargspec(method).args original_args = getargspec(original_method).args not_supported = [m for m in original_args if m not in method_args] except TypeError: not_supported = [] if len(ua_args) > 0: not_supported.extend(ua_args) if len(not_supported) > 0: note = ("\n Notes\n -----\n" " Dask doesn't support the following argument(s).\n\n") args = ''.join([' * {0}\n'.format(a) for a in not_supported]) doc = doc + note + args doc = skip_doctest(doc) doc = extra_titles(doc) method.__doc__ = doc return method except AttributeError: module_name = original_klass.__module__.split('.')[0] @functools.wraps(method) def wrapped(*args, **kwargs): msg = "Base package doesn't support '{0}'.".format(method_name) if version is not None: msg2 = " Use {0} {1} or later to use this method." msg += msg2.format(module_name, version) raise NotImplementedError(msg) return wrapped return wrapper def funcname(func): """Get the name of a function.""" # functools.partial if isinstance(func, functools.partial): return funcname(func.func) # methodcaller if isinstance(func, methodcaller): return func.method module_name = getattr(func, '__module__', None) or '' type_name = getattr(type(func), '__name__', None) or '' # toolz.curry if 'toolz' in module_name and 'curry' == type_name: return func.func_name # multipledispatch objects if 'multipledispatch' in module_name and 'Dispatcher' == type_name: return func.name # All other callables try: name = func.__name__ if name == '': return 'lambda' return name except AttributeError: return str(func) def ensure_bytes(s): """ Turn string or bytes to bytes >>> ensure_bytes(u'123') '123' >>> ensure_bytes('123') '123' >>> ensure_bytes(b'123') '123' """ if isinstance(s, bytes): return s if hasattr(s, 'encode'): return s.encode() msg = "Object %s is neither a bytes object nor has an encode method" raise TypeError(msg % s) def ensure_unicode(s): """ Turn string or bytes to bytes >>> ensure_unicode(u'123') u'123' >>> ensure_unicode('123') u'123' >>> ensure_unicode(b'123') u'123' """ if isinstance(s, unicode): return s if hasattr(s, 'decode'): return s.decode() msg = "Object %s is neither a bytes object nor has an encode method" raise TypeError(msg % s) def digit(n, k, base): """ >>> digit(1234, 0, 10) 4 >>> digit(1234, 1, 10) 3 >>> digit(1234, 2, 10) 2 >>> digit(1234, 3, 10) 1 """ return n // base**k % base def insert(tup, loc, val): """ >>> insert(('a', 'b', 'c'), 0, 'x') ('x', 'b', 'c') """ L = list(tup) L[loc] = val return tuple(L) def dependency_depth(dsk): import toolz deps, _ = get_deps(dsk) @toolz.memoize def max_depth_by_deps(key): if not deps[key]: return 1 d = 1 + max(max_depth_by_deps(dep_key) for dep_key in deps[key]) return d return max(max_depth_by_deps(dep_key) for dep_key in deps.keys()) def memory_repr(num): for x in ['bytes', 'KB', 'MB', 'GB', 'TB']: if num < 1024.0: return "%3.1f %s" % (num, x) num /= 1024.0 def asciitable(columns, rows): """Formats an ascii table for given columns and rows. Parameters ---------- columns : list The column names rows : list of tuples The rows in the table. Each tuple must be the same length as ``columns``. """ rows = [tuple(str(i) for i in r) for r in rows] columns = tuple(str(i) for i in columns) widths = tuple(max(max(map(len, x)), len(c)) for x, c in zip(zip(*rows), columns)) row_template = ('|' + (' %%-%ds |' * len(columns))) % widths header = row_template % tuple(columns) bar = '+%s+' % '+'.join('-' * (w + 2) for w in widths) data = '\n'.join(row_template % r for r in rows) return '\n'.join([bar, header, bar, data, bar]) def put_lines(buf, lines): if any(not isinstance(x, unicode) for x in lines): lines = [unicode(x) for x in lines] buf.write('\n'.join(lines)) _method_cache = {} class methodcaller(object): """ Return a callable object that calls the given method on its operand. Unlike the builtin `operator.methodcaller`, instances of this class are serializable """ __slots__ = ('method',) func = property(lambda self: self.method) # For `funcname` to work def __new__(cls, method): if method in _method_cache: return _method_cache[method] self = object.__new__(cls) self.method = method _method_cache[method] = self return self def __call__(self, obj, *args, **kwargs): return getattr(obj, self.method)(*args, **kwargs) def __reduce__(self): return (methodcaller, (self.method,)) def __str__(self): return "<%s: %s>" % (self.__class__.__name__, self.method) __repr__ = __str__ class itemgetter(object): """ Return a callable object that gets an item from the operand Unlike the builtin `operator.itemgetter`, instances of this class are serializable """ __slots__ = ('index',) def __init__(self, index): self.index = index def __call__(self, x): return x[self.index] def __reduce__(self): return (itemgetter, (self.index,)) def __eq__(self, other): return type(self) is type(other) and self.index == other.index class MethodCache(object): """Attribute access on this object returns a methodcaller for that attribute. Examples -------- >>> a = [1, 3, 3] >>> M.count(a, 3) == a.count(3) True """ __getattr__ = staticmethod(methodcaller) __dir__ = lambda self: list(_method_cache) M = MethodCache() class SerializableLock(object): _locks = WeakValueDictionary() """ A Serializable per-process Lock This wraps a normal ``threading.Lock`` object and satisfies the same interface. However, this lock can also be serialized and sent to different processes. It will not block concurrent operations between processes (for this you should look at ``multiprocessing.Lock`` or ``locket.lock_file`` but will consistently deserialize into the same lock. So if we make a lock in one process:: lock = SerializableLock() And then send it over to another process multiple times:: bytes = pickle.dumps(lock) a = pickle.loads(bytes) b = pickle.loads(bytes) Then the deserialized objects will operate as though they were the same lock, and collide as appropriate. This is useful for consistently protecting resources on a per-process level. The creation of locks is itself not threadsafe. """ def __init__(self, token=None): self.token = token or str(uuid.uuid4()) if self.token in SerializableLock._locks: self.lock = SerializableLock._locks[self.token] else: self.lock = Lock() SerializableLock._locks[self.token] = self.lock def acquire(self, *args): return self.lock.acquire(*args) def release(self, *args): return self.lock.release(*args) def __enter__(self): self.lock.__enter__() def __exit__(self, *args): self.lock.__exit__(*args) @property def locked(self): return self.locked def __getstate__(self): return self.token def __setstate__(self, token): self.__init__(token) def __str__(self): return "<%s: %s>" % (self.__class__.__name__, self.token) __repr__ = __str__ def effective_get(get=None, collection=None): """Get the effective get method used in a given situation""" return (get or _globals.get('get') or getattr(collection, '__dask_scheduler__', None)) def get_scheduler_lock(get=None, collection=None): """Get an instance of the appropriate lock for a certain situation based on scheduler used.""" from . import multiprocessing actual_get = effective_get(get, collection) if actual_get == multiprocessing.get: return mp.Manager().Lock() return SerializableLock() def ensure_dict(d): if type(d) is dict: return d elif hasattr(d, 'dicts'): result = {} for dd in d.dicts.values(): result.update(dd) return result return dict(d) class OperatorMethodMixin(object): """A mixin for dynamically implementing operators""" @classmethod def _bind_operator(cls, op): """ bind operator to this class """ name = op.__name__ if name.endswith('_'): # for and_ and or_ name = name[:-1] elif name == 'inv': name = 'invert' meth = '__{0}__'.format(name) if name in ('abs', 'invert', 'neg', 'pos'): bind_method(cls, meth, cls._get_unary_operator(op)) else: bind_method(cls, meth, cls._get_binary_operator(op)) if name in ('eq', 'gt', 'ge', 'lt', 'le', 'ne', 'getitem'): return rmeth = '__r{0}__'.format(name) bind_method(cls, rmeth, cls._get_binary_operator(op, inv=True)) @classmethod def _get_unary_operator(cls, op): """ Must return a method used by unary operator """ raise NotImplementedError @classmethod def _get_binary_operator(cls, op, inv=False): """ Must return a method used by binary operator """ raise NotImplementedError def partial_by_order(*args, **kwargs): """ >>> from operator import add >>> partial_by_order(5, function=add, other=[(1, 10)]) 15 """ function = kwargs.pop('function') other = kwargs.pop('other') args2 = list(args) for i, arg in other: args2.insert(i, arg) return function(*args2, **kwargs) dask-0.16.0/dask/utils_test.py000066400000000000000000000100641320364734500162230ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function def inc(x): return x + 1 def dec(x): return x - 1 def add(x, y): return x + y class GetFunctionTestMixin(object): """ The GetFunctionTestCase class can be imported and used to test foreign implementations of the `get` function specification. It aims to enforce all known expectations of `get` functions. To use the class, inherit from it and override the `get` function. For example: > from dask.utils_test import GetFunctionTestMixin > class TestCustomGet(GetFunctionTestMixin): get = staticmethod(myget) Note that the foreign `myget` function has to be explicitly decorated as a staticmethod. """ def test_get(self): d = {':x': 1, ':y': (inc, ':x'), ':z': (add, ':x', ':y')} assert self.get(d, ':x') == 1 assert self.get(d, ':y') == 2 assert self.get(d, ':z') == 3 def test_badkey(self): d = {':x': 1, ':y': (inc, ':x'), ':z': (add, ':x', ':y')} try: result = self.get(d, 'badkey') except KeyError: pass else: msg = 'Expected `{}` with badkey to raise KeyError.\n' msg += "Obtained '{}' instead.".format(result) assert False, msg.format(self.get.__name__) def test_nested_badkey(self): d = {'x': 1, 'y': 2, 'z': (sum, ['x', 'y'])} try: result = self.get(d, [['badkey'], 'y']) except KeyError: pass else: msg = 'Expected `{}` with badkey to raise KeyError.\n' msg += "Obtained '{}' instead.".format(result) assert False, msg.format(self.get.__name__) def test_data_not_in_dict_is_ok(self): d = {'x': 1, 'y': (add, 'x', 10)} assert self.get(d, 'y') == 11 def test_get_with_list(self): d = {'x': 1, 'y': 2, 'z': (sum, ['x', 'y'])} assert self.get(d, ['x', 'y']) == (1, 2) assert self.get(d, 'z') == 3 def test_get_with_list_top_level(self): d = {'a': [1, 2, 3], 'b': 'a', 'c': [1, (inc, 1)], 'd': [(sum, 'a')], 'e': ['a', 'b'], 'f': [[[(sum, 'a'), 'c'], (sum, 'b')], 2]} assert self.get(d, 'a') == [1, 2, 3] assert self.get(d, 'b') == [1, 2, 3] assert self.get(d, 'c') == [1, 2] assert self.get(d, 'd') == [6] assert self.get(d, 'e') == [[1, 2, 3], [1, 2, 3]] assert self.get(d, 'f') == [[[6, [1, 2]], 6], 2] def test_get_with_nested_list(self): d = {'x': 1, 'y': 2, 'z': (sum, ['x', 'y'])} assert self.get(d, [['x'], 'y']) == ((1,), 2) assert self.get(d, 'z') == 3 def test_get_works_with_unhashables_in_values(self): f = lambda x, y: x + len(y) d = {'x': 1, 'y': (f, 'x', set([1]))} assert self.get(d, 'y') == 2 def test_nested_tasks(self): d = {'x': 1, 'y': (inc, 'x'), 'z': (add, (inc, 'x'), 'y')} assert self.get(d, 'z') == 4 def test_get_stack_limit(self): d = dict(('x%d' % (i + 1), (inc, 'x%d' % i)) for i in range(10000)) d['x0'] = 0 assert self.get(d, 'x10000') == 10000 # introduce cycle d['x5000'] = (inc, 'x5001') try: self.get(d, 'x10000') except (RuntimeError, ValueError) as e: if isinstance(e, RuntimeError): assert str(e) == 'Cycle detected in Dask: x5001->x5000->x5001' elif isinstance(e, ValueError): assert str(e).startswith('Found no accessible jobs in dask') else: msg = 'dask with infinite cycle should have raised an exception.' assert False, msg assert self.get(d, 'x4999') == 4999 def test_with_sharedict(self): from .sharedict import ShareDict dsk = ShareDict() dsk.update({'x': 1, 'y': (inc, 'x')}) dsk.update({'z': (add, (inc, 'x'), 'y')}) assert self.get(dsk, 'z') == 4 dask-0.16.0/docs/000077500000000000000000000000001320364734500134575ustar00rootroot00000000000000dask-0.16.0/docs/Makefile000066400000000000000000000126751320364734500151320ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/dask.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/dask.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/dask" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/dask" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." dask-0.16.0/docs/README.rst000066400000000000000000000005361320364734500151520ustar00rootroot00000000000000To build a local copy of the dask docs, install the programs in requirements-docs.txt and run 'make html'. If you use the conda package manager these commands suffice:: git clone git@github.com:dask/dask.git cd dask/docs conda create -n daskdocs --file requirements-docs.txt source activate daskdocs make html open build/html/index.html dask-0.16.0/docs/make.bat000066400000000000000000000117551320364734500150750ustar00rootroot00000000000000@ECHO OFF REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set BUILDDIR=build set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source set I18NSPHINXOPTS=%SPHINXOPTS% source if NOT "%PAPER%" == "" ( set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% ) if "%1" == "" goto help if "%1" == "help" ( :help echo.Please use `make ^` where ^ is one of echo. html to make standalone HTML files echo. dirhtml to make HTML files named index.html in directories echo. singlehtml to make a single large HTML file echo. pickle to make pickle files echo. json to make JSON files echo. htmlhelp to make HTML files and a HTML help project echo. qthelp to make HTML files and a qthelp project echo. devhelp to make HTML files and a Devhelp project echo. epub to make an epub echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter echo. text to make text files echo. man to make manual pages echo. texinfo to make Texinfo files echo. gettext to make PO message catalogs echo. changes to make an overview over all changed/added/deprecated items echo. linkcheck to check all external links for integrity echo. doctest to run all doctests embedded in the documentation if enabled goto end ) if "%1" == "clean" ( for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i del /q /s %BUILDDIR%\* goto end ) if "%1" == "html" ( %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/html. goto end ) if "%1" == "dirhtml" ( %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. goto end ) if "%1" == "singlehtml" ( %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. goto end ) if "%1" == "pickle" ( %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the pickle files. goto end ) if "%1" == "json" ( %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the JSON files. goto end ) if "%1" == "htmlhelp" ( %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run HTML Help Workshop with the ^ .hhp project file in %BUILDDIR%/htmlhelp. goto end ) if "%1" == "qthelp" ( %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in %BUILDDIR%/qthelp, like this: echo.^> qcollectiongenerator %BUILDDIR%\qthelp\dask.qhcp echo.To view the help file: echo.^> assistant -collectionFile %BUILDDIR%\qthelp\dask.ghc goto end ) if "%1" == "devhelp" ( %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp if errorlevel 1 exit /b 1 echo. echo.Build finished. goto end ) if "%1" == "epub" ( %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub if errorlevel 1 exit /b 1 echo. echo.Build finished. The epub file is in %BUILDDIR%/epub. goto end ) if "%1" == "latex" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex if errorlevel 1 exit /b 1 echo. echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. goto end ) if "%1" == "text" ( %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text if errorlevel 1 exit /b 1 echo. echo.Build finished. The text files are in %BUILDDIR%/text. goto end ) if "%1" == "man" ( %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man if errorlevel 1 exit /b 1 echo. echo.Build finished. The manual pages are in %BUILDDIR%/man. goto end ) if "%1" == "texinfo" ( %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo if errorlevel 1 exit /b 1 echo. echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. goto end ) if "%1" == "gettext" ( %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale if errorlevel 1 exit /b 1 echo. echo.Build finished. The message catalogs are in %BUILDDIR%/locale. goto end ) if "%1" == "changes" ( %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes if errorlevel 1 exit /b 1 echo. echo.The overview file is in %BUILDDIR%/changes. goto end ) if "%1" == "linkcheck" ( %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck if errorlevel 1 exit /b 1 echo. echo.Link check complete; look for any errors in the above output ^ or in %BUILDDIR%/linkcheck/output.txt. goto end ) if "%1" == "doctest" ( %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest if errorlevel 1 exit /b 1 echo. echo.Testing of doctests in the sources finished, look at the ^ results in %BUILDDIR%/doctest/output.txt. goto end ) :end dask-0.16.0/docs/release-procedure.md000066400000000000000000000015631320364734500174140ustar00rootroot00000000000000There are a variety of other projects related to dask that are often co-released. We may want to check their status while releasing Release per project: * Update release notes in docs/source/changelog.rst * Tag commit git tag -a x.x.x -m 'Version x.x.x' * and push to github git push dask master --tags * Upload to PyPI git clean -xfd python setup.py sdist bdist_wheel --universal twine upload dist/* * Update conda recipe feedstock on `conda-forge `_. * Update conda-smithy and run conda-smithy rerender git clone git@github.com:conda-forge/dask-feedstock cd dask-feedstock conda install conda-smithy conda-smithy rerender * Get sha256 hash from pypi.org * Update version number and hash in recipe * Check dependencies dask-0.16.0/docs/requirements-docs.txt000066400000000000000000000001231320364734500176650ustar00rootroot00000000000000numpydoc==0.6 sphinx sphinx_rtd_theme toolz cloudpickle pandas>=0.19.0 distributed dask-0.16.0/docs/source/000077500000000000000000000000001320364734500147575ustar00rootroot00000000000000dask-0.16.0/docs/source/_static/000077500000000000000000000000001320364734500164055ustar00rootroot00000000000000dask-0.16.0/docs/source/_static/dask-simple.png000066400000000000000000000254151320364734500213330ustar00rootroot00000000000000PNG  IHDRbKGD IDATx{\Tea#7/+J5rѼ,(5 IeFjjj%[Y(*,ZfʥD ǼD."3zz\̜s#11|)&)Tf" *3)&˱~񎢷; W毿111c5SSS( 14N R `O ?IIIH4•/*3LAe& 2aP0DTf" *3LAe& 2aP0DTf" *3CZE}PZZ, 2s:1њY J%-Z[bڵ?~>P^^9s੧»9h3C $I_Fii)RSSaaa9بZtA޽+Vymfhɭ[ÇG\\~c*Q1 ;v쀩?w9mfhpA<ظq#'Oċ/9Z fLAe& 2aP0DTf" *3LAe& 2aP0DTf" *3glٲww5•2 /(zŅwUɈs/0DTf" *3LAe& 2aP0DTf" *3LAe& 2aP0DTf" *3LAe& 2aP0DTf" *3LAe& 2a7s>o(,,T~E@ZZZ^DG3k*++yuW4f״@O$(2kXtt4|5txP5lĉi~sssRY,--ss刊q*@eւ(sttرcu8P 00ݺukv9"##t Y d2mjrDFFrJ%>*DFF6pvvƈ#8%YK ޽{733̙3!ITb2k$I@P&QhڴiP(חs"Q^^^ٳg c;" S*(((@aa!+Wiii~*:j4={zӢ-􄿿?kkk$V n߾}a׮]8{,1tP1^^^F߾}aii [[[JBEE*++\x'ODNN,--h?v<0n999,663{{{ϳoUWWwjbg@fbb~߳UV* %7 Tv(**bQQQL&1///k.V[[e-]u҅uޝZr,K0T( ffkk\]]ٮ]Rɲoݺ^ufaa 233u\FenMAA>|8b˗/g555\rI&1IX|||8.];IL$|rQZUVV?]3gOh3Qbb"311a?xGyj؅ xTf۶m311a;wr98q"ѣlsl{O>1c`ٲeX|98P(ʂH<7n`РAݻy鐒 4Gƾ}xɸg4?>aQ: [nERRx8x $edd .dNNNݻb„ ^^^2e >qx0͌ 6 h-{=l޼ 8\ݚx;Q1 0C y5[3gË/;I^{ {AQQ8:gTefaӦME=xъ3g[nغu+(:gTe>}40gQرcT*8:eTe޷o1h Q*228s (:eTeNMMŴix:___xxx믿E̿ -ݻw5Ʊc4202:u VVVrc-<ѣG({E~@ffΗ][[ ggg=yEEEpqqAFFF ||ofeeѻwotW\ؘhʜOOO1tF$xxx ''w19cq飑rss///ׯWOY+ˑ;wrMhc:Sq:\OY~cO_gތ1XPPa*1X~~>dnc4!&&M4IcdX37~oİ/^ [h2 ݺuC^^`Ν8v~G%~=c4W^x(\__^hq up]CTzG[4sssk7FK.*'|xwP +**Z}~{ UUUM(ܸy2B$L8~g׶Giy&TVVjlEaii]wb۶m5lvv6""" ɰd=z8~8_n_фr888hd,C`e 5jժU… 鉿ptto ?~^^^:u*^@AA?<1kk].2ٝI;NGxx88`<=<>>7m0l0t(Go0WWW$&&"!! .^\8ȝJb'Of͛t; oyMݹsC # 4ӧO#88ظq#861b <_~A|#F`/ xGxTdff"44T㓱hxxx`TGe< ;s!((#ѣ3f z)>|ּ# *p)\~FVVHj1$&&bĉxgjTg^ ϟz֭[l„ ܜY)Jynܸ&OXbbzJ/D2*%&&2KKKN< rZ1777vg0 TGcƍc$Pk}Jر1 oZ/Q/` P/d]͛7ڵk'd,::jt3Y@5a8vPWW)SSLA`` ƌ={>֘1\x8|0`kk `ܹpww+4iׯ_~ظq#̙ׯcϞ=8tT*ѿxyy}J TVV"''999駟ԓb ƍoxlCB{kɒ%HJJB~~~=É'pY!77WO(I`ii >H5TTGضmr9-ZeKxݻ׮] rWѵkW."##"Fenî]pm2d2kغu+n޼-!2|{g͙3OwTV8p׮]C||<(+ 6h,"21+V <<`d2|駼-*s > .`ҥ_ڵkh` aggwSߖ'ʕ+|Uh2337|wfuZ3?dҤII-[ &&w}B=&LU¥K<@e~ӑsҦ %%SLG_P{ŴixySټ *syԩS bc8rƎ;>2@aa!ܰi&̝;wv7nJ%?;>`ݺuop,]8}4(z̷o߆+tzJ1=zCxG6l^fɒ%ꫯO?Q| S4Ԫ?w̛7oBK/;J5Zo>Õі)Q WWWYw;vݻw)Vm߾ׯ_,RDbb"f͚(M3gvuQ9%%x7xGјS>SÅѕ1+W)QxjՆ xGʜ/)Qbgg bݺu ѕÄ O*m6QtΨ|IrܨԪիWݩUFulQSSDiJU6mœ9sx9ܹsS4%6615@xbo?( o^o@hh(1:@:R$XSSLa믿fC a$Il̙iCpk۷oCP1R={`„ 3x:S__\|!!!tcz*t!\ܴD_]]QFaذa &G[X~JR?cB:RpnPDFF XRRAV޺uKtC5iF `3puuL&k/O]]S W~_EY/Q"177GZZ ZtJw+sqq1ZժUyj{Y[[:thZ3d•ڵk%&& F~؃ni -ɨ̆y8 ?666__f6552$Iz-,_O 쐖4)J2뻚TVV$ ˖-û1_8~8|Iu E Pe~pm#I.]{c"`oo&B3,X@E~9}nܸ95;T.c㕩Ξ=m۶aȑ9s&$Ir߻w}66nh4;j cjjChOӟVz; LLAe& 2aP0DTf" *3LAe& 2aP0Dz3\qq19ÇgΜir?c ۷oÇၒ"**Sbo Ü9sl߾ϟ#1h W^ᐘpttlk׮!!!/17o-[2]$zJܚ={@T"((큁1˄ ̙޽{7O>QDk.XYYA$|s sssرs–Dxx3k׮ e x5ksHhhz6ÇcҤIUQf;;;h6FvF]xWaii?X}ݻQf///h6{yy9W^:$:'''̝;;wDqq1cHOOyGkA@}ȑ:d gbذamN`ΛA9<<&&&8~xaffF;N3g͛a<#I\]] M.cҥؼy3ݻi7oƛoVhޒ%KPUUB&ݻw fb۷/.\">>s[x o07e@@@>$ sO"y&N;#f?[lAhh(yGy$Y3q ,ZuuuĥKxGjZ3fx ( /@yGjZ3f틜10DTf" *3LAe& 2aP0DTf" *3գl٢roY?{Hff&d ۦ1wOc "99063LAe& 2aP0DTf" *3LAe& 2aP0DTf" *3LAe& 2aP0DTf" *3LAe& 2aP0DTf" *34ѷo_H&IKrN4 I޽{ 2&fϞ2"22RGڏLL&k~L`tUڇLppp>jU*fΜTCe&̜9*,,,0yd'j*3i&$$n755Ehh(lll8z4*3iaaa05mz5jR3fpJhTfҢ3f@T6&LѨ̤Eƍw333L6 fffSLZdffӧ˫P(9ṲۨUP(nݺߟsQIF '''@LLL;SBIqq1qU%%%P(زe 7٦cACjj*;'O`ii kkktGee%ݻ+'ΎKI2ݻ?ߢk׮5j0`w-pT___~8{,N8lcʔ)xU!rzlllجYXZZkhhԸwe[ne#Gd$1???vNF@Tk2{{{֮]*++͚5d2N>崀,_9 2,7;;=3Ԕ-Yrm/,0òt|R֯_ϬСCYAA6GeUBBf͚Ūf'֭b̢QT,..I>cqݻǂ;q6AeM||<355e;w:s1kkkm|02?d&&&,))wV544{ٱkrh*(9LLL؊+xGyj6tPֿVQQai 1p@`ǎKii)aÆ2 еkW|'[=СCسgf:q&IC.\ȜXyyyg CVSSooo>C޽ oooL6 ֭Paȶlق;w?~NEe6PXr%^|EtޝwNA>}tj*JIIAyy9^z%Q:M&ᥗ^?OTTTtx*ھ};BBBrjَ$%%ux ****BFF^.0qD۷cP _ L4w>}:222P^^ޡS 1zhQAAA=l22246!СC:O?4N:աS ͛7QRR???W… z.Mc`rrr﫯{K IDAT8z. L~~>t邞={y&Tfsmѣ]7kKCC0_bpvvƭ[;;; 2/^T?WFll,׿bСXrexCYY?*z뭷؀42VYY<==JbEEEK. {wYAAKMMeGc ;͜9S=˖-[vСNgt~}j2m3j]SQߒ$wիrss[o\\\s֯_Gʕ+01}0557,걟KMo$  4Bbccfiiϥ2.]gq[6//O+76[[~.uȲzg~|HIIwMK_~(--ݻwЩׂncMr,[ {/cԩy&Ξ=SСcil`wt|uu5VXlk׮|k׮PQQkopppw}p\|o6sN|0˃{KR [[[|駘={68 ;w|ܧ9#FP zdffbz>w2}vYPɸqp5?w:t77=l ///|缣hRĮ]0k֬Ae6PHJJ@޾ܺuS42s碢`&J|>ƍr` gƪU4TYYY8q-[֩q{f믿k׮ł xF 8q3C̆_~Xt)Q\\;Nl۶ YYYشiSǢ54r.]~ ];[ G}㴋Bȑ#QWW:~!tlQҒ+;bɒ%B BP &ݻ\qڴi&&I۾}&2 2ۗ]zwٳd2{izh*hJKKY^ŋyibĄkcx*ݻǂ#ꫯxa_f$իWkk1TfQձy1IŋY]] 2de)))\Yt۷ogVVVӓ;vLg˭g+V`LJ]|Yۋ2_1Iؔ)S?e)J{nlll￯ TfcƆ$Ib쫯br\#cߺu[yyy13336o>>񁯯&=*3i[rr2"""`5㙉8DTf" *3LAe& 2aP0DTf" *3LAe& 2aP0DTf" *3LAe& 2aP0DTf" *3LAe& 2a@\.Guu.//o8GGGj*3i7nhv{׮]kaպ.A߿?LLڮ$Iׯ4d2DFF 2&!Zcǎm١̤ ?-jhh3t}̤3f,--1yd'j*3i&$$+!44666R=4cmm05]P **SG2EEEAP4&LѨ̤EƍS_1mڴfkk}Be&-233z̤UӦMSojtLZ5zh}@B,\uuu ;v,4y{{cncCҊdDDDC@^zw@QQBmShͬyGK[BDTf" *3LAe& 2aP0DTf" *3LAe& 2a̤ӊq>|EEE8s f&쌰04\D#a&}*3و;v,$IBHH>}`Ν2\r͟k׮q~%-F.^رc|爋ks#F 33S v؁~ cƌAXX믃1>2%???#;;Gch3T*O>ƹs`aa6siiiϷO Bokeh Plot
dask-0.16.0/docs/source/_static/stacked_profile.html000066400000000000000000001775121320364734500224460ustar00rootroot00000000000000 Bokeh Plot
dask-0.16.0/docs/source/_static/style.css000066400000000000000000000000761320364734500202620ustar00rootroot00000000000000@import url("theme.css"); a.internal em {font-style: normal} dask-0.16.0/docs/source/_templates/000077500000000000000000000000001320364734500171145ustar00rootroot00000000000000dask-0.16.0/docs/source/_templates/layout.html000066400000000000000000000001251320364734500213150ustar00rootroot00000000000000{% extends "!layout.html" %} {% set css_files = css_files + ["_static/style.css"] %} dask-0.16.0/docs/source/array-api.rst000066400000000000000000000265021320364734500174030ustar00rootroot00000000000000API --- .. currentmodule:: dask.array Top level user functions: .. autosummary:: all allclose angle any apply_along_axis apply_over_axes arange arccos arccosh arcsin arcsinh arctan arctan2 arctanh argmax argmin argwhere around array asanyarray asarray atleast_1d atleast_2d atleast_3d bincount broadcast_to coarsen ceil choose clip compress concatenate conj copysign corrcoef cos cosh count_nonzero cov cumprod cumsum deg2rad degrees diag diff digitize dot dstack ediff1d empty empty_like exp expm1 eye fabs fix flatnonzero floor fmax fmin fmod frexp fromfunction full full_like histogram hstack hypot imag indices insert isclose iscomplex isfinite isinf isnan isnull isreal ldexp linspace log log10 log1p log2 logaddexp logaddexp2 logical_and logical_not logical_or logical_xor map_blocks max maximum mean min minimum modf moment nanargmax nanargmin nancumprod nancumsum nanmax nanmean nanmin nanprod nanstd nansum nanvar nextafter nonzero notnull ones ones_like percentile prod ptp rad2deg radians ravel real rechunk repeat reshape result_type rint roll round sign signbit sin sinh sqrt square squeeze stack std sum take tan tanh tensordot tile topk transpose tril triu trunc unique var vnorm vstack where zeros zeros_like Fast Fourier Transforms ~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: fft.fft_wrap fft.fft fft.fft2 fft.fftn fft.ifft fft.ifft2 fft.ifftn fft.rfft fft.rfft2 fft.rfftn fft.irfft fft.irfft2 fft.irfftn fft.hfft fft.ihfft fft.fftfreq fft.rfftfreq fft.fftshift fft.ifftshift Linear Algebra ~~~~~~~~~~~~~~ .. autosummary:: linalg.cholesky linalg.inv linalg.lstsq linalg.lu linalg.norm linalg.qr linalg.solve linalg.solve_triangular linalg.svd linalg.svd_compressed linalg.tsqr Masked Arrays ~~~~~~~~~~~~~ .. autosummary:: ma.filled ma.fix_invalid ma.getdata ma.getmaskarray ma.masked_array ma.masked_equal ma.masked_greater ma.masked_greater_equal ma.masked_inside ma.masked_invalid ma.masked_less ma.masked_less_equal ma.masked_not_equal ma.masked_outside ma.masked_values ma.masked_where ma.set_fill_value Random ~~~~~~ .. autosummary:: random.beta random.binomial random.chisquare random.choice random.exponential random.f random.gamma random.geometric random.gumbel random.hypergeometric random.laplace random.logistic random.lognormal random.logseries random.negative_binomial random.noncentral_chisquare random.noncentral_f random.normal random.pareto random.poisson random.power random.random random.random_sample random.rayleigh random.standard_cauchy random.standard_exponential random.standard_gamma random.standard_normal random.standard_t random.triangular random.uniform random.vonmises random.wald random.weibull random.zipf Stats ~~~~~ .. autosummary:: stats.ttest_ind stats.ttest_1samp stats.ttest_rel stats.chisquare stats.power_divergence stats.skew stats.skewtest stats.kurtosis stats.kurtosistest stats.normaltest stats.f_oneway stats.moment Image Support ~~~~~~~~~~~~~ .. autosummary:: image.imread Slightly Overlapping Ghost Computations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: ghost.ghost ghost.map_overlap Create and Store Arrays ~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: from_array from_delayed from_npy_stack store to_hdf5 to_npy_stack Internal functions ~~~~~~~~~~~~~~~~~~ .. currentmodule:: dask.array.core .. autosummary:: atop top Other functions ~~~~~~~~~~~~~~~ .. currentmodule:: dask.array .. autofunction:: from_array .. autofunction:: from_delayed .. autofunction:: store .. autofunction:: topk .. autofunction:: coarsen .. autofunction:: stack .. autofunction:: concatenate .. autofunction:: all .. autofunction:: allclose .. autofunction:: angle .. autofunction:: any .. autofunction:: apply_along_axis .. autofunction:: apply_over_axes .. autofunction:: arange .. autofunction:: arccos .. autofunction:: arccosh .. autofunction:: arcsin .. autofunction:: arcsinh .. autofunction:: arctan .. autofunction:: arctan2 .. autofunction:: arctanh .. autofunction:: argmax .. autofunction:: argmin .. autofunction:: argwhere .. autofunction:: around .. autofunction:: array .. autofunction:: asanyarray .. autofunction:: asarray .. autofunction:: atleast_1d .. autofunction:: atleast_2d .. autofunction:: atleast_3d .. autofunction:: bincount .. autofunction:: broadcast_to .. autofunction:: coarsen .. autofunction:: ceil .. autofunction:: choose .. autofunction:: clip .. autofunction:: compress .. autofunction:: concatenate .. autofunction:: conj .. autofunction:: copysign .. autofunction:: corrcoef .. autofunction:: cos .. autofunction:: cosh .. autofunction:: count_nonzero .. autofunction:: cov .. autofunction:: cumprod .. autofunction:: cumsum .. autofunction:: deg2rad .. autofunction:: degrees .. autofunction:: diag .. autofunction:: diff .. autofunction:: digitize .. autofunction:: dot .. autofunction:: dstack .. autofunction:: ediff1d .. autofunction:: empty .. autofunction:: empty_like .. autofunction:: exp .. autofunction:: expm1 .. autofunction:: eye .. autofunction:: fabs .. autofunction:: fix .. autofunction:: flatnonzero .. autofunction:: floor .. autofunction:: fmax .. autofunction:: fmin .. autofunction:: fmod .. autofunction:: frexp .. autofunction:: fromfunction .. autofunction:: full .. autofunction:: full_like .. autofunction:: histogram .. autofunction:: hstack .. autofunction:: hypot .. autofunction:: imag .. autofunction:: indices .. autofunction:: insert .. autofunction:: isclose .. autofunction:: iscomplex .. autofunction:: isfinite .. autofunction:: isinf .. autofunction:: isnan .. autofunction:: isnull .. autofunction:: isreal .. autofunction:: ldexp .. autofunction:: linspace .. autofunction:: log .. autofunction:: log10 .. autofunction:: log1p .. autofunction:: log2 .. autofunction:: logaddexp .. autofunction:: logaddexp2 .. autofunction:: logical_and .. autofunction:: logical_not .. autofunction:: logical_or .. autofunction:: logical_xor .. autofunction:: max .. autofunction:: maximum .. autofunction:: mean .. autofunction:: min .. autofunction:: minimum .. autofunction:: modf .. autofunction:: moment .. autofunction:: nanargmax .. autofunction:: nanargmin .. autofunction:: nancumprod .. autofunction:: nancumsum .. autofunction:: nanmax .. autofunction:: nanmean .. autofunction:: nanmin .. autofunction:: nanprod .. autofunction:: nanstd .. autofunction:: nansum .. autofunction:: nanvar .. autofunction:: nextafter .. autofunction:: nonzero .. autofunction:: notnull .. autofunction:: ones .. autofunction:: ones_like .. autofunction:: percentile .. autofunction:: prod .. autofunction:: ptp .. autofunction:: rad2deg .. autofunction:: radians .. autofunction:: ravel .. autofunction:: real .. autofunction:: rechunk .. autofunction:: repeat .. autofunction:: reshape .. autofunction:: result_type .. autofunction:: rint .. autofunction:: roll .. autofunction:: round .. autofunction:: sign .. autofunction:: signbit .. autofunction:: sin .. autofunction:: sinh .. autofunction:: sqrt .. autofunction:: square .. autofunction:: squeeze .. autofunction:: stack .. autofunction:: std .. autofunction:: sum .. autofunction:: take .. autofunction:: tan .. autofunction:: tanh .. autofunction:: tensordot .. autofunction:: tile .. autofunction:: topk .. autofunction:: transpose .. autofunction:: tril .. autofunction:: triu .. autofunction:: trunc .. autofunction:: unique .. autofunction:: var .. autofunction:: vnorm .. autofunction:: vstack .. autofunction:: where .. autofunction:: zeros .. autofunction:: zeros_like .. currentmodule:: dask.array.linalg .. autofunction:: cholesky .. autofunction:: inv .. autofunction:: lstsq .. autofunction:: lu .. autofunction:: norm .. autofunction:: qr .. autofunction:: solve .. autofunction:: solve_triangular .. autofunction:: svd .. autofunction:: svd_compressed .. autofunction:: tsqr .. currentmodule:: dask.array.ma .. autofunction:: filled .. autofunction:: fix_invalid .. autofunction:: getdata .. autofunction:: getmaskarray .. autofunction:: masked_array .. autofunction:: masked_equal .. autofunction:: masked_greater .. autofunction:: masked_greater_equal .. autofunction:: masked_inside .. autofunction:: masked_invalid .. autofunction:: masked_less .. autofunction:: masked_less_equal .. autofunction:: masked_not_equal .. autofunction:: masked_outside .. autofunction:: masked_values .. autofunction:: masked_where .. autofunction:: set_fill_value .. currentmodule:: dask.array.ghost .. autofunction:: ghost .. autofunction:: map_overlap .. currentmodule:: dask.array .. autofunction:: from_array .. autofunction:: from_delayed .. autofunction:: from_npy_stack .. autofunction:: store .. autofunction:: to_hdf5 .. autofunction:: to_npy_stack .. currentmodule:: dask.array.fft .. autofunction:: fft_wrap .. autofunction:: fft .. autofunction:: fft2 .. autofunction:: fftn .. autofunction:: ifft .. autofunction:: ifft2 .. autofunction:: ifftn .. autofunction:: rfft .. autofunction:: rfft2 .. autofunction:: rfftn .. autofunction:: irfft .. autofunction:: irfft2 .. autofunction:: irfftn .. autofunction:: hfft .. autofunction:: ihfft .. autofunction:: fftfreq .. autofunction:: rfftfreq .. autofunction:: fftshift .. autofunction:: ifftshift .. currentmodule:: dask.array.random .. autofunction:: beta .. autofunction:: binomial .. autofunction:: chisquare .. autofunction:: exponential .. autofunction:: f .. autofunction:: gamma .. autofunction:: geometric .. autofunction:: gumbel .. autofunction:: hypergeometric .. autofunction:: laplace .. autofunction:: logistic .. autofunction:: lognormal .. autofunction:: logseries .. autofunction:: negative_binomial .. autofunction:: noncentral_chisquare .. autofunction:: noncentral_f .. autofunction:: normal .. autofunction:: pareto .. autofunction:: poisson .. autofunction:: power .. autofunction:: random .. autofunction:: random_sample .. autofunction:: rayleigh .. autofunction:: standard_cauchy .. autofunction:: standard_exponential .. autofunction:: standard_gamma .. autofunction:: standard_normal .. autofunction:: standard_t .. autofunction:: triangular .. autofunction:: uniform .. autofunction:: vonmises .. autofunction:: wald .. autofunction:: weibull .. autofunction:: zipf .. currentmodule:: dask.array.stats .. autofunction:: ttest_ind .. autofunction:: ttest_1samp .. autofunction:: ttest_rel .. autofunction:: chisquare .. autofunction:: power_divergence .. autofunction:: skew .. autofunction:: skewtest .. autofunction:: kurtosis .. autofunction:: kurtosistest .. autofunction:: normaltest .. autofunction:: f_oneway .. autofunction:: moment .. currentmodule:: dask.array.image .. autofunction:: imread .. currentmodule:: dask.array.core .. autofunction:: map_blocks .. autofunction:: atop .. autofunction:: top .. currentmodule:: dask.array Array Methods ~~~~~~~~~~~~~ .. autoclass:: Array :members: dask-0.16.0/docs/source/array-creation.rst000066400000000000000000000243641320364734500204420ustar00rootroot00000000000000Create Dask Arrays ================== We store and manipulate large arrays in a wide variety of ways. There are some standards like HDF5 and NetCDF but just as often people use custom storage solutions. This page talks about how to build dask graphs to interact with your array. In principle we need functions that return NumPy arrays. These functions and their arrangement can be as simple or as complex as the situation dictates. Simple case - Format Supports NumPy Slicing ------------------------------------------- Many storage formats have Python projects that expose storage using NumPy slicing syntax. These include HDF5, NetCDF, BColz, Zarr, GRIB, etc.. For example the ``HDF5`` file format has the ``h5py`` Python project, which provides a ``Dataset`` object into which we can slice in NumPy fashion. .. code-block:: Python >>> import h5py >>> f = h5py.File('myfile.hdf5') # HDF5 file >>> d = f['/data/path'] # Pointer on on-disk array >>> d.shape # d can be very large (1000000, 1000000) >>> x = d[:5, :5] # We slice to get numpy arrays It is common for Python wrappers of on-disk array formats to present a NumPy slicing syntax. The full dataset looks like a NumPy array with ``.shape`` and ``.dtype`` attributes even though the data hasn't yet been loaded in and still lives on disk. Slicing in to this array-like object fetches the appropriate data from disk and returns that region as an in-memory NumPy array. For this common case ``dask.array`` presents the convenience function ``da.from_array`` .. code-block:: Python >>> import dask.array as da >>> x = da.from_array(d, chunks=(1000, 1000)) Concatenation and Stacking -------------------------- Often we store data in several different locations and want to stitch them together. .. code-block:: Python >>> filenames = sorted(glob('2015-*-*.hdf5') >>> dsets = [h5py.File(fn)['/data'] for fn in filenames] >>> arrays = [da.from_array(dset, chunks=(1000, 1000)) for dset in dsets] >>> x = da.concatenate(arrays, axis=0) # Concatenate arrays along first axis For more information see :doc:`concatenation and stacking ` docs. Using ``dask.delayed`` ---------------------- You can create a plan to arrange many numpy arrays into a grid with normal for loops using :doc:`dask.delayed` and then convert each of these Dask.delayed objects into a single-chunk Dask array with ``da.from_delayed``. You can then arrange these single-chunk Dask arrays into a larger multiple-chunk Dask array using :doc:`concatenation and stacking `, as described above. See :doc:`documentation on using dask.delayed with collections`. From Dask.dataframe ------------------- You can create dask arrays from dask dataframes using the ``.values`` attribute or the ``.to_records()`` method. .. code-block:: python >>> x = df.values >>> x = df.to_records() However these arrays do not have known chunk sizes (dask.dataframe does not track the number of rows in each partition) and so some operations like slicing will not operate correctly. Interactions with NumPy arrays ------------------------------ Dask.array operations will automatically convert NumPy arrays into single-chunk dask arrays .. code-block:: python >>> x = da.sum(np.ones(5)) >>> x.compute() 5 When NumPy and Dask arrays interact the result will be a Dask array. Automatic rechunking rules will generally slice the NumPy array into the appropriate Dask chunk shape .. code-block:: python >>> x = da.ones(10, chunks=(5,)) >>> y = np.ones(10) >>> z = x + y >>> z dask.array These interactions work not just for NumPy arrays, but for any object that has shape and dtype attributes and implements NumPy slicing syntax. Chunks ------ We always specify a ``chunks`` argument to tell dask.array how to break up the underlying array into chunks. This strongly impacts performance. We can specify ``chunks`` in one of three ways * a blocksize like ``1000`` * a blockshape like ``(1000, 1000)`` * explicit sizes of all blocks along all dimensions, like ``((1000, 1000, 500), (400, 400))`` Your chunks input will be normalized and stored in the third and most explicit form. For performance, a good choice of ``chunks`` follows the following rules: 1. A chunk should be small enough to fit comfortably in memory. We'll have many chunks in memory at once. 2. A chunk must be large enough so that computations on that chunk take significantly longer than the 1ms overhead per task that dask scheduling incurs. A task should take longer than 100ms. 3. Chunks should align with the computation that you want to do. For example if you plan to frequently slice along a particular dimension then it's more efficient if your chunks are aligned so that you have to touch fewer chunks. If you want to add two arrays then its convenient if those arrays have matching chunks patterns. Unknown Chunks ~~~~~~~~~~~~~~ Some arrays have unknown chunk sizes. These are designated using ``np.nan`` rather than an integer. These arrays support many but not all operations. In particular, operations like slicing are not possible and will result in an error. .. code-block:: python >>> x.shape (np.nan, np.nan) >>> x[0] ValueError: Array chunk sizes unknown Chunks Examples ~~~~~~~~~~~~~~~ We show of how different inputs for ``chunks=`` cut up the following array:: 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 We show how different ``chunks=`` arguments split the array into different blocks **chunks=3**: Symmetric blocks of size 3:: 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 **chunks=2**: Symmetric blocks of size 2:: 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 **chunks=(3, 2)**: Asymmetric but repeated blocks of size ``(3, 2)``:: 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 **chunks=(1, 6)**: Asymmetric but repeated blocks of size ``(1, 6)``:: 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 **chunks=((2, 4), (3, 3))**: Asymmetric and non-repeated blocks:: 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 **chunks=((2, 2, 1, 1), (3, 2, 1))**: Asymmetric and non-repeated blocks:: 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 **Discussion** The latter examples are rarely provided by users on original data but arise from complex slicing and broadcasting operations. Generally people use the simplest form until they need more complex forms. The choice of chunks should align with the computations you want to do. For example, if you plan to take out thin slices along the first dimension then you might want to make that dimension skinnier than the others. If you plan to do linear algebra then you might want more symmetric blocks. Store Dask Arrays ================= In Memory --------- If you have a small amount of data, you can call ``np.array`` or ``.compute()`` on your Dask array to turn in to a normal NumPy array: .. code-block:: Python >>> x = da.arange(6, chunks=3) >>> y = x**2 >>> np.array(y) array([0, 1, 4, 9, 16, 25]) >>> y.compute() array([0, 1, 4, 9, 16, 25]) HDF5 ---- Use the ``to_hdf5`` function to store data into HDF5 using ``h5py``: .. code-block:: Python >>> da.to_hdf5('myfile.hdf5', '/y', y) # doctest: +SKIP Store several arrays in one computation with the function ``da.to_hdf5`` by passing in a dict: .. code-block:: Python >>> da.to_hdf5('myfile.hdf5', {'/x': x, '/y': y}) # doctest: +SKIP Other On-Disk Storage --------------------- Alternatively, you can store dask arrays in any object that supports numpy-style slice assignment like ``h5py.Dataset``, or ``bcolz.carray``: .. code-block:: Python >>> import bcolz # doctest: +SKIP >>> out = bcolz.zeros(shape=y.shape, rootdir='myfile.bcolz') # doctest: +SKIP >>> da.store(y, out) # doctest: +SKIP You can store several arrays in one computation by passing lists of sources and destinations: .. code-block:: Python >>> da.store([array1, array2], [output1, output2]) # doctest: +SKIP Plugins ======= We can run arbitrary user-defined functions on dask.arrays whenever they are constructed. This allows us to build a variety of custom behaviors that improve debugging, user warning, etc.. You can register a list of functions to run on all dask.arrays to the global ``array_plugins=`` value: .. code-block:: python >>> def f(x): ... print(x.nbytes) >>> with dask.set_options(array_plugins=[f]): ... x = da.ones((10, 1), chunks=(5, 1)) ... y = x.dot(x.T) 80 80 800 800 If the plugin function returns None then the input Dask.array will be returned without change. If the plugin function returns something else then that value will be the result of the constructor. Examples -------- Automatically compute ~~~~~~~~~~~~~~~~~~~~~ We may wish to turn some Dask.array code into normal NumPy code. This is useful for example to track down errors immediately that would otherwise be hidden by Dask's lazy semantics. .. code-block:: python >>> with dask.set_options(array_plugins=[lambda x: x.compute()]): ... x = da.arange(5, chunks=2) >>> x # this was automatically converted into a numpy array array([0, 1, 2, 3, 4]) Warn on large chunks ~~~~~~~~~~~~~~~~~~~~ We may wish to warn users if they are creating chunks that are too large .. code-block:: python def warn_on_large_chunks(x): shapes = list(itertools.product(*x.chunks)) nbytes = [x.dtype.itemsize * np.prod(shape) for shape in shapes] if any(nb > 1e9 for nb in nbytes): warnings.warn("Array contains very large chunks") with dask.set_options(array_plugins=[warn_on_large_chunks]): ... Combine ~~~~~~~ You can also combine these plugins into a list. They will run one after the other, chaining results through them. .. code-block:: python with dask.set_options(array_plugins=[warn_on_large_chunks, lambda x: x.compute()]): ... dask-0.16.0/docs/source/array-design.rst000066400000000000000000000106521320364734500201020ustar00rootroot00000000000000Internal Design =============== Overview -------- .. image:: images/array.png :width: 40 % :align: right :alt: A dask array Dask arrays define a large array with a grid of blocks of smaller arrays. These arrays may be concrete, or functions that produce arrays. We define a Dask array with the following components * A Dask graph with a special set of keys designating blocks such as ``('x', 0, 0), ('x', 0, 1), ...`` (See :doc:`Dask graph documentation ` for more details.) * A sequence of chunk sizes along each dimension called ``chunks``, for example ``((5, 5, 5, 5), (8, 8, 8))`` * A name to identify which keys in the dask graph refer to this array, like ``'x'`` * A NumPy dtype Example ~~~~~~~ .. code-block:: python >>> import dask.array as da >>> x = da.arange(0, 15, chunks=(5,)) >>> x.name 'arange-539766a' >>> x.dask # somewhat simplified {('arange-539766a', 0): (np.arange, 0, 5), ('arange-539766a', 1): (np.arange, 5, 10), ('arange-539766a', 2): (np.arange, 10, 15)} >>> x.chunks ((5, 5, 5),) >>> x.dtype dtype('int64') Keys of the Dask graph ---------------------- By special convention we refer to each block of the array with a tuple of the form ``(name, i, j, k)`` for ``i, j, k`` being the indices of the block, ranging from ``0`` to the number of blocks in that dimension. The dask graph must hold key-value pairs referring to these keys. It likely also holds other key-value pairs required to eventually compute the desired values, for example .. code-block:: python { ('x', 0, 0): (add, 1, ('y', 0, 0)), ('x', 0, 1): (add, 1, ('y', 0, 1)), ... ('y', 0, 0): (getitem, dataset, (slice(0, 1000), slice(0, 1000))), ('y', 0, 1): (getitem, dataset, (slice(0, 1000), slice(1000, 2000))) ... } The name of an ``Array`` object can be found in the ``name`` attribute. One can get a nested list of keys with the ``._keys()`` method. One can flatten down this list with ``dask.array.core.flatten()``; this is sometimes useful when building new dictionaries. Chunks ------ We also store the size of each block along each axis. This is a tuple of tuples such that the length of the outer tuple is equal to the dimension and the lengths of the inner tuples are equal to the number of blocks along each dimension. In the example illustrated above this value is as follows:: chunks = ((5, 5, 5, 5), (8, 8, 8)) Note that these numbers do not necessarily need to be regular. We often create regularly sized grids but blocks change shape after complex slicing. Beware that some operations do expect certain symmetries in the block-shapes. For example matrix multiplication requires that blocks on each side have anti-symmetric shapes. Some ways in which ``chunks`` reflects properties of our array 1. ``len(x.chunks) == x.ndim``: The length of chunks is the number of dimensions 2. ``tuple(map(sum, x.chunks)) == x.shape``: The sum of each internal chunk, is the length of that dimension. 3. The length of each internal chunk is the number of keys in that dimension. For instance, for ``chunks == ((a, b), (d, e, f))`` and name == ``'x'`` our array has tasks with the following keys:: ('x', 0, 0), ('x', 0, 1), ('x', 0, 2) ('x', 1, 0), ('x', 1, 1), ('x', 1, 2) Create an Array Object ---------------------- So to create an ``da.Array`` object we need a dictionary with these special keys :: dsk = {('x', 0, 0): ...} a name specifying to which keys this array refers :: name = 'x' and a chunks tuple:: chunks = ((5, 5, 5, 5), (8, 8, 8)) Then one can construct an array:: x = da.Array(dsk, name, chunks) So ``dask.array`` operations update dask graphs, update dtypes, and track chunk shapes. Example - ``eye`` function -------------------------- As an example lets build the ``np.eye`` function for ``dask.array`` to make the identity matrix .. code-block:: python def eye(n, blocksize): chunks = ((blocksize,) * (n // blocksize), (blocksize,) * (n // blocksize)) name = 'eye' + next(tokens) # unique identifier dsk = {(name, i, j): (np.eye, blocksize) if i == j else (np.zeros, (blocksize, blocksize)) for i in range(n // blocksize) for j in range(n // blocksize)} dtype = np.eye(0).dtype # take dtype default from numpy return dask.array.Array(dsk, name, chunks, dtype) dask-0.16.0/docs/source/array-ghost.rst000066400000000000000000000126711320364734500177600ustar00rootroot00000000000000Overlapping Blocks with Ghost Cells =================================== Some array operations require communication of borders between neighboring blocks. Example operations include the following: * Convolve a filter across an image * Sliding sum/mean/max, ... * Search for image motifs like a Gaussian blob that might span the border of a block * Evaluate a partial derivative * Play the game of Life_ Dask array supports these operations by creating a new array where each block is slightly expanded by the borders of its neighbors. This costs an excess copy and the communication of many small chunks but allows localized functions to evaluate in an embarrassing manner. We call this process *ghosting*. Ghosting -------- Consider two neighboring blocks in a Dask array. .. image:: images/unghosted-neighbors.png :width: 30% :alt: un-ghosted neighbors We extend each block by trading thin nearby slices between arrays .. image:: images/ghosted-neighbors.png :width: 30% :alt: ghosted neighbors We do this in all directions, including also diagonal interactions with the ghost function: .. image:: images/ghosted-blocks.png :width: 40% :alt: ghosted blocks .. code-block:: python >>> import dask.array as da >>> import numpy as np >>> x = np.arange(64).reshape((8, 8)) >>> d = da.from_array(x, chunks=(4, 4)) >>> d.chunks ((4, 4), (4, 4)) >>> g = da.ghost.ghost(d, depth={0: 2, 1: 1}, ... boundary={0: 100, 1: 'reflect'}) >>> g.chunks ((8, 8), (6, 6)) >>> np.array(g) array([[100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [ 0, 0, 1, 2, 3, 4, 3, 4, 5, 6, 7, 7], [ 8, 8, 9, 10, 11, 12, 11, 12, 13, 14, 15, 15], [ 16, 16, 17, 18, 19, 20, 19, 20, 21, 22, 23, 23], [ 24, 24, 25, 26, 27, 28, 27, 28, 29, 30, 31, 31], [ 32, 32, 33, 34, 35, 36, 35, 36, 37, 38, 39, 39], [ 40, 40, 41, 42, 43, 44, 43, 44, 45, 46, 47, 47], [ 16, 16, 17, 18, 19, 20, 19, 20, 21, 22, 23, 23], [ 24, 24, 25, 26, 27, 28, 27, 28, 29, 30, 31, 31], [ 32, 32, 33, 34, 35, 36, 35, 36, 37, 38, 39, 39], [ 40, 40, 41, 42, 43, 44, 43, 44, 45, 46, 47, 47], [ 48, 48, 49, 50, 51, 52, 51, 52, 53, 54, 55, 55], [ 56, 56, 57, 58, 59, 60, 59, 60, 61, 62, 63, 63], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]]) Boundaries ---------- While ghosting you can specify how to handle the boundaries. Current policies include the following: * ``periodic`` - wrap borders around to the other side * ``reflect`` - reflect each border outwards * ``any-constant`` - pad the border with this value So an example boundary kind argument might look like the following .. code-block:: python {0: 'periodic', 1: 'reflect', 2: np.nan} Alternatively you can use functions like ``da.fromfunction`` and ``da.concatenate`` to pad arbitrarily. Map a function across blocks ---------------------------- Ghosting goes hand-in-hand with mapping a function across blocks. This function can now use the additional information copied over from the neighbors that is not stored locally in each block .. code-block:: python >>> from scipy.ndimage.filters import gaussian_filter >>> def func(block): ... return gaussian_filter(block, sigma=1) >>> filt = g.map_blocks(func) While in this case we used a SciPy function above this could have been any arbitrary function. This is a good interaction point with Numba_. If your function does not preserve the shape of the block then you will need to provide a ``chunks`` keyword argument. If your block sizes are regular then this can be a blockshape, such as ``(1000, 1000)`` or if your blocks are irregular then this must be a full chunks tuple, for example ``((1000, 700, 1000), (200, 300))``. .. code-block:: python >>> g.map_blocks(myfunc, chunks=(5, 5)) If your function needs to know the location of the block on which it operates you can give your function a keyword argument ``block_id`` .. code-block:: python def func(block, block_id=None): ... This extra keyword argument will be given a tuple that provides the block location like ``(0, 0)`` for the upper right block or ``(0, 1)`` for the block just to the right of that block. Trim Excess ----------- After mapping a blocked function you may want to trim off the borders from each block by the same amount by which they were expanded. The function ``trim_internal`` is useful here and takes the same ``depth`` argument given to ``ghost``. .. code-block:: python >>> x.chunks ((10, 10, 10, 10), (10, 10, 10, 10)) >>> y = da.ghost.trim_internal(x, {0: 2, 1: 1}) >>> y.chunks ((6, 6, 6, 6), (8, 8, 8, 8)) Full Workflow ------------- And so a pretty typical ghosting workflow includes ``ghost``, ``map_blocks``, and ``trim_internal`` .. code-block:: python >>> x = ... >>> g = da.ghost.ghost(x, depth={0: 2, 1: 2}, ... boundary={0: 'periodic', 1: 'periodic'}) >>> g2 = g.map_blocks(myfunc) >>> result = da.ghost.trim_internal(g2, {0: 2, 1: 2}) .. _Life: http://en.wikipedia.org/wiki/Conway%27s_Game_of_Life .. _Numba: http://numba.pydata.org/ dask-0.16.0/docs/source/array-linear-operator.rst000066400000000000000000000013401320364734500217260ustar00rootroot00000000000000LinearOperator ============== Dask arrays implement the SciPy LinearOperator_ interface and so can be used with any SciPy algorithm depending on that interface. Example ------- .. code-block:: python import dask.array as da x = da.random.random(size=(10000, 10000), chunks=(1000, 1000)) from scipy.sparse.linalg.interface import MatrixLinearOperator A = MatrixLinearOperator(x) import numpy as np b = np.random.random(10000) from scipy.sparse.linalg import gmres x = gmres(A, b) *Disclaimer: This is just a toy example and not necessarily the best way to solve this problem for this data.* .. _LinearOperator: http://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.LinearOperator.html dask-0.16.0/docs/source/array-overview.rst000066400000000000000000000057731320364734500205070ustar00rootroot00000000000000Overview ======== Dask Array implements a subset of the NumPy ndarray interface using blocked algorithms, cutting up the large array into many small arrays. This lets us compute on arrays larger than memory using all of our cores. We coordinate these blocked algorithms using dask graphs. Design ------ .. image:: images/dask-array-black-text.svg :alt: Dask arrays coordinate many numpy arrays :align: right Dask arrays coordinate many NumPy arrays arranged into a grid. These NumPy arrays may live on disk or on other machines. Common Uses ----------- Today Dask array is commonly used in the sort of gridded data analysis that arises in weather, climate modeling, or oceanography, especially when data sizes become inconveniently large. Dask array complements large on-disk array stores like HDF5, NetCDF, and BColz. Additionally Dask array is commonly used to speed up expensive in-memory computations using multiple cores, such as you might find in image analysis or statistical and machine learning applications. Scope ----- The ``dask.array`` library supports the following interface from ``numpy``: * Arithmetic and scalar mathematics, ``+, *, exp, log, ...`` * Reductions along axes, ``sum(), mean(), std(), sum(axis=0), ...`` * Tensor contractions / dot products / matrix multiply, ``tensordot`` * Axis reordering / transpose, ``transpose`` * Slicing, ``x[:100, 500:100:-2]`` * Fancy indexing along single axes with lists or numpy arrays, ``x[:, [10, 1, 5]]`` * The array protocol ``__array__`` * Some linear algebra ``svd, qr, solve, solve_triangular, lstsq`` See :doc:`the dask.array API` for a more extensive list of functionality. Execution --------- By default Dask array uses the threaded scheduler in order to avoid data transfer costs and because NumPy releases the GIL well. It is also quite effective on a cluster using the `dask.distributed`_ scheduler. .. _`dask.distributed`: https://distributed.readthedocs.io/en/latest/ Limitations ----------- Dask array does not implement the entire numpy interface. Users expecting this will be disappointed. Notably, Dask array has the following limitations: 1. Dask array does not implement all of ``np.linalg``. This has been done by a number of excellent BLAS/LAPACK implementations, and is the focus of numerous ongoing academic research projects. 2. Dask array with unknown shapes do not support all operations 3. Dask array does not attempt operations like ``sort`` which are notoriously difficult to do in parallel, and are of somewhat diminished value on very large data (you rarely actually need a full sort). Often we include parallel-friendly alternatives like ``topk``. 4. Dask array doesn't implement operations like ``tolist`` that would be very inefficient for larger datasets. Likewise it is very inefficient to iterate over a Dask array with for loops. 5. Dask development is driven by immediate need, and so many lesser used functions have not been implemented. Community contributions are encouraged. dask-0.16.0/docs/source/array-slicing.rst000066400000000000000000000027501320364734500202610ustar00rootroot00000000000000Slicing ======= Dask array supports most of the NumPy slicing syntax. In particular it supports the following: * Slicing by integers and slices ``x[0, :5]`` * Slicing by lists/arrays of integers ``x[[1, 2, 4]]`` * Slicing by lists/arrays of booleans ``x[[False, True, True, False, True]]`` It does not currently support the following: * Slicing one ``dask.array`` with another ``x[x > 0]`` * Slicing with lists in multiple axes ``x[[1, 2, 3], [3, 2, 1]]`` Both of these are straightforward to add though. If you have a use case then raise an issue. Efficiency ---------- The normal dask schedulers are smart enough to compute only those blocks that are necessary to achieve the desired slicing. So large operations may be cheap if only a small output is desired. In the example below we create a trillion element Dask array in million element blocks. We then operate on the entire array and finally slice out only a portion of the output. .. code-block:: python >>> Trillion element array of ones, in 1000 by 1000 blocks >>> x = da.ones((1000000, 1000000), chunks=(1000, 1000)) >>> da.exp(x)[:1500, :1500] ... This only needs to compute the top-left four blocks to achieve the result. We are still slightly wasteful on those blocks where we need only partial results. We are also a bit wasteful in that we still need to manipulate the dask-graph with a million or so tasks in it. This can cause an interactive overhead of a second or two. But generally, slicing works well. dask-0.16.0/docs/source/array-sparse.rst000066400000000000000000000062471320364734500201330ustar00rootroot00000000000000Sparse Arrays ============= By swapping out in-memory numpy arrays with in-memory sparse arrays we can reuse the blocked algorithms of Dask.array to achieve parallel and distributed sparse arrays. The blocked algorithms in Dask.array normally parallelize around in-memory numpy arrays. However, if another in-memory array library supports the NumPy interface then it too can take advantage of dask.array's parallel algorithms. In particular the `sparse `_ array library satisfies a subset of the NumPy API and works well with, and is tested against, Dask.array. Example ------- Say we have a dask.array with mostly zeros .. code-block:: python x = da.random.random((100000, 100000), chunks=(1000, 1000)) x[x < 0.95] = 0 We can convert each of these chunks of NumPy arrays into a sparse.COO array. .. code-block:: python import sparse s = x.map_blocks(sparse.COO) Now our array is composed not of many NumPy arrays, but rather of many sparse arrays. Semantically this does not change anything. Operations that work will work identically (assuming that the behavior of ``numpy`` and ``sparse`` are identical) but performance characteristics and storage costs may change significantly .. code-block:: python >>> s.sum(axis=0)[:100].compute() >>> _.todense() array([ 4803.06859272, 4913.94964525, 4877.13266438, 4860.7470773 , 4938.94446802, 4849.51326473, 4858.83977856, 4847.81468485, ... ]) Requirements ------------ Any in-memory library that copies the NumPy ndarray interface should work here. The `sparse `_ library is a minimal example. In particular an in-memory library should implement at least the following operations: 1. Simple slicing with slices, lists, and elements (for slicing, rechunking, reshaping, etc). 2. A ``concatenate`` function matching the interface of ``np.concatenate``. This must be registered in ``dask.array.core.concatenate_lookup``. 3. All ufuncs must support the full ufunc interface, including ``dtype=`` and ``out=`` parameters (even if they don't function properly) 4. All reductions must support the full ``axis=`` and ``keepdims=`` keywords and behave like numpy in this respect 5. The array class should follow the ``__array_priority__`` protocol and be prepared to respond to other arrays of lower priority. 6. If ``dot`` support is desired, a ``tensordot`` function matching the interface of ``np.tensordot`` should be registered in ``dask.array.core.tensordot_lookup``. The implementation of other operations like reshape, transpose, etc. should follow standard NumPy conventions regarding shape and dtype. Not implementing these is fine; the parallel ``dask.array`` will err at runtime if these operations are attempted. Mixed Arrays ------------ Dask.array supports mixing different kinds of in-memory arrays. This relies on the in-memory arrays knowing how to interact with each other when necessary. When two arrays interact the functions from the array with the highest ``__array_priority__`` will take precedence (for example for concatenate, tensordot, etc.). dask-0.16.0/docs/source/array-stack.rst000066400000000000000000000025261320364734500177370ustar00rootroot00000000000000Stack and Concatenate ===================== Often we have many arrays stored on disk that we want to stack together and think of as one large array. This is common with geospatial data in which we might have many HDF5/NetCDF files on disk, one for every day, but we want to do operations that span multiple days. To solve this problem we use the functions ``da.stack`` and ``da.concatenate``. Stack ----- We stack many existing Dask arrays into a new array, creating a new dimension as we go. .. code-block:: python >>> import dask.array as da >>> data = [da.from_array(np.ones((4, 4)), chunks=(2, 2)) ... for i in range(3)] # A small stack of dask arrays >>> x = da.stack(data, axis=0) >>> x.shape (3, 4, 4) >>> da.stack(data, axis=1).shape (4, 3, 4) >>> da.stack(data, axis=-1).shape (4, 4, 3) This creates a new dimension with length equal to the number of slices Concatenate ----------- We concatenate existing arrays into a new array, extending them along an existing dimension .. code-block:: python >>> import dask.array as da >>> import numpy as np >>> data = [da.from_array(np.ones((4, 4)), chunks=(2, 2)) ... for i in range(3)] # small stack of dask arrays >>> x = da.concatenate(data, axis=0) >>> x.shape (12, 4) >>> da.concatenate(data, axis=1).shape (4, 12) dask-0.16.0/docs/source/array-stats.rst000066400000000000000000000020651320364734500177660ustar00rootroot00000000000000Stats ===== Dask Array implements a subset of the `scipy.stats`_ package. Statistical Functions --------------------- You can calculate various measures of an array including skewnes, kurtosis, and arbitrary moments. .. code-block:: python >>> from dask.array import stats >>> x = da.random.beta(1, 1, size=(1000,), chunks=10) >>> k, s, m = [stats.kurtosis(x), stats.skew(x), stats.moment(x, 5)] >>> dask.compute(k, s, m) (1.7612340817172787, -0.064073498030693302, -0.00054523780628304799) Statistical Tests ----------------- You can perform basic statistical tests on dask arrays. Each of these tests return a ``dask.delayed`` wrapping one of the scipy ``namedtuple`` results. .. code-block:: python >>> a = da.random.uniform(size=(50,), chunks=(25,)) >>> b = a + da.random.uniform(low=-0.15, high=0.15, size=(50,), chunks=(25,)) >>> result = ttest_rel(a, b) >>> result.compute() Ttest_relResult(statistic=-1.5102104380013242, pvalue=0.13741197274874514) .. _scipy.stats: https://docs.scipy.org/doc/scipy-0.19.0/reference/stats.html dask-0.16.0/docs/source/array.rst000066400000000000000000000006351320364734500166330ustar00rootroot00000000000000Array ===== Dask arrays implement a subset of the NumPy interface on large arrays using blocked algorithms and task scheduling. .. toctree:: :maxdepth: 1 array-overview.rst array-creation.rst array-api.rst Other topics .. toctree:: :maxdepth: 1 array-slicing.rst array-stack.rst array-ghost.rst array-design.rst array-linear-operator.rst array-sparse.rst array-stats.rst dask-0.16.0/docs/source/bag-api.rst000066400000000000000000000024031320364734500170100ustar00rootroot00000000000000API === .. currentmodule:: dask.bag Top level user functions: .. autosummary:: Bag Bag.all Bag.any Bag.compute Bag.count Bag.distinct Bag.filter Bag.flatten Bag.fold Bag.foldby Bag.frequencies Bag.groupby Bag.join Bag.map Bag.map_partitions Bag.max Bag.mean Bag.min Bag.pluck Bag.product Bag.reduction Bag.random_sample Bag.remove Bag.repartition Bag.starmap Bag.std Bag.sum Bag.take Bag.to_dataframe Bag.to_delayed Bag.to_textfiles Bag.topk Bag.var Bag.visualize Create Bags ----------- .. autosummary:: from_sequence from_delayed read_text from_url range Top-level functions ------------------- .. autosummary:: concat map map_partitions zip Turn Bags into other things --------------------------- .. autosummary:: Bag.to_textfiles Bag.to_dataframe Bag.to_delayed Bag methods ----------- .. autoclass:: Bag :members: Other functions --------------- .. autofunction:: from_sequence .. autofunction:: from_delayed .. autofunction:: read_text .. autofunction:: from_url .. autofunction:: range .. autofunction:: concat .. autofunction:: map_partitions .. autofunction:: map .. autofunction:: zip dask-0.16.0/docs/source/bag-creation.rst000066400000000000000000000061651320364734500200540ustar00rootroot00000000000000Create Dask Bags ================ There are several ways to create Dask.bags around your data: ``db.from_sequence`` -------------------- You can create a bag from an existing Python iterable: .. code-block:: python >>> import dask.bag as db >>> b = db.from_sequence([1, 2, 3, 4, 5, 6]) You can control the number of partitions into which this data is binned: .. code-block:: python >>> b = db.from_sequence([1, 2, 3, 4, 5, 6], npartitions=2) This controls the granularity of the parallelism that you expose. By default dask will try to partition your data into about 100 partitions. IMPORTANT: do not load your data into Python and then load that data into dask.bag. Instead, use dask.bag to load your data. This parallelizes the loading step and reduces inter-worker communication: .. code-block:: python >>> b = db.from_sequence(['1.dat', '2.dat', ...]).map(load_from_filename) ``db.read_text`` ---------------- Dask.bag can load data directly from textfiles. You can pass either a single filename, a list of filenames, or a globstring. The resulting bag will have one item per line, one file per partition: .. code-block:: python >>> b = db.read_text('myfile.txt') >>> b = db.read_text(['myfile.1.txt', 'myfile.2.txt', ...]) >>> b = db.read_text('myfile.*.txt') This handles standard compression libraries like ``gzip``, ``bz2``, ``xz``, or any easily installed compression library that has a File-like object. Compression will be inferred by filename extension, or by using the ``compression='gzip'`` keyword: .. code-block:: python >>> b = db.read_text('myfile.*.txt.gz') The resulting items in the bag are strings. If you have encoded data like line-delimited JSON then you may want to map a decoding or load function across the bag: .. code-block:: python >>> import json >>> b = db.read_text('myfile.*.json').map(json.loads) Or do string munging tasks. For convenience there is a string namespace attached directly to bags with ``.str.methodname``: .. code-block:: python >>> b = db.read_text('myfile.*.csv').str.strip().str.split(',') ``db.from_delayed`` ------------------- You can construct a dask bag from :doc:`dask.delayed ` values using the ``db.from_delayed`` function. See :doc:`documentation on using dask.delayed with collections ` for more information. Store Dask Bags =============== In Memory --------- You can convert a dask bag to a list or Python iterable by calling ``compute()`` or by converting the object into a list .. code-block:: python >>> result = b.compute() or >>> result = list(b) To Textfiles ------------ You can convert a dask bag into a sequence of files on disk by calling the ``.to_textfiles()`` method .. autofunction:: dask.bag.core.to_textfiles To DataFrames ------------- You can convert a dask bag into a :doc:`dask dataframe` and use those storage solutions. .. automethod:: dask.bag.core.Bag.to_dataframe To Delayed Values ----------------- You can convert a dask bag into a list of :doc:`dask delayed values` and custom storage solutions from there. .. automethod:: dask.bag.core.Bag.to_delayed dask-0.16.0/docs/source/bag-overview.rst000066400000000000000000000067311320364734500201150ustar00rootroot00000000000000Overview ======== Dask.Bag implements a operations like ``map``, ``filter``, ``fold``, and ``groupby`` on collections of Python objects. It does this in parallel and in small memory using Python iterators. It is similar to a parallel version of PyToolz_ or a Pythonic version of the `PySpark RDD`_. .. _PyToolz: https://toolz.readthedocs.io/en/latest/ .. _`PySpark RDD`: http://spark.apache.org/docs/latest/api/python/pyspark.html Design ------ Dask bags coordinate many Python lists or Iterators, each of which forms a partition of a larger collection. Common Uses ----------- Dask bags are often used to parallelize simple computations on unstructured or semi-structured data like text data, log files, JSON records, or user defined Python objects. Execution --------- Execution on bags provide two benefits: 1. Parallel: data is split up, allowing multiple cores or machines to execute in parallel. 2. Iterating: data processes lazily, allowing smooth execution of larger-than-memory data, even on a single machine within a single partition Default scheduler ~~~~~~~~~~~~~~~~~ By default ``dask.bag`` uses ``dask.multiprocessing`` for computation. As a benefit Dask bypasses the GIL_ and uses multiple cores on Pure Python objects. As a drawback Dask.bag doesn't perform well on computations that include a great deal of inter-worker communication. For common operations this is rarely an issue as most Dask.bag workflows are embarrassingly parallel or result in reductions with little data moving between workers. .. _GIL: https://docs.python.org/3/glossary.html#term-gil Shuffle ~~~~~~~ Some operations, like ``groupby``, require substantial inter-worker communication. On a single machine, dask uses partd_ to perform efficient, parallel, spill-to-disk shuffles. When working in a cluster, dask uses a task based shuffle. These shuffle operations are expensive and better handled by projects like ``dask.dataframe``. It is best to use ``dask.bag`` to clean and process data, then transform it into an array or dataframe before embarking on the more complex operations that require shuffle steps. .. _partd: https://github.com/mrocklin/partd Known Limitations ----------------- Bags provide very general computation (any Python function.) This generality comes at cost. Bags have the following known limitations: 1. By default they rely on the multiprocessing scheduler, which has its own set of known limitations (see :doc:`shared`) 2. Bags are immutable and so you can not change individual elements 3. Bag operations tend to be slower than array/dataframe computations in the same way that standard Python containers tend to be slower than NumPy arrays and Pandas dataframes. 4. ``Bag.groupby`` is slow. You should try to use ``Bag.foldby`` if possible. Using ``Bag.foldby`` requires more thought. Name ---- *Bag* is the mathematical name for an unordered collection allowing repeats. It is a friendly synonym to multiset_. A bag or a multiset is a generalization of the concept of a set that, unlike a set, allows multiple instances of the multiset's elements. * ``list``: *ordered* collection *with repeats*, ``[1, 2, 3, 2]`` * ``set``: *unordered* collection *without repeats*, ``{1, 2, 3}`` * ``bag``: *unordered* collection *with repeats*, ``{1, 2, 2, 3}`` So a bag is like a list, but it doesn't guarantee an ordering among elements. There can be repeated elements but you can't ask for the ith element. .. _multiset: http://en.wikipedia.org/wiki/Bag_(mathematics) dask-0.16.0/docs/source/bag.rst000066400000000000000000000004521320364734500162430ustar00rootroot00000000000000Bag === Dask.Bag parallelizes computations across a large collection of generic Python objects. It is particularly useful when dealing with large quantities of semi-structured data like JSON blobs or log files. .. toctree:: :maxdepth: 1 bag-overview.rst bag-creation.rst bag-api.rst dask-0.16.0/docs/source/bytes.rst000066400000000000000000000056621320364734500166500ustar00rootroot00000000000000Internal Data Ingestion ======================= Dask contains internal tools for extensible data ingestion in the ``dask.bytes`` package. *These functions are developer-focused rather than for direct consumption by users. These functions power user facing functions like ``dd.read_csv`` and ``db.read_text`` which are probably more useful for most users.* .. currentmodule:: dask.bytes .. autosummary:: read_bytes open_files open_text_files These functions are extensible in their output formats (bytes, file objects), their input locations (file system, S3, HDFS), line delimiters, and compression formats. These functions provide data as ``dask.delayed`` objects. These objects either point to blocks of bytes (``read_bytes``) or open file objects (``open_files``, ``open_text_files``). They can handle different compression formats by prepending protocols like ``s3://`` or ``hdfs://``. They handle compression formats listed in the ``dask.bytes.compression`` module. These functions are not used for all data sources. Some data sources like HDF5 are quite particular and receive custom treatment. Delimiters ---------- The ``read_bytes`` function takes a path (or globstring of paths) and produces a sample of the first file and a list of delayed objects for each of the other files. If passed a delimiter such as ``delimiter=b'\n'`` it will ensure that the blocks of bytes start directly after a delimiter and end directly before a delimiter. This allows other functions, like ``pd.read_csv``, to operate on these delayed values with expected behavior. These delimiters are useful both for typical line-based formats (log files, CSV, JSON) as well as other delimited formats like Avro, which may separate logical chunks by a complex sentinel string. Locations --------- These functions dispatch to other functions that handle different storage backends, like S3 and HDFS. These storage backends register themselves with protocols and so are called whenever the path is prepended with a string like the following:: s3://bucket/keys-*.csv The various back-ends accept optional extra keywords, detailing authentication and other parameters, see :doc:`remote data services ` Compression ----------- These functions support widely available compression technologies like ``gzip``, ``bz2``, ``xz``, ``snappy``, and ``lz4``. More compressions can be easily added by inserting functions into dictionaries available in the ``dask.bytes.compression`` module. This can be done at runtime and need not be added directly to the codebase. However, not all compression technologies are available for all functions. In particular, compression technologies like ``gzip`` do not support efficient random access and so are useful for streaming ``open_files`` but not useful for ``read_bytes`` which splits files at various points. Functions --------- .. autofunction:: read_bytes .. autofunction:: open_files .. autofunction:: open_text_files dask-0.16.0/docs/source/caching.rst000066400000000000000000000130151320364734500171050ustar00rootroot00000000000000Opportunistic Caching ===================== EXPERIMENTAL FEATURE added to Version 0.6.2 and above - see :ref:`disclaimer`. Dask usually removes intermediate values as quickly as possible in order to make space for more data to flow through your computation. However, in some cases, we may want to hold onto intermediate values, because they might be useful for future computations in an interactive session. We need to balance the following concerns: 1. Intermediate results might be useful in future unknown computations 2. Intermediate results also fill up memory, reducing space for the rest of our current computation. Negotiating between these two concerns helps us to leverage the memory that we have available to speed up future, unanticipated computations. Which intermediate results should we keep? This document explains an experimental, opportunistic caching mechanism that automatically picks out and stores useful tasks. Motivating Example ------------------ Consider computing the maximum value of a column in a CSV file: .. code-block:: python >>> import dask.dataframe as dd >>> df = dd.read_csv('myfile.csv') >>> df.columns ['first-name', 'last-name', 'amount', 'id', 'timestamp'] >>> df.amount.max().compute() 1000 Even though our full dataset may be too large to fit in memory, the single ``df.amount`` column may be small enough to hold in memory just in case it might be useful in the future. This is often the case during data exploration, because we investigate the same subset of our data repeatedly before moving on. For example, we may now want to find the minimum of the amount column: .. code-block:: python >>> df.amount.min().compute() -1000 Under normal operations, this would need to read through the entire CSV file over again. This is somewhat wasteful, and stymies interactive data exploration. Two Simple Solutions -------------------- If we know ahead of time that we want both the maximum and minimum, we can compute them simultaneously. Dask will share intermediates intelligently, reading through the dataset only once: .. code-block:: python >>> dd.compute(df.amount.max(), df.amount.min()) (1000, -1000) If we know that this column fits in memory then we can also explicitly compute the column and then continue forward with straight Pandas: .. code-block:: python >>> amount = df.amount.compute() >>> amount.max() 1000 >>> amount.min() -1000 If either of these solutions work for you, great. Otherwise, continue on for a third approach. Automatic Opportunistic Caching ------------------------------- Another approach is to watch *all* intermediate computations, and *guess* which ones might be valuable to keep for the future. Dask has an *opportunistic caching mechanism* that stores intermediate tasks that show the following characteristics: 1. Expensive to compute 2. Cheap to store 3. Frequently used We can activate a fixed sized cache as a callback_. .. _callback: diagnostics.rst .. code-block:: python >>> from dask.cache import Cache >>> cache = Cache(2e9) # Leverage two gigabytes of memory >>> cache.register() # Turn cache on globally Now the cache will watch every small part of the computation and judge the value of that part based on the three characteristics listed above (expensive to compute, cheap to store, and frequently used). Dask will hold on to 2GB of the best intermediate results it can find, evicting older results as better results come in. If the ``df.amount`` column fits in 2GB then probably all of it will be stored while we keep working on it. If we start work on something else, then the ``df.amount`` column will likely be evicted to make space for other more timely results: .. code-block:: python >>> df.amount.max().compute() # slow the first time 1000 >>> df.amount.min().compute() # fast because df.amount is in the cache -1000 >>> df.id.nunique().compute() # starts to push out df.amount from cache Cache tasks, not expressions ---------------------------- This caching happens at the low-level scheduling layer, not the high-level dask.dataframe or dask.array layer. We don't explicitly cache the column ``df.amount``. Instead, we cache the hundreds of small pieces of that column that form the dask graph. It could be that we end up caching only a fraction of the column. This means that the opportunistic caching mechanism described above works for *all* dask computations, as long as those computations employ a consistent naming scheme (as all of dask.dataframe, dask.array, and dask.delayed do.) You can see which tasks are held by the cache by inspecting the following attributes of the cache object: .. code-block:: python >>> cache.cache.data >>> cache.cache.heap.heap >>> cache.cache.nbytes The cache object is powered by cachey_, a tiny library for opportunistic caching. .. _cachey: https://github.com/blaze/cachey .. _disclaimer: Disclaimer ---------- This feature is still experimental, and can cause your computation to fill up RAM. Restricting your cache to a fixed size like 2GB requires dask to accurately count the size of each of our objects in memory. This can be tricky, particularly for Pythonic objects like lists and tuples, and for DataFrames that contain object dtypes. It is entirely possible that the caching mechanism will *undercount* the size of objects, causing it to use up more memory than anticipated which can lead to blowing up RAM and crashing your session. dask-0.16.0/docs/source/changelog.rst000066400000000000000000000703541320364734500174510ustar00rootroot00000000000000Changelog ========= 0.16.0 / 2017-11-17 ------------------- This is a major release. It includes breaking changes, new protocols, and a large number of bug fixes. Array +++++ - Add ``atleast_1d``, ``atleast_2d``, and ``atleast_3d`` (:pr:`2760`) (:pr:`2765`) `John A Kirkham`_ - Add ``allclose`` (:pr:`2771`) by `John A Kirkham`_ - Remove ``random.different_seeds`` from Dask Array API docs (:pr:`2772`) `John A Kirkham`_ - Deprecate ``vnorm`` in favor of ``dask.array.linalg.norm`` (:pr:`2773`) `John A Kirkham`_ - Reimplement ``unique`` to be lazy (:pr:`2775`) `John A Kirkham`_ - Support broadcasting of Dask Arrays with 0-length dimensions (:pr:`2784`) `John A Kirkham`_ - Add ``asarray`` and ``asanyarray`` to Dask Array API docs (:pr:`2787`) `James Bourbeau`_ - Support ``unique``'s ``return_*`` arguments (:pr:`2779`) `John A Kirkham`_ - Simplify ``_unique_interlal`` (:pr:`2850`) (:pr:`2855`) `John A Kirkham`_ - Avoid removing some getter calls in array optimizations (:pr:`2826`) `Jim Crist`_ DataFrame +++++++++ - Support ``pyarrow`` in ``dd.to_parquet`` (:pr:`2868`) `Jim Crist`_ - Fixed ``DataFrame.quantile`` and ``Series.quantile`` returning ``nan`` when missing values are present (:pr:`2791`:) `Tom Augspurger`_ - Fixed ``DataFrame.quantile`` losing the result ``.name`` when ``q`` is a scalar (:pr:`2791`:) `Tom Augspurger`_ - Fixed ``dd.concat`` return a ``dask.Dataframe`` when concatenating a single series along the columns, matching pandas' behavior (:pr:`2800`) `James Munroe`_ - Fixed default inplace parameter for ``DataFrame.eval`` to match the pandas defualt for pandas >= 0.21.0 (:pr:`2838`) `Tom Augspurger`_ - Fix exception when calling ``DataFrame.set_index`` on text column where one of the partitions was empty (:pr:`2831`) `Jesse Vogt`_ - Do not raise exception when calling ``DataFrame.set_index`` on empty dataframe (:pr:`2827`) `Jess Vogt`_ - Fixed bug in ``Dataframe.fillna`` when filling with a ``Series`` value (:pr:`2810`) `Tom Augspurger`_ - Deprecate old argument ordering in ``dd.to_parquet`` to better match convention of putting the dataframe first (:pr:`2867`) `Jim Crist`_ - df.astype(categorical_dtype -> known categoricals (:pr:`2835`) `Jim Crist`_ - Test against Pandas release candidate (:pr:`2814`) `Tom Augspurger`_ - Add more tests for read_parquet(engine='pyarrow') (:pr:`2822`) `Uwe Korn`_ - Remove unnecessary map_partitions in aggregate (:pr:`2712`) `Christopher Prohm`_ - Fix bug calling sample on empty partitions (:pr:`2818`) `@xwang777`_ - Error nicely when parsing dates in read_csv (:pr:`2863`) `Jim Crist`_ - Cleanup handling of passing filesystem objects to PyArrow readers (:pr:`2527`) `@fjetter`_ - Support repartitioning even if there are no divisions (:pr:`2873`) `@Ced4`_ - Support reading/writing to hdfs using ``pyarrow`` in ``dd.to_parquet`` (:pr:`2894`:, :pr:`2881`:) `Jim Crist`_ Core ++++ - Allow tuples as sharedict keys (:pr:`2763`) `Matthew Rocklin`_ - Calling compute within a dask.distributed task defaults to distributed scheduler (:pr:`2762`) `Matthew Rocklin`_ - Auto-import gcsfs when gcs:// protocol is used (:pr:`2776`) `Matthew Rocklin`_ - Fully remove dask.async module, use dask.local instead (:pr:`2828`) `Thomas Caswell`_ - Compatability with bokeh 0.12.10 (:pr:`:2844`) `Tom Augspurger`_ - Reduce test memory usage (:pr:`2782`) `Jim Crist`_ - Add Dask collection interface (:pr:`2748`) `Jim Crist`_ - Update Dask collection interface during XArray integration (:pr:`2847`) `Matthew Rocklin`_ - Close resource profiler process on __exit__ (:pr:`2871`) `Jim Crist`_ - Fix S3 tests (:pr:`2875`) `Jim Crist`_ - Fix port for bokeh dashboard in docs (:pr:`2889`) `Ian Hopkinson`_ - Wrap Dask filesystems for PyArrow compatibility (:pr:`2881`) `Jim Crist`_ 0.15.4 / 2017-10-06 ------------------- Array +++++ - ``da.random.choice`` now works with array arguments (:pr:`2781`) - Support indexing in arrays with np.int (fixes regression) (:pr:`2719`) - Handle zero dimension with rechunking (:pr:`2747`) - Support -1 as an alias for "size of the dimension" in ``chunks`` (:pr:`2749`) - Call mkdir in array.to_npy_stack (:pr:`2709`) DataFrame +++++++++ - Added the `.str` accessor to Categoricals with string categories (:pr:`2743`) - Support int96 (spark) datetimes in parquet writer (:pr:`2711`) - Pass on file scheme to fastparquet (:pr:`2714`) - Support Pandas 0.21 (:pr:`2737`) Bag +++ - Add tree reduction support for foldby (:pr: `2710`) Core ++++ - Drop s3fs from ``pip install dask[complete]`` (:pr:`2750`) 0.15.3 / 2017-09-24 ------------------- Array +++++ - Add masked arrays (:pr:`2301`) - Add ``*_like array creation functions`` (:pr:`2640`) - Indexing with unsigned integer array (:pr:`2647`) - Improved slicing with boolean arrays of different dimensions (:pr:`2658`) - Support literals in ``top`` and ``atop`` (:pr:`2661`) - Optional axis argument in cumulative functions (:pr:`2664`) - Improve tests on scalars with ``assert_eq`` (:pr:`2681`) - Fix norm keepdims (:pr:`2683`) - Add ``ptp`` (:pr:`2691`) - Add apply_along_axis (:pr:`2690`) and apply_over_axes (:pr:`2702`) DataFrame +++++++++ - Added ``Series.str[index]`` (:pr:`2634`) - Allow the groupby by param to handle columns and index levels (:pr:`2636`) - ``DataFrame.to_csv`` and ``Bag.to_textfiles`` now return the filenames to which they have written (:pr:`2655`) - Fix combination of ``partition_on`` and ``append`` in ``to_parquet`` (:pr:`2645`) - Fix for parquet file schemes (:pr:`2667`) - Repartition works with mixed categoricals (:pr:`2676`) Core ++++ - ``python setup.py test`` now runs tests (:pr:`2641`) - Added new cheatsheet (:pr:`2649`) - Remove resize tool in Bokeh plots (:pr:`2688`) 0.15.2 / 2017-08-25 ------------------- Array +++++ - Remove spurious keys from map_overlap graph (:pr:`2520`) - where works with non-bool condition and scalar values (:pr:`2543`) (:pr:`2549`) - Improve compress (:pr:`2541`) (:pr:`2545`) (:pr:`2555`) - Add argwhere, _nonzero, and where(cond) (:pr:`2539`) - Generalize vindex in dask.array to handle multi-dimensional indices (:pr:`2573`) - Add choose method (:pr:`2584`) - Split code into reorganized files (:pr:`2595`) - Add linalg.norm (:pr:`2597`) - Add diff, ediff1d (:pr:`2607`), (:pr:`2609`) - Improve dtype inference and reflection (:pr:`2571`) Bag +++ - Remove deprecated Bag behaviors (:pr:`2525`) DataFrame +++++++++ - Support callables in assign (:pr:`2513`) - better error messages for read_csv (:pr:`2522`) - Add dd.to_timedelta (:pr:`2523`) - Verify metadata in from_delayed (:pr:`2534`) (:pr:`2591`) - Add DataFrame.isin (:pr:`2558`) - Read_hdf supports iterables of files (:pr:`2547`) Core ++++ - Remove bare ``except:`` blocks everywhere (:pr:`2590`) 0.15.1 / 2017-07-08 ------------------- - Add storage_options to to_textfiles and to_csv (:pr:`2466`) - Rechunk and simplify rfftfreq (:pr:`2473`), (:pr:`2475`) - Better support ndarray subclasses (:pr:`2486`) - Import star in dask.distributed (:pr:`2503`) - Threadsafe cache handling with tokenization (:pr:`2511`) 0.15.0 / 2017-06-09 ------------------- Array +++++ - Add dask.array.stats submodule (:pr:`2269`) - Support ``ufunc.outer`` (:pr:`2345`) - Optimize fancy indexing by reducing graph overhead (:pr:`2333`) (:pr:`2394`) - Faster array tokenization using alternative hashes (:pr:`2377`) - Added the matmul ``@`` operator (:pr:`2349`) - Improved coverage of the ``numpy.fft`` module (:pr:`2320`) (:pr:`2322`) (:pr:`2327`) (:pr:`2323`) - Support NumPy's ``__array_ufunc__`` protocol (:pr:`2438`) Bag +++ - Fix bug where reductions on bags with no partitions would fail (:pr:`2324`) - Add broadcasting and variadic ``db.map`` top-level function. Also remove auto-expansion of tuples as map arguments (:pr:`2339`) - Rename ``Bag.concat`` to ``Bag.flatten`` (:pr:`2402`) DataFrame +++++++++ - Parquet improvements (:pr:`2277`) (:pr:`2422`) Core ++++ - Move dask.async module to dask.local (:pr:`2318`) - Support callbacks with nested scheduler calls (:pr:`2397`) - Support pathlib.Path objects as uris (:pr:`2310`) 0.14.3 / 2017-05-05 ------------------- DataFrame +++++++++ - Pandas 0.20.0 support 0.14.2 / 2017-05-03 ------------------- Array +++++ - Add da.indices (:pr:`2268`), da.tile (:pr:`2153`), da.roll (:pr:`2135`) - Simultaneously support drop_axis and new_axis in da.map_blocks (:pr:`2264`) - Rechunk and concatenate work with unknown chunksizes (:pr:`2235`) and (:pr:`2251`) - Support non-numpy container arrays, notably sparse arrays (:pr:`2234`) - Tensordot contracts over multiple axes (:pr:`2186`) - Allow delayed targets in da.store (:pr:`2181`) - Support interactions against lists and tuples (:pr:`2148`) - Constructor plugins for debugging (:pr:`2142`) - Multi-dimensional FFTs (single chunk) (:pr:`2116`) Bag +++ - to_dataframe enforces consistent types (:pr:`2199`) DataFrame +++++++++ - Set_index always fully sorts the index (:pr:`2290`) - Support compatibility with pandas 0.20.0 (:pr:`2249`), (:pr:`2248`), and (:pr:`2246`) - Support Arrow Parquet reader (:pr:`2223`) - Time-based rolling windows (:pr:`2198`) - Repartition can now create more partitions, not just less (:pr:`2168`) Core ++++ - Always use absolute paths when on POSIX file system (:pr:`2263`) - Support user provided graph optimizations (:pr:`2219`) - Refactor path handling (:pr:`2207`) - Improve fusion performance (:pr:`2129`), (:pr:`2131`), and (:pr:`2112`) 0.14.1 / 2017-03-22 ------------------- Array +++++ - Micro-optimize optimizations (:pr:`2058`) - Change slicing optimizations to avoid fusing raw numpy arrays (:pr:`2075`) (:pr:`2080`) - Dask.array operations now work on numpy arrays (:pr:`2079`) - Reshape now works in a much broader set of cases (:pr:`2089`) - Support deepcopy python protocol (:pr:`2090`) - Allow user-provided FFT implementations in ``da.fft`` (:pr:`2093`) Bag +++ DataFrame +++++++++ - Fix to_parquet with empty partitions (:pr:`2020`) - Optional ``npartitions='auto'`` mode in ``set_index`` (:pr:`2025`) - Optimize shuffle performance (:pr:`2032`) - Support efficient repartitioning along time windows like ``repartition(freq='12h')`` (:pr:`2059`) - Improve speed of categorize (:pr:`2010`) - Support single-row dataframe arithmetic (:pr:`2085`) - Automatically avoid shuffle when setting index with a sorted column (:pr:`2091`) - Improve handling of integer-na handling in read_csv (:pr:`2098`) Delayed +++++++ - Repeated attribute access on delayed objects uses the same key (:pr:`2084`) Core ++++ - Improve naming of nodes in dot visuals to avoid generic ``apply`` (:pr:`2070`) - Ensure that worker processes have different random seeds (:pr:`2094`) 0.14.0 / 2017-02-24 ------------------- Array +++++ - Fix corner cases with zero shape and misaligned values in ``arange`` (:pr:`1902`), (:pr:`1904`), (:pr:`1935`), (:pr:`1955`), (:pr:`1956`) - Improve concatenation efficiency (:pr:`1923`) - Avoid hashing in ``from_array`` if name is provided (:pr:`1972`) Bag +++ - Repartition can now increase number of partitions (:pr:`1934`) - Fix bugs in some reductions with empty partitions (:pr:`1939`), (:pr:`1950`), (:pr:`1953`) DataFrame +++++++++ - Support non-uniform categoricals (:pr:`1877`), (:pr:`1930`) - Groupby cumulative reductions (:pr:`1909`) - DataFrame.loc indexing now supports lists (:pr:`1913`) - Improve multi-level groupbys (:pr:`1914`) - Improved HTML and string repr for DataFrames (:pr:`1637`) - Parquet append (:pr:`1940`) - Add ``dd.demo.daily_stock`` function for teaching (:pr:`1992`) Delayed +++++++ - Add ``traverse=`` keyword to delayed to optionally avoid traversing nested data structures (:pr:`1899`) - Support Futures in from_delayed functions (:pr:`1961`) - Improve serialization of decorated delayed functions (:pr:`1969`) Core ++++ - Improve windows path parsing in corner cases (:pr:`1910`) - Rename tasks when fusing (:pr:`1919`) - Add top level ``persist`` function (:pr:`1927`) - Propagate ``errors=`` keyword in byte handling (:pr:`1954`) - Dask.compute traverses Python collections (:pr:`1975`) - Structural sharing between graphs in dask.array and dask.delayed (:pr:`1985`) 0.13.0 / 2017-01-02 ------------------- Array +++++ - Mandatory dtypes on dask.array. All operations maintain dtype information and UDF functions like map_blocks now require a dtype= keyword if it can not be inferred. (:pr:`1755`) - Support arrays without known shapes, such as arises when slicing arrays with arrays or converting dataframes to arrays (:pr:`1838`) - Support mutation by setting one array with another (:pr:`1840`) - Tree reductions for covariance and correlations. (:pr:`1758`) - Add SerializableLock for better use with distributed scheduling (:pr:`1766`) - Improved atop support (:pr:`1800`) - Rechunk optimization (:pr:`1737`), (:pr:`1827`) Bag +++ - Avoid wrong results when recomputing the same groupby twice (:pr:`1867`) DataFrame +++++++++ - Add ``map_overlap`` for custom rolling operations (:pr:`1769`) - Add ``shift`` (:pr:`1773`) - Add Parquet support (:pr:`1782`) (:pr:`1792`) (:pr:`1810`), (:pr:`1843`), (:pr:`1859`), (:pr:`1863`) - Add missing methods combine, abs, autocorr, sem, nsmallest, first, last, prod, (:pr:`1787`) - Approximate nunique (:pr:`1807`), (:pr:`1824`) - Reductions with multiple output partitions (for operations like drop_duplicates) (:pr:`1808`), (:pr:`1823`) (:pr:`1828`) - Add delitem and copy to DataFrames, increasing mutation support (:pr:`1858`) Delayed +++++++ - Changed behaviour for ``delayed(nout=0)`` and ``delayed(nout=1)``: ``delayed(nout=1)`` does not default to ``out=None`` anymore, and ``delayed(nout=0)`` is also enabled. I.e. functions with return tuples of length 1 or 0 can be handled correctly. This is especially handy, if functions with a variable amount of outputs are wrapped by ``delayed``. E.g. a trivial example: ``delayed(lambda *args: args, nout=len(vals))(*vals)`` Core ++++ - Refactor core byte ingest (:pr:`1768`), (:pr:`1774`) - Improve import time (:pr:`1833`) 0.12.0 / 2016-11-03 ------------------- DataFrame +++++++++ - Return a series when functions given to ``dataframe.map_partitions`` return scalars (:pr:`1515`) - Fix type size inference for series (:pr:`1513`) - ``dataframe.DataFrame.categorize`` no longer includes missing values in the ``categories``. This is for compatibility with a `pandas change `_ (:pr:`1565`) - Fix head parser error in ``dataframe.read_csv`` when some lines have quotes (:pr:`1495`) - Add ``dataframe.reduction`` and ``series.reduction`` methods to apply generic row-wise reduction to dataframes and series (:pr:`1483`) - Add ``dataframe.select_dtypes``, which mirrors the `pandas method `_ (:pr:`1556`) - ``dataframe.read_hdf`` now supports reading ``Series`` (:pr:`1564`) - Support Pandas 0.19.0 (:pr:`1540`) - Implement ``select_dtypes`` (:pr:`1556`) - String accessor works with indexes (:pr:`1561`) - Add pipe method to dask.dataframe (:pr:`1567`) - Add ``indicator`` keyword to merge (:pr:`1575`) - Support Series in ``read_hdf`` (:pr:`1575`) - Support Categories with missing values (:pr:`1578`) - Support inplace operators like ``df.x += 1`` (:pr:`1585`) - Str accessor passes through args and kwargs (:pr:`1621`) - Improved groupby support for single-machine multiprocessing scheduler (:pr:`1625`) - Tree reductions (:pr:`1663`) - Pivot tables (:pr:`1665`) - Add clip (:pr:`1667`), align (:pr:`1668`), combine_first (:pr:`1725`), and any/all (:pr:`1724`) - Improved handling of divisions on dask-pandas merges (:pr:`1666`) - Add ``groupby.aggregate`` method (:pr:`1678`) - Add ``dd.read_table`` function (:pr:`1682`) - Improve support for multi-level columns (:pr:`1697`) (:pr:`1712`) - Support 2d indexing in ``loc`` (:pr:`1726`) - Extend ``resample`` to include DataFrames (:pr:`1741`) - Support dask.array ufuncs on dask.dataframe objects (:pr:`1669`) Array +++++ - Add information about how ``dask.array`` ``chunks`` argument work (:pr:`1504`) - Fix field access with non-scalar fields in ``dask.array`` (:pr:`1484`) - Add concatenate= keyword to atop to concatenate chunks of contracted dimensions - Optimized slicing performance (:pr:`1539`) (:pr:`1731`) - Extend ``atop`` with a ``concatenate=`` (:pr:`1609`) ``new_axes=`` (:pr:`1612`) and ``adjust_chunks=`` (:pr:`1716`) keywords - Add clip (:pr:`1610`) swapaxes (:pr:`1611`) round (:pr:`1708`) repeat - Automatically align chunks in ``atop``-backed operations (:pr:`1644`) - Cull dask.arrays on slicing (:pr:`1709`) Bag ++++ - Fix issue with callables in ``bag.from_sequence`` being interpreted as tasks (:pr:`1491`) - Avoid non-lazy memory use in reductions (:pr:`1747`) Administration ++++++++++++++ - Added changelog (:pr:`1526`) - Create new threadpool when operating from thread (:pr:`1487`) - Unify example documentation pages into one (:pr:`1520`) - Add versioneer for git-commit based versions (:pr:`1569`) - Pass through node_attr and edge_attr keywords in dot visualization (:pr:`1614`) - Add continuous testing for Windows with Appveyor (:pr:`1648`) - Remove use of multiprocessing.Manager (:pr:`1653`) - Add global optimizations keyword to compute (:pr:`1675`) - Micro-optimize get_dependencies (:pr:`1722`) 0.11.0 / 2016-08-24 ------------------- Major Points ++++++++++++ DataFrames now enforce knowing full metadata (columns, dtypes) everywhere. Previously we would operate in an ambiguous state when functions lost dtype information (such as ``apply``). Now all dataframes always know their dtypes and raise errors asking for information if they are unable to infer (which they usually can). Some internal attributes like ``_pd`` and ``_pd_nonempty`` have been moved. The internals of the distributed scheduler have been refactored to transition tasks between explicit states. This improves resilience, reasoning about scheduling, plugin operation, and logging. It also makes the scheduler code easier to understand for newcomers. Breaking Changes ++++++++++++++++ - The ``distributed.s3`` and ``distributed.hdfs`` namespaces are gone. Use protocols in normal methods like ``read_text('s3://...'`` instead. - ``Dask.array.reshape`` now errs in some cases where previously it would have create a very large number of tasks 0.10.2 / 2016-07-27 ------------------- - More Dataframe shuffles now work in distributed settings, ranging from setting-index to hash joins, to sorted joins and groupbys. - Dask passes the full test suite when run when under in Python's optimized-OO mode. - On-disk shuffles were found to produce wrong results in some highly-concurrent situations, especially on Windows. This has been resolved by a fix to the partd library. - Fixed a growth of open file descriptors that occurred under large data communications - Support ports in the ``--bokeh-whitelist`` option ot dask-scheduler to better routing of web interface messages behind non-trivial network settings - Some improvements to resilience to worker failure (though other known failures persist) - You can now start an IPython kernel on any worker for improved debugging and analysis - Improvements to ``dask.dataframe.read_hdf``, especially when reading from multiple files and docs 0.10.0 / 2016-06-13 ------------------- Major Changes +++++++++++++ - This version drops support for Python 2.6 - Conda packages are built and served from conda-forge - The ``dask.distributed`` executables have been renamed from dfoo to dask-foo. For example dscheduler is renamed to dask-scheduler - Both Bag and DataFrame include a preliminary distributed shuffle. Bag ++++ - Add task-based shuffle for distributed groupbys - Add accumulate for cumulative reductions DataFrame +++++++++ - Add a task-based shuffle suitable for distributed joins, groupby-applys, and set_index operations. The single-machine shuffle remains untouched (and much more efficient.) - Add support for new Pandas rolling API with improved communication performance on distributed systems. - Add ``groupby.std/var`` - Pass through S3/HDFS storage options in ``read_csv`` - Improve categorical partitioning - Add eval, info, isnull, notnull for dataframes Distributed +++++++++++ - Rename executables like dscheduler to dask-scheduler - Improve scheduler performance in the many-fast-tasks case (important for shuffling) - Improve work stealing to be aware of expected function run-times and data sizes. The drastically increases the breadth of algorithms that can be efficiently run on the distributed scheduler without significant user expertise. - Support maximum buffer sizes in streaming queues - Improve Windows support when using the Bokeh diagnostic web interface - Support compression of very-large-bytestrings in protocol - Support clean cancellation of submitted futures in Joblib interface Other +++++ - All dask-related projects (dask, distributed, s3fs, hdfs, partd) are now building conda packages on conda-forge. - Change credential handling in s3fs to only pass around delegated credentials if explicitly given secret/key. The default now is to rely on managed environments. This can be changed back by explicitly providing a keyword argument. Anonymous mode must be explicitly declared if desired. 0.9.0 / 2016-05-11 ------------------ API Changes +++++++++++ - ``dask.do`` and ``dask.value`` have been renamed to ``dask.delayed`` - ``dask.bag.from_filenames`` has been renamed to ``dask.bag.read_text`` - All S3/HDFS data ingest functions like ``db.from_s3`` or ``distributed.s3.read_csv`` have been moved into the plain ``read_text``, ``read_csv functions``, which now support protocols, like ``dd.read_csv('s3://bucket/keys*.csv')`` Array +++++ - Add support for ``scipy.LinearOperator`` - Improve optional locking to on-disk data structures - Change rechunk to expose the intermediate chunks Bag ++++ - Rename ``from_filename``s to ``read_text`` - Remove ``from_s3`` in favor of ``read_text('s3://...')`` DataFrame +++++++++ - Fixed numerical stability issue for correlation and covariance - Allow no-hash ``from_pandas`` for speedy round-trips to and from-pandas objects - Generally reengineered ``read_csv`` to be more in line with Pandas behavior - Support fast ``set_index`` operations for sorted columns Delayed +++++++ - Rename ``do/value`` to ``delayed`` - Rename ``to/from_imperative`` to ``to/from_delayed`` Distributed +++++++++++ - Move s3 and hdfs functionality into the dask repository - Adaptively oversubscribe workers for very fast tasks - Improve PyPy support - Improve work stealing for unbalanced workers - Scatter data efficiently with tree-scatters Other +++++ - Add lzma/xz compression support - Raise a warning when trying to split unsplittable compression types, like gzip or bz2 - Improve hashing for single-machine shuffle operations - Add new callback method for start state - General performance tuning 0.8.1 / 2016-03-11 ------------------ Array +++++ - Bugfix for range slicing that could periodically lead to incorrect results. - Improved support and resiliency of ``arg`` reductions (``argmin``, ``argmax``, etc.) Bag ++++ - Add ``zip`` function DataFrame +++++++++ - Add ``corr`` and ``cov`` functions - Add ``melt`` function - Bugfixes for io to bcolz and hdf5 0.8.0 / 2016-02-20 ------------------ Array +++++ - Changed default array reduction split from 32 to 4 - Linear algebra, ``tril``, ``triu``, ``LU``, ``inv``, ``cholesky``, ``solve``, ``solve_triangular``, eye``, ``lstsq``, ``diag``, ``corrcoef``. Bag ++++ - Add tree reductions - Add range function - drop ``from_hdfs`` function (better functionality now exists in hdfs3 and distributed projects) DataFrame +++++++++ - Refactor ``dask.dataframe`` to include a full empty pandas dataframe as metadata. Drop the ``.columns`` attribute on Series - Add Series categorical accessor, series.nunique, drop the ``.columns`` attribute for series. - ``read_csv`` fixes (multi-column parse_dates, integer column names, etc. ) - Internal changes to improve graph serialization Other +++++ - Documentation updates - Add from_imperative and to_imperative functions for all collections - Aesthetic changes to profiler plots - Moved the dask project to a new dask organization 0.7.6 / 2016-01-05 ------------------ Array +++++ - Improve thread safety - Tree reductions - Add ``view``, ``compress``, ``hstack``, ``dstack``, ``vstack`` methods - ``map_blocks`` can now remove and add dimensions DataFrame +++++++++ - Improve thread safety - Extend sampling to include replacement options Imperative ++++++++++ - Removed optimization passes that fused results. Core ++++ - Removed ``dask.distributed`` - Improved performance of blocked file reading - Serialization improvements - Test Python 3.5 0.7.4 / 2015-10-23 ------------------ This was mostly a bugfix release. Some notable changes: - Fix minor bugs associated with the release of numpy 1.10 and pandas 0.17 - Fixed a bug with random number generation that would cause repeated blocks due to the birthday paradox - Use locks in ``dask.dataframe.read_hdf`` by default to avoid concurrency issues - Change ``dask.get`` to point to ``dask.async.get_sync`` by default - Allow visualization functions to accept general graphviz graph options like rankdir='LR' - Add reshape and ravel to ``dask.array`` - Support the creation of ``dask.arrays`` from ``dask.imperative`` objects Deprecation +++++++++++ This release also includes a deprecation warning for ``dask.distributed``, which will be removed in the next version. Future development in distributed computing for dask is happening here: https://distributed.readthedocs.io . General feedback on that project is most welcome from this community. 0.7.3 / 2015-09-25 ------------------ Diagnostics +++++++++++ - A utility for profiling memory and cpu usage has been added to the ``dask.diagnostics`` module. DataFrame +++++++++ This release improves coverage of the pandas API. Among other things it includes ``nunique``, ``nlargest``, ``quantile``. Fixes encoding issues with reading non-ascii csv files. Performance improvements and bug fixes with resample. More flexible read_hdf with globbing. And many more. Various bug fixes in ``dask.imperative`` and ``dask.bag``. 0.7.0 / 2015-08-15 ------------------ DataFrame +++++++++ This release includes significant bugfixes and alignment with the Pandas API. This has resulted both from use and from recent involvement by Pandas core developers. - New operations: query, rolling operations, drop - Improved operations: quantiles, arithmetic on full dataframes, dropna, constructor logic, merge/join, elemwise operations, groupby aggregations Bag ++++ - Fixed a bug in fold where with a null default argument Array +++++ - New operations: da.fft module, da.image.imread Infrastructure ++++++++++++++ - The array and dataframe collections create graphs with deterministic keys. These tend to be longer (hash strings) but should be consistent between computations. This will be useful for caching in the future. - All collections (Array, Bag, DataFrame) inherit from common subclass 0.6.1 / 2015-07-23 ------------------ Distributed +++++++++++ - Improved (though not yet sufficient) resiliency for ``dask.distributed`` when workers die DataFrame +++++++++ - Improved writing to various formats, including to_hdf, to_castra, and to_csv - Improved creation of dask DataFrames from dask Arrays and Bags - Improved support for categoricals and various other methods Array +++++ - Various bug fixes - Histogram function Scheduling ++++++++++ - Added tie-breaking ordering of tasks within parallel workloads to better handle and clear intermediate results Other +++++ - Added the dask.do function for explicit construction of graphs with normal python code - Traded pydot for graphviz library for graph printing to support Python3 - There is also a gitter chat room and a stackoverflow tag .. _`John A Kirkham`: https://github.com/jakirkham .. _`Matthew Rocklin`: https://github.com/mrocklin .. _`Jim Crist`: https://github.com/jcrist .. _`James Bourbeau`: https://github.com/jrbourbeau .. _`James Munroe`: https://github.com/jmunroe .. _`Thomas Caswell`: https://github.com/tacaswell .. _`Tom Augspurger`: https://github.com/tomaugspurger .. _`Jesse Vogt`: https://github.com/jessevogt .. _`Uwe Korn`: https://github.com/xhochy .. _`Christopher Prohm`: https://github.com/chmp .. _`@xwang777`: https://github.com/xwang777 .. _`@fjetter`: https://github.com/fjetter .. _`@Ced4`: https://github.com/Ced4 .. _`Ian Hopkinson`: https://https://github.com/IanHopkinson dask-0.16.0/docs/source/cheatsheet.rst000066400000000000000000000002651320364734500176310ustar00rootroot00000000000000Dask Cheat Sheet ================ The 300KB pdf :download:`dask cheat sheet ` is a single page summary of all the most important information about using dask. dask-0.16.0/docs/source/cite.rst000066400000000000000000000027751320364734500164500ustar00rootroot00000000000000Citations ========= Dask is developed by many people from many institutions. Some of these developers are academics who depend on academic citations to justify their efforts. Unfortunately, no single citation can do all of these developers (and the developers to come) sufficient justice. Instead, we choose to use a single blanket citation for all developers past and present. To cite Dask in publications, please use the following:: Dask Development Team (2016). Dask: Library for dynamic task scheduling URL http://dask.pydata.org A BibTeX entry for LaTeX users follows:: @Manual{, title = {Dask: Library for dynamic task scheduling}, author = {{Dask Development Team}}, year = {2016}, url = {http://dask.pydata.org}, } The full author list is available using git, or by looking at the `AUTHORS file `_. Papers about parts of Dask -------------------------- Rocklin, Matthew. "Dask: Parallel Computation with Blocked algorithms and Task Scheduling." (2015). `PDF `_. :: @InProceedings{ matthew_rocklin-proc-scipy-2015, author = { Matthew Rocklin }, title = { Dask: Parallel Computation with Blocked algorithms and Task Scheduling }, booktitle = { Proceedings of the 14th Python in Science Conference }, pages = { 130 - 136 }, year = { 2015 }, editor = { Kathryn Huff and James Bergstra } } dask-0.16.0/docs/source/conf.py000066400000000000000000000224661320364734500162700ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # dask documentation build configuration file, created by # sphinx-quickstart on Sun Jan 4 08:58:22 2015. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys, os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. #sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ['sphinx.ext.autodoc', 'sphinx.ext.mathjax', 'sphinx.ext.autosummary', 'sphinx.ext.extlinks', 'numpydoc'] numpydoc_show_class_members = False # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = u'dask' copyright = u'2017, Anaconda' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. # version = '0.1.0' from dask import __version__ as version # The full version, including alpha/beta/rc tags. # release = '0.1.0' release = version # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = [] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # Taken from docs.readthedocs.io: # on_rtd is whether we are on readthedocs.io on_rtd = os.environ.get('READTHEDOCS', None) == 'True' if not on_rtd: # only import and set the theme if we're building docs locally import sphinx_rtd_theme html_theme = 'sphinx_rtd_theme' html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_domain_indices = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. #html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. #html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'daskdoc' # -- Options for LaTeX output -------------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'dask.tex', u'dask Documentation', u'Dask Development Team', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # If true, show page references after internal links. #latex_show_pagerefs = False # If true, show URL addresses after external links. #latex_show_urls = False # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ('index', 'dask', u'dask Documentation', [u'Dask Development Team'], 1) ] # If true, show URL addresses after external links. #man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ('index', 'dask', u'dask Documentation', u'Dask Development Team', 'dask', 'One line description of project.', 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. #texinfo_appendices = [] # If false, no module index is generated. #texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. #texinfo_show_urls = 'footnote' # -- Options for Epub output --------------------------------------------------- # Bibliographic Dublin Core info. epub_title = u'dask' epub_author = u'Dask Development Team' epub_publisher = u'Anaconda Inc' epub_copyright = u'2017, Anaconda Inc' # The language of the text. It defaults to the language option # or en if the language is not set. #epub_language = '' # The scheme of the identifier. Typical schemes are ISBN or URL. #epub_scheme = '' # The unique identifier of the text. This can be a ISBN number # or the project homepage. #epub_identifier = '' # A unique identification for the text. #epub_uid = '' # A tuple containing the cover image and cover page html template filenames. #epub_cover = () # HTML files that should be inserted before the pages created by sphinx. # The format is a list of tuples containing the path and title. #epub_pre_files = [] # HTML files that should be inserted after the pages created by sphinx. # The format is a list of tuples containing the path and title. #epub_post_files = [] # A list of files that should not be packed into the epub file. #epub_exclude_files = [] # The depth of the table of contents in toc.ncx. #epub_tocdepth = 3 # Allow duplicate toc entries. #epub_tocdup = True extlinks = { 'issue': ('https://github.com/dask/dask/issues/%s', 'GH#'), 'pr': ('https://github.com/dask/dask/pull/%s', 'GH#') } dask-0.16.0/docs/source/custom-collections.rst000066400000000000000000000465021320364734500213460ustar00rootroot00000000000000Custom Collections ================== For many problems the built-in dask collections (``dask.array``, ``dask.dataframe``, ``dask.bag``, and ``dask.delayed``) are sufficient. For cases where they aren't it's possible to create your own dask collections. Here we describe the required methods to fullfill the dask collection interface. .. warning:: The custom collection API is experimental and subject to change without going through a deprecation cycle. .. note:: This is considered an advanced feature. For most cases the built-in collections are probably sufficient. Before reading this you should read and underestand: - :doc:`overview ` - :doc:`graph specification ` - :doc:`custom graphs ` **Contents** - :ref:`Description of the dask collection interface ` - :ref:`How this interface is used to implement the core dask methods ` - :ref:`How to add the core methods to your class ` - :ref:`example-dask-collection` - :ref:`How to check if something is a dask collection ` - :ref:`How to make tokenize work with your collection ` .. _collection-interface: The Dask Collection Interface ----------------------------- To create your own dask collection, you need to fullfill the following interface. Note that there is no required base class. It's recommended to also read :ref:`core-method-internals` to see how this interface is used inside dask. .. method:: __dask_graph__(self) The dask graph. Returns ------- dsk : MutableMapping, None The dask graph. If ``None``, this instance will not be interpreted as a dask collection, and none of the remaining interface methods will be called. .. method:: __dask_keys__(self) The output keys for the dask graph. Returns ------- keys : list A possibly nested list of keys that represent the outputs of the graph. After computation, the results will be returned in the same layout, with the keys replaced with their corresponding outputs. .. staticmethod:: __dask_optimize__(dsk, keys, \*\*kwargs) Given a graph and keys, return a new optimized graph. This method can be either a ``staticmethod`` or a ``classmethod``, but not an instancemethod. Note that graphs and keys are merged before calling ``__dask_optimize__``; as such the graph and keys passed to this method may represent more than one collection sharing the same optimize method. If not implemented, defaults to returning the graph unchanged. Parameters ---------- dsk : MutableMapping The merged graphs from all collections sharing the same ``__dask_optimize__`` method. keys : list A list of the outputs from ``__dask_keys__`` from all collections sharing the same ``__dask_optimize__`` method. \*\*kwargs Extra keyword arguments forwarded from the call to ``compute`` or ``persist``. Can be used or ignored as needed. Returns ------- optimized_dsk : MutableMapping The optimized dask graph. .. staticmethod:: __dask_scheduler__(dsk, keys, \*\*kwargs) The default scheduler ``get`` to use for this object. Usually attached to the class as a staticmethod, e.g. >>> import dask.threaded >>> class MyCollection(object): ... # Use the threaded scheduler by default ... __dask_scheduler__ = staticmethod(dask.threaded.get) .. method:: __dask_postcompute__(self) Return the finalizer and (optional) extra arguments to convert the computed results into their in-memory representation. Used to implement ``dask.compute``. Returns ------- finalize : callable A function with the signature ``finalize(results, *extra_args)``. Called with the computed results in the same structure as the corresponding keys from ``__dask_keys__``, as well as any extra arguments as specified in ``extra_args``. Should perform any necessary finalization before returning the corresponding in-memory collection from ``compute``. For example, the ``finalize`` function for ``dask.array.Array`` concatenates all the individual array chunks into one large numpy array, which is then the result of ``compute``. extra_args : tuple Any extra arguments to pass to ``finalize`` after ``results``. If no extra arguments should be an empty tuple. .. method:: __dask_postpersist__(self) Return the rebuilder and (optional) extra arguments to rebuild an equivalent dask collection from a persisted graph. Used to implement ``dask.persist``. Returns ------- rebuild : callable A function with the signature ``rebuild(dsk, *extra_args)``. Called with a persisted graph containing only the keys and results from ``__dask_keys__``, as well as any extra arguments as specified in ``extra_args``. Should return an equivalent dask collection with the same keys as ``self``, but with the results already computed. For example, the ``rebuild`` function for ``dask.array.Array`` is just the ``__init__`` method called with the new graph but the same metadata. extra_args : tuple Any extra arguments to pass to ``rebuild`` after ``dsk``. If no extra arguments should be an empty tuple. .. note:: It's also recommended to define ``__dask_tokenize__``, see :ref:`deterministic-hashing`. .. _core-method-internals: Internals of the Core Dask Methods ---------------------------------- Dask has a few *core* functions (and corresponding methods) that implement common operations: - ``compute``: convert one or more dask collections into their in-memory counterparts - ``persist``: convert one or more dask collections into equivalent dask collections with their results already computed and cached in memory. - ``visualize``: given one or more dask collections, draw out the graph that would be passed to the scheduler during a call to ``compute`` or ``persist`` Here we briefly describe the internals of these functions to illustrate how they relate to the above interface. Compute ~~~~~~~ The operation of ``compute`` can be broken into three stages: 1. **Graph Merging & Optimization** First the individual collections are converted to a single large graph and nested list of keys. How this happens depends on the value of the ``optimize_graph`` keyword, which each function takes: - If ``optimize_graph`` is ``True`` (default) then the collections are first grouped by their ``__dask_optimize__`` methods. All collections with the same ``__dask_optimize__`` method have their graphs merged and keys concatenated, and then a single call to each respective ``__dask_optimize__`` is made with the merged graphs and keys. The resulting graphs are then merged. - If ``optimize_graph`` is ``False`` then all the graphs are merged and all the keys concatenated. After this stage there is a single large graph and nested list of keys which represents all the collections. 2. **Computation** After the graphs are merged and any optimizations performed, the resulting large graph and nested list of keys are passed on to the scheduler. The scheduler to use is chosen as follows: - If a ``get`` function is specified directly as a keyword, use that. - Otherwise, if a global scheduler is set, use that. - Otherwise fall back to the default scheduler for the given collections. Note that if all collections don't share the same ``__dask_scheduler__`` then an error will be raised. Once the appropriate scheduler ``get`` function is determined, it's called with the merged graph, keys, and extra keyword arguments. After this stage ``results`` is a nested list of values. The structure of this list mirrors that of ``keys``, with each key substituted with its corresponding result. 3. **Postcompute** After the results are generated the output values of ``compute`` need to be built. This is what the ``__dask_postcompute__`` method is for. ``__dask_postcompute__`` returns two things: - A ``finalize`` function, which takes in the results for the corresponding keys - A tuple of extra arguments to pass to ``finalize`` after the results To build the outputs, the list of collections and results is iterated over, and the finalizer for each collection is called on its respective results. In pseudocode this process looks like: .. code:: python def compute(*collections, **kwargs): # 1. Graph Merging & Optimization # ------------------------------- if kwargs.pop('optimize_graph', True): # If optimization is turned on, group the collections by # optimization method, and apply each method only once to the merged # sub-graphs. optimization_groups = groupby_optimization_methods(collections) graphs = [] for optimize_method, cols in optimization_groups: # Merge the graphs and keys for the subset of collections that # share this optimization method sub_graph = merge_graphs([x.__dask_graph__() for x in cols]) sub_keys = [x.__dask_keys__() for x in cols] # kwargs are forwarded to ``__dask_optimize__`` from compute optimized_graph = optimize_method(sub_graph, sub_keys, **kwargs) graphs.append(optimized_graph) graph = merge_graphs(graphs) else: graph = merge_graphs([x.__dask_graph__() for x in collections]) # Keys are always the same keys = [x.__dask_keys__() for x in collections] # 2. Computation # -------------- # Determine appropriate get function based on collections, global # settings, and keyword arguments get = determine_get_function(collections, **kwargs) # Pass the merged graph, keys, and kwargs to ``get`` results = get(graph, keys, **kwargs) # 3. Postcompute # -------------- output = [] # Iterate over the results and collections for res, collection in zip(results, collections): finalize, extra_args = collection.__dask_postcompute__() out = finalize(res, **extra_args) output.append(out) # `dask.compute` always returns tuples return tuple(output) Persist ~~~~~~~ Persist is very similar to ``compute``, except for how the return values are created. It too has three stages: 1. **Graph Merging & Optimization** Same as in ``compute``. 2. **Computation** Same as in ``compute``, except in the case of the distributed scheduler, where the values in ``results`` are futures instead of values. 3. **Postpersist** Similar to ``__dask_postcompute__``, ``__dask_postpersist__`` is used to rebuild values in a call to ``persist``. ``__dask_postpersist__`` returns two things: - A ``rebuild`` function, which takes in a persisted graph. The keys of this graph are the same as ``__dask_keys__`` for the corresponding collection, and the values are computed results (for the single machine scheduler) or futures (for the distributed scheduler). - A tuple of extra arguments to pass to ``rebuild`` after the graph To build the outputs of ``persist``, the list of collections and results is iterated over, and the rebuilder for each collection is called on the graph for its respective results. In pseudocode this looks like: .. code:: python def persist(*collections, **kwargs): # 1. Graph Merging & Optimization # ------------------------------- # **Same as in compute** graph = ... keys = ... # 2. Computation # -------------- # **Same as in compute** results = ... # 3. Postpersist # -------------- output = [] # Iterate over the results and collections for res, collection in zip(results, collections): # res has the same structure as keys keys = collection.__dask_keys__() # Get the computed graph for this collection. # Here flatten converts a nested list into a single list graph = {k: r for (k, r) in zip(flatten(keys), flatten(res))} # Rebuild the output dask collection with the computed graph rebuild, extra_args = collection.__dask_postpersist__() out = rebuild(graph, *extra_args) output.append(out) # dask.persist always returns tuples return tuple(output) Visualize ~~~~~~~~~ Visualize is the simplest of the 3 core methods. It only has two stages: 1. **Graph Merging & Optimization** Same as in ``compute`` 2. **Graph Drawing** The resulting merged graph is drawn using ``graphviz`` and output to the specified file. In pseudocode this looks like: .. code:: python def visualize(*collections, **kwargs): # 1. Graph Merging & Optimization # ------------------------------- # **Same as in compute** graph = ... keys = ... # 2. Graph Drawing # ---------------- # Draw the graph with graphviz's `dot` tool and return the result. return dot_graph(graph, **kwargs) .. _adding-methods-to-class: Adding the Core Dask Methods to Your Class ------------------------------------------ Defining the above interface will allow your object to used by the core dask functions (``dask.compute``, ``dask.persist``, ``dask.visualize``, etc...). To add corresponding method versions of these subclass from ``dask.base.DaskMethodsMixin``, which adds implementations of ``compute``, ``persist``, and ``visualize`` based on the interface above. .. _example-dask-collection: Example Dask Collection ----------------------- Here we create a dask collection representing a tuple. Every element in the tuple is represented as a task in the graph. Note that this is for illustration purposes only - the same user experience could be done using normal tuples with elements of ``dask.delayed``. .. code:: python # Saved as dask_tuple.py from dask.base import DaskMethodsMixin from dask.optimize import cull # We subclass from DaskMethodsMixin to add common dask methods to our # class. This is nice but not necessary for creating a dask collection. class Tuple(DaskMethodsMixin): def __init__(self, dsk, keys): # The init method takes in a dask graph and a set of keys to use # as outputs. self._dsk = dsk self._keys = keys def __dask_graph__(self): return self._dsk def __dask_keys__(self): return self._keys @staticmethod def __dask_optimize__(dsk, keys, **kwargs): # We cull unnecessary tasks here. Note that this isn't necessary, # dask will do this automatically, this just shows one optimization # you could do. dsk2, _ = cull(dsk, keys) return dsk2 # Use the threaded scheduler by default. __dask_scheduler__ = staticmethod(dask.threaded.get) def __dask_postcompute__(self): # We want to return the results as a tuple, so our finalize # function is `tuple`. There are no extra arguments, so we also # return an empty tuple. return tuple, () def __dask_postpersist__(self): # Since our __init__ takes a graph as its first argument, our # rebuild function can just be the class itself. For extra # arguments we also return a tuple containing just the keys. return Tuple, (self._keys,) def __dask_tokenize__(self): # For tokenize to work we want to return a value that fully # represents this object. In this case it's the list of keys # to be computed. return tuple(self._keys) Demonstrating this class: .. code:: python >>> from dask_tuple import Tuple >>> from operator import add, mul # Define a dask graph >>> dsk = {'a': 1, ... 'b': 2, ... 'c': (add, 'a', 'b'), ... 'd': (mul, 'b', 2), ... 'e': (add, 'b', 'c')} # The output keys for this graph >>> keys = ['b', 'c', 'd', 'e'] >>> x = Tuple(dsk, keys) # Compute turns Tuple into a tuple >>> x.compute() (2, 3, 4, 5) # Persist turns Tuple into a Tuple, with each task already computed >>> x2 = x.persist() >>> isinstance(x2, Tuple) True >>> x2.__dask_graph__() {'b': 2, 'c': 3, 'd': 4, 'e': 5} >>> x2.compute() (2, 3, 4, 5) .. _is-dask-collection: Checking if an object is a dask collection ------------------------------------------ To check if an object is a dask collection, use ``dask.base.is_dask_collection``: .. code:: python >>> from dask.base import is_dask_collection >>> from dask import delayed >>> x = delayed(sum)([1, 2, 3]) >>> is_dask_collection(x) True >>> is_dask_collection(1) False .. _deterministic-hashing: Implementing Deterministic Hashing ---------------------------------- Dask implements its own deterministic hash function to generate keys based on the value of arguments. This function is available as ``dask.base.tokenize``. Many common types already have implementations of ``tokenize``, which can be found in ``dask/base.py``. When creating your own custom classes you may need to register a ``tokenize`` implementation. There are two ways to do this: .. note:: Both dask collections and normal python objects can have implementations of ``tokenize`` using either of the methods described below. 1. The ``__dask_tokenize__`` method Where possible, it's recommended to define the ``__dask_tokenize__`` method. This method takes no arguments and should return a value fully representative of the object. 2. Register a function with ``dask.base.normalize_token`` If defining a method on the class isn't possible, you can register a tokenize function with the ``normalize_token`` dispatch. The function should have the same signature as described above. In both cases the implementation should be the same, only the location of the definition is different. Example ~~~~~~~ .. code:: python >>> from dask.base import tokenize, normalize_token # Define a tokenize implementation using a method. >>> class Foo(object): ... def __init__(self, a, b): ... self.a = a ... self.b = b ... ... def __dask_tokenize__(self): ... # This tuple fully represents self ... return (Foo, self.a, self.b) >>> x = Foo(1, 2) >>> tokenize(x) '5988362b6e07087db2bc8e7c1c8cc560' >>> tokenize(x) == tokenize(x) # token is deterministic True # Register an implementation with normalize_token >>> class Bar(object): ... def __init__(self, x, y): ... self.x = x ... self.y = y >>> @normalize_token.register(Bar) ... def tokenize_bar(x): ... return (Bar, x.x, x.x) >>> y = Bar(1, 2) >>> tokenize(y) '5a7e9c3645aa44cf13d021c14452152e' >>> tokenize(y) == tokenize(y) True >>> tokenize(y) == tokenize(x) # tokens for different objects aren't equal False For more examples please see ``dask/base.py`` or any of the built-in dask collections. dask-0.16.0/docs/source/custom-graphs.rst000066400000000000000000000056401320364734500203120ustar00rootroot00000000000000Custom Graphs ============= There may be times that you want to do parallel computing, but your application doesn't fit neatly into something like ``dask.array`` or ``dask.bag``. In these cases, you can interact directly with the dask schedulers. These schedulers operate well as standalone modules. This separation provides a release valve for complex situations and allows advanced projects additional opportunities for parallel execution, even if those projects have an internal representation for their computations. As dask schedulers improve or expand to distributed memory, code written to use dask schedulers will advance as well. .. _custom-graph-example: Example ------- .. figure:: images/pipeline.png :alt: "Dask graph for data pipeline" :align: right As discussed in the :doc:`motivation ` and :doc:`specification ` sections, the schedulers take a task graph which is a dict of tuples of functions, and a list of desired keys from that graph. Here is a mocked out example building a graph for a traditional clean and analyze pipeline: .. code-block:: python def load(filename): ... def clean(data): ... def analyze(sequence_of_data): ... def store(result): with open(..., 'w') as f: f.write(result) dsk = {'load-1': (load, 'myfile.a.data'), 'load-2': (load, 'myfile.b.data'), 'load-3': (load, 'myfile.c.data'), 'clean-1': (clean, 'load-1'), 'clean-2': (clean, 'load-2'), 'clean-3': (clean, 'load-3'), 'analyze': (analyze, ['clean-%d' % i for i in [1, 2, 3]]), 'store': (store, 'analyze')} from dask.multiprocessing import get get(dsk, 'store') # executes in parallel Related Projects ---------------- The following excellent projects also provide parallel execution: * Joblib_ * Multiprocessing_ * `IPython Parallel`_ * `Concurrent.futures`_ * `Luigi`_ Each library lets you dictate how your tasks relate to each other with various levels of sophistication. Each library executes those tasks with some internal logic. Dask schedulers differ in the following ways: 1. You specify the entire graph as a Python dict rather than using a specialized API 2. You get a variety of schedulers ranging from single machine single core, to threaded, to multiprocessing, to distributed, and 3. The dask single-machine schedulers have logic to execute the graph in a way that minimizes memory footprint. But the other projects offer different advantages and different programming paradigms. One should inspect all such projects before selecting one. .. _Joblib: https://pythonhosted.org/joblib/parallel.html .. _Multiprocessing: https://docs.python.org/3/library/multiprocessing.html .. _`IPython Parallel`: https://ipyparallel.readthedocs.io/en/latest/ .. _`Concurrent.futures`: https://docs.python.org/3/library/concurrent.futures.html .. _Luigi: https://luigi.readthedocs.io dask-0.16.0/docs/source/daskcheatsheet.pdf000066400000000000000000006154401320364734500204440ustar00rootroot00000000000000%PDF-1.7 % 24 0 obj <> endobj xref 24 75 0000000016 00000 n 0000002182 00000 n 0000002366 00000 n 0000002934 00000 n 0000002966 00000 n 0000003102 00000 n 0000003239 00000 n 0000003352 00000 n 0000004815 00000 n 0000006117 00000 n 0000007480 00000 n 0000007926 00000 n 0000008367 00000 n 0000008837 00000 n 0000008920 00000 n 0000009485 00000 n 0000009893 00000 n 0000009928 00000 n 0000010039 00000 n 0000010544 00000 n 0000011962 00000 n 0000012500 00000 n 0000012592 00000 n 0000012851 00000 n 0000012982 00000 n 0000013547 00000 n 0000014053 00000 n 0000014487 00000 n 0000014954 00000 n 0000015131 00000 n 0000017499 00000 n 0000019112 00000 n 0000019661 00000 n 0000019797 00000 n 0000019822 00000 n 0000022081 00000 n 0000023816 00000 n 0000026648 00000 n 0000026717 00000 n 0000029553 00000 n 0000029881 00000 n 0000033506 00000 n 0000036154 00000 n 0000036426 00000 n 0000061884 00000 n 0000084938 00000 n 0000085033 00000 n 0000085144 00000 n 0000085213 00000 n 0000085296 00000 n 0000094225 00000 n 0000094487 00000 n 0000094637 00000 n 0000094662 00000 n 0000094961 00000 n 0000100655 00000 n 0000100906 00000 n 0000101201 00000 n 0000107007 00000 n 0000107044 00000 n 0000110339 00000 n 0000110376 00000 n 0000148484 00000 n 0000148521 00000 n 0000148750 00000 n 0000149115 00000 n 0000149501 00000 n 0000149635 00000 n 0000149778 00000 n 0000150007 00000 n 0000150393 00000 n 0000150507 00000 n 0000150650 00000 n 0000150718 00000 n 0000001796 00000 n trailer <]/Prev 202948>> startxref 0 %%EOF 98 0 obj <>stream hb``g`` X80}Py(zI4.ϥVe0 uB 6{E$ "Xeb\ʸq/ c=CB&FgW Y@3w1Tg3acŶ;oF}>.{ɑ_tN0ee``Sex'6ϔ < gC3rʀLH ͠  ̬4l 1G!)Pym g`wl B9 endstream endobj 25 0 obj <>>>/Lang(en-US)/Metadata 22 0 R/Outlines 18 0 R/Pages 21 0 R/Type/Catalog/ViewerPreferences<>>> endobj 26 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]/Properties<>/Shading<>>>/Rotate 0/Tabs/W/Thumb 19 0 R/TrimBox[0.0 0.0 396.0 612.0]/Type/Page>> endobj 27 0 obj [28 0 R 29 0 R] endobj 28 0 obj <>/Border[0 0 0]/H/N/Rect[162.126 575.577 234.61 565.448]/Subtype/Link/Type/Annot>> endobj 29 0 obj <>/Border[0 0 0]/H/N/Rect[275.678 563.577 342.422 553.448]/Subtype/Link/Type/Annot>> endobj 30 0 obj <> endobj 31 0 obj <>stream HW6 +֊II&hRs I~e؞'3Z#H=?~~o߄݇>;($QȜ4bp:&"GNQVbx212ؒwj?3p)=} 33 G,F'*&QHIlT/_>LU#iL(-*t4T)b4rnɎG%fϧQHbmd[o s$& =Eds9R.7x(2o$~FBN1W;Y pLHNE7fYG`M6u-sԤ1ZATq&|A|FiU*8R)9(.OeX+lXFEܹ5/z 'k6~>bƦdhнFw7'K3N+{{UD㫖VM愣*F=` -F἗;A G li&rhd^5Q[Gԕ S5Άl3/γy83.`2gH =ZO?cŖg`ky￉VTrWTr)"5j?qD ŻGQ|╥ZXL>d̀J$z7~w~]X0E*a-[ $-WDbB p .W7lXˉɳ>w}=;n"=u|˳X.=V>VeiosUv{޺ѺRgW1 [p~ cO֌*)B_Kۘ endstream endobj 32 0 obj <>stream HWK#7 QIYA,fLHQזoۯY"jN5ӷӯSRj஡_?¿_BT /?IjE@AL!&c<ߦ&(7{oSȖJ+ZIjVHROC`QK/(GkI"!'Ĭx.1 cr2VKG0OSPރJK^,ID֊*UqЀ{դ(sL ^huvCL5Hn- Dim%)JW-'Z1KQ M/.v$MI%-` tF[}q1FjÀ| ǁ.z;tWh))K\_ T/ i[qu&'KA?KRE6bT *`qv/]P%nFl.h{m3&}lҶNkzpPXll~ewgdrBub2? GTm9La/^߼MZ`LT…Pǻ JuEC2Eb+XU/hHsx |>.:<}uKZkDnwq_ܡU3~0[,P[и|\oK^T&<04W\fN -VaoV\rnx똘"dfE\ y0"[[}S9 #z}(7=/*`㻽}66 l:!HZU@Yl(X_v endstream endobj 33 0 obj <>stream HWI7 +Bp8ArV2SEd")?/.]NWu꽷ɸ{%>u'$˅|lectɵTI&)RZT e}lVs+IvZ .lq;M(5-)wk2Uc<*rSөn%JTKMzO|FAe?%wh-wR Դ iYW T[eԷ8dkCLdh(% 'K (N9[W@WQv@1#Bk(x(8##!5h|0y:-T`<ɚ ~o*xs 9҈ OwAA 8˸:>bArЎ*8(%CI;0"8]g,M(GYGva}.)2p^}GH w Cg~Cƍ%|.Z0L`Xq)-t*lڱXy;!WJywwG p7pZOF C{Qf\>'I 2N|ʲn;6o3M>pƭ\8 |,"csu526oe_$(hD!u>'tcpaSN谌>fb}Wގ:;9E>cJUJ(.J: *>{9tgR?XşHxD+J'w=:ԁYxZ_[GG*Ο6g806Gq>᪼"} Y!ϊ. Ϡ :ynE W]^?b'iWv N0^cKgj\b'Mc#w-qⱮ;{0&MNEUNWW+-.rp*{?v}ʱv+swt8‚nd&,tOhCDGEa\NeH %NF;S(œ,)T{i/zܳV0EAaREצ=h_Wyq endstream endobj 34 0 obj <> endobj 35 0 obj <> endobj 36 0 obj <> endobj 37 0 obj <> endobj 38 0 obj <>stream H\͊0z -gD7ƐIf дJjhl8}utj[^Ivlqla~b{ ~0v};?4):~8l#}n6x Ϧl~m϶8ܦOa [׶ 6`%eڱ שiCls0"]>U0t}kN;M4U~^,DZjiH&ϖ;;4j̜+ !o;:88\ف,`%+u9XC]8XC-8#QP0S)d 3LAc̔5 k+쳠F~sMk*샠> >(_W+J_W+J_O6cch?P{1|dyqinW" endstream endobj 39 0 obj <> endobj 40 0 obj [/ICCBased 65 0 R] endobj 41 0 obj <> endobj 42 0 obj <>stream H\n0~ /E!׭ڤMfAE~|8QGa.fa4v0.5uifU~ߝyvyk[7/sه~ڭΧk#;d{?^to7o[0OS)z >stream HWMDWq|p?,@]!+"l$x=gv&"cowuUW99S;c}TqC$jquɇRW}! d,A8&s8O|9;VOi ֱ$9씣gzs}k*3$Rkr~q@ǘ x9n@oF3U[^Z ̩CV #`Ss\WORj')$C(rM!@=T J P-A6d)ғŌUP//a쵪4qjك}#F*;]ƛ#@%*`ښ\`O ,X;W EKŃT *@0i*= Fz̤EG2){ F| 72@ou0ID,luԐ{dL5a k!A:"s& ]aEB{r#Ҥd+Z+(T˔Bq(hk]`CaZaU1g #F HEބ K0z2s%MmC4>x8?S+α5PY>.up=[Zac*%V |_HXe= ߆+ԼX@0x$C)cuiLO-l ;<9XK*= E4Wa8` Z)uq$Z|FkۨG eK2 呁6n]ѹkbH!B([4Ș sy N3 KeѬ>e3 Y &N CDX |: zl5Sa>|^?BP15'_w_=<`w_v?{~%=mT MoQsƻݛ︿3D0ifmm?+ endstream endobj 44 0 obj <>stream H\j0~ CqbI3-@öed (!o֐3Ҍ4f~2G}̱] `bYoϯ=7cQ2.M3/^t3wn8|O]H}<ߛ)q!NfaV+ӅcNߚs0z?r_1j^Rp6&BQ/2k~VE1pl4+l^,+#;H~?kL~o_,Y[:X8%y ؒ-֓=>>Z!c> endobj 46 0 obj <> endobj 47 0 obj <> endobj 48 0 obj <> endobj 49 0 obj <>stream H\n@ EL^$~$}N?@Ʈz$suG򒠲n lq~!_k=LQ~rd]fޅhf?e7ǃ4ٷ8}?lls4$go%iקa=ry.h{6t5~OWc|;ej1uso7=>+Wd;?/BMPS)hJEB0k j k j k j kR ^ ysA0sPzVxVzVxVzVxVzVxVzVxVzVxVzVxVzSxyQAQAQAQAQADZ^{qű^֜Uܷk~hw1I,{jq) ?Gֻ endstream endobj 50 0 obj <> endobj 51 0 obj <> endobj 52 0 obj <> endobj 53 0 obj <>stream HWio7޿g X DZ@lf[#KIk$K#^,lbW(SOS7߭j~e*y튊h゚uR]?:_m1}Lf~n66j^iVM?̦dQ'Vg Ρm~|ZMORZ>>ޯh[uP^.A6Zgkُ޸wEzx͙Ugx oT1&ezV#!$&fGME[4#AU'~묆St>R껆6j%ii[ ۢumcy C]lbhM3[JBhX5 tz8 ֍tyDWIWmkhh=sXOC lr;jYPvV<#iPF#軨-8a>`*P 3ʾa@7e$tV QoT/(΅#R;b(.Hҷ/Z)y2yʼ VܿTB6nt5_Eqw_@M#ch±:_Y"j P"-3`Q,!H0P`jI`oTp&|Pf9CC$D H#wbF ScFig1/pl-ncmIo)KnLU[3C/C8El!:Tc ^$5kXsTGMwQ 40zhc0UtC@-c2X0 ffkH+G:F# *rģ ΄ttL W'-gRpVfmc p&$@ fs"`ͼR| |#9o3|1!v_{GNk1 ojvycӍM=XW)TD*<'Lc.` ݑ@E\V̝صD%i'U2s+1@o@[j[/zd"0EK Q1]@97^*,L$z> QenyVVZ!1xUV#Z?@r :Q1;__0{G"(A^Wna8PטgQ1P܆NU'`~e^gU6w)\VzrEx = }g)h5Nىb<6x$lnr^ުS#uZ#GFƞ7dnW/79sLuoA-g3TXIv(pB{XQ:^4` ņvdNij kpNy/ڢSoLj4TQ>stream HWI$7 + )Q3s9R'6``Z%R\q۞Qv )S N[g۳Du;KY]/ JFe;N;~>~<~{F[Y6!;m_Xo[ LevrAR&4/Tq¶7J 8OwWXhD;!5e2O| ׷jt 5Upb mb.˸Mz6H'4/F\]np$@"^=U>5WOׄj%,wڗI` g?Lх&B. KQf\Zu}Պy[2pskC'/URuض"ʫB\FP$o' tBjɳНeYdISJ#8"o|7DRX ވT8}t[;fO8oX<=³ni) >o"}a!x#y&fZś(!欘} P|k퇷c>Ͳ^)) XO2K)JiI}u3%ϒl hUpEbԧ8=ZСrBQ yZ4s:ܹ=_t8%#01a93ZP^e٬m﫻HsS&q], N֝zi {.̨AȈCa*'Pv9cN k AS{;*5}gIGS)0]fFĻF/Rt:(ͭëRl4ۅHDY\vqJ<6p^> endobj 57 0 obj [63 0 R] endobj 58 0 obj <>stream HW]OI}7Y\wuII$B`]2dUEf0fl3{3O+u=U >yuE&')BR4NMhϑ6چ&=m:1j5 M'dc*ZV5A ~6N}PXc%^߽w,]!(by6n9G7Ddy4_{cȐ6cH?j讓),Y4Z FńNVE{%j&Ͻߔe*JtҨɬGpE'k%kTG|5Eej BPPpp0L֌V4mL.1 [/ 7,_Ș4oa{P,Aن\, {fm#1\9XJ$NfOfS֏$@o)!6Mzq ,m ;&TBvFJe+,2O,05MԳU~T %`rrCѨ𿀦GL j1&s`YnMG˃X, Y\kq%8Fm]$MT@ YKd?:89>><})946.ՏS9ϟ~^&Ðx:R5f旝?wWËU` Uxlތ[HGD-DYo 5|ށk6W_#w~8D<7B{{qÄ#YIlrЇ9x$pomu5.WPxJcA V=N~c],S ue}cd@rJM1]EDq{u> Xg8qgzWM.n~]|H^3}wsEC=//)iNa<4:cTqҽy T?x]> endstream endobj 59 0 obj <>stream HWmo8_oMVk'Nzwnեҝʪ Z$ܖ3cBJwqXNKvV|8-ew''yC*9%ʌ0O2%eReפ H<KIyŊM ?[yhߏ~eI﷬21>$ϓU.~/H!w#IwIq,\.-[0+߀+Iby7l2:"L!XI^N<+vi!m>5@<ߡdb孄t8GY;(ŧdd[$#9GB]NτP 7S=|çjoñT+H05Xϡ B?}e|*y>z=ڀa.CdG0xj8-9 # ʯt<4peF7hOthZO x$$7slEpNyZ2l[Uby`N""",ktz狎1#֒bA孯|X1s?`7N "\3uur#%st}so{Ivttdڃftt=4E䄝vGҍs6U{.?^o;o]e6(F#*oIf%> 33AYcd\!sf (sI^87N|aQ>AI`I'`a&@PkR48$ᰲ39z,K̄ -O)08&o2%)[$C.8МwaCg  3֐ ɥ?KBȬ;+ ɝl dp.`@RT6%Hi?0ZCU!S O(EM$7g0K*\rbg撗 |,AȦHfJy٣[:dzt.JS)X [wvNUnl7"YSG V1EԸ2[jV+Ƣ('P2c KZS@T]A(1juىsE*1#rUhLjq \p໩AT T8-Q=$|5rٸ9PJ_޳$Cg J>UfS<zL66 4E_q~jɂ.,L}E N]ڹЀ0DM ZU{ۯmݚ|Aʓ %=ޭ={T;\$ЮBܤ)aٜNr"QKX:6ij KX 7T!I-n[ak endstream endobj 60 0 obj <>stream HlU{Tafajvafê1 |VDDY Š<͑H41&FTkOZ APQ$ ƀb5<O g4wM&=y}߽8;1,j.[l -&/g9S`|=*d~;Wfβb7  Y=Fb8u[57#-?՘5ff]ĩ(c^.֜=]7sƌś6F6 L᱆i`)-aX0jLe% e1L$D;1y C/ *3Ygrz汝 bn!wysKyLoS+>St@8\]K\4nnw'KO'op,ՋR.Uo@r{vp' Y;:p/ lxr *˳99fQ[l 4rηy>OyagQI/[ZpЭ F@DY B d,(Z0DըNA/tC5 =QMw#cs33%0]}IѶ;šk-YfւU'.'KB>vB( puХwq6Ăoa_%eS k`=Zg*k-zC9Z J*&!5dS:/U\R{8SQz,Vx8,Gn3B)W@u Ц쭁*t ZxH 6|.*YJح{ Gz̾kfQVh,򞬄VJ!yƵw{c*2Je؏)*.wDKRr(k@a=w\/-j ~,oiY2 #ypGr^֢Q.1T muT4B1/[?=~QlL6u=`/U_`˷ĘN%Ln  Ijpqtp50vCTX>aP?C]@5zn]sO~ʫ=`"P#{\ >zL{kkzܲTQFaܦs-a42/&B蘆ۮHA*/<{hv8~d[G=W 䣖+(Bu A \(U;L<<W,_q Uj -"He m6\9Zˤ4dW⸭)gr:BΗ6P,,)ޮ"3dڢTD2_68Wtdcyo}1ȣGJZlv:qNv;9^wMOyZ6("sc",I&=M+ZD ?Qzn:3Xc oJS4KDF p+]; B+4)88™0 )>LJyEPjJ"S P4{%Ȕ-C&NєI5?8d YLO؜+'!H˷$'ͲIU~A1RKDj1+|X"!8(Ʃ0>JD[ NZf妒%KzZ%JPʦwghTأ{/Ey$]6uuZhz&cY`0IL|&;يQ`>D endstream endobj 61 0 obj <> endobj 62 0 obj <>stream HlT TݙA"îՙ , euy,j Q "y-dApW[ i<9E5H@/86(&)&!w?Kbۤ?߽wҨ(/}6qUUKm++\9+ʊy&-jdqޣhfh@aSSuJ[}CUilvk1rwϔ.7ܴV4Wl`1EGFΛkJ,+3yMUj{^<ךzKo*;(&CY*FMū*YETJWQ9KG%QT,G :zUjIuVOTuڥ\i3)`2Hv'ʽX||4MkeP_o/<N4I=rlhx/qIS|__lmt8"gt|vkFYD&cey$I|7G5E;[punݎ!,2V"8 0\Nԃ /0l @08B/ /woc${$AOFATz=K⺏: Jz/`9{hm,CjA'/r`,#%/0+RmF:}||_,8"΍#fMhC'dJu σZa< jB,!Ca(|o6= ``$"`[Gfd A@ SH'9:K0[3 zo\5 򐴀۸!jT~#Q+ͬwQT* ,qB`[5Q 2a܋`/?f(׻&'tIT0GKX<7aA~O|bN_fxo`6 6ջH%#Dk"hO.qXc\ē50YX%0ONf*$=KY,/ep(&'V.bɈ`u8VKCN5|e] W]N&ZaI$7;Q./l}}Jh!!o045ꐔfﺂn9r`MYCs1MO1*" Tt4Ov3="(FF*whE^@- LLs._cB! Dٌf<4$=Q6㡽V`"_ _ M\@Њ>djcXHW&#3IK<e Bxnbpfac w2^%VW!,2b<Wְ#2|l`䡮 !a+|i {iV'-\%v1ۻ6^pH\3rcBd2Ѓ̡fq@SZȘ]t?^ j5-u?r V&l''B%.<|ڴ;Q@𼮇E\S?͟XxF` `COt5Cf72rP7HCL3DMm~ ʒ,NǞq(͈Vj7Jg<.ik KGzOt8J|B8h<WKdgu]=bGLyٚ-䜺]:vcd'n]zq8S[RXi$K҃ Q`y7 a1A:T0rt2r's=i}%^1эdȲ i* 8nSCvv/}tzRrH6, BR9+7MpF\P tjYҢ [[BŨ>A=3/9?/?1u /1HȪr8^c;{'N-``.yv="AC48Ȑ~3y?<5,+̓eBR~JpջN< W@^wUQGc)R a)Adc!}<~TCtS[o(}Oe*eV BK] Wr@tmg `LWWvgu<?+~` { GljpUVn.H[㈦Hj~覧slSS]SPU0i` LcF(J<XtD@آbYsW ϯ M*auaq J6ԕծӕF{T,iBiDf"vc&r䧁R̵/QKvf߳@*c*A f#|Z1w5rlDy- Q;q^:Z̒=_\| H@ %o+ endstream endobj 63 0 obj <> endobj 64 0 obj <>stream HdT Pg{8qgNFHDE@$0 XUkydM)Q`PNaT& :JV$%LW3{{R9P4M sGGN3lYcI7]n_SZR&{y]Yq0Ű)vn  80W*uǏgg$Ǔyt>T3yfv &q$_Vi%!ӠWRe &#LӼ$v [sjIpm,T܍ %~ʰY(''&+Gy=.v{|LJ.L½[a!kٰ.`O_O K [V)GIvoPneZj?G2xov۠[ME^Y軶hFc|jL~V Nl6Rª9oˍMiw~IDEL`g)|9 -U\~ŮN͍0]I BE @12-shɁO HMpivnaoI+Pו'כ˫K=lSƒGwQcIIH|ą$%u.}}RJ0tDu=h>ba-<ͧ.Aÿ 9,|>~NU`M9oʗCʚn+}W[Mw:^uDJ_"QΊt;+Rf,p6CX־.!6byڻr^FvF) op2 `,*ΰKU ħ1 C*z2@q Q !j)G`8or̒OD$= ZEq|.W+4=.C +ؙ)L-0Q"yUoVPY9[+ ׉=wL ֳlG\`: (y0xirIo *XY15Aj>,ImWEiU[9aB3m1CO pA`ձ%)u,pļ!c]9">J) 0\Pe{ϴn񄥐 $HNLƥHdQУ"d O*Ge8hZz$? 1z 4С a!\,b8pnCbė;а<8 # 6y>vhС_>#e2b.?LG_`đ%Hf|yp|OGyqX6/ . [C9?7˝mH2G4LL^+{Vd۾0XEDu'R'?0Q)6/5Q!nQ]Wcмob >smt<\o C}n9VJ=ɡ7y$>l b5I)e  hK]SMqwCfUM٘(&U/Ch[-TWf..g&!%i~E*ċIyK9K/\V_qRڢބ6(R|"0]wՙz ʄ4͂+aO=jj띅|ќoJV޵z] ZцlDhb{Z }3 ] 2qCzӌ19د~+D} 뇇ݭ~= r8~"XRrjZRr~hcQƦaՍ9rfvSvfrtjf(A*:U {GB:DI)ę$J#IXWh?'UΜ9N(o plpR:QoL*7rp$s_¾2"QIԩv`(3M2j 0JՔ6Yk9~н9Wn>Ѽ8cMPRfdhsY@]+Ƥ/^TH+ )#,Cu?2=syef;=>(|ZKb; t́~^&.2&F Ht3JMOX.$V~FIq/X2ŅUg\4hZS)&Qbԋ0Z]RtW#՚ l:wSZQS Md\.-٩kS'v^F} (C;\?ڭM)q'*3Q՜ dBZ v:oٺcM1KWWV'~owEFsb>ֱ;ˌ<4a݂yb2Bt!N.[b;}M Ʈ*jz)r՚jN}'yp[Dm6z"P2y%LCu#<8Eb& f㭋Hru0"D4^"ڡ'0q.z X\ qD!wZ1DSQ( BO ?cgHB":bs&3]"ZD'd= 1;2Y:龼 ^ʶv(=D PO+PLbE*7ZX*e,> W/47?}~H\.x|E$P\|?&fMtTYֽm2M3 0 endstream endobj 65 0 obj <>stream HyTSwoɞc [5laQIBHADED2mtFOE.c}08׎8GNg9w߽'0 ֠Jb  2y.-;!KZ ^i"L0- @8(r;q7Ly&Qq4j|9 V)gB0iW8#8wթ8_٥ʨQQj@&A)/g>'Kt;\ ӥ$պFZUn(4T%)뫔0C&Zi8bxEB;Pӓ̹A om?W= x-[0}y)7ta>jT7@tܛ`q2ʀ&6ZLĄ?_yxg)˔zçLU*uSkSeO4?׸c. R ߁-25 S>ӣVd`rn~Y&+`;A4 A9=-tl`;~p Gp| [`L`< "A YA+Cb(R,*T2B- ꇆnQt}MA0alSx k&^>0|>_',G!"F$H:R!zFQd?r 9\A&G rQ hE]a4zBgE#H *B=0HIpp0MxJ$D1D, VĭKĻYdE"EI2EBGt4MzNr!YK ?%_&#(0J:EAiQ(()ӔWT6U@P+!~mD eԴ!hӦh/']B/ҏӿ?a0nhF!X8܌kc&5S6lIa2cKMA!E#ƒdV(kel }}Cq9 N')].uJr  wG xR^[oƜchg`>b$*~ :Eb~,m,-ݖ,Y¬*6X[ݱF=3뭷Y~dó ti zf6~`{v.Ng#{}}jc1X6fm;'_9 r:8q:˜O:ϸ8uJqnv=MmR 4 n3ܣkGݯz=[==<=GTB(/S,]6*-W:#7*e^YDY}UjAyT`#D="b{ų+ʯ:!kJ4Gmt}uC%K7YVfFY .=b?SƕƩȺy چ k5%4m7lqlioZlG+Zz͹mzy]?uuw|"űNwW&e֥ﺱ*|j5kyݭǯg^ykEklD_p߶7Dmo꿻1ml{Mś nLl<9O[$h՛BdҞ@iءG&vVǥ8nRĩ7u\ЭD-u`ֲK³8%yhYѹJº;.! zpg_XQKFAǿ=ȼ:ɹ8ʷ6˶5̵5͵6ζ7ϸ9к<Ѿ?DINU\dlvۀ܊ݖޢ)߯6DScs 2F[p(@Xr4Pm8Ww)Km endstream endobj 66 0 obj <> endobj 67 0 obj <>stream H XT-g. KꚚhfi"jT\X"*V4BT4$+*qA ADcbfL m5M{9{}@ 2bzi>7 Kءq)7jEՍBSF$8.DԤIu h W֍V'Rq0m^B:<4ڔ4zh7E'MJ ɱku{YeMqFSƧ^"޷r2"@]v2q.d%Iōc$(CG4t|]9-҈hCѠ^& ƨ6Qk~IYQTv!B ߠaM6 ji8nޢeg~ӺMv;<ێ=ߩ~ߥkt=zz/+|1zO7ܯ7eb0d#FJHLJ~{tʘRO8iӦϘ6믳}g޻,\xI-_2Uv7lܴ9-y[m߱s={ ?Px#G}'O>s /riٕ|>w(%8h8A>I:JݤV!G) yP^/_)u\X VO!\Ɲy0, '3+kZK{U jV9﵇ZCo?w֣.zO}>S_ﵩl lv6ض–klۛ5{ {+8{|a3`HFm#3͌HY#H2ҌtcXf7]Fq8i7.QhGwGcc#1uCvv@j7{3L1ǛK̥r35|symE׊y*ʈJ3ߤ%H6y)FzS&}+G1rjtAxQ.T*1vk5X*/!ady5kp* >/G{{x_yM3p;,'Uj? iOnzIqP>'_L3Ue.OR\>%9"wYAmQ!rnvnwrvwvsF:Nz#",k,-ĕw vs9)> .u4KE}.\U(JRU,6yBQy]jݢ!< "Ki(*u`_}OWs7-V(5 k.F*:S26"sXۘX؂M|6"db:ox؀|Ab"PxY\9|ḌK(6=, #qw.0 HFF6Fc R00011 31ĈLH&TxaVT,eZZGP6@ ZT6F<6S}H#ʧIh74KSK)Q(0:H>5tQ:FqjH;15tR3 '+"GE[ħ b>_K\— _U~RT<[IsGtmg>{ybHD IA!!2H"EiiQX>cKM51E5ӧw׹w CD> sLȵ᪝ o|}΋m}N?]}OCuFUyuA]t'q .ɥ4\˳/WJ\؟pW@wNs:װ(9NPwܩq I<9S9p:gp&,ygB猓ˋsv-%?e?/x/+^w.8Y'W:^țin;ݝtcYnƻ nv7Փ͞ɼ򷼍{w.ûy}#C#|q;NI\7͝㦻n;rieZ6|wͯ% h$bG HAuWS%@=TT=S՟KP/+d ɬA >P B!( E(*J@IUKB)( e, -d[*~.T P C .t0S7mM;i/ !!jA(Ԇ:P 0`*Lh3`&,FXHSi&ͥVZH[i'J{ t.UIw!SzIo#}_| d #X*d| HT(ME2F8O%L!dD2JS*KŰʯjpc )lT?.xLLLl\lw Z\p n]!< 1b]@be$4&$(Fi&͢8J$IɔJ4nKh9մ6?mb0tĖiM]t=zDw탷m%uY]|%誺ѡ:L׷{T#D7C-tKJmt[Y{Awԝtg:\Խtoǔ2e}E>o#56 3 78ah3ĘXhRL2fYfVUf`fcQUk1ޱq މ$dow7;;흮GǞ'g禦I*lu>|pߝ 'C5uTs5Q "m*#$ ?lbLU?A8?a8GSpr \kpn@܄| VPsjA-"VgujLMՉ:S!M5]H~>@ձ8p/` N&G`M 1Cbmu1ʣoeʵEZkJRWi]ooܱ[MMtNYq֝uHa:H[V^nh`CqOpQ8X禣md<a "XͲx^>ǁ?Ca0AqmX A:%K3In'T<,v98=%)2i>Ȧ`N=p'R?`ys8ԖLg38&΋Qǫd7o^}wQ~+KP+ebw2,Ƕǎ:{Ns9UwoO(`coܺ8| l uKBeAatX>#F^7r 3@7=ӁG5bΰ]-#:V$-fKyY~JWcTZ7vG]7;5^<)"Lw!wb neA1|1MځZ}ɓdkY*GV] 4j.#eȴ+/rڪÞ$k*b.BRD(WuWLHw4Q} I,䟹op(&mT/˜eGDnA+)pvPiDo2=u=C=S!}a-:$-Z$n#jO'ddeRcGRo8Mӝ7L׵=KyQ U(FSJQR(ZѺ6\o3z$'$dݍh~%.!v6us}ycy' 8Ҵ<LUZ'6jXRԊnvANBfвjR&J(lx+[^j;|5N%ML|hmo56 #LNC- @ 8˕4P 9X9"YcsiwٍbPmo b5j]zI4Lk2sL/WSWM.ev.1?1%0B٭gg z'dKG66p77֔qq;EQ ȃ݈G0.߂iEx8a$e~=ݽ3S&[~(w C_ gl3'ӳѡnH/ax@VU] \PĊXA\E\Nkd"ջ ?aN4&b@6MHdKySc#Ćº5&j͈O8ki1@w.r`|l5\-UCϿ33ϼ=a>+]lE '#_L?eK[|ط['֊ێm>Bo3+'B~UPDBţVSd`$:y?K Hbs# K t-1ʾ__Ž5l,5՛X,R.TpL2"aT!XՀϿ@H71ń~`*T456TxoxW)/>3LB-+1-::ۍ$"aCJ8 顰nd7dIe0Zt(Hi,S)K\yP|≧^ flLXҢus6XƒmmWs&VX)U,.~ AT>ؑ<)ꠧ#Aڞ[`K%g,5YJ6{Ԓ@'sAII~IkӤkK&y-kMW6i[{%@/az.."+"}Z`Qy\8e.Қ`unmT8'Z)5ݻt̾r|ߺVG^{-Dkc3V/[|m["[=cFg!h|#M#Fgv^/>pGpY'#! /cFV\/΂u?G#LLatQtYIYYG'Q|\Cs|A# Au Yƿ>C9WPƳȟ0Tƿ8uXB[˞R G0PU H;ϜG_ےg^Q= p+ 4đ@T}qo]-R_Dmߞ {!;Hߘm[;2f {X@h'3(׻TS0V-QWzd䩎*p<(&3Z_[=o8w}f{># yn\S)PL# tR$b SJ)QXjű ,`ֱJGZ5U  V6|ww8w9?#<%؛^Ⱦ|ojgA58ȉgjFFZ?cnC;']ىۚ7 ɥڅvcS?GV|P3qV;m2R䄅,OUDpSC5*%L̨q70wZgFPPocc#.Zݼ}75(ghdw-6&jN.٨2YLf4A$B$.@Mxxeaubih꒖pf۟,Q##v$z$v:Pi f)imdbJo\FVB*WaL\ILXGC@( LfkY{@'AħSm@UHS/#~g|hdž'of{>d9N]5B ?SIdcuV5b0ۗ&JZHPjOě*~"& nA^8.c.2z.jn`x,W#q91TGmQ.dqv![%p[X]HTڅ Qaz`/_e%qL' X_rd'/ uOy0MwCgE&S6z՝jkf?x%`*NsxѮDP?+'Y@Ϧ+~7ڎofq 珤4O~pnxeMnxӄ?t{eL\f1P3}Jը -D4+9*ҚI:e@67kHs;cvv#X*MuAB7 7FHdW^뫭mejՖP#3lwk~.G}tC!<)W[tgBs_<4Ѓ_yrO\E4wM!LquL]U-/eyڽnn@6\l*jgNIL4H>jm̤3ѠhabFI(~*&VJq SR:Ԣwy`;޻Go?z wFǚYS{߈%}=*E._i&#\*|>\2R)KK](0cm<(ˣWKQ 'H #R ]`^gz 'Ǐp앩yr|Ydo ~|*x{%y׎mx'|tIh+GoGÐPe$|gR HDا跺a? M8 !˨S"J咢(\e^٪RzZ{lfҵ)I">Q$B]PۺIٝ 󘤉 JPq]A[[Ӭ0<Ϭ3֧'F$ 7;Ԓ/gȫ45dA[+ⶑh&\00vt5Vet>@˘O"Y8gpwBšdG `**, I6̸aĀa+qjQXp1Jxt01ќLg)U٩%7G· i/?K2aV|oW`#Ket:TX}CρB]_m~, ?$|(NgJ'ܝYÓf,0ֽMo_x 䫓oLC6`T D@BV͙7EES$)8.śC\XF%3<##$T,d"W8VsjzH=*Mun-Rdhp~6^ڝMS> 113AKHxC9x}F`BС[c,:smp]2HܝT|4̫쇿aRb f֑*,J( rOWGnqԣF[_#Xg:mRYO%-ظ{vjvvul߼<_wlW:pw2zzH&SbL]1;v٩bj5SmbU ]GRjlϓROK>n8ڭn#}^Ku*f,ʛ^ԆoD>o9.oppFVD5$ j.i @VLC0%jǨìUc8Ld֤t#Ǎ?ؖb׺96/n~E{^^by+^9ڛ?akc88w>Kg'$N|,h0B_&+ ZŋQ+EJjHCc A&l4ѩ*Etj+jhBӤU q<8$=I|=!|ej@V׭ZǬ ?'>O}Ѵ@)-V E U27,,GplKoLfj΃$׵<ճGas.O4.֪%xq=Mr 90qتhy LKDOPWՋo~p$3& |S3Z|^K8u|:u~$k@>fC=t'r[F WR;;{oɕ;ٳXv*l'L 5HD 'Z~*{c\fĥ~w\prN~rmkH ^AZ?VSxB= x&  /ƿoӧ3/8<ɡL22m چ(L t_-"vk1nZˡ^nnFFlx:+ "|tzT'oo>)j׌sU_O DFpO(n; H$6kAh$1T$$R0,NVfX9EQ[Xc 2zn7惫Z޽Mkئ)q~u[^K4&ꨄG ^ t`qmcU9v}5m 8`IRzC)X݇D2جf/m Ԇz 0a=WS*ϬhYesi<ġp=_k8yF9bKjGtΚ5AdBt切 )ۦ32CHͤ.an16mBm t0gMMzgSp46EMk/1OsCjpo@z~N8MoT>u&+ì.MBp_Beń6*<jwlW4IgZ$<}U0)2ReUAä(q9t ~@I4Yvb#| <5ZeՃu1YGþ(8GbּBp<_6ȼ-g m2S{ϛ#{x*̻ק[.=p.եs..x$N !‚)Of,-LZȗT<^[*E!6c-榐j(] BQ:c{>/I*x+^ u#bz@/6oL\sZ5-' Kc81ւUhW彫+b*fϚz"5$ f2A5)a]X\F\T:30/|Ǹ%/|`OW3#@ZNQ&>}(F&D\I*+E] G:unL3ٖ֕@k*ݫMW7+z~~g3qZ{ܝb?v_糝8&ߐgg#_ єt : P6"R֖21A[u0cZH*DImTB]Ξ&&<<{gƢPry3FWA/.Oz G 0uc8f'eܲ7 CmTcTV( üLQE_V@2۫  bn5'^dxm?~2A{nX!kJ>/}_P^lZN-tr *rw|GEJ VXEd^D9c}ƫn4բ1zKet8pf5sdQǷ *0lG @w) ^ Pi1d|'n:j ;y(k]J@F"?)R8#HP8 K c%BJN_D[%rb@w<ƑBѢ&jc ŢM{%9;v 7I.ƙ(v>?}/mr[pŦ\ UBö &J:qj{ĵxN+t/}fAcd嚐,CQR}4=Zw#lKֈ'AkTOxEVʔ"9JdмВ%$O~l`aBsA9H:stp w W^\KrG^a+849e!_:RH':~_ewwZ=NI{zz_.ֺsc'r 'H>fQg]=An] ٿAcMI|)z-p^WOMo-{WZ<{"ߍn<k\ .OTh.*5[9b*eXʑn0?ӼnڭR+o*AeXUhE=.jל")ʅ f'[~a:91.*aʥ~P69AYp](+c ]|d=҇&rj5qT.ڳg\^o>_f \nzQ=Ɏ+^dY#r!;n͇Rrb^xyyi;c'P%Q.&55}w&9wKv+X Me2={uY?ݼ t2엕wJ/~̺cG q\Nu*7c7LY+D&e)lԎ:[G7"buUZ>r߮{w?kq n[lmQ9в#BoEaTgj!=]{EqU$ceutD&T?6f*ڋiNY6Mu3~pT~,Hc6K«tV͘:qI/޾:,͔X~2LsW8͎ q|}&XR-H]k(H 4)1L|%ӾJtJ i@iVfSP&ScC>Vfhԛ'jWA)X+M"[Vbj$ ;dȡ,$RXpz ݖ0B&LH < P(]8dY)&`ss-o~{ɟΟyldd_wo=]N{t"Tѐ.ئV<*_Azֹ̆55Wm&y-dXZY,,qK* k8abUm~.UVrHzHpMtU$A݆}vdX2:h C^Vb^ Pہ 4mY}J^ w-<(%ȿvk" w/NѦz9DbxRM\[{r/s5\qa s}@/N q-h{E)j(J4%9IH!dkp,fGG!@5Mb<9f(hnºye7nm^v|ac~lbf'Sn͝P*Ղ R*dZ%4A5AR;&@ `*nSGMڴ"2*5U18;﵁$H,IsyL>*"sE89l[1(4f;v*}a TxDɠ`F>ohxi.tׁ W韵} aNNjUM#{$(A6bYEEu5aҊ9`rVϱh37G܆r| L,D/,9ald}S#DHwb9=h2B g23jas픁k 6Gݩ!NEIgbN?LܜtR''w'e=:87WeǛb#fP[8~ڥՁ3?`Gkx꺿RTG$,#odPS_D5)Gh5v4mTh*P^zs򤨎`wpc= βXK«x2b\BPZ2ɸ ^m)ֳV t7WZ4!uXC.lLW\iFTl;{(&C6`0(|; UZ/g4Oޓ:M?Y:Dr8yh79&I2Y\eK>#2rN-RKWI+ 09fdbFB({BMw_1ߑ2js $AEo+g& 2.LU|/u\\VON,ZN2fY2@ofhn31|Nހ쬈qSHUڨ"vMM.#>s%C6 S4D*G$֬D2 r if_zh>57qK!sZ"iX:ձfC9^P>nGn<8ͳ#E\8ékJ+B5i6dAJ43P/K{j˛?@]"x }8ӑ&F W5<}\װ /9_JaIFp\="+_y2 saT>ұw1~27V+c_w_IjM/HlQy Vlp^:i>n!$\Ak˴Z&kܯd`)#e|OF|i,tvې@V1,Ze#P=zQ""$2Mƺ{o []MM- Z w-_, A_n0%Zh'{Fr)~y:ZG(J1^oo0V]Iƶ˵lMZnEWRKX$B./~yE&{#uY˿Jw3 6ͻi3F4RWor}D{= =JOzpBCz\>"m}ex^NWlV{ߘ֗-k+w ?sUExl'L|,eD݈ yTDc-Hb|MڠCPX_[LzI/K?SEj7#ϗSz^όB9k#ox[u΃z${8/2K]o۪] m -=|^5Ʋ|.?jF֏hѡ֏j$u^tϴ`T ^#Sd'D6Jn9R$Kn|:zUjmK-R'pҋ/~e-Lvrzt'JƑ}ˢ{7zɏKSl1Hwj].#wZMd|9efGF갹ՒIMsVfPQ$@𓀿Y A<9me?lJxK6[ƞpj8gA(>+g,ŖO~ BAy9frIː*\*(Np_Cet܍>&9**VUm2b'1B9ѧmTlP|iFTmu:SopUBY D(uMT^w:ESYv3Tn͂l\6f{1UzԭyFR0ktެ<^̞`xo`6KwG/o_ױGoл5MA C92 Ϧl juz"XΩl |d]I%i2?y?v2xבgW4(D~+>GQQJ+)މF1gRp. e0dt&ZNDȯVw;Bv!VNGϻۨ'#d=z-">N>7هl}Oq +|Ls3Ϋ89=A&ɟuָQ$]"'ɶ䭔w T) tQ$Z1,ǂWjFfSUXjX*|XEeH"QKtq Wx̦ e,R|LdלsRCCbIO9$jnWŐ)Bul% ̓R$&AP;W}lS?]'vG>&!9IC:@bM$i> )B <+j5)a*1%h*!Ԛ@TU61Cl4Mm/cE?9{}w}r ;.ʸbggyG@cbk&w~*]._ #qAus͛\Lt|?k+QΊܔ PzVo[g IG-w7mDmbd}Su6ayRX47pz}/VwܯtԭR+p)EmJaFn o (_֠xsoa ^؇<-KYԴeY ?_7 <[_}r}@Cos7_s:8'huu'V-ÝpϼA'8jKa?=9=XVdEFvg=)thvc=ܬ~ cgݬMffG/T#8TDВ -l, 4.n>ڻ ,g{DmID@rDymDo|u>@(F̿DzQ (\o4q"/9W1*B@TZU@z&?u~h]hH0"'Q9mVM>'0%)"zdAh)}}Yr[h1cgaS@ԳYrDLjViϛFK S_0u z6Mt #hn:@a(4,-Mhš >{DzԠ'H@u¶tXaxp툴y 2"=sq:[Q2bf\ZL 6([Z+#2s{匣r^]sxX48>6bX|0EkCFgda3|8? 6bGp-|k{͖9l PA#S1"Q#h$20Gjcq#7v$@5cݴɽ )2)с 1v?zdxq 5K}R9Rt7Mݳ3|&ճ&hҊB.֥Lk S-B#?ߦ$0iXae?Oo7S*Ke4u>JN)L9 )hёA=Y@óMZJeO*s˰8$f5YMbV=pO>OUf(SL(Eསw7yBOI~wK> y#wD1Ǥ ;yU *elG[6EEmC¯r+o-h 7-d oN0BS$,yප .=@AĈb  ?h#߈ >>xO>}GV7<@vYSq՘W5 rjiҡR)d6*u9vIJ* 7hځ`8L6gz5Z; .|uR/%%u82U9cUru4N%nXJ,F%+WIT%G;yQ -Ƹ68#D00d2,C2̶/\p/Ki9i/k źưcX1!xk7=Ɓ) _U*A.P 2@A^; b7SnmxǽS^ h; Q m%[֮M59Zɏl3&7dA+vo*NuudNǮKڇVRZQwo ˓MWǖOdacc}Tx_/ؘ9Fˠm3H CHfaCL/;8A)vIw e`s.IF?sg%kYŰp%]}ut>stream H\U Pι_| Q]\AXQ@$ ZƚFhI'FǢfZhԤTѦZqt:6<.@0VB!nRaErL̪=XWR1f1#Sf)]b! -(S` =P"SD؊#%Q'@+.[OC:GVHG)\ >u7%sPEXN8s>D (67 ݨM xMZM{J+)i0a0NwN.oSE)rEZNzzv^O͒8_*~?v 2n%2 (l}9 rstcحHpSmN+p&!MΜiz1^Q|.{pǿ%EA*\8ES( }Ue|/ՠv}~hSk|9ߑ'U"*0-bӸ{x$>[f9VZ$yeUv>dM䖒 x%r/W ;G"sH _S7Eq4RyQURZ&t:}͚&DF)WR-ctNP=Kt2 2HraײxVʭ*Yy4krw uGCu_XʚemjS֟ O{:Y>U#ϗ8cG T0,/GG# $jX5MGX ..:ǜ!Vx;P5zN+=RBFTJb}բZr`K՟PwzdB=^*d}Kua33E$. RPbU 0F>Ϲ'0\:L\| BkMOq7b$OZ0<::zu=ktB، !="W*3JEK$bK'3?e\9UݣI(Vym5\tTǨ@XVEdlcJMkU\5LgXX"dHw[+kԕdmx6f*ja©=Eᢑ ?DkTiidtnz~M/nz,]s56a>C ̑;"0BNҕ˷,L~.YQ)7؋:&e_ |PKB I؂]{}ůi9k 塩F_Lg޲oio!?^T4&%79ƢّDdyRr<%'%=*Ç 283hD9{ѣ{a] ҩcAVLpyhOG22b[׮B@9E4:>McMh牦翚LDbۙr\?P<*͕=hg7;<*J68ӻ9}uUMsuAX7ꂂe,3_“m+-ݕ֊eMKOʏu(u0^M]g׽{N|k;ILs!<ɓ&M@ҮS*@+&Mn bt* BZM`/ MkFnxg߽NѦi׾~|;;|GjUP6bRL0^u68^7;gЎl2*퓛 ӹY<( ^G..iJ`(llMmIn6m̓_Mn'xh)B 7 +^냡= a^xW8X;DZz FЖ6X z= u:cME!S0`?[r%{Rh`k䲞5o; ּ=x pA`N9ӹ6ժA!j} k;Jiu5O6<< ̆J$2 ^ԪPxM$qň_gx8x|o<(`֊3FpRrJ+wN9*XިhUBNC>Ua*12k,htxŏTh#ۂ޾Uj'־U}a;_Ű~u1NM U|~z~>ӓhow&;./|;NML[[V`*R2E )-D[*]UJh H1΀?IU;mecT٦0i'{ڧRy _ApwFa-kۄt]r|ަpD 2ҡD:kP&D3/Bl_+hNgőŵd4*RFJI¥}VgWx^ud_ܪ@߶mM#m9n쭦ׯ.M"ﲯ_;w#F-Ƿt4Ve=;LE-7V:[MA_E[/U}b.("((3h>CM\n3\ Z&d:JDiMjJ]*iBxB g΋$OEU{ [W6=>a/ۮ:dY 0F 뱺$ָ% UŅPs`,q'?ӝ5X9%*4^kiNbg,abLYa6L7j5M%*T`x#Ix`8r QIBӡ_ a"6dJ{ #\-,&p & 4?}faOT%T2/+Pwށ:s:{>޸81ygݷfОK:0if#K+zvUTlѫ#-?* oZq )ac,ǭpDڬ8X[;o#x#FءcUu:*|NB bZTc26*L+ ۴JtQDo!}-xU(" Ń p%D#ZHpydנ5lJNՎamy68c{/!mXB 2 r #P]x8: {Z,G[Hm0`ȍt(SP\(񸓏fKLss43dƪ{Wn5Ҷ+&P L<5}_^٭:KRiUPC )^ JL uV& }o'@xm'vڨRe>$]hE 32dB?w{caDR!Kji*u渴ɋ{\|rRms{Yw_)7o>|dӦ#ˍ~e~=ᇃ ڿݵ{Um35~!ċĻjq2 DX!saT@auc@gG-3«w<*z gSX0MGΨjb.֋".ϛ'f:[@ngܳ"315 ?`3)7:q 4P*Lɖԏ?I(̨s{YrAjZ'% l|ۯ;W!\\8^ b(n|U6jh\iI+_v/99gߝ?ϱql1%.4|8u2*vImEL*4DY-UPqg;6>}ζtʤn'"Z+8`Xa.R/SS^pIFr).u B$&De%6h$1юhDjȬ]YESv^wv!>"h Ub'_;VeL^,]ENF~<[^Nd$*̓e UT2L8y;e"@QgWq03$$*8a0 O3xoȱmHgw7u ^L0nQS oknt$f6e̾S*h62J`*]zExȩ0 9OdƄuhk9`|hbDN(7ah0h+a{Nհpuw݈< WQr_3D9AD u"OF1d3McɄ q3b <6s +h"M4h2H҈ іrjL5cfJ ZJO \!w/C^N*\om }5OcStQy)]7-_Ӆ <Nuҷa`U'_rIC#H#Nm\rh' t%xIFʒw-8+08U j:-ƂQB8*2FW4$2 l* rMàP4ZB/tVDqt \`8wM⦩k^DLq3K~p@N&XH?anvRU' ?ȱt)^תc'_ƍ`3VKy&HT=BQ/Qq%ƤtYUslgNS\Byf>TJ<5SU}}/miRQej%|^BPDT"U-&\ /)S?ԑ]>9rJ X{a116W9a '|T3:>gHetXt%aJě!->W3C7I fy q5C/=쌨+o2 g&.G}H ~7( cRI7⦂t@[n!:U|/=s_kKHK84EN%HIDЈ|Z}~kM̃(:)QSͱ\+8FTto- t'|xM.{U{'J5cg(yiW6{/' ԇ]UBOb{sdKzOo)Wkʕ-e:1W.<+@\psS-)ZTW3.f"Y(;Gmu*[WDZ="ê S8JIfp`)0MLY5*j)ɒ ()c(zkt 9/Z=5zǰ1fAqm 8Zdړ.v/ j\gD*BEMqE"++QKrwNi^;16~yṽB>>H#M0 I)>G6~{ધhd@_<􅭅7qU~M#*^e~ G+U-<'й8O~ ݭ:qdK+YNr?i+U-4M3V 546 GP2?vSO2h$, -c -4::9:iYtcDFhLe`[%NQ@HϤ+a$J9p^4+ʎ~k׏b >1ʛ 3~y܆['.ҝ%;F5.KʬK p9a5;9ߜ{p]Iծd]IZɒ%oHS0 !,*H`8 ZB쁰b z(jaڰʰ|j#A"̰hf(N6,@n%TyH f4bTή դo}Xspy/{y*=kIxطNO|J=q`H<ݟ4ho Dr"tw ,X:AO5`}isqx|KV %o?Uz7~& 4-ptج;@ 4UOfO>[ETt5Cg0=P3EK' އ yx wo!}qX~m@s>]SB U~O[W RgZznkx#n#PeH,|p{ᾜg_$%JˁO6GLea)6bemG׆],k bl&ˍs xbWe+Iy|H>%w˔,tO$^M$OݤFDN<L_Wb0)=ZaA4_,8B6qN=2۠mf+UHef&H-E2Vzx-Od)6jfDO xU?#4?>zi뾞+OT_?:蜪w6\:[Ƅ/vْUun^jf>7Zhy*}YM,)̘P Pgu;l^1lcY3&yA>G7hL1m%(2D{JD@70U ň*0zyN1*\5i8!;ہЫ%׉Hu1 eBL0qy=yG1cOk]uYEnR6 tO3It8Mҧypn1XII<ǥ?Zv6paC!{*C- -G+K2#tm:;lՍM{#sN}n>w~N!eGy)3n46C;{<>=oe27мPx%N0(ڸE) ,/ٰ5 ng8=tY\hXkzYK?p~&( '!1N&79(75A!zfX*UfXZyK7 ýN)q  (- b;A68}s;Ү*9)׸[w]?sXcAhZxU{?҂ CA:8Pg:"A BE`j3&dk'ԭZA U]ׁTmj4* ºŦ?´&m4B)J4s~44iu>ۤmz ;z_]׊?;OFtkyJSo2>(tܨِGWd%"egT3"Cj}u_Ƥk=6 JX_9"[XWz)Aq 񯋑@|8~{)dO| Wov5dŗ6sgU vJwzw 1j><ɿΟ+S "RJ㌓oiNkÆVn<A EtDuUU6l#S# >"`_V6M 6TK,%^^U z~遙{:T מ(s= Q&Aw>1 VrgS3:Mz.&ߜ#HycjzYV~?~_HHt>db1Df3vTG]BXCSwTUiJ %ӦZR=JZR?4;<C&{Cj ޔB%c-I$vڸD31/(&!CLİ֡(0[јH&)gC|)p>0~NөNԨG`S9g`gr2~_β9 pHwaq `KH`L$D"T.T,!$3`Oe=j("@܀}ˍ{eaN/1͚C<`Ɗ$>whG ה"D>Hx8,*De( "* }(p$r' OC |Zh  B0-Ђ,UC"QG*/](FaX4g7rVogݛn.o]ue]㍶ 8>eRrG&˵(\1)us޾Sk xP$*yhC "-.C(5Ņ#+d'-KV2\TU*L2YzRY8txPuۑٞo=n:`v{7&7w7Ic͠t _%᥏U*܂/^ކOk<5;8;ӣG[_1u_hEQ)mx6@AB&gZ+%y{ d1BZLۖ6Oy UEVX& E0dLvdk9"$d? sj:F܇}|VisU#/^9oZOi w sa@IYyfa'͉Hrmaip @y;@_,(*/ G*݄{sEHH$ gPy4=H)A;H㷌* >Nr!4AW)/y%2cph j^XB1 etnAy)nrŅf88w`+?{gsp6KfF4M$>jнÙGo59^qrf7F/&;?/{?Kl$N!N$& -ZR4H% [3U"0 㥥DR JSyS%[#鶲*똺htBMs DkUYyd}_tHeLDJcR}47)2SRCtr,stfU .)]]^J*3i| b7)W ޵O:Io5z LW]qT4vH|aKMAg,۸ό]K[-~%{xarM||̸J2eZh]N(b-@} _.P  +#աBm3`s蹪Ж*^zp{0zi?(w#==XR<'%*&RnI8:sPq+Z' u&MҡzKQOԠz]R= G1RF.X2Ӱ~lHg'2 O&"FX{WfcuXMg̚OK]ijr{?>?>ɷo޿W1;' w=4Դ#Ѵ f?p/۸r۸}y J1ų1v w1K-F2Yf &%Nghi 1DY G0ptP%4#(ߌ;J{|Ҋp<؎[͗ "_"uI[ׁ\V/Ghh~7fJ X%%)R$yrH!ƻL2c#@q+Z n=--\eXruM)Qf^uZ&* s(#/29u eh+q&6,9?7 mFm ԡ=ܽEiuCsHc(vZrXqvhi, Qm<t23Xx"HݳVMV["2/eaBpDQnULrfQM9rKvo]Ka)r`K/o]4.E+|"W'#>߼#O(KTbZ|sK&eN{2=\-wt?z.yο!O >]]t #E6PxS5 iB^][fApr0-y'R_OU⠇h Gyf̊uUADU?=[tثlxQ-`S ';m}N'}VW,qmܰRc>?;` \{__3K)l/F"/s|Jr HI쥩~HJ0 ͸^+n&t4kk AASF.Nz4 YĝbX5$ !F5c:QǬ(ў_M'mc|ka}>,g(*TW4ܤe~UA17"  l)* (G q4^x^9hHTÌI&vMa<5;$sˈd(JJ y yT:$oIe gqm#i\r'AtQ.1H$bBQ9q@\N-V+'S:qVYD@//8G '3ʙU4,]),dtNTrItސow2j`0 *:  (M2 !0am꺞I߇Ď^$š ύ nԄ*[3( L6 B["~ EH:i*&m&6JsH@ݫs}s}s9z-ZeLak5y\5 @(rZ%o- P(j50HЕ6|v[KQ=)]@%}T9l (V8O#eC4Nn55 tH, `Y|-TӋt}ɇ+^^`6FF;gIXf([&c4- <|64̱/0r#R\00]dL1&R+eQ$ M3 s o0Zx<+kߵᾇ;-[yb6ԭwNp?oG6[6RKv(˭@; ?ehpUfU&p~>͵oFVHԈ5'D Pm"|(}@x/^l`.hMM 4F"#IR$F DcMɌ3bf͌f&L3ɤSkj6,6d,it"_++yc~T#AQXLE6xB͡`O̲1t'ԙ*bDn`@dG܉t@`D<Mq+: U-U1Qo=_J:G(lU=͐օ%$//2^mm8Ya7y]T9- ~A8l%oLq f?G;BҙL%ZRǨ|lr/n).\@x&(xrҽj+ro>{3E"{" HY^V;"ܠb_4mvf<+en stbgҥڲ]]ZoCom'QFR]|nu_ = cb{ Cn9oK#+x:#af/zYȋozf^י)Xy st$ )&ӔHilS~m(?Ib([5GW[(!'sFÿ<i OoSl9[̏wj͉.^W}=\^mhzӜCSǝҶ俜.'V:LQҊGVb_X( "L׏68uI✂+g >Ec;l0pDXCpO`l'SZh%X`@:X[#)43p^ {`-\spvプu# xvZuqK]EZ{Rt"b+U6l}T^nAkֱEL:uʹ<!K$z\O)pG6DrlO^_0u4[-tέ!.di.z:bjU0Ik&q)xtz_5/|\4no~ҍ{CsC &! wVg>y Щ'9la}t h >Ou~XD =0Mguq!\ 1\ق{F[QGUO /e\U}s!&a)IU$ZM dki-")NhXv )I5*L8 K80R:`2Tj~9o=,9; pWz|FMC ث1OҧלƸ-&:|}E41]MvψJq+(Bk%^2Q2,?gx3>-Hʼn.J LfpB!ӱ*IB։Jv{ixp !d?ܓC@ ^<8$=H_N1 -FMyV fd!QDR"dwG5n ~56Bh#]Y6$َ%|tFvyNre9Vd?PM{-dE1'WetruTzJ/X%8A3elug~_[tӁ]ħuT?V;a߸-{ƵĻ'3ݥ\Wx_GV{-;Ib9,U uDƠF*MLn;rF* A|k|oZI |= 6y[rB|$66͵1}*dZḫ44QGکjb2!peHubzq* l=h_ .7}q.ٯ\nF򅦺\*Ad-`ˢ3ϱ[OF>˱e#DyՀZf$ڳnȝց/2{,S,H>NQ\YTP $IP g?9cf)W"km, ,LĞƼ(5qCP$]).ǰe65\osc ?[~ -B2s>zRKZ\CU2+dXn2Q w5t:Y={XM8U<qN/'Oٕ(Md;hP?PɍK׆zkbo07J-AZ ܌JGADu>St=H/5n;Edu=m!dg:";\n;֐Ͽݙm7zP}]˷_b]Pwh=Y-r{㽞# qt)P 0(-\f6[b0*rne\W}T"v2+\3/ C~ł^vCEa'ubSt|_jKٳ-u w<"7O%B׸y:B nkn`}׃z8J6#Kja,,, ;0zW_gcDF[z<85 UF9*&wo %C'2H|@o^J%Q1,c!0}YU&lje( >>[mYxφB񜘽vX/D>% GVq`lϏ>( g5LjƔKՋz)h۵qdkpIlb1IJx" 'NFd2 }mm;9;,<(K2no/J./3vQ u"#1 J2Э NϠWR^or N=ׂEB%Cؑz<-BYY^>Eyxb6Z#{T^!:ovDYBvc}̪UVXt=h܁oK'oj9-&Sj2C[{.W ^`y^|Fk }SK2nrY@ityJ]X;׉ޯkPR.9:]{_v!Qttled@ʈܓ`a`vM,J0,+J9G-*N "UED+l'&ٻ'<4mfgՆpn$4V;s yo wMsYg&xY̟Ma; 1"Rw 438o._U=NNб{F74_yѽy^N!M^,VH@vP%"-cMYG80'Z&tT{'ITMI%IYߪ-iǁnHma_i̙"~H0Ɯy{-;BiM5 RB1 7Ÿ7V ^p7mZ!vm KZ*CRmVZCrhH=P4$VQVU;>l :ȼFxM5Q_&jo (Mn|pV+D;]oa BP( BP( BP( BP( BP( B?c5tަ"Ҩh(:K6DR N?Ǩje͖,n6س] {Ѳt}V"zjvfT}Ų5ڥ-Y6Ge`ʲaŲ[dP 5c5D>EIze^ 1]0h PZzq8/CRtQz߄N㈿Kk¾D)ٱ0o^؉Ab+s) W* d/w<ԫR:rUԹþ@K`6"> 2LYp|DdMD6DGN,}{-{:mϓ [zҋ-sǝeAn m9kِFWK#K8/ejZƮG ]_B| %_"&[V+cE`JyG,}ɵ80ZJyA.J9*es2;'IgB6mN)˄|9{]tнԧxH/"^F]tIA/:ĻsAgs`(r Ğ8$Y71:1<~#'>2ms|^TygEg/Kv4x+5?c}0j @/v* K;ysNDM` L\QEYu Q0nU _!ߡ4 nm޹,u8:8\->@p<=8 Ã7|*uDHvm]DZxSZD@29C"cjej&H &qղ@ν -4pn hC܄ϪXo`ax0j1j0\ ӻ5yfi;buv]c8_^`富]8)t,g`4zf?Ίt;F]C#kx0rǂP~xsDzVh9VZ+b?$גAhAk;Jq"{ʧO|UueߦÌdw;:CC§Lv!\fwd[rw;+_:m/N3B˿tfDŽ voN|ypM~Gt:!L3$ex :K@g_fT3 Upƺ6i$ڐd!7W endstream endobj 69 0 obj <>stream Hj{"[0 #@_ endstream endobj 70 0 obj <> endobj 71 0 obj <> endobj 72 0 obj <>stream Hj` (Z endstream endobj 73 0 obj <>stream HW]lT?5]&]s?`^l֘{izmCLZh TVZ ҧDU44T>J}CU)jT-EQ=`*sw|3sΙ3sw/^%2QIR|)|<+-/BHg/M}!5?Ε3R '\=!8[_DN+zx,՟(!]QMU?#`T ǟ;D o|[F[ا)M\C! 7ˮqvږҽH#*:Y+Mrmkʟ^[:~+ѯ7QD0IYY;vrBNZrwT9VD}NԱm;*iҶ$9kֶ h<89K<#}<ے:،OgǧԝlA1Ȋ0ۥ' IEGl;PyCy&/9I ;  6 w42p9HqF<'b_Ml#*>,4 .òEǍ2EMɢ䊗RMHy qEB[;hZY+1#f*JVG Y+ȘdpQQ6 7EU(w CvxUC&=hȚq;G1e+(# |D>>R3,[01#RBIiI"cO xG!VFdgpւk^'{<}r8a.vn CD!~pxciC7*a%$`)6RyXcp-ZGi[&=2g,]~\P/{ bQxrKpxEpX,QϒY&p9(rCǑrXA`#p9N!W*rr䑓0 AY!y9贇 aP^r*kx=B꼐U9T<+BꂇWxԯ{= V|y$e`VŕwA4?&;'d" QtPM9)d=RU峉W &~ ;xGsk0ؒz+Z-Dy{ֻ۠*~R5X?Z]x~gsGO둷7e<ӕwǣֺ\8ܝ-=?=uP[6D<,&z8u-ۛ{pXOsKw<wkOfuH<_F>R@7R,d =r c{`$YE <lm):D oo ne׶Ճ.is݇wN4nmQ:M. uJFkrɓ>uVw~FG˞uͧ7(.P?gKO's^GnRI6ᵳS2HIrڜzDSIjPWZ@7Tuǹ(y*ACX1!\ E S ijM_mjF;k;yHfjp$RLN>fJug?Y{ۄucskvR\!JD};\'T H=ع:/aI-ȑo"Ǜ9Q! ."3 !+hҾu߄GM?~㼜w'ߦ=k} A|Dw_hQN;zd[}جQa:'>hX6m xƍU~>8O#b/%˸)QDэwwAb 1716!1;CT8l5 ܃hd,霯 " lljR$X]1hDsӴ3P(_X?D3?E IQ},F/[L%W1XLqD2ڹ/blCؖM')$I>9N d7JCI8oI^M3>G೟CzՊj1Ɛz+H[||_r>aOyWRo ^~@I᜝}(V.w7o!Ywϛ%x+ _or+bՋ|A~iK -إQ [~WQȳ[9Cگa>Ϲ%xsicMZ\Q?fVUsX#uʃJ4|QL['"~ _98=hT+{:QH"10q|L+3rpLA~caVj0jP(soArݍ:*˱!UdC֒82<߲_GRD(Ջża**QMF=\e;U3] i %$Xd6) d)ԓ@-W[T&!UU5SfRLƶWFmtp1w$ÌM&sgD-Z5wzHNbSúZ&Ȫ/E9*ķQf-Fxeb-Gn ͼ?z 5RuX[u7b(=㣙W3:ӌJ}O3zJW @=]dt9S{$p+o8bλ.mxACDctd8LMjt;=F}["qgQ-&T˼4`SJ~Vgb5LUܕ8(fȟ3 ѾM\sĹ﮽ yf]4w{KS'ܓ}ukt)F0s\n]xDvzfDtμug\TO; #j(TP߸:58̺E{j~ lt4,;~/^:F1ͺdDQ| 9c*˙bl;kqӉ7yBs}:Wf=Ѷm;D'ڄ\^"bwFyhJq˭h%q;-#eWT)oWé|#HorxHmgm?n"( /׬bOۘd WK꘳:$=*+UtViOU>6CS1[ЬS o9/*N^~l::߅ Rb/,!ɹϼn)i89qRsKa2ɓYN> \^W6 pOMnΒ#>yZFVeE#ܠ~y6΀ wA)L\ary/"TFx+8ABƷlXlx6N(3a.mj ` 1fFis6/19MȠo{P8y02rw?dzkf|;@gL~mfNc0(;[ά5(qW;R&%" _L!ve:HWgn7P;X id8q|&ٹhRsr8%mYr߄af½ZП{kޑ &^p6",MP`xғk~EL#O q2+67\޽R  Rh?F.O딐a{*wlC/ڶN]ӿ"ӌhoAn.[j˼50g=h?]kD{h:kg?%~h Y)z?,D_[2}0|ozLUc…ܹ.Yxs>=h˟[;RPblzw1yΜyw1B{o\o4!n}H:?|VKh;CU_Icc[澦7 :/YrB=g ѐU:m$N~`e0sg&XIuj/ԟ{thhF.#MJk9L[oدX6DZ!# > [?<2=YsV^;jO36\,*AGϩb*Z3yKD6dǽ uS9ٯ}_ڡ9ᨵ R?ѵ^j~EVm^e4{_d˟wƆNodv셃W _`;W}-Ε↾9ع~gsu;̅N2'tZCWb X,) ?&sK/ض2k6le-jI֖$mˠMm6鲮KֵŞS+Y5tCCZ [ҵAM 4A'8Pбy{V'Sد:U} ;OpWb}hc}d91+[ъxZfW߶ƸkrYu{50oA ;څ+N-8o͐o%" qn-wpCxOf H.;u{SxbZ Oh4h4Fh4Fh4Fh4Fh4F,) <&0~ ,ي*|^\"bťi,]گvŮd٧;ۀ0&n2//ۦv!Ձ];ҙljh8'bssKK#G&Enr_z46щ7%:RT*.s֦|>MfTR8kTnX&dh2!"Kh, h*41HfEn8)Lr$lY/-^<估 CM=h}Xt޹F9Bc{z4q=_"剘ecX,;"҃W=0:F"! #:QO oc` fMHc1#Ha^'SHEj\)qh7aS0cA9V 7dqT+a޸^֎1*PyK Yѓ 5Geb@g"sK~{9=*#zg5ØY1JM3:Qڍ؏{X9|AtڪbЇ 'F]pThys+o^6*[ڊ_sͶS=Zv"_VvCk*yl̜9'O=|F endstream endobj 74 0 obj <> endobj 75 0 obj <> endobj 76 0 obj [75 0 R] endobj 77 0 obj <>stream H\j0 ~ Cq J ̰Fqy*^M{m'Пl Oq [GO(y.v2QiuN84UUo"ΉW8\\;dO#[b )2w3!茝Z'OIalp8Gc T իTܓS` h..2#]%OAE5Ao endstream endobj 78 0 obj <>stream HV{tݹI,dP1 " @vl-!*VX #"(B')P (vP8pLD=ܙyw @g4x\i@r[7qџXu^H8ti {NV Z6c^/]_TVT?Jr!q[oL_f̪ l5[juq/r)oxTݬGhous|uɄ! l#IbYΑ?kLAHZ1$DcMF!o^B=DђHoQ3 yitC`$~x59Ld{4bNC<(S]HB7t]Sw w'HLY`>HG~]B6rA`0 C{Q0 a8QQ(h܏1xc ơ%RaDF9&a2aL^c6a[؇?=ğ>0>|pS|8iXxOI^[l#u:ϲZmBb/]Njx TW#_!S Ep:1O,vCtX-~2KadvvX{ZӬY"K?.tEkI_i4|gYƳ24ydIR/' sy/&~W|+oM0UiMiWeȩ?S&y|P>!Ew*z a3^4ƟRˤ?IϝTEOW i7kZH\;izvZPz >mZ @+u9mwzSiNksږ/[懾 B-`|\lLtTdDm w끝-[ ֶ"Pl sՁUꣁŁy)€L@|nKXܩ;0N!olp1 | 1*>v5SE iT^FJǪ0o-[ǶUd*?OS3~$??σο._7.л{".5Ӥw7cۀz-|`5h6\0AЁ;,͏|([a6N.?,c w%m5mkBYq}`F:#lÇWpoм!헓7#bSveCS)5ã2WDBHeT1DGIIVKKDQlTCXy-K<րMҙ43!TN8h8bd9.xԾJ]3;י,!ӝ*t-AXK1Zbn3}eQJR]*;EjR8Tr٧f+bcƷoԨDEW.]HF(נa"~ ,$,ݖl Dm*qq o`rGkL9MmvubSDTBdS' gx**kWp\A?rbԩȰ.C%&UkRs.{an ҂w [DnNIp]Uժ1W.E)|nsYR mg1v4ȷoI Ǥ dn=[DRF#ezFG3MvHPu"uU{ⶄd޲V"m 󽦅u2FE v{;%=I#VOgQSMe TXV ي]oz)UƔlGNIY,kE U,2r{ bx W! AsEM1PJGBS2ik`[(-tV|hQ`V씩RǶjc0m@2gݳ_kֿډO4^=#(J7ЕR1csbY[:{}:q;56vxu/u3tgmwܵCVri64Ս!4mKY>]Cm5sY4OrHzgͼЖդ'V}JдwI/ˢ]ɣT7t[)>);WoȘeN'3ĭq~ܻ?2C[vuJ{_L If)2)zgl(,æKwJ2fdo]I'/,H#RK_=ea?s O#Re6du`$8PsuR^{?]﷑ $;Jiw?lFw4.Geѹ/}ҋ'W}yߗ],is=#{;+}:d.u<+j~_I{\djv $G]gu7(Lo2دO&Rj6;+RL'cyZř4.>XjWVǛ2RϠqas>=L*{܉Ul'Xd$W2˨}m2ĴzdZ%kك FG]R*F{vR0w ž.;on6tEG;/Bh/?BWuJJ* jg 3WF'8_ &< a"ad5v(EDzm$GE#pS

]gh3'{{/&hOGr-z3>$&_}^֜KrRj;WqrBس|V1]*46e|O"gTw{%hWOE\5s}Iyإn~Jѱ \K,YMϝW%>n)߯Ey1K5zc] GȾm=$KBZ4Doa_lv߫RWsLy /뽤w}$"=;ɼ= \W'NPϠc SD˅:؉ 2̟__?Us9w~TC}箌^G>Gk^}`ZBNft\|o 0W?OUMXmF׍.W#5X|_Gu[oATZk:lm~k.S)Gfev]ϴ/]xDWu_׺;XSo}J Md|}kG9e|uWߜ6@kcokV_Fn ֩:4މ~ǃb#~R/iK3(=ܨc>z(5Q< Dl/匷&G/79K ~1<ɜ٣C{|[7yF~#]2;໚5oϫ>߽?ft~]M ٸvK-}οoN9Xɹ|#oaϛ=YvHhu< y2+K|\5Wij,M}c^V(=jk hj#ͷCSmNIy5}o<;6Α oYG7Zim~r:)gppu4- ެ CbI̋oʺ"n+&+iJ~xµ'.@p}mxi-ߎ4·oc=zfi 7;:u;_ OaV܃R2QB q/^ϊNq.7*1W( 3{}oG9Ep'=?s3^/ݽ4F#+8ȢwoƇO0mjb z6vzSrvOۅ3ܞcsOpNd ^G[xTy~ KJLRGK=!gGS;x1wr$Wʃr-hr xI['%\) \N_`}>!<>+__}ʻ%˩V{&x$q]&t>:q\oԙ}Q'w9|3NX,] zpp'q(?zά%vJL]O[WKOQ>X|.h,L\Wʣ]P`"8tP !a!pi&3M\0ܺoC"J" Wfɽ;urofɝ[ϡoB_ ə;a8xC}w!A\s5\s5+cDC1 a23R;}cf}y#s!^5gxExCe*rIw+g%V] O4>c9ЙeIhꍅ̣<1r!֣cy0z3Nhzö;7f6U lwH0ؓeNtv"Z.t6(Or> endobj 80 0 obj <>stream H\j >wٜ%PrNReby6l*s)do݀@q;@ڂ.]bqi"w8=8YWȁf8}ކ3aK txg^삠+v!a{Bhk 5YliFe̓T?}YV-ަxBm̒N(}H)&,#o0 endstream endobj 81 0 obj <>stream application/pdf a_mustr_square 2017-07-28T12:33:51-05:00 2017-07-28T12:33:51-05:00 2017-05-30T16:30:47-05:00 Adobe Illustrator CC 2017 (Macintosh) uuid:8a33c9b6-e986-1246-9281-d5ff3dd7722b xmp.did:f744acec-f656-4994-b32b-3dbf5732d103 uuid:5D20892493BFDB11914A8590D31508C8 proof:pdf xmp.iid:AAFFBB53481CE711A627A8AA43E985B6 xmp.did:AAFFBB53481CE711A627A8AA43E985B6 uuid:5D20892493BFDB11914A8590D31508C8 default saved xmp.iid:C6A615A08F6AE311ABED81B5F088587B 2013-12-21T23:31:24+01:00 Adobe Illustrator CS6 (Windows) / saved xmp.iid:AAFFBB53481CE711A627A8AA43E985B6 2017-04-08T18:43:43+02:00 Adobe Illustrator CS6 (Windows) / converted from application/postscript to application/vnd.adobe.illustrator saved xmp.iid:f744acec-f656-4994-b32b-3dbf5732d103 2017-05-30T16:30:47-05:00 Adobe Illustrator CC 2017 (Macintosh) / Print Document False False 1 1000.000000 1000.000000 Pixels Black Default Swatch Group 0 K=0 GRAY PROCESS 0 K=100 GRAY PROCESS 255 Adobe PDF library 10.01 endstream endobj 82 0 obj <> endobj 83 0 obj <>stream Adobe Illustrator CC 2015 (Macintosh) 2016-06-07T13:46:37-05:00 2016-06-07T13:46:37-05:00 2016-06-07T13:46:37-05:00 1 False False 550.000000 550.000000 Pixels Cyan Magenta Yellow Black Default Swatch Group 0 application/postscript Victoria O'Dell dask_stacked_CMYK.eps xmp.did:cb71d87e-4c9b-4635-bf52-d561cdc8d5fc xmp.iid:cb71d87e-4c9b-4635-bf52-d561cdc8d5fc xmp.did:cb71d87e-4c9b-4635-bf52-d561cdc8d5fc saved xmp.iid:cb71d87e-4c9b-4635-bf52-d561cdc8d5fc 2016-06-07T13:46:37-05:00 Adobe Illustrator CC 2015 (Macintosh) / endstream endobj 84 0 obj <> endobj 85 0 obj <>stream application/postscript Print Eric Delin 2016-03-31T15:14:18-04:00 2016-03-31T15:14:18-04:00 2016-03-31T15:14:18-04:00 Adobe Illustrator CC 2015 (Macintosh) xmp.iid:b1f3d386-e4f4-48fd-ace9-ab88b88d0e1e xmp.did:b1f3d386-e4f4-48fd-ace9-ab88b88d0e1e uuid:5D20892493BFDB11914A8590D31508C8 proof:pdf xmp.iid:5a9bb392-c272-4ee2-91a5-2e0a1f206312 xmp.did:5a9bb392-c272-4ee2-91a5-2e0a1f206312 uuid:5D20892493BFDB11914A8590D31508C8 proof:pdf saved xmp.iid:9ac37b17-29bc-4c4b-b4c4-d43d91673115 2016-03-31T14:24:12-04:00 Adobe Illustrator CC 2015 (Macintosh) / saved xmp.iid:c0421c2b-8001-4ff9-9e38-53c41fcddbe4 2016-03-31T14:35:53-04:00 Adobe Illustrator CC 2015 (Macintosh) / converted from application/postscript to application/vnd.adobe.illustrator saved xmp.iid:4843f457-4aa0-4063-b4f9-670eeb2926c9 2016-03-31T14:43:41-04:00 Adobe Illustrator CC 2015 (Macintosh) / saved xmp.iid:b1f3d386-e4f4-48fd-ace9-ab88b88d0e1e 2016-03-31T15:14:18-04:00 Adobe Illustrator CC 2015 (Macintosh) / converted from application/postscript to application/vnd.adobe.illustrator Print False False 1 6.000000 1.925946 Inches Cyan Yellow Default Swatch Group 0 White CMYK PROCESS 0.000000 0.000000 0.000000 0.000000 Black CMYK PROCESS 69.921875 66.406250 64.453125 74.218750 CMYK Red CMYK PROCESS 1.562500 98.046875 95.312500 0.000000 CMYK Yellow CMYK PROCESS 4.687500 0.000000 93.750000 0.000000 CMYK Green CMYK PROCESS 82.812500 7.421875 96.484375 0.390625 CMYK Cyan CMYK PROCESS 69.921875 15.234375 0.000000 0.000000 CMYK Blue CMYK PROCESS 98.437500 94.531250 6.640625 0.781250 CMYK Magenta CMYK PROCESS 1.953125 98.828125 2.734375 0.000000 C=15 M=100 Y=90 K=10 CMYK PROCESS 17.968750 99.218750 89.453125 8.203125 C=0 M=90 Y=85 K=0 CMYK PROCESS 0.000000 89.843750 84.765625 0.000000 C=0 M=80 Y=95 K=0 CMYK PROCESS 0.000000 79.687500 94.140625 0.000000 C=0 M=50 Y=100 K=0 CMYK PROCESS 0.781250 50.781250 98.046875 0.000000 C=0 M=35 Y=85 K=0 CMYK PROCESS 0.781250 35.546875 84.375000 0.000000 C=5 M=0 Y=90 K=0 CMYK PROCESS 5.859375 0.781250 91.015625 0.000000 C=20 M=0 Y=100 K=0 CMYK PROCESS 20.312500 0.781250 98.437500 0.000000 C=50 M=0 Y=100 K=0 CMYK PROCESS 50.781250 0.781250 98.828125 0.000000 C=75 M=0 Y=100 K=0 CMYK PROCESS 75.000000 1.171875 100.000000 0.000000 C=85 M=10 Y=100 K=10 CMYK PROCESS 85.937500 17.578125 100.000000 3.906250 C=90 M=30 Y=95 K=30 CMYK PROCESS 89.843750 33.203125 97.656250 25.390625 C=75 M=0 Y=75 K=0 CMYK PROCESS 75.781250 1.171875 74.609375 0.000000 C=80 M=10 Y=45 K=0 CMYK PROCESS 78.906250 11.718750 45.312500 0.000000 C=70 M=15 Y=0 K=0 CMYK PROCESS 70.703125 17.187500 0.000000 0.000000 C=85 M=50 Y=0 K=0 CMYK PROCESS 84.765625 49.609375 1.171875 0.000000 C=100 M=95 Y=5 K=0 CMYK PROCESS 98.828125 92.187500 7.421875 0.390625 C=100 M=100 Y=25 K=25 CMYK PROCESS 100.000000 98.437500 31.250000 22.656250 C=75 M=100 Y=0 K=0 CMYK PROCESS 75.000000 97.656250 3.906250 0.390625 C=50 M=100 Y=0 K=0 CMYK PROCESS 51.562500 98.828125 3.515625 0.000000 C=35 M=100 Y=35 K=10 CMYK PROCESS 36.328125 100.000000 35.156250 8.984375 C=10 M=100 Y=50 K=0 CMYK PROCESS 10.156250 99.218750 47.265625 0.390625 C=0 M=95 Y=20 K=0 CMYK PROCESS 1.562500 94.531250 20.312500 0.000000 C=25 M=25 Y=40 K=0 CMYK PROCESS 25.781250 25.390625 40.625000 0.000000 C=40 M=45 Y=50 K=5 CMYK PROCESS 39.843750 44.921875 48.437500 6.640625 C=50 M=50 Y=60 K=25 CMYK PROCESS 51.562500 51.171875 60.937500 23.046875 C=55 M=60 Y=65 K=40 CMYK PROCESS 53.515625 60.546875 64.453125 39.453125 C=25 M=40 Y=65 K=0 CMYK PROCESS 23.828125 40.234375 64.062500 1.562500 C=30 M=50 Y=75 K=10 CMYK PROCESS 30.859375 48.828125 75.390625 9.765625 C=35 M=60 Y=80 K=25 CMYK PROCESS 36.328125 59.765625 80.468750 24.218750 C=40 M=65 Y=90 K=35 CMYK PROCESS 39.453125 63.281250 88.671875 35.156250 C=40 M=70 Y=100 K=50 CMYK PROCESS 41.796875 68.750000 94.921875 48.437500 C=50 M=70 Y=80 K=70 CMYK PROCESS 51.953125 69.531250 78.125000 69.140625 PANTONE 361 C SPOT 100.000000 LAB 63.137257 -51 53 PANTONE 7546 C SPOT 100.000000 LAB 21.960800 -4 -12 Grays 1 C=0 M=0 Y=0 K=100 CMYK PROCESS 69.921875 66.406250 64.453125 74.218750 C=0 M=0 Y=0 K=90 CMYK PROCESS 67.187500 60.156250 58.984375 45.703125 C=0 M=0 Y=0 K=80 CMYK PROCESS 63.281250 55.859375 51.562500 26.953125 C=0 M=0 Y=0 K=70 CMYK PROCESS 57.812500 48.437500 47.265625 14.843750 C=0 M=0 Y=0 K=60 CMYK PROCESS 51.953125 43.359375 41.015625 6.640625 C=0 M=0 Y=0 K=50 CMYK PROCESS 45.703125 36.328125 35.937500 1.562500 C=0 M=0 Y=0 K=40 CMYK PROCESS 37.109375 29.296875 28.125000 0.000000 C=0 M=0 Y=0 K=30 CMYK PROCESS 27.343750 20.703125 20.312500 0.000000 C=0 M=0 Y=0 K=20 CMYK PROCESS 17.187500 12.890625 12.890625 0.000000 C=0 M=0 Y=0 K=10 CMYK PROCESS 8.593750 6.250000 6.640625 0.000000 C=0 M=0 Y=0 K=5 CMYK PROCESS 3.906250 2.734375 2.734375 0.000000 Brights 1 C=0 M=100 Y=100 K=0 CMYK PROCESS 1.562500 98.046875 95.312500 0.000000 C=0 M=75 Y=100 K=0 CMYK PROCESS 0.781250 73.828125 97.656250 0.000000 C=0 M=10 Y=95 K=0 CMYK PROCESS 2.343750 9.375000 98.437500 0.000000 C=85 M=10 Y=100 K=0 CMYK PROCESS 83.593750 9.765625 100.000000 0.781250 C=100 M=90 Y=0 K=0 CMYK PROCESS 98.437500 85.156250 6.250000 0.390625 C=60 M=90 Y=0 K=0 CMYK PROCESS 58.593750 88.671875 1.953125 0.000000 Adobe PDF library 10.01 endstream endobj 86 0 obj <> endobj 87 0 obj <>stream H:w|(#wM endstream endobj 88 0 obj <>stream H@t؅5eYy3 v+*a׊^D H&Zӆӡ.Xhܣq2xCArޔf9-x %k(1B)IAPNJtjtQ+u>stream nBoCpDqDrEtFuFvGwHxHyIzJ{K|K}L~MNڀNہOۂP܃Q܅Q܆R݇S݈TމTފUދVߌWߍXߎXYZ[\]]^_`abbcdefghhijklmnoopqrstu endstream endobj 90 0 obj <> endobj 91 0 obj <> endobj 92 0 obj <>stream H:`u~(#-ZA endstream endobj 93 0 obj <>stream `:b:c;dj?k@l@nAoBpCqDsDtEuFvGxHyIzJ{J}K~LMځNڂOۃPۄQ܆R܇S݈T݉UދVތWލWߏXߐYZ[\]^_`bcdefghijklmnopqstu endstream endobj 94 0 obj <> endobj 95 0 obj <> endobj 96 0 obj <> endobj 97 0 obj <> endobj 1 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]/Properties<>>>/Rotate 0/Tabs/W/Thumb 20 0 R/TrimBox[0.0 0.0 396.0 612.0]/Type/Page>> endobj 2 0 obj [3 0 R 4 0 R 5 0 R 6 0 R 7 0 R] endobj 3 0 obj <>/Border[0 0 0]/H/N/Rect[224.132 75.553 276.902 65.725]/Subtype/Link/Type/Annot>> endobj 4 0 obj <>/Border[0 0 0]/H/N/Rect[224.132 63.553 314.713 53.725]/Subtype/Link/Type/Annot>> endobj 5 0 obj <>/Border[0 0 0]/H/N/Rect[224.132 51.553 316.569 41.725]/Subtype/Link/Type/Annot>> endobj 6 0 obj <>/Border[0 0 0]/H/N/Rect[219.481 31.2827 266.124 21.3498]/Subtype/Link/Type/Annot>> endobj 7 0 obj <>/Border[0 0 0]/H/N/Rect[273.161 31.2827 289.173 21.3498]/Subtype/Link/Type/Annot>> endobj 8 0 obj <>stream HW[sH~W)#[T65Ʃa3AIItKc5k>}.߹?QbR 0oMߩդ㇪KS#$BHygA>s<#Xh93έ6Cz:Z?/ Y' cHZ+` >g< 7a;:'-- p r-`2k#T ;{1|<uzΪ~=<7Go:ތWժNId:C.3oռ5b0͸MVvu{yt'︬/|4_WF' &|1OF"LƋ^hGڨ"Y K(B^vyjm}|&jW]j:GvA7ڋRMRѳ[~/jؠ落J1B _Pˤݬ\---G`1ϋ`̶m {~9ٶW򡂟W_}3#$8c\}A+; \!|فCW=#[BAS%+: sX_'Md ]b?j<w>3Ðal' !.1>ue/ReO'LccT`c-+wK%;:k%{>^ Dȴ$ht;iT+ȉAԵQ/ G `ob.u.01fѳoDHi% iPTa #(!C%&"=>OnpP pE.^k&k{`Q4^Y6=]dwًX6B" ǟJ c͟SpO.@S5.^NhG'OKrRM@N#K7 v;H]( w)|[!ȆvozXF'!gc]k?hIXj_pL!'.J(@DfưQ9A/%+iiZJ pfV=5jbbJ_m0eŠ bʖ;XncIPPRr0]g2b4LUCR1{M1AM&O]$@[Q $fmX`B.'1NK66;p2eNRED^ 9%h~uXoǬO+vXa:3vRZ`_|)W5O]`lـ)&EURZ@Mm,ϗzʄZ f=ASb7 g6vTnTr|Js3-u3f@dMęG`_zP=1H׃(1B[Ϳٯ9+xﯨizh$Q /DhR!|GˬEZ LWTx䭗Y9cAp2_#*KeS<Ë3i,Z5`q\)^~y:cqH5;Ďxx QD|ZlM?xۣ ֗@zk5./_ͭ, ެtٙ=}p}lhVi~/lnh.tghyg_][w/ 6pg1nA0<1}= ڍN Fزcqv]<*Ko,orݛ<>7OsѣGdAmwtqO}bu~UQ{m39ϮJhtFJ9C ŋ-fxŠ;Z9*ʔ. - ӫ7Z3`3۵wkޮvK == W(U#T"EmhWX$r[ wxr,k5=rg]`[ec;P_!/Nwo^߽:.>{,~܁'\m|{tcugZ(aBz2`NNjAx\^.dɖs;캑ә(@X#9ev+/*65tG\92l5=D{m.j`͹8e@yƳv,651lRqTB^il$MhQOa.0x@Kq(4`1I{|'Ʃ\Xxqb,P&JFZt]r V4$HB])6'ƒŋ_Ar#B(Bpg,7X@+F3/x:Z[p.<_6w0VE+Y8)'+dTف٘LѮ_lS)+Z:^=h=EGD>!V>F<_!B!\P=zxG<lgReKi4B۳EĬm>i 2, &3IFҏc_ 4x1i4ް 0,ՓXVlv.SyCK v)hxwlU3߿eX3t"y-IXVvaEp[0jIK_@^ 6bDfl8(.Kp5>&1b 5T#$+̙ OlDr @Vd yThRcgi1/ @xw1e\}ђۘ:9QJHI2"Sg+ADrrR%z)OpXƢsO^ 憒n8Avc7矋EYUwIڂ M\9:Лw .CIdQ~60v6 MGjZs-CpOw9cz[^l6)!)'3hivyBQ#,ٝI)f2%!x^2PnB) JTxPR~01#E-q5&f#̀{4E/ !Eo@PZ9D+Tc /l-ۖiMfB]8ȾܷyHnN#P<<;WHNqyD/&RsvD6rñ+!wr)P6Ťj$ Vޔ#FD; (vƆ)YF]/})z ,(Ed`,Yk \xδt,dU"K.{S&a)LGN܋+RB#`SHcYe̜]/R.Cj 6YT6Az^arQW|[d E F?QX]3x CϽY)^#]cfddRa!m> ˻^"JGR4rV.90엪 ͙@ArfC"ZM"sG d.!A4 7 iGB J H2;Ml(8 tQɌZ~(4x28/LVk`*l{b%q+i(uy DؿbJБCH7kb~ Ԥ߻ nE~`Uw)DkR".FZӔ##ҧv9Ǟ>wJ{&ҭc&T!-rJ+.sv uUzeK^Q !`1wE"kA4wu験FVV?̈Tzna .t,d%&mlc?ݶ(W̢ r!)l'@EcɉbY6$9A=v >=ף8ØNG˲R6E'FBI4,³#{\{)^mj:/L0Ib%k 9k9QށzJ!Uwdx9{KarA&~kKt!@,(TAߥUQ-~ ,HbZIAOhGY Nq…QqT,g (HmHF!Ftj2 Pݡ lP T)g=.ln</N}ҸmڽW\Qք|i{Ttn # $ס EZP*DWi-L\JB5): Z1텴:20j;\ _ #7V04䊭TY,a0; } 84,u Q,Ax4Q0P",l]$k*;V3ln VrLbburkjdR` o'M 7}wldS5ڪV>{B/cT+yR"-_:BJtdYi!ݙɼ;bOU PbسUJ_qJ!/o)`~c\H}ұ6zV( ҤN dL_t:KR wKh?,S`b+uni>kL8Nˢa_+—%l>dz[e&gl4"" fr8)Kɟ9@' _y KrNjq^$eD/+e*~4>ؒ"kCO&|xݤM{/fz>Φ7٪|{2r҆JZ/=(9BLu]aXSc.&i={z癎?=x:Eӳz Okw+ŁU( KƘz,@>s fL԰kwPq`ܟכf65ƌrC8+,1] >(@jgWo΂4UDq[IqR[t(f\oN 招r{iٙE\22tw.bG ?Pͤ*8ؔ$8]_\E2,gx`qVYSMr4s{Ù/Os|JxWGr|)QL]LqQ,ZJ~DD1K}W؟qm o*Dߘs9`>͆OpJKa~*Zp{:~+\V:ky\2_?> v "hK%f_|9vl@('Jm9^j%S]W8KUcbti9Utj~&M6}#WN ^j<\ W'q'NJ% N[=2xenk>k/{ 2i\Pg:.%5\<kY\`E4צS[Ee,򗙹uH%s)NUdVHH|IvF{,`8QMkn(k(dfOp84*a93*%fB%MAAd(t[s=h m.HHP/t/DJ+ym;7CaSXo#=+(X endstream endobj 9 0 obj <>stream application/postscript Print Eric Delin 2016-03-31T15:16:01-04:00 2016-03-31T15:16:01-04:00 2016-03-31T15:16:01-04:00 Adobe Illustrator CC 2015 (Macintosh) xmp.iid:69c3ba62-7e14-44dd-87bc-12655b8182eb xmp.did:69c3ba62-7e14-44dd-87bc-12655b8182eb uuid:5D20892493BFDB11914A8590D31508C8 proof:pdf xmp.iid:83f15bff-bca7-4ffc-a0db-8af97042a358 xmp.did:83f15bff-bca7-4ffc-a0db-8af97042a358 uuid:5D20892493BFDB11914A8590D31508C8 proof:pdf saved xmp.iid:9ac37b17-29bc-4c4b-b4c4-d43d91673115 2016-03-31T14:24:12-04:00 Adobe Illustrator CC 2015 (Macintosh) / saved xmp.iid:c0421c2b-8001-4ff9-9e38-53c41fcddbe4 2016-03-31T14:35:53-04:00 Adobe Illustrator CC 2015 (Macintosh) / converted from application/postscript to application/vnd.adobe.illustrator saved xmp.iid:4843f457-4aa0-4063-b4f9-670eeb2926c9 2016-03-31T14:43:41-04:00 Adobe Illustrator CC 2015 (Macintosh) / saved xmp.iid:69c3ba62-7e14-44dd-87bc-12655b8182eb 2016-03-31T15:16:01-04:00 Adobe Illustrator CC 2015 (Macintosh) / converted from application/postscript to application/vnd.adobe.illustrator Print False False 1 6.000000 1.925946 Inches Default Swatch Group 0 White CMYK PROCESS 0.000000 0.000000 0.000000 0.000000 Black CMYK PROCESS 70.312500 66.406250 64.453125 74.218750 CMYK Red CMYK PROCESS 2.734375 97.656250 94.140625 0.000000 CMYK Yellow CMYK PROCESS 5.468750 0.000000 94.531250 0.000000 CMYK Green CMYK PROCESS 83.203125 8.203125 97.265625 0.390625 CMYK Cyan CMYK PROCESS 70.312500 16.796875 0.000000 0.000000 CMYK Blue CMYK PROCESS 98.437500 93.750000 8.203125 0.781250 CMYK Magenta CMYK PROCESS 3.125000 98.828125 3.515625 0.000000 C=15 M=100 Y=90 K=10 CMYK PROCESS 18.359375 99.218750 88.281250 8.203125 C=0 M=90 Y=85 K=0 CMYK PROCESS 0.000000 89.843750 84.765625 0.000000 C=0 M=80 Y=95 K=0 CMYK PROCESS 0.000000 79.687500 93.359375 0.000000 C=0 M=50 Y=100 K=0 CMYK PROCESS 1.171875 51.171875 97.656250 0.000000 C=0 M=35 Y=85 K=0 CMYK PROCESS 1.562500 35.937500 84.765625 0.000000 C=5 M=0 Y=90 K=0 CMYK PROCESS 6.250000 1.171875 92.187500 0.000000 C=20 M=0 Y=100 K=0 CMYK PROCESS 20.703125 0.781250 98.828125 0.000000 C=50 M=0 Y=100 K=0 CMYK PROCESS 50.781250 1.171875 98.828125 0.000000 C=75 M=0 Y=100 K=0 CMYK PROCESS 75.390625 1.953125 100.000000 0.000000 C=85 M=10 Y=100 K=10 CMYK PROCESS 85.937500 17.968750 100.000000 4.296875 C=90 M=30 Y=95 K=30 CMYK PROCESS 89.843750 33.203125 97.656250 25.390625 C=75 M=0 Y=75 K=0 CMYK PROCESS 76.171875 2.343750 74.218750 0.000000 C=80 M=10 Y=45 K=0 CMYK PROCESS 79.296875 12.109375 45.312500 0.390625 C=70 M=15 Y=0 K=0 CMYK PROCESS 70.703125 18.359375 0.000000 0.000000 C=85 M=50 Y=0 K=0 CMYK PROCESS 84.765625 49.609375 2.343750 0.000000 C=100 M=95 Y=5 K=0 CMYK PROCESS 98.828125 91.406250 8.984375 0.781250 C=100 M=100 Y=25 K=25 CMYK PROCESS 100.000000 97.656250 31.640625 23.437500 C=75 M=100 Y=0 K=0 CMYK PROCESS 75.000000 97.265625 5.468750 0.390625 C=50 M=100 Y=0 K=0 CMYK PROCESS 52.343750 98.828125 5.078125 0.390625 C=35 M=100 Y=35 K=10 CMYK PROCESS 36.718750 100.000000 35.156250 9.375000 C=10 M=100 Y=50 K=0 CMYK PROCESS 10.546875 99.218750 46.093750 0.390625 C=0 M=95 Y=20 K=0 CMYK PROCESS 2.343750 94.531250 20.312500 0.000000 C=25 M=25 Y=40 K=0 CMYK PROCESS 25.781250 25.781250 41.015625 0.000000 C=40 M=45 Y=50 K=5 CMYK PROCESS 40.234375 45.312500 48.046875 6.640625 C=50 M=50 Y=60 K=25 CMYK PROCESS 51.562500 51.171875 60.937500 23.046875 C=55 M=60 Y=65 K=40 CMYK PROCESS 53.125000 60.546875 64.453125 39.062500 C=25 M=40 Y=65 K=0 CMYK PROCESS 24.218750 40.625000 64.062500 1.953125 C=30 M=50 Y=75 K=10 CMYK PROCESS 31.640625 48.046875 75.781250 9.765625 C=35 M=60 Y=80 K=25 CMYK PROCESS 36.718750 58.593750 80.468750 24.609375 C=40 M=65 Y=90 K=35 CMYK PROCESS 39.453125 62.890625 89.062500 34.765625 C=40 M=70 Y=100 K=50 CMYK PROCESS 42.187500 67.968750 93.750000 48.046875 C=50 M=70 Y=80 K=70 CMYK PROCESS 51.953125 69.531250 77.343750 69.140625 PANTONE 361 C SPOT 100.000000 LAB 63.137257 -51 53 PANTONE 7546 C SPOT 100.000000 LAB 21.960800 -4 -12 Grays 1 C=0 M=0 Y=0 K=100 CMYK PROCESS 70.312500 66.406250 64.453125 74.218750 C=0 M=0 Y=0 K=90 CMYK PROCESS 67.578125 59.765625 58.984375 44.921875 C=0 M=0 Y=0 K=80 CMYK PROCESS 62.890625 55.859375 51.562500 26.953125 C=0 M=0 Y=0 K=70 CMYK PROCESS 57.812500 48.828125 46.484375 14.453125 C=0 M=0 Y=0 K=60 CMYK PROCESS 51.953125 43.359375 41.015625 6.640625 C=0 M=0 Y=0 K=50 CMYK PROCESS 46.093750 36.718750 36.328125 1.953125 C=0 M=0 Y=0 K=40 CMYK PROCESS 37.890625 29.687500 28.515625 0.000000 C=0 M=0 Y=0 K=30 CMYK PROCESS 27.734375 21.093750 20.703125 0.000000 C=0 M=0 Y=0 K=20 CMYK PROCESS 17.578125 12.890625 13.281250 0.000000 C=0 M=0 Y=0 K=10 CMYK PROCESS 8.984375 6.250000 7.421875 0.000000 C=0 M=0 Y=0 K=5 CMYK PROCESS 3.906250 2.734375 2.734375 0.000000 Brights 1 C=0 M=100 Y=100 K=0 CMYK PROCESS 2.734375 97.656250 94.140625 0.000000 C=0 M=75 Y=100 K=0 CMYK PROCESS 1.171875 73.437500 97.265625 0.000000 C=0 M=10 Y=95 K=0 CMYK PROCESS 3.125000 9.375000 100.000000 0.000000 C=85 M=10 Y=100 K=0 CMYK PROCESS 83.593750 10.546875 100.000000 0.781250 C=100 M=90 Y=0 K=0 CMYK PROCESS 98.046875 84.375000 7.421875 0.390625 C=60 M=90 Y=0 K=0 CMYK PROCESS 58.984375 88.281250 2.734375 0.000000 Adobe PDF library 10.01 endstream endobj 10 0 obj <> endobj 11 0 obj <> endobj 12 0 obj <> endobj 13 0 obj <> endobj 14 0 obj <> endobj 15 0 obj <> endobj 16 0 obj [/Indexed/DeviceRGB 255 17 0 R] endobj 17 0 obj <>stream 8;X]O>EqN@%''O_@%e@?J;%+8(9e>X=MR6S?i^YgA3=].HDXF.R$lIL@"pJ+EP(%0 b]6ajmNZn*!='OQZeQ^Y*,=]?C.B+\Ulg9dhD*"iC[;*=3`oP1[!S^)?1)IZ4dup` E1r!/,*0[*9.aFIR2&b-C#soRZ7Dl%MLY\.?d>Mn 6%Q2oYfNRF$$+ON<+]RUJmC0InDZ4OTs0S!saG>GGKUlQ*Q?45:CI&4J'_2j$XKrcYp0n+Xl_nU*O( l[$6Nn+Z_Nq0]s7hs]`XX1nZ8&94a\~> endstream endobj 18 0 obj <> endobj 19 0 obj <>stream 8;Z,&\TPeD%+PE\1^;H&9g"1(aV:o-V1b1+jUJFh!:W/tMa!R0%hrsAqP8oK@JG0B ]pO`b?12c>m.G3hk$-R-5KWco*6RF?g3)/!OFRX?[%]r[$U`#DI^b<[!='sN%(I-d =q$h[Pb?\`Cj%.'D.,W[Q3.EA+A;A169YiMpQ1u7cXWY/oic'Bm]Fn^AUg^!Aig?Z Qhf&W@4A\20EmN8Yr&/Q$!%U?O`$E*;RD'unf!Nl_/lSD*+9n`c!R60VWr06PK.Bl QG3m8en:2-OnV_iS2Hh7MptJ9="9Mt+aYR+ojU_$D,\G?K8c!'3)0`S&*)_nm,^B)l,V7@p+qtLEEbUT: p5a2)120XF!(Pju[s*+Gp)q7F56SbNKo7'LA>0ec[ ou_ahP.uscp<3Pe!ruuCX0r~> endstream endobj 20 0 obj <>stream 8;XEL9=BHq$sc.g/gJYM;O5-KEBS.$jb!gE(1&:]ok=.H\Mm)i>TC6u/ucm7"og.9 ZahB)7b0G7?Kbop:M>hk?j-e>)80rF7^Abrj,9Q)6_`Ug$M$bqeP0<(,Oj]>LgfIL +UO6`f7__oJ8>?oo+&[q=-:eullH(Q[tZ/cRF1hQ1u$Q=GFQJ\p.thn1emk^N'l7> 584K^a6=m+oT]sH(\UpZAfj1J.Y8pP*GM^0Y,(ca*=aUo2cU6-ep/`iNji(OhEtL! LT^%UY:%b^$GO-<"K'm*35oJ]i)keY/@^*#?%`Hg6ch=@U&4^?_[??ZM5=eROp`ra/9@N>WN_`f]&_-o_=nlDcBS29pHeP'tN2S8Z($AO""FAMiS?K%9<]e4bp, -Fsm-VNU=oS[+]:,=W~> endstream endobj 21 0 obj <> endobj 22 0 obj <>stream 2017-08-28T15:07:14-05:00 2017-08-28T15:07:14-05:00 2017-08-28T15:07:14-05:00 Adobe InDesign CC 2017 (Macintosh) uuid:d419b192-4ff4-9944-b27c-646e520813f1 xmp.did:0c2d8070-84dc-42d1-b96a-0da3a9209da8 xmp.id:563173b4-d5b8-4f24-a3f4-cf5c72f871a5 proof:pdf xmp.iid:d5003e2c-f286-4b6f-94de-11b2b2b20b81 xmp.did:032752bb-e84e-4921-b8d6-f081bbac731e xmp.did:0c2d8070-84dc-42d1-b96a-0da3a9209da8 default converted from application/x-indesign to application/pdf Adobe InDesign CC 2017 (Macintosh) / 2017-08-28T15:07:14-05:00 application/pdf Adobe PDF Library 15.0 False endstream endobj 23 0 obj <> endobj xref 0 24 0000000000 65535 f 0000150785 00000 n 0000151210 00000 n 0000151257 00000 n 0000151391 00000 n 0000151525 00000 n 0000151659 00000 n 0000151795 00000 n 0000151931 00000 n 0000159536 00000 n 0000197465 00000 n 0000197501 00000 n 0000197569 00000 n 0000197648 00000 n 0000197728 00000 n 0000197792 00000 n 0000197861 00000 n 0000197909 00000 n 0000198422 00000 n 0000198465 00000 n 0000199291 00000 n 0000200139 00000 n 0000200198 00000 n 0000202763 00000 n trailer <]>> startxref 116 %%EOF dask-0.16.0/docs/source/dataframe-api.rst000066400000000000000000000154341320364734500202130ustar00rootroot00000000000000API --- .. currentmodule:: dask.dataframe Dataframe ~~~~~~~~~ .. autosummary:: DataFrame DataFrame.add DataFrame.append DataFrame.apply DataFrame.assign DataFrame.astype DataFrame.categorize DataFrame.columns DataFrame.compute DataFrame.corr DataFrame.count DataFrame.cov DataFrame.cummax DataFrame.cummin DataFrame.cumprod DataFrame.cumsum DataFrame.describe DataFrame.div DataFrame.drop DataFrame.drop_duplicates DataFrame.dropna DataFrame.dtypes DataFrame.fillna DataFrame.floordiv DataFrame.get_partition DataFrame.groupby DataFrame.head DataFrame.index DataFrame.iterrows DataFrame.itertuples DataFrame.join DataFrame.known_divisions DataFrame.loc DataFrame.map_partitions DataFrame.mask DataFrame.max DataFrame.mean DataFrame.merge DataFrame.min DataFrame.mod DataFrame.mul DataFrame.ndim DataFrame.nlargest DataFrame.npartitions DataFrame.pow DataFrame.quantile DataFrame.query DataFrame.radd DataFrame.random_split DataFrame.rdiv DataFrame.rename DataFrame.repartition DataFrame.reset_index DataFrame.rfloordiv DataFrame.rmod DataFrame.rmul DataFrame.rpow DataFrame.rsub DataFrame.rtruediv DataFrame.sample DataFrame.set_index DataFrame.std DataFrame.sub DataFrame.sum DataFrame.tail DataFrame.to_bag DataFrame.to_csv DataFrame.to_delayed DataFrame.to_hdf DataFrame.to_records DataFrame.truediv DataFrame.values DataFrame.var DataFrame.visualize DataFrame.where Series ~~~~~~ .. autosummary:: Series Series.add Series.align Series.all Series.any Series.append Series.apply Series.astype Series.autocorr Series.between Series.bfill Series.cat Series.clear_divisions Series.clip Series.clip_lower Series.clip_upper Series.compute Series.copy Series.corr Series.count Series.cov Series.cummax Series.cummin Series.cumprod Series.cumsum Series.describe Series.diff Series.div Series.drop_duplicates Series.dropna Series.dt Series.dtype Series.eq Series.ffill Series.fillna Series.first Series.floordiv Series.ge Series.get_partition Series.groupby Series.gt Series.head Series.idxmax Series.idxmin Series.isin Series.isnull Series.iteritems Series.known_divisions Series.last Series.le Series.loc Series.lt Series.map Series.map_overlap Series.map_partitions Series.mask Series.max Series.mean Series.memory_usage Series.min Series.mod Series.mul Series.nbytes Series.ndim Series.ne Series.nlargest Series.notnull Series.nsmallest Series.nunique Series.nunique_approx Series.persist Series.pipe Series.pow Series.prod Series.quantile Series.radd Series.random_split Series.rdiv Series.reduction Series.repartition Series.resample Series.reset_index Series.rolling Series.round Series.sample Series.sem Series.shift Series.size Series.std Series.str Series.sub Series.sum Series.to_bag Series.to_csv Series.to_delayed Series.to_frame Series.to_hdf Series.to_parquet Series.to_string Series.to_timestamp Series.truediv Series.unique Series.value_counts Series.values Series.var Series.visualize Series.where Groupby Operations ~~~~~~~~~~~~~~~~~~ .. currentmodule:: dask.dataframe.groupby .. autosummary:: DataFrameGroupBy.aggregate DataFrameGroupBy.apply DataFrameGroupBy.count DataFrameGroupBy.cumcount DataFrameGroupBy.cumprod DataFrameGroupBy.cumsum DataFrameGroupBy.get_group DataFrameGroupBy.max DataFrameGroupBy.mean DataFrameGroupBy.min DataFrameGroupBy.size DataFrameGroupBy.std DataFrameGroupBy.sum DataFrameGroupBy.var .. autosummary:: SeriesGroupBy.aggregate SeriesGroupBy.apply SeriesGroupBy.count SeriesGroupBy.cumcount SeriesGroupBy.cumprod SeriesGroupBy.cumsum SeriesGroupBy.get_group SeriesGroupBy.max SeriesGroupBy.mean SeriesGroupBy.min SeriesGroupBy.nunique SeriesGroupBy.size SeriesGroupBy.std SeriesGroupBy.sum SeriesGroupBy.var Rolling Operations ~~~~~~~~~~~~~~~~~~ .. currentmodule:: dask.dataframe .. autosummary:: rolling.map_overlap rolling.rolling_apply rolling.rolling_count rolling.rolling_kurt rolling.rolling_max rolling.rolling_mean rolling.rolling_median rolling.rolling_min rolling.rolling_quantile rolling.rolling_skew rolling.rolling_std rolling.rolling_sum rolling.rolling_var rolling.rolling_window Create DataFrames ~~~~~~~~~~~~~~~~~ .. currentmodule:: dask.dataframe .. autosummary:: read_csv read_table read_parquet read_hdf read_sql_table from_array from_bcolz from_dask_array from_delayed from_pandas dask.bag.core.Bag.to_dataframe Store DataFrames ~~~~~~~~~~~~~~~~ .. autosummary:: to_csv to_parquet to_hdf to_records to_bag to_delayed DataFrame Methods ~~~~~~~~~~~~~~~~~ .. currentmodule:: dask.dataframe .. autoclass:: DataFrame :members: :inherited-members: Series Methods ~~~~~~~~~~~~~~ .. autoclass:: Series :members: :inherited-members: .. currentmodule:: dask.dataframe.groupby DataFrameGroupBy ~~~~~~~~~~~~~~~~ .. autoclass:: DataFrameGroupBy :members: :inherited-members: SeriesGroupBy ~~~~~~~~~~~~~ .. autoclass:: SeriesGroupBy :members: :inherited-members: Storage and Conversion ~~~~~~~~~~~~~~~~~~~~~~ .. currentmodule:: dask.dataframe .. autofunction:: read_csv .. autofunction:: read_table .. autofunction:: read_parquet .. autofunction:: read_hdf .. autofunction:: read_sql_table .. autofunction:: from_array .. autofunction:: from_pandas .. autofunction:: from_bcolz .. autofunction:: from_dask_array .. autofunction:: from_delayed .. autofunction:: to_delayed .. autofunction:: to_records .. autofunction:: to_csv .. autofunction:: to_bag .. autofunction:: to_hdf .. autofunction:: to_parquet Rolling ~~~~~~~ .. currentmodule:: dask.dataframe.rolling .. autofunction:: rolling_apply .. autofunction:: map_overlap .. autofunction:: rolling_count .. autofunction:: rolling_kurt .. autofunction:: rolling_max .. autofunction:: rolling_mean .. autofunction:: rolling_median .. autofunction:: rolling_min .. autofunction:: rolling_quantile .. autofunction:: rolling_skew .. autofunction:: rolling_std .. autofunction:: rolling_sum .. autofunction:: rolling_var .. autofunction:: rolling_window Other functions ~~~~~~~~~~~~~~~ .. currentmodule:: dask.dataframe .. autofunction:: compute .. autofunction:: map_partitions .. currentmodule:: dask.dataframe.multi .. autofunction:: concat .. autofunction:: merge dask-0.16.0/docs/source/dataframe-create.rst000066400000000000000000000110611320364734500206750ustar00rootroot00000000000000Create and Store Dask DataFrames ================================ Dask can create dataframes from various data storage formats like CSV, HDF, Apache Parquet, and others. For most formats this data can live on various storage systems including local disk, network file systems (NFS), the Hadoop File System (HDFS), and Amazon's S3 (excepting HDF, which is only available on POSIX like file systems). See the `Overview section `_ for an in depth discussion of ``dask.dataframe`` scope, use, limitations. API --- The following functions provide access to convert between Dask Dataframes, file formats, and other Dask or Python collections. .. currentmodule:: dask.dataframe File Formats: .. autosummary:: read_csv read_parquet read_hdf read_sql_table from_bcolz from_array to_csv to_parquet to_hdf Dask Collections: .. autosummary:: from_delayed from_dask_array dask.bag.core.Bag.to_dataframe to_delayed to_records to_bag Pandas: .. autosummary:: from_pandas Locations --------- For text, CSV, and Apache Parquet formats data can come from local disk, from the Hadoop File System, from S3FS, or others, by prepending the filenames with a protocol. .. code-block:: python >>> df = dd.read_csv('my-data-*.csv') >>> df = dd.read_csv('hdfs:///path/to/my-data-*.csv') >>> df = dd.read_csv('s3://bucket-name/my-data-*.csv') For remote systems like HDFS or S3 credentials may be an issue. Usually these are handled by configuration files on disk (such as a ``.boto`` file for S3) but in some cases you may want to pass storage-specific options through to the storage backend. You can do this with the ``storage_options=`` keyword. .. code-block:: python >>> df = dd.read_csv('s3://bucket-name/my-data-*.csv', ... storage_options={'anon': True}) Dask Delayed ------------ For more complex situations not covered by the functions above you may want to use :doc:`dask.delayed` , which lets you construct Dask.dataframes out of arbitrary Python function calls that load dataframes. This can allow you to handle new formats easily, or bake in particular logic around loading data if, for example, your data is stored with some special See :doc:`documentation on using dask.delayed with collections` or an `example notebook `_ showing how to create a Dask DataFrame from a nested directory structure of Feather files (as a stand in for any custom file format). Dask.delayed is particularly useful when simple ``map`` operations aren't sufficient to capture the complexity of your data layout. From Raw Dask Graphs -------------------- This section is mainly for developers wishing to extend dask.dataframe. It discusses internal API not normally needed by users. Everything below can be done just as effectively with :doc:`dask.delayed` described just above. You should never need to create a dataframe object by hand. To construct a DataFrame manually from a dask graph you need the following information: 1. dask: a dask graph with keys like ``{(name, 0): ..., (name, 1): ...}`` as well as any other tasks on which those tasks depend. The tasks corresponding to ``(name, i)`` should produce ``pandas.DataFrame`` objects that correspond to the columns and divisions information discussed below. 2. name: The special name used above 3. columns: A list of column names 4. divisions: A list of index values that separate the different partitions. Alternatively, if you don't know the divisions (this is common) you can provide a list of ``[None, None, None, ...]`` with as many partitions as you have plus one. For more information see the Partitions section in the :doc:`dataframe documentation `. As an example, we build a DataFrame manually that reads several CSV files that have a datetime index separated by day. Note, you should never do this. The ``dd.read_csv`` function does this for you. .. code-block:: Python dsk = {('mydf', 0): (pd.read_csv, 'data/2000-01-01.csv'), ('mydf', 1): (pd.read_csv, 'data/2000-01-02.csv'), ('mydf', 2): (pd.read_csv, 'data/2000-01-03.csv')} name = 'mydf' columns = ['price', 'name', 'id'] divisions = [Timestamp('2000-01-01 00:00:00'), Timestamp('2000-01-02 00:00:00'), Timestamp('2000-01-03 00:00:00'), Timestamp('2000-01-03 23:59:59')] df = dd.DataFrame(dsk, name, columns, divisions) dask-0.16.0/docs/source/dataframe-design.rst000066400000000000000000000160471320364734500207140ustar00rootroot00000000000000Internal Design =============== Dask dataframes coordinate many Pandas DataFrames/Series arranged along an index. We define a ``dask.dataframe`` object with the following components: - A dask graph with a special set of keys designating partitions, such as ``('x', 0), ('x', 1), ...``. - A name to identify which keys in the dask graph refer to this dataframe, such as ``'x'``. - An empty pandas object containing appropriate metadata (e.g. column names, dtypes, etc...). - A sequence of partition boundaries along the index, called ``divisions``. Metadata -------- Many dataframe operations rely on knowing the name and dtype of columns. To keep track of this information, all ``dask.dataframe`` objects have a ``_meta`` attribute which contains an empty pandas object with the same dtypes and names. For example: .. code-block:: python >>> df = pd.DataFrame({'a': [1, 2, 3], 'b': ['x', 'y', 'z']}) >>> ddf = dd.from_pandas(df, npartitions=2) >>> ddf._meta Empty DataFrame Columns: [a, b] Index: [] >>> ddf._meta.dtypes a int64 b object dtype: object Internally ``dask.dataframe`` does its best to propagate this information through all operations, so most of the time a user shouldn't have to worry about this. Usually this is done by evaluating the operation on a small sample of fake data, which can be found on the ``_meta_nonempty`` attribute: .. code-block:: python >>> ddf._meta_nonempty a b 0 1 foo 1 1 foo Sometimes this operation may fail in user defined functions (e.g. when using ``DataFrame.apply``), or may be prohibitively expensive. For these cases, many functions support an optional ``meta`` keyword, which allows specifying the metadata directly, avoiding the inference step. For convenience, this supports several options: 1. A pandas object with appropriate dtypes and names. If not empty, an empty slice will be taken: .. code-block:: python >>> ddf.map_partitions(foo, meta=pd.DataFrame({'a': [1], 'b': [2]})) 2. A description of the appropriate names and dtypes. This can take several forms: * A ``dict`` of ``{name: dtype}`` or an iterable of ``(name, dtype)`` specifies a dataframe * A tuple of ``(name, dtype)`` specifies a series * A dtype object or string (e.g. ``'f8'``) specifies a scalar This keyword is available on all functions/methods that take user provided callables (e.g. ``DataFrame.map_partitions``, ``DataFrame.apply``, etc...), as well as many creation functions (e.g. ``dd.from_delayed``). Categoricals ------------ Dask dataframe divides `categorical data`_ into two types: - Known categoricals have the ``categories`` known statically (on the ``_meta`` attribute). Each partition **must** have the same categories as found on the ``_meta`` attribute. - Unknown categoricals don't know the categories statically, and may have different categories in each partition. Internally, unknown categoricals are indicated by the presence of ``dd.utils.UNKNOWN_CATEGORIES`` in the categories on the ``_meta`` attribute. Since most dataframe operations propagate the categories, the known/unknown status should propagate through operations (similar to how ``NaN`` propagates). For metadata specified as a description (option 2 above), unknown categoricals are created. Certain operations are only available for known categoricals. For example, ``df.col.cat.categories`` would only work if ``df.col`` has known categories, since the categorical mapping is only known statically on the metadata of known categoricals. The known/unknown status for a categorical column can be found using the ``known`` property on the categorical accessor: .. code-block:: python >>> ddf.col.cat.known False Additionally, an unknown categorical can be converted to known using ``.cat.as_known()``. If you have multiple categorical columns in a dataframe, you may instead want to use ``df.categorize(columns=...)``, which will convert all specified columns to known categoricals. Since getting the categories requires a full scan of the data, using ``df.categorize()`` is more efficient than calling ``.cat.as_known()`` for each column (which would result in multiple scans). .. code-block:: python >>> col_known = ddf.col.cat.as_known() # use for single column >>> col_known.cat.known True >>> ddf_known = ddf.categorize() # use for multiple columns >>> ddf_known.col.cat.known True To convert a known categorical to an unknown categorical, there is also the ``.cat.as_unknown()`` method. This requires no computation, as it's just a change in the metadata. Non-categorical columns can be converted to categoricals in a few different ways: .. code-block:: python # astype operates lazily, and results in unknown categoricals ddf = ddf.astype({'mycol': 'category', ...}) # or ddf['mycol'] = ddf.mycol.astype('category') # categorize requires computation, and results in known categoricals ddf = ddf.categorize(columns=['mycol', ...]) Additionally, with pandas 0.19.2 and up ``dd.read_csv`` and ``dd.read_table`` can read data directly into unknown categorical columns by specifying a column dtype as ``'category'``: .. code-block:: python >>> ddf = dd.read_csv(..., dtype={col_name: 'category'}) .. _`categorical data`: http://pandas.pydata.org/pandas-docs/stable/categorical.html Partitions ---------- Internally a dask dataframe is split into many partitions, and each partition is one pandas dataframe. These dataframes are split vertically along the index. When our index is sorted and we know the values of the divisions of our partitions, then we can be clever and efficient with expensive algorithms (e.g. groupby's, joins, etc...). For example, if we have a time-series index then our partitions might be divided by month. All of January will live in one partition while all of February will live in the next. In these cases operations like ``loc``, ``groupby``, and ``join/merge`` along the index can be *much* more efficient than would otherwise be possible in parallel. You can view the number of partitions and divisions of your dataframe with the following fields: .. code-block:: python >>> df.npartitions 4 >>> df.divisions ['2015-01-01', '2015-02-01', '2015-03-01', '2015-04-01', '2015-04-31'] Divisions includes the minimum value of every partition's index and the maximum value of the last partition's index. In the example above if the user searches for a specific datetime range then we know which partitions we need to inspect and which we can drop: .. code-block:: python >>> df.loc['2015-01-20': '2015-02-10'] # Must inspect first two partitions Often we do not have such information about our partitions. When reading CSV files for example we do not know, without extra user input, how the data is divided. In this case ``.divisions`` will be all ``None``: .. code-block:: python >>> df.divisions [None, None, None, None, None] In these cases any operation that requires a cleanly partitioned dataframe with known divisions will have to perform a sort. This can generally achieved by calling ``df.set_index(...)``. dask-0.16.0/docs/source/dataframe-groupby.rst000066400000000000000000000132171320364734500211260ustar00rootroot00000000000000Shuffling for GroupBy and Join ============================== .. currentmodule:: dask.dataframe Operations like ``groupby``, ``join``, and ``set_index`` have special performance considerations that are different from normal Pandas due to the parallel, larger-than-memory, and distributed nature of dask.dataframe. Easy Case --------- To start off, common groupby operations like ``df.groupby(columns).reduction()`` for known reductions like ``mean, sum, std, var, count, nunique`` are all quite fast and efficient, even if partitions are not cleanly divided with known divisions. This is the common case. Additionally, if divisions are known then applying an arbitrary function to groups is efficient when the grouping columns include the index. Joins are also quite fast when joining a Dask dataframe to a Pandas dataframe or when joining two Dask dataframes along their index. No special considerations need to be made when operating in these common cases. So if you're doing common groupby and join operations then you can stop reading this. Everything will scale nicely. Fortunately this is true most of the time. .. code-block:: python >>> df.groupby(columns).known_reduction() # Fast and common case >>> df.groupby(columns_with_index).apply(user_fn) # Fast and common case >>> dask_df.join(pandas_df, on=column) # Fast and common case Difficult Cases --------------- In some cases, such as when applying an arbitrary function to groups (when not grouping on index with known divisions), when joining along non-index columns, or when explicitly setting an unsorted column to be the index, we may need to trigger a full dataset shuffle .. code-block:: python >>> df.groupby(columns_no_index).apply(user_fn) # Requires shuffle >>> lhs.join(rhs, on=column) # Requires shuffle >>> df.set_index(column) # Requires shuffle A shuffle is necessary when we need to re-sort our data along a new index. For example if we have banking records that are organized by time and we now want to organize them by user ID then we'll need to move a lot of data around. In Pandas all of this data fit in memory, so this operation was easy. Now that we don't assume that all data fits in memory we must be a bit more careful. Re-sorting the data can be avoided by restricting yourself to the easy cases mentioned above. Shuffle Methods --------------- There are currently two strategies to shuffle data depending on whether you are on a single machine or on a distributed cluster. Shuffle on Disk ``````````````` When operating on larger-than-memory data on a single machine we shuffle by dumping intermediate results to disk. This is done using the partd_ project for on-disk shuffles. .. _partd: https://github.com/dask/partd Shuffle over the Network ```````````````````````` When operating on a distributed cluster the Dask workers may not have access to a shared hard drive. In this case we shuffle data by breaking input partitions into many pieces based on where they will end up and moving these pieces throughout the network. This prolific expansion of intermediate partitions can stress the task scheduler. To manage for many-partitioned datasets this we sometimes shuffle in stages, causing undue copies but reducing the ``n**2`` effect of shuffling to something closer to ``n log(n)`` with ``log(n)`` copies. Selecting methods ````````````````` Dask will use on-disk shuffling by default but will switch to task-based distributed shuffling if the default scheduler is set to use a ``dask.distributed.Client`` such as would be the case if the user sets the Client as default using one of the following two options: .. code-block:: python client = Client('scheduler:8786', set_as_default=True) or dask.set_options(get=client.get) Alternatively, if you prefer to avoid defaults, you can specify a ``method=`` keyword argument to ``groupby`` or ``set_index`` .. code-block:: python df.set_index(column, method='disk') df.set_index(column, method='tasks') Aggregate ========= Dask support Pandas' ``aggregate`` syntax to run multiple reductions on the same groups. Common reductions, such as ``max``, ``sum``, ``mean`` are directly supported: .. code-block:: python >>> df.groupby(columns).aggregate(['sum', 'mean', 'max', 'min']) Dask also supports user defined reductions. To ensure proper performance, the reduction has to be formulated in terms of three independent steps. The ``chunk`` step is applied to each partition independently and reduces the data within a partition. The ``aggregate`` combines the within partition results. The optional ``finalize`` step combines the results returned from the ``aggregate`` step and should return a single final column. For Dask to recognize the reduction, it has to be passed as an instance of ``dask.dataframe.Aggregation``. For example, ``sum`` could be implemented as .. code-block:: python custom_sum = dd.Aggregation('custom_sum', lambda s: s.sum(), lambda s0: s0.sum()) df.groupby('g').agg(custom_sum) The name argument should be different from existing reductions to avoid data corruption. The arguments to each function are pre-grouped series objects, similar to ``df.groupby('g')['value']``. Many reductions can only be implemented with multiple temporaries. To implement these reductions, the steps should return tuples and expect multiple arguments. A mean function can be implemented as .. code-block:: python custom_mean = dd.Aggregation( 'custom_mean', lambda s: (s.count(), s.sum()), lambda count, sum: (count.sum(), sum.sum()), lambda count, sum: sum / count, ) df.groupby('g').agg(custom_mean) dask-0.16.0/docs/source/dataframe-overview.rst000066400000000000000000000135001320364734500213000ustar00rootroot00000000000000Overview ======== Dask Dataframe implements a subset of the Pandas Dataframe interface using blocked algorithms, cutting up the large DataFrame into many small Pandas DataFrames. This lets us compute on dataframes that are larger than memory using all of our cores or on many dataframes spread across a cluster. One operation on a dask.dataframe triggers many operations on the constituent Pandas dataframes. Design ------ .. image:: images/dask-dataframe.svg :alt: Dask DataFrames coordinate many Pandas DataFrames :align: right :width: 40% Dask dataframes coordinate many Pandas DataFrames/Series arranged along the index. Dask.dataframe is partitioned *row-wise*, grouping rows by index value for efficiency. These Pandas objects may live on disk or on other machines. Common Uses and Anti-Uses ------------------------- Dask.dataframe is particularly useful in the following situations: * Manipulating large datasets on a single machine, even when those datasets don't fit comfortably into memory. * Fast computation on large workstation machines by parallelizing many Pandas calls across many cores. * Distributed computing of very large tables stored in the Hadoop File System (HDFS), S3, or other parallel file systems. * Parallel groupby, join, or time series computations However in the following situations Dask.dataframe may not be the best choice: * If your dataset fits comfortably into RAM on your laptop then you may be better off just using Pandas_. There may be simpler ways to improve performance than through parallelism. * If your dataset doesn't fit neatly into the Pandas tabular model then you might find more use in :doc:`dask.bag ` or :doc:`dask.array ` * If you need functions that are not implemented in dask.dataframe then you might want to look at :doc:`dask.delayed ` which offers more flexibility. * If you need a proper database with all that databases offer you might prefer something like Postgres_ .. _Pandas: https://pandas.pydata.org/ .. _Postgres: https://www.postgresql.org/ Dask.dataframe copies the pandas API ------------------------------------ Because the ``dask.dataframe`` application programming interface (API) is a subset of the pandas API it should be familiar to pandas users. There are some slight alterations due to the parallel nature of dask: .. code-block:: python >>> import dask.dataframe as dd >>> df = dd.read_csv('2014-*.csv') >>> df.head() x y 0 1 a 1 2 b 2 3 c 3 4 a 4 5 b 5 6 c >>> df2 = df[df.y == 'a'].x + 1 As with all dask collections (for example Array, Bag, DataFrame) one triggers computation by calling the ``.compute()`` method: .. code-block:: python >>> df2.compute() 0 2 3 5 Name: x, dtype: int64 Scope ----- Dask.dataframe covers a small but well-used portion of the pandas API. This limitation is for two reasons: 1. The pandas API is *huge* 2. Some operations are genuinely hard to do in parallel (for example sort). Additionally, some important operations like ``set_index`` work, but are slower than in pandas because they may write out to disk. The following class of computations works well: * Trivially parallelizable operations (fast): * Elementwise operations: ``df.x + df.y``, ``df * df`` * Row-wise selections: ``df[df.x > 0]`` * Loc: ``df.loc[4.0:10.5]`` * Common aggregations: ``df.x.max()``, ``df.max()`` * Is in: ``df[df.x.isin([1, 2, 3])]`` * Datetime/string accessors: ``df.timestamp.month`` * Cleverly parallelizable operations (fast): * groupby-aggregate (with common aggregations): ``df.groupby(df.x).y.max()``, ``df.groupby('x').max()`` * groupby-apply on index: ``df.groupby(['idx', 'x']).apply(myfunc)``, where ``idx`` is the index level name * value_counts: ``df.x.value_counts()`` * Drop duplicates: ``df.x.drop_duplicates()`` * Join on index: ``dd.merge(df1, df2, left_index=True, right_index=True)`` * Join with Pandas DataFrames: ``dd.merge(df1, df2, on='id')`` * Elementwise operations with different partitions / divisions: ``df1.x + df2.y`` * Datetime resampling: ``df.resample(...)`` * Rolling averages: ``df.rolling(...)`` * Pearson Correlations: ``df[['col1', 'col2']].corr()`` * Operations requiring a shuffle (slow-ish, unless on index) * Set index: ``df.set_index(df.x)`` * groupby-apply not on index (with anything): ``df.groupby(df.x).apply(myfunc)`` * Join not on the index: ``dd.merge(df1, df2, on='name')`` See :doc:`DataFrame API documentation` for a more extensive list. Execution --------- By default ``dask.dataframe`` uses the multi-threaded scheduler. This exposes some parallelism when pandas or the underlying numpy operations release the global interpreter lock (GIL). Generally pandas is more GIL bound than NumPy, so multi-core speed-ups are not as pronounced for ``dask.dataframe`` as they are for ``dask.array``. This is changing, and the pandas development team is actively working on releasing the GIL. In some cases you may experience speedups by switching to the multiprocessing or distributed scheduler. .. code-block:: python >>> dask.set_options(get=dask.multiprocessing.get) See :doc:`scheduler docs` for more information. Limitations ----------- Dask.DataFrame does not implement the entire Pandas interface. Users expecting this will be disappointed. Notably, dask.dataframe has the following limitations: 1. Setting a new index from an unsorted column is expensive 2. Many operations, like groupby-apply and join on unsorted columns require setting the index, which as mentioned above, is expensive 3. The Pandas API is very large. Dask.dataframe does not attempt to implement many pandas features or any of the more exotic data structures like NDFrames dask-0.16.0/docs/source/dataframe-performance.rst000066400000000000000000000207361320364734500217440ustar00rootroot00000000000000Dask DataFrame Performance Tips =============================== Use Pandas ---------- For data that fits into RAM, Pandas can often be faster and easier to use than Dask.dataframe. While "Big Data" tools can be exciting, they are almost always worse than normal data tools while those remain appropriate. Pandas Performance Tips Apply to Dask.dataframe ------------------------------------------------ Normal Pandas performance tips, like avoiding apply, using vectorized operations, using categoricals, etc. all apply equally to Dask.dataframe. See `Modern Pandas `_ by `Tom Augspurger `_ is a good read here. Use the Index ------------- Dask.dataframe can be optionally sorted along a single index column. Some operations against this column can be very fast. For example if your dataset is sorted by time you can quickly select data for a particular day, perform time series joins, etc. You can check if your data is sorted by looking at the ``df.known_divisions`` attribute. You can set an index column using the ``.set_index(columnname)`` method. This operation is expensive though, so use it sparingly (see below). .. code-block:: python df = df.set_index('timestamp') # set the index to make some operations fast df.loc['2001-01-05':'2001-01-12'] # this is very fast if you have an index df.merge(df2, left_index=True, right_index=True) # this is also very fast Avoid Shuffles -------------- Setting an index is an important (see above) but expensive operation. You should do it infrequently and you should persist afterwards (see below). Some operations like ``set_index`` and ``merge/join`` are harder to do in a parallel or distributed setting than they are in-memory on a single machine. In particular *shuffling operations* that rearrange data become much more communication intensive. For example if your data is arranged by customer ID but now you want to arrange it by time all of your partitions will have to talk to each other to exchange shards of data. This can be an intense process, particularly on a cluster. So definitely set the index, but try do so infrequently. After you set the index then you may want to ``persist`` your data if you are on a cluster. .. code-block:: python df = df.set_index('column-name') # do this infrequently Additionally, set_index has a few options that can accelerate it in some situations. For example if you know that your dataset is sorted or you already know the values by which it is divided you can provide these to accelerate the set_index operation. See the `set_index docstring `_ for more information. .. code-block:: python df2 = df.set_index(d.timestamp, sorted=True) Persist Intelligently --------------------- *This section is only relevant to users on distributed systems.* Often dataframe workloads look like the following: 1. Load data from files 2. Filter data to a particular subset 3. Shuffle data to set an intelligent index 4. Several complex queries on top of this indexed data It is often ideal to load, filter, and shuffle data once and keep this result in memory. Afterwards each of the several complex queries can be based off of this in-memory data rather than have to repeat the full load-filter-shuffle process each time. To do this, use the `client.persist `_ method. .. code-block:: python df = dd.read_csv('s3://bucket/path/to/*.csv') df = df[df.balance < 0] df = client.persist(df) df = df.set_index('timestamp') df = client.persist(df) >>> df.customer_id.nunique().compute() 18452844 >>> df.groupby(df.city).size().compute() ... Persist is important because Dask.dataframe is *lazy by default*. Persist is a way of telling the cluster that it should start computing on the computations that you have defined so far and that it should try to keep those results in memory. You will get back a new dataframe that is semantically equivalent to your old dataframe, but now points to running data. Your old dataframe still points to lazy computations .. code-block:: python # Don't do this client.persist(df) # Persist doesn't change the input in-place # Do this instead df = client.persist(df) # Replace your old lazy dataframe Repartition to Reduce Overhead ------------------------------ Your Dask.dataframe is split up into many Pandas dataframes. We sometimes call these "partitions". Often the number of partitions is decided for you; for example it might be the number of CSV files from which you are reading. However over time as you reduce or increase the size of your pandas dataframes by filtering or joining it may be wise to reconsider how many partitions you need. There is a cost to having too many or having too few. Partitions should fit comfortably in memory (smaller than a gigabyte) but also not be too numerous. Every operation on every partition takes the central scheduler a few hundred microseconds to process. If you have a few thousand tasks this is barely noticeable, but it is nice to reduce the number if possible. A common situation is that you load lots of data into reasonably sized partitions (dask's defaults make decent choices) but then you filter down your dataset to only a small fraction of the original. At this point it is wise to regroup your many small partitions into a few larger ones. You can do this with the ``repartition`` method: .. code-block:: python df = dd.read_csv('s3://bucket/path/to/*.csv') df = df[df.name == 'Alice'] # only 1/100th of the data df = df.repartition(npartitions=df.npartitions // 100) df = client.persist(df) # if on a distributed system This helps to reduce overhead and increase the effectiveness of vectorized Pandas operations. You should aim for partitions that have around 100MB of data each. Additionally, reducing partitions is very helpful just before shuffling, which creates ``n log(n)`` tasks relative to the number of partitions. Dataframes with less than 100 partitions are much easier to shuffle than dataframes with tens of thousands. Joins ----- Joining two dataframes can be either very expensive or very cheap depending on the situation. It is cheap in the following cases: 1. Joining a Dask.dataframe with a Pandas dataframe 2. Joining a Dask.dataframe with a Dask.dataframe of a single partition. 3. Joining Dask.dataframes along their indexes It is expensive in the following case: 1. Joining Dask.dataframes along columns that are not their index The expensive case requires a shuffle. This is fine, and Dask.dataframe will complete the job well, but it will be more expensive than a typical linear-time operation. .. code-block:: python dd.merge(a, pandas_df) # fast dd.merge(a, b, left_index=True, right_index=True) # fast dd.merge(a, b, left_index=True, right_on='id') # half-fast, half-slow dd.merge(a, b, left_on='id', right_on='id') # slow Store Data in Apache Parquet Format ----------------------------------- HDF5 is a popular choice for Pandas users with high performance needs. We encourage Dask.dataframe users to `store and load data `_ using Parquet instead. `Apache Parquet `_ is a columnar binary format that is easy to split into multiple files (easier for parallel loading) and is generally much simpler to deal with than HDF5 (from the library's perspective). It is also a common format used by other big data systems like `Apache Spark `_ and `Apache Impala (incubating) `_ and so is useful to interchange with other systems. .. code-block:: python df.to_parquet('path/to/my-results/') df = dd.read_parquet('path/to/my-results/') Dask supports reading with multiple implementations of the Apache Parquet format for Python. .. code-block:: python df1 = dd.read_parquet('path/to/my-results/', engine='fastparquet') df2 = dd.read_parquet('path/to/my-results/', engine='arrow') These libraries be installed using .. code-block:: shell conda install fastparquet pyarrow -c conda-forge Fastparquet is a Python-based implementation that uses the `Numba `_ Python-to-LLVM compiler. PyArrow is part of the `Apache Arrow `_ project and uses the `C++ implementation of Apache Parquet `_. dask-0.16.0/docs/source/dataframe.rst000066400000000000000000000017261320364734500174430ustar00rootroot00000000000000DataFrame ========= A Dask DataFrame is a large parallel dataframe composed of many smaller Pandas dataframes, split along the index. These pandas dataframes may live on disk for larger-than-memory computing on a single machine, or on many different machines in a cluster. Dask.dataframe implements a commonly used subset of the Pandas_ interface including elementwise operations, reductions, grouping operations, joins, timeseries algorithms, and more. It copies the Pandas interface for these operations exactly and so should be very familiar to Pandas users. Because Dask.dataframe operations merely coordinate Pandas operations they usually exhibit similar performance characteristics as are found in Pandas. .. _Pandas: http://pandas.pydata.org/ .. toctree:: :maxdepth: 1 dataframe-overview.rst dataframe-create.rst dataframe-api.rst dataframe-performance.rst Other topics .. toctree:: :maxdepth: 1 dataframe-design.rst dataframe-groupby.rst dask-0.16.0/docs/source/debugging.rst000066400000000000000000000224741320364734500174550ustar00rootroot00000000000000Debugging ========= Debugging parallel programs is hard. Normal debugging tools like logging and using ``pdb`` to interact with tracebacks stop working normally when exceptions occur in far-away machines or different processes or threads. Dask has a variety of mechanisms to make this process easier. Depending on your situation some of these approaches may be more appropriate than others. These approaches are ordered from lightweight or easy solutions to more involved solutions. Exceptions ---------- When a task in your computation fails the standard way of understanding what went wrong is to look at the exception and traceback. Often people do this with the ``pdb`` module, IPython ``%debug`` or ``%pdb`` magics, or by just looking at the traceback and investigating where in their code the exception occurred. Normally when a computation computes in a separate thread or a different machine these approaches break down. Dask provides a few mechanisms to recreate the normal Python debugging experience. Inspect Exceptions and Tracebacks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ By default, Dask already copies the exception and traceback wherever they occur and reraises that exception locally. If your task failed with a ``ZeroDivisionError`` remotely then you'll get a ``ZeroDivisionError`` in your interactive session. Similarly you'll see a full traceback of where this error occurred, which, just like in normal Python, can help you to identify the troublsome spot in your code. However, you cannot use the ``pdb`` module or ``%debug`` IPython magics with these tracebacks to look at the value of variables during failure. You can only inspect things visually. Additionally, the top of the traceback may be filled with functions that are dask-specific and not relevant to your problem, you can safely ignore these. Both the single-machine and distributed schedulers do this. Use the Single-Threaded Scheduler ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Dask ships with a simple single-threaded scheduler. This doesn't offer any parallel performance improvements, but does run your Dask computation faithfully in your local thread, allowing you to use normal tools like ``pdb``, ``%debug`` IPython magics, the profiling tools like the ``cProfile`` module and `snakeviz `_. This allows you to use all of your normal Python debugging tricks in Dask computations, as long as you don't need parallelism. This only works for single-machine schedulers. It does not work with dask.distributed unless you are comfortable using the Tornado API (look at the `testing infrastructure `_ docs, which accomplish this). Also, because this operates on a single machine it assumes that your computation can run on a single machine without exceeding memory limits. It may be wise to use this approach on smaller versions of your problem if possible. Rerun Failed Task Locally ~~~~~~~~~~~~~~~~~~~~~~~~~ If a remote task fails, we can collect the function and all inputs, bring them to the local thread, and then rerun the function in hopes of triggering the same exception locally, where normal debugging tools can be used. With the single machine schedulers, use the ``rerun_exceptions_locally=True`` keyword. .. code-block:: python x.compute(rerun_exceptions_locally=True) On the distributed scheduler use the ``recreate_error_locally`` method on anything that contains ``Futures`` : .. code-block:: python >>> x.compute() ZeroDivisionError(...) >>> %pdb >>> future = client.compute(x) >>> client.recreate_error_locally(future) Remove Failed Futures Manually ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Sometimes only parts of your computations fail, for example if some rows of a CSV dataset are faulty in some way. When running with the distributed scheduler you can remove chunks of your data that have produced bad results if you switch to dealing with Futures. .. code-block:: python >>> import dask.dataframe as dd >>> df = ... # create dataframe >>> df = df.persist() # start computing on the cluster >>> from distributed.client import futures_of >>> futures = futures_of(df) # get futures behind dataframe >>> futures [ ] >>> # wait until computation is done >>> while any(f.status == 'pending' for f in futures): ... sleep(0.1) >>> # pick out only the successful futures and reconstruct the dataframe >>> good_futures = [f for f in futures if f.status == 'finished'] >>> df = dd.from_delayed(good_futures, meta=df._meta) This is a bit of a hack, but often practical when first exploring messy data. If you are using the concurrent.futures API (map, submit, gather) then this approach is more natural. Inspect Scheduling State ------------------------ Not all errors present themselves as Exceptions. For example in a distributed system workers may die unexpectedly or your computation may be unreasonably slow due to inter-worker communication or scheduler overhead or one of several other issues. Getting feedback about what's going on can help to identify both failures and general performance bottlenecks. For the single-machine scheduler see :doc:`diagnostics ` documentation. The rest of the section will assume that you are using the `distributed scheduler `_ where these issues arise more commonly. Web Diagnostics ~~~~~~~~~~~~~~~ First, the distributed scheduler has a number of `diagnostic web pages `_ showing dozens of recorded metrics like CPU, memory, network, and disk use, a history of previous tasks, allocation of tasks to workers, worker memory pressure, work stealing, open file handle limits, etc.. *Many* problems can be correctly diagnosed by inspecting these pages. By default these are available at ``http://scheduler:8787/`` ``http://scheduler:8788/`` and ``http://worker:8789/`` where ``scheduler`` and ``worker`` should be replaced by the addresses of the scheduler and each of the workers. See `web diagnostic docs `_ for more information. Logs ~~~~ The scheduler and workers and client all emits logs using `Python's standard logging module `_. By default these emit to standard error. When Dask is launched by a cluster job scheduler (SGE/SLURM/YARN/Mesos/Marathon/Kubernetes/whatever) that system will track these logs and will have an interface to help you access them. If you are launching Dask on your own they will probably dump to the screen unless you `redirect stderr to a file `_ . You can control the logging verbosity in the ``~/.dask/config.yaml`` file. Defaults currently look like the following: .. code-block:: yaml logging: distributed: info distributed.client: warning bokeh: error So for example you could add a line like ``distributed.worker: debug`` to get *very* verbose output from the workers. LocalCluster ------------ If you are using the distributed scheduler from a single machine you may be setting up workers manually using the command line interface or you may be using `LocalCluster `_ which is what runs when you just call ``Client()`` .. code-block:: python >>> from dask.distributed import Client, LocalCluster >>> client = Client() # This is actually the following two commands >>> cluster = LocalCluster() >>> client = Client(cluster.scheduler.address) LocalCluster is useful because the scheduler and workers are in the same process with you, so you can easily inspect their `state `_ while they run (they are running in a separate thread). .. code-block:: python >>> cluster.scheduler.processing {'worker-one:59858': {'inc-123', 'add-443'}, 'worker-two:48248': {'inc-456'}} You can also do this for the workers *if* you run them without nanny processes. .. code-block:: python >>> cluster = LocalCluster(nanny=False) >>> client = Client(cluster) This can be very helpful if you want to use the dask.distributed API and still want to investigate what is going on directly within the workers. Information is not distilled for you like it is in the web diagnostics, but you have full low-level access. Inspect state with IPython -------------------------- Sometimes you want to inspect the state of your cluster, but you don't have the luxury of operating on a single machine. In these cases you can launch an IPython kernel on the scheduler and on every worker, which lets you inspect state on the scheduler and workers as computations are completing. This does not give you the ability to run ``%pdb`` or ``%debug`` on remote machines, the tasks are still running in separate threads, and so are not easily accessible from an interactive IPython session. For more details, see the `Dask.distributed IPython docs `_. dask-0.16.0/docs/source/delayed-api.rst000066400000000000000000000001411320364734500176630ustar00rootroot00000000000000API === .. currentmodule:: dask.delayed .. autosummary:: delayed .. autofunction:: delayed dask-0.16.0/docs/source/delayed-collections.rst000066400000000000000000000027301320364734500214360ustar00rootroot00000000000000Working with Collections ======================== Often we want to do a bit of custom work with ``dask.delayed`` (for example for complex data ingest), then leverage the algorithms in ``dask.array`` or ``dask.dataframe``, and then switch back to custom work. To this end, all collections support ``from_delayed`` functions and ``to_delayed`` methods. As an example, consider the case where we store tabular data in a custom format not known by ``dask.dataframe``. This format is naturally broken apart into pieces and we have a function that reads one piece into a Pandas DataFrame. We use ``dask.delayed`` to lazily read these files into Pandas DataFrames, use ``dd.from_delayed`` to wrap these pieces up into a single ``dask.dataframe``, use the complex algorithms within ``dask.dataframe`` (groupby, join, etc..) and then switch back to delayed to save our results back to the custom format. .. code-block:: python import dask.dataframe as dd from dask.delayed import delayed from my_custom_library import load, save filenames = ... dfs = [delayed(load)(fn) for fn in filenames] df = dd.from_delayed(dfs) df = ... # do work with dask.dataframe dfs = df.to_delayed() writes = [delayed(save)(df, fn) for df, fn in zip(dfs, filenames)] dd.compute(*writes) Data science is often complex, ``dask.delayed`` provides a release valve for users to manage this complexity on their own, and solve the last mile problem for custom formats and complex situations. dask-0.16.0/docs/source/delayed-overview.rst000066400000000000000000000074071320364734500207740ustar00rootroot00000000000000Overview ======== Motivation and Example ---------------------- Dask.delayed lets you parallelize custom code. It is useful whenever your problem doesn't quite fit a high-level parallel object like dask.array or dask.dataframe but could still benefit from parallelism. Dask.delayed works by delaying your function evaluations and putting them into a dask graph. Dask.delayed is useful when wrapping existing code or when handling non-standard problems. Consider the following example: .. code-block:: python def inc(x): return x + 1 def double(x): return x + 2 def add(x, y): return x + y data = [1, 2, 3, 4, 5] output = [] for x in data: a = inc(x) b = double(x) c = add(a, b) output.append(c) total = sum(output) As written this code runs sequentially in a single thread. However we see that a lot of this could be executed in parallel. We use the ``delayed`` function to parallelize this code by turning it into a dask graph. We slightly modify our code by wrapping functions in ``delayed``. This delays the execution of the function and generates a dask graph instead. .. code-block:: python from dask import delayed output = [] for x in data: a = delayed(inc)(x) b = delayed(double)(x) c = delayed(add)(a, b) output.append(c) total = delayed(sum)(output) We used the ``delayed`` function to wrap the function calls that we want to turn into tasks. None of the ``inc``, ``double``, ``add`` or ``sum`` calls have happened yet, instead the object ``total`` is a ``Delayed`` result that contains a task graph of the entire computation. Looking at the graph we see clear opportunities for parallel execution. The dask schedulers will exploit this parallelism, generally improving performance. (although not in this example, because these functions are already very small and fast.) .. code-block:: python total.visualize() # see image to the right .. image:: images/delayed-inc-double-add.svg :align: right :alt: simple task graph created with dask.delayed We can now compute this lazy result to execute the graph in parallel: .. code-block:: python >>> total.compute() 45 Delayed Function ---------------- The ``dask.delayed`` interface consists of one function, ``delayed``: - ``delayed`` wraps functions Wraps functions. Can be used as a decorator, or around function calls directly (i.e. ``delayed(foo)(a, b, c)``). Outputs from functions wrapped in ``delayed`` are proxy objects of type ``Delayed`` that contain a graph of all operations done to get to this result. - ``delayed`` wraps objects Wraps objects. Used to create ``Delayed`` proxies directly. ``Delayed`` objects can be thought of as representing a key in the dask. A ``Delayed`` supports *most* python operations, each of which creates another ``Delayed`` representing the result: - Most operators (``*``, ``-``, and so on) - Item access and slicing (``a[0]``) - Attribute access (``a.size``) - Method calls (``a.index(0)``) Operations that aren't supported include: - Mutating operators (``a += 1``) - Mutating magics such as ``__setitem__``/``__setattr__`` (``a[0] = 1``, ``a.foo = 1``) - Iteration. (``for i in a: ...``) - Use as a predicate (``if a: ...``) The last two points in particular mean that ``Delayed`` objects cannot be used for control flow, meaning that no ``Delayed`` can appear in a loop or if statement. In other words you can't iterate over a ``Delayed`` object, or use it as part of a condition in an if statement, but ``Delayed`` object can be used in a body of a loop or if statement (i.e. the example above is fine, but if ``data`` was a ``Delayed`` object it wouldn't be). Even with this limitation, many workflows can easily be parallelized. dask-0.16.0/docs/source/delayed.rst000066400000000000000000000012261320364734500171210ustar00rootroot00000000000000Delayed ======= Sometimes problems don't fit into one of the collections like ``dask.array`` or ``dask.dataframe``. In these cases, users can parallelize custom algorithms using the simpler ``dask.delayed`` interface. This allows one to create graphs directly with a light annotation of normal python code. .. code-block:: python >>> x = dask.delayed(inc)(1) >>> y = dask.delayed(inc)(2) >>> z = dask.delayed(add)(x, y) >>> z.compute() 7 >>> z.vizualize() .. image:: images/inc-add.svg :alt: simple task graph created with dask.delayed .. toctree:: :maxdepth: 1 delayed-overview.rst delayed-api.rst delayed-collections.rst dask-0.16.0/docs/source/develop.rst000066400000000000000000000164451320364734500171610ustar00rootroot00000000000000Development Guidelines ====================== Dask is a community maintained project. We welcome contributions in the form of bug reports, documentation, code, design proposals, and more. This page provides resources on how best to contribute. Where to ask for help --------------------- Dask conversation happens in the following places: 1. `StackOverflow #dask tag`_: for usage questions 2. `Github Issue Tracker`_: for discussions around new features or established bugs 3. `Gitter chat`_: for real-time discussion For usage questions and bug reports we strongly prefer the use of StackOverflow and Github issues over gitter chat. Github and StackOverflow are more easily searchable by future users and so is more efficient for everyone's time. Gitter chat is generally reserved for community discussion. .. _`StackOverflow #dask tag`: http://stackoverflow.com/questions/tagged/dask .. _`Github Issue Tracker`: https://github.com/dask/dask/issues/ .. _`Gitter chat`: https://gitter.im/dask/dask Separate Code Repositories -------------------------- Dask maintains code and documentation in a few git repositories hosted on the Github ``dask`` organization, http://github.com/dask. This includes the primary repository and several other repositories for different components. A non-exhaustive list follows: * http://github.com/dask/dask: The main code repository holding parallel algorithms, the single-machine scheduler, and most documentation. * http://github.com/dask/distributed: The distributed memory scheduler * http://github.com/dask/hdfs3: Hadoop Filesystem interface * http://github.com/dask/s3fs: S3 Filesystem interface * http://github.com/dask/dask-ec2: AWS launching * ... Git and Github can be challenging at first. Fortunately good materials exist on the internet. Rather than repeat these materials here we refer you to Pandas' documentation and links on this subject at http://pandas.pydata.org/pandas-docs/stable/contributing.html Issues ------ The community discusses and tracks known bugs and potential features in the `Github Issue Tracker`_. If you have a new idea or have identified a bug then you should raise it there to start public discussion. If you are looking for an introductory issue to get started with development then check out the `introductory label`_, which contains issues that are good for starting developers. Generally familiarity with Python, NumPy, Pandas, and some parallel computing are assumed. .. _`introductory label`: https://github.com/dask/dask/issues?q=is%3Aissue+is%3Aopen+label%3Aintroductory Development Environment ----------------------- Download code ~~~~~~~~~~~~~ Clone the main dask git repository (or whatever repository you're working on.):: git clone git@github.com:dask/dask.git Install ~~~~~~~ You may want to install larger dependencies like NumPy and Pandas using a binary package manager, like conda_. You can skip this step if you already have these libraries, don't care to use them, or have sufficient build environment on your computer to compile them when installing with ``pip``:: conda install -y numpy pandas scipy bokeh cytoolz pytables h5py .. _conda: http://conda.pydata.org/docs/ Install dask and dependencies:: cd dask pip install -e .[complete] For development dask uses the following additional dependencies:: pip install pytest moto mock Run Tests ~~~~~~~~~ Dask uses py.test_ for testing. You can run tests from the main dask directory as follows:: py.test dask --verbose .. _py.test: http://pytest.org/latest/ Contributing to Code -------------------- Dask maintains development standards that are similar to most PyData projects. These standards include language support, testing, documentation, and style. Python Versions ~~~~~~~~~~~~~~~ Dask supports Python versions 2.7, 3.3, 3.4, and 3.5 in a single codebase. Name changes are handled by the :file:`dask/compatibility.py` file. Test ~~~~ Dask employs extensive unit tests to ensure correctness of code both for today and for the future. Test coverage is expected for all code contributions. Tests are written in a py.test style with bare functions. .. code-block:: python def test_fibonacci(): assert fib(0) == 0 assert fib(1) == 0 assert fib(10) == 55 assert fib(8) == fib(7) + fib(6) for x in [-3, 'cat', 1.5]: with pytest.raises(ValueError): fib(x) These tests should compromise well between covering all branches and fail cases and running quickly (slow test suites get run less often.) You can run tests locally by running ``py.test`` in the local dask directory:: py.test dask --verbose You can also test certain modules or individual tests for faster response:: py.test dask/dataframe --verbose py.test dask/dataframe/tests/test_dataframe_core.py::test_set_index Tests run automatically on the Travis.ci continuous testing framework on every push to every pull request on GitHub. Docstrings ~~~~~~~~~~ User facing functions should roughly follow the numpydoc_ standard, including sections for ``Parameters``, ``Examples`` and general explanatory prose. By default examples will be doc-tested. Reproducible examples in documentation is valuable both for testing and, more importantly, for communication of common usage to the user. Documentation trumps testing in this case and clear examples should take precedence over using the docstring as testing space. To skip a test in the examples add the comment ``# doctest: +SKIP`` directly after the line. .. code-block:: python def fib(i): """ A single line with a brief explanation A more thorough description of the function, consisting of multiple lines or paragraphs. Parameters ---------- i: int A short description of the argument if not immediately clear Examples -------- >>> fib(4) 3 >>> fib(5) 5 >>> fib(6) 8 >>> fib(-1) # Robust to bad inputs ValueError(...) """ .. _numpydoc: https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt Docstrings are currently tested under Python 2.7 on travis.ci. You can test docstrings with pytest as follows:: py.test dask --doctest-modules Docstring testing requires graphviz to be installed. This can be done via:: conda install -y graphviz Style ~~~~~ Dask verifies style uniformity with the ``flake8`` tool.:: pip install flake8 flake8 dask Changelog ~~~~~~~~~ Every significative code contribution should be listed in the :doc:`changelog` under the corresponding version. When submitting a Pull Request in Github please add to that file explaining what was added/modified. Contributing to Documentation ----------------------------- Dask uses Sphinx_ for documentation, hosted on http://readthedocs.org . Documentation is maintained in the RestructuredText markup language (``.rst`` files) in ``dask/docs/source``. The documentation consists both of prose and API documentation. To build the documentation locally, first install requirements:: cd docs/ pip install -r requirements-docs.txt Then build documentation with ``make``:: make html The resulting HTML files end up in the ``build/html`` directory. You can now make edits to rst files and run ``make html`` again to update the affected pages. .. _Sphinx: http://www.sphinx-doc.org/ dask-0.16.0/docs/source/diagnostics.rst000066400000000000000000000231171320364734500200240ustar00rootroot00000000000000Diagnostics =========== Profiling parallel code can be tricky, but ``dask.diagnostics`` provides functionality to aid in profiling and inspecting dask graph execution. Scheduler Callbacks ------------------- Schedulers based on ``dask.local.get_async`` (currently ``dask.get``, ``dask.threaded.get``, and ``dask.multiprocessing.get``) accept five callbacks, allowing for inspection of scheduler execution. The callbacks are: 1. ``start(dsk)`` Run at the beginning of execution, right before the state is initialized. Receives the dask graph. 2. ``start_state(dsk, state)`` Run at the beginning of execution, right after the state is initialized. Receives the dask graph and scheduler state. 3. ``pretask(key, dsk, state)`` Run every time a new task is started. Receives the key of the task to be run, the dask graph, and the scheduler state. 4. ``posttask(key, result, dsk, state, id)`` Run every time a task is finished. Receives the key of the task that just completed, the result, the dask graph, the scheduler state, and the id of the worker that ran the task. 5. ``finish(dsk, state, errored)`` Run at the end of execution, right before the result is returned. Receives the dask graph, the scheduler state, and a boolean indicating whether or not the exit was due to an error. These are internally represented as tuples of length 5, stored in the order presented above. Callbacks for common use cases are provided in ``dask.diagnostics``. Progress Bar ------------ The ``ProgressBar`` class builds on the scheduler callbacks described above to display a progress bar in the terminal or notebook during computation. This can be a nice feedback during long running graph execution. It can be used as a context manager around calls to ``get`` or ``compute`` to profile the computation: .. code-block:: python >>> from dask.diagnostics import ProgressBar >>> a = da.random.normal(size=(10000, 10000), chunks=(1000, 1000)) >>> res = a.dot(a.T).mean(axis=0) >>> with ProgressBar(): ... out = res.compute() [########################################] | 100% Completed | 17.1 s Or registered globally using the ``register`` method. .. code-block:: python >>> pbar = ProgressBar() >>> pbar.register() >>> out = res.compute() [########################################] | 100% Completed | 17.1 s To unregister from the global callbacks, call the ``unregister`` method: .. code-block:: python >>> pbar.unregister() Profiling --------- Dask provides a few tools for profiling execution. As with the ``ProgressBar``, they each can be used as context managers, or registered globally. Profiler ^^^^^^^^ The ``Profiler`` class is used to profile dask execution at the task level. During execution it records the following information for each task: 1. Key 2. Task 3. Start time in seconds since the epoch 4. Finish time in seconds since the epoch 5. Worker id ResourceProfiler ^^^^^^^^^^^^^^^^ The ``ResourceProfiler`` class is used to profile dask execution at the resource level. During execution it records the following information for each timestep 1. Time in seconds since the epoch 2. Memory usage in MB 3. % CPU usage The default timestep is 1 second, but can be set manually using the ``dt`` keyword. .. code-block:: python >>> from dask.diagnostics import ResourceProfiler >>> rprof = ResourceProfiler(dt=0.5) CacheProfiler ^^^^^^^^^^^^^ The ``CacheProfiler`` class is used to profile dask execution at the scheduler cache level. During execution it records the following information for each task: 1. Key 2. Task 3. Size metric 4. Cache entry time in seconds since the epoch 5. Cache exit time in seconds since the epoch Where the size metric is the output of a function called on the result of each task. The default metric is to count each task (``metric`` is 1 for all tasks). Other functions may be used as a metric instead through the ``metric`` keyword. For example, the ``nbytes`` function found in ``cachey`` can be used to measure the number of bytes in the scheduler cache: .. code-block:: python >>> from dask.diagnostics import CacheProfiler >>> from cachey import nbytes >>> cprof = CacheProfiler(metric=nbytes) Example ^^^^^^^ As an example to demonstrate using the diagnostics, we'll profile some linear algebra done with ``dask.array``. We'll create a random array, take its QR decomposition, and then reconstruct the initial array by multiplying the Q and R components together. Note that since the profilers (and all diagnostics) are just context managers, multiple profilers can be used in a with block: .. code-block:: python >>> import dask.array as da >>> from dask.diagnostics import Profiler, ResourceProfiler, CacheProfiler >>> a = da.random.random(size=(10000, 1000), chunks=(1000, 1000)) >>> q, r = da.linalg.qr(a) >>> a2 = q.dot(r) >>> with Profiler() as prof, ResourceProfiler(dt=0.25) as rprof, ... CacheProfiler() as cprof: ... out = a2.compute() The results of each profiler are stored in their ``results`` attribute as a list of ``namedtuple`` objects: .. code-block:: python >>> prof.results[0] TaskData(key=('tsqr-8d16e396b237bf7a731333130d310cb9_QR_st1', 5, 0), task=(qr, (_apply_random, 'random_sample', 1060164455, (1000, 1000), (), {})), start_time=1454368444.493292, end_time=1454368444.902987, worker_id=4466937856) >>> rprof.results[0] ResourceData(time=1454368444.078748, mem=74.100736, cpu=0.0) >>> cprof.results[0] CacheData(key=('tsqr-8d16e396b237bf7a731333130d310cb9_QR_st1', 7, 0), task=(qr, (_apply_random, 'random_sample', 1310656009, (1000, 1000), (), {})), metric=1, cache_time=1454368444.49662, free_time=1454368446.769452) These can be analyzed separately, or viewed in a bokeh plot using the provided ``visualize`` method on each profiler: .. code-block:: python >>> prof.visualize() .. raw:: html To view multiple profilers at the same time, the ``dask.diagnostics.visualize`` function can be used. This takes a list of profilers, and creates a vertical stack of plots aligned along the x-axis: .. code-block:: python >>> from dask.diagnostics import visualize >>> visualize([prof, rprof, cprof]) .. raw:: html Looking at the above figure, from top to bottom: 1. The results from the ``Profiler`` object. This shows the execution time for each task as a rectangle, organized along the y-axis by worker (in this case threads). Similar tasks are grouped by color, and by hovering over each task one can see the key and task that each block represents. 2. The results from the ``ResourceProfiler`` object. This shows two lines, one for total CPU percentage used by all the workers, and one for total memory usage. 3. The results from the ``CacheProfiler`` object. This shows a line for each task group, plotting the sum of the current ``metric`` in the cache against time. In this case it's the default metric (count), and the lines represent the number of each object in the cache at time. Note that the grouping and coloring is the same as for the ``Profiler`` plot, and that the task represented by each line can be found by hovering over the line. From these plots we can see that the initial tasks (calls to ``numpy.random.random`` and ``numpy.linalg.qr`` for each chunk) are run concurrently, but only use slightly more than 100\% CPU. This is because the call to ``numpy.linalg.qr`` currently doesn't release the global interpreter lock, so those calls can't truly be done in parallel. Next, there's a reduction step where all the blocks are combined. This requires all the results from the first step to be held in memory, as shown by the increased number of results in the cache, and increase in memory usage. Immediately after this task ends, the number of elements in the cache decreases, showing that they were only needed for this step. Finally, there's an interleaved set of calls to ``dot`` and ``sum``. Looking at the CPU plot shows that these run both concurrently and in parallel, as the CPU percentage spikes up to around 350\%. Custom Callbacks ---------------- Custom diagnostics can be created using the callback mechanism described above. To add your own, subclass the ``Callback`` class, and define your own methods. Here we create a class that prints the name of every key as it's computed: .. code-block:: python from dask.callbacks import Callback class PrintKeys(Callback): def _pretask(self, key, dask, state): """Print the key of every task as it's started""" print("Computing: {0}!".format(repr(key))) This can now be used as a context manager during computation: .. code-block:: python >>> from operator import add, mul >>> dsk = {'a': (add, 1, 2), 'b': (add, 3, 'a'), 'c': (mul, 'a', 'b')} >>> with PrintKeys(): ... get(dsk, 'c') Computing 'a'! Computing 'b'! Computing 'c'! Alternatively, functions may be passed in as keyword arguments to ``Callback``: .. code-block:: python >>> def printkeys(key, dask, state): ... print("Computing: {0}!".format(repr(key))) >>> with Callback(pretask=printkeys): ... get(dsk, 'c') Computing 'a'! Computing 'b'! Computing 'c'! dask-0.16.0/docs/source/distributed.rst000066400000000000000000000004161320364734500200340ustar00rootroot00000000000000Distributed Scheduling ====================== Dask can run on a cluster of hundreds of machines and thousands of cores. Technical documentation for the distributed system is located on a separate website located here: * https://distributed.readthedocs.io/en/latest/ dask-0.16.0/docs/source/examples-tutorials.rst000066400000000000000000000054731320364734500213640ustar00rootroot00000000000000Examples ======== Array ------ :doc:`Array documentation` .. toctree:: :maxdepth: 1 examples/array-numpy.rst examples/array-hdf5.rst examples/array-random.rst examples/array-extend.rst * `Blogpost: Distributed NumPy and Image Analysis on a Cluster, January 2017 `_ * `Use Dask.array to generate task graphs `_ * `Alternating Least Squares for collaborative filtering `_ Bag --- :doc:`Bag documentation` .. toctree:: :maxdepth: 1 examples/bag-json.rst examples/bag-word-count-hdfs.rst DataFrame ---------- :doc:`DataFrame documentation` .. toctree:: :maxdepth: 1 examples/dataframe-csv.rst examples/dataframe-hdf5.rst * `Blogpost: Dataframes on a cluster, January 2017 `_ * `Distributed DataFrames on NYCTaxi data `_ * `Build Parallel Algorithms for Pandas `_ * `Simple distributed joins `_ * `Build Dask.dataframes from custom format, feather `_ Delayed ------- :doc:`Delayed documentation` .. toctree:: :maxdepth: 1 examples/delayed-array.rst examples/delayed-custom.rst * `Blogpost: Delayed on a cluster, January 2017 `_ * `Blogpost: Dask and Celery, September 2016 `_ * `Basic Delayed example `_ * `Build Parallel Algorithms for Pandas `_ * `Build Dask.dataframes from custom format, feather `_ Distributed Concurrent.futures ------------------------------ `Concurrent.futures documentation `_ * `Custom workflows `_ * `Ad Hoc Distributed Random Forests `_ * `Web Servers and Asynchronous task scheduling `_ Tutorial -------- A Dask tutorial from July 2015 (fairly old) is available here: https://github.com/dask/dask-tutorial dask-0.16.0/docs/source/examples/000077500000000000000000000000001320364734500165755ustar00rootroot00000000000000dask-0.16.0/docs/source/examples/array-extend.rst000066400000000000000000000065431320364734500217420ustar00rootroot00000000000000Build Custom Dask.Array Function ================================ As discussed in the :doc:`array design document <../array-design>` to create a dask ``Array`` object we need the following: 1. A dask graph 2. A name specifying a set of keys within that graph 3. A ``chunks`` tuple giving chunk shape information 4. A NumPy dtype Often ``dask.array`` functions take other ``Array`` objects as inputs along with parameters, add tasks to a new dask dictionary, create a new ``chunks`` tuple, and then construct and return a new ``Array`` object. The hard parts are invariably creating the right tasks and creating a new ``chunks`` tuple. Careful review of the :doc:`array design document <../array-design>` is suggested. Example `eye` ------------- Consider this simple example with the ``eye`` function. .. code-block:: python from dask.base import tokenize def eye(n, blocksize): chunks = ((blocksize,) * n // blocksize, (blocksize,) * n // blocksize) name = 'eye-' + tokenize(n, blocksize) # unique identifier dsk = {(name, i, j): (np.eye, blocksize) if i == j else (np.zeros, (blocksize, blocksize)) for i in range(n // blocksize) for j in range(n // blocksize)} dtype = np.eye(0).dtype # take dtype default from numpy return Array(dsk, name, chunks, dtype) This example is particularly simple because it doesn't take any ``Array`` objects as input. Example `diag` -------------- Consider the function ``diag`` that takes a 1d vector and produces a 2d matrix with the values of the vector along the diagonal. Consider the case where the input is a 1d array with chunk sizes ``(2, 3, 4)`` in the first dimension like this:: [x_0, x_1], [x_2, x_3, x_4], [x_5, x_6, x_7, x_8] We need to create a 2d matrix with chunks equal to ``((2, 3, 4), (2, 3, 4))`` where the ith block along the diagonal of the output is the result of calling ``np.diag`` on the ``ith`` block of the input and all other blocks are zero. .. code-block:: python from dask.base import tokenize def diag(v): """Construct a diagonal array, with ``v`` on the diagonal.""" assert v.ndim == 1 chunks = (v.chunks[0], v.chunks[0]) # repeat chunks twice name = 'diag-' + tokenize(v) # unique identifier dsk = {(name, i, j): (np.diag, (v.name, i)) if i == j else (np.zeros, (v.chunks[0][i], v.chunks[0][j])) for i in range(len(v.chunks[0])) for j in range(len(v.chunks[0]))} dsk.update(v.dask) # include dask graph of the input dtype = v.dtype # output has the same dtype as the input return Array(dsk, name, chunks, dtype) >>> x = da.arange(9, chunks=((2, 3, 4),)) >>> x dask.array >>> M = diag(x) >>> M dask.array >>> M.compute() array([[0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 2, 0, 0, 0, 0, 0, 0], [0, 0, 0, 3, 0, 0, 0, 0, 0], [0, 0, 0, 0, 4, 0, 0, 0, 0], [0, 0, 0, 0, 0, 5, 0, 0, 0], [0, 0, 0, 0, 0, 0, 6, 0, 0], [0, 0, 0, 0, 0, 0, 0, 7, 0], [0, 0, 0, 0, 0, 0, 0, 0, 8]]) dask-0.16.0/docs/source/examples/array-hdf5.rst000066400000000000000000000020631320364734500212720ustar00rootroot00000000000000Creating Dask arrays from HDF5 Datasets ======================================= We can construct dask array objects from other array objects that support numpy-style slicing. In this example, we wrap a dask array around an HDF5 dataset, chunking that dataset into blocks of size ``(1000, 1000)``: .. code-block:: Python >>> import h5py >>> f = h5py.File('myfile.hdf5') >>> dset = f['/data/path'] >>> import dask.array as da >>> x = da.from_array(dset, chunks=(1000, 1000)) Often we have many such datasets. We can use the ``stack`` or ``concatenate`` functions to bind many dask arrays into one: .. code-block:: Python >>> dsets = [h5py.File(fn)['/data'] for fn in sorted(glob('myfiles.*.hdf5')] >>> arrays = [da.from_array(dset, chunks=(1000, 1000)) for dset in dsets] >>> x = da.stack(arrays, axis=0) # Stack along a new first axis Note that none of the data is loaded into memory yet, the dask array just contains a graph of tasks showing how to load the data. This allows ``dask.array`` to do work on datasets that don't fit into RAM. dask-0.16.0/docs/source/examples/array-numpy.rst000066400000000000000000000014001320364734500216060ustar00rootroot00000000000000Creating Dask arrays from NumPy arrays ====================================== We can create Dask arrays from any object that implements NumPy slicing, like a ``numpy.ndarray`` or on-disk formats like h5py or netCDF Dataset objects. This is particularly useful with on disk arrays that don't fit in memory but, for simplicity's sake, we show how this works on a NumPy array. The following example uses ``da.from_array`` to create a Dask array from a NumPy array, which isn't particularly valuable (the NumPy array already works in memory just fine) but is easy to play with. .. code-block:: python >>> import numpy as np >>> import dask.array as da >>> x = np.arange(1000) >>> y = da.from_array(x, chunks=(100)) >>> y.mean().compute() 499.5 dask-0.16.0/docs/source/examples/array-random.rst000066400000000000000000000005041320364734500217220ustar00rootroot00000000000000Creating random arrays ====================== In a simple case, we can create arrays with random data using the ``da.random`` module. .. code-block:: python >>> import dask.array as da >>> x = da.random.normal(0, 1, size=(100000,100000), chunks=(1000, 1000)) >>> x.mean().compute() -0.0002280808453825202 dask-0.16.0/docs/source/examples/bag-json.rst000066400000000000000000000011331320364734500210250ustar00rootroot00000000000000Read JSON records from disk =========================== We commonly use ``dask.bag`` to process unstructured or semi-structured data: .. code-block:: python >>> import dask.bag as db >>> import json >>> js = db.read_text('logs/2015-*.json.gz').map(json.loads) >>> js.take(2) ({'name': 'Alice', 'location': {'city': 'LA', 'state': 'CA'}}, {'name': 'Bob', 'location': {'city': 'NYC', 'state': 'NY'}) >>> result = js.pluck('name').frequencies() # just another Bag >>> dict(result) # Evaluate Result {'Alice': 10000, 'Bob': 5555, 'Charlie': ...} dask-0.16.0/docs/source/examples/bag-word-count-hdfs.rst000066400000000000000000000141551320364734500231070ustar00rootroot00000000000000Word count ========== In this example, we'll use ``dask`` to count the number of words in text files (Enron email dataset, 6.4 GB) both locally and on a cluster (along with the `distributed`_ and `hdfs3`_ libraries). Local computation ----------------- Download the first text file (76 MB) in the dataset to your local machine: .. code-block:: bash $ wget https://s3.amazonaws.com/blaze-data/enron-email/edrm-enron-v2_allen-p_xml.zip/merged.txt Import ``dask.bag`` and create a ``bag`` from the single text file: .. code-block:: python >>> import dask.bag as db >>> b = db.read_text('merged.txt', blocksize=10000000) View the first ten lines of the text file with ``.take()``: .. code-block:: python >>> b.take(10) ('Date: Tue, 26 Sep 2000 09:26:00 -0700 (PDT)\r\n', 'From: Phillip K Allen\r\n', 'To: pallen70@hotmail.com\r\n', 'Subject: Investment Structure\r\n', 'X-SDOC: 948896\r\n', 'X-ZLID: zl-edrm-enron-v2-allen-p-1713.eml\r\n', '\r\n', '---------------------- Forwarded by Phillip K Allen/HOU/ECT on 09/26/2000 \r\n', '04:26 PM ---------------------------\r\n', '\r\n') We can write a word count expression using the ``bag`` methods to split the lines into words, concatenate the nested lists of words into a single list, count the frequencies of each word, then list the top 10 words by their count: .. code-block:: python >>> wordcount = b.str.split().flatten().frequencies().topk(10, lambda x: x[1]) Note that the combined operations in the previous expression are lazy. We can trigger the word count computation using ``.compute()``: .. code-block:: python >>> wordcount.compute() [('P', 288093), ('1999', 280917), ('2000', 277093), ('FO', 255844), ('AC', 254962), ('1', 240458), ('0', 233198), ('2', 224739), ('O', 223927), ('3', 221407)] This computation required about 7 seconds to run on a laptop with 8 cores and 16 GB RAM. Cluster computation with HDFS ----------------------------- Next, we'll use ``dask`` along with the `distributed`_ and `hdfs3`_ libraries to count the number of words in all of the text files stored in a Hadoop Distributed File System (HDFS). Copy the text data from Amazon S3 into HDFS on the cluster: .. code-block:: bash $ hadoop distcp s3n://AWS_SECRET_ID:AWS_SECRET_KEY@blaze-data/enron-email hdfs:///tmp/enron where ``AWS_SECRET_ID`` and ``AWS_SECRET_KEY`` are valid AWS credentials. We can now start a ``distributed`` scheduler and workers on the cluster, replacing ``SCHEDULER_IP`` and ``SCHEDULER_PORT`` with the IP address and port of the ``distributed`` scheduler: .. code-block:: bash $ dask-scheduler # On the head node $ dask-worker SCHEDULER_IP:SCHEDULER_PORT --nprocs 4 --nthreads 1 # On the compute nodes Because our computations use pure Python rather than numeric libraries (e.g., NumPy, pandas), we started the workers with multiple processes rather than with multiple threads. This helps us avoid issues with the Python Global Interpreter Lock (GIL) and increases efficiency. In Python, import the ``hdfs3`` and the ``distributed`` methods used in this example: .. code-block:: python >>> from dask.distributed import Client, progress Initialize a connection to the ``distributed`` executor: .. code-block:: python >>> client = Client('SCHEDULER_IP:SCHEDULER_PORT') Create a ``bag`` from the text files stored in HDFS. This expression will not read data from HDFS until the computation is triggered: .. code-block:: python >>> import dask.bag as db >>> b = db.read_text('hdfs:///tmp/enron/*/*') We can write a word count expression using the same ``bag`` methods as the local ``dask`` example: .. code-block:: python >>> wordcount = b.str.split().flatten().frequencies().topk(10, lambda x: x[1]) We are ready to count the number of words in all of the text files using ``distributed`` workers. We can map the ``wordcount`` expression to a future that triggers the computation on the cluster. .. code-block:: python >>> future = clinet.compute(wordcount) Note that the ``compute`` operation is non-blocking, and you can continue to work in the Python shell/notebook while the computations are running. We can check the status of the ``future`` while all of the text files are being processed: .. code-block:: python >>> print(future) >>> progress(future) [########################################] | 100% Completed | 8min 15.2s This computation required about 8 minutes to run on a cluster with three worker machines, each with 4 cores and 16 GB RAM. For comparison, running the same computation locally with ``dask`` required about 20 minutes on a single machine with the same specs. When the ``future`` finishes reading in all of the text files and counting words, the results will exist on each worker. To sum the word counts for all of the text files, we need to gather the results from the ``dask.distributed`` workers: .. code-block:: python >>> results = client.gather(future) Finally, we print the top 10 words from all of the text files: .. code-block:: python >>> print(results) [('0', 67218227), ('the', 19588747), ('-', 14126955), ('to', 11893912), ('N/A', 11814994), ('of', 11725144), ('and', 10254267), ('in', 6685245), ('a', 5470711), ('or', 5227787)] The complete Python script for this example is shown below: .. code-block:: python # word-count.py # Local computation import dask.bag as db b = db.read_text('merged.txt') b.take(10) wordcount = b.str.split().flatten().frequencies().topk(10, lambda x: x[1]) wordcount.compute() # Cluster computation with HDFS from dask.distributed import Client, progress client = Client('SCHEDULER_IP:SCHEDULER_PORT') b = db.read_text('hdfs:///tmp/enron/*/*') wordcount = b.str.split().flatten().frequencies().topk(10, lambda x: x[1]) future = client.compute(wordcount) print(future) progress(future) results = client.gather(future) print(results) .. _distributed: https://distributed.readthedocs.io/en/latest/ .. _hdfs3: https://hdfs3.readthedocs.io/en/latest/ dask-0.16.0/docs/source/examples/dataframe-csv.rst000066400000000000000000000023771320364734500220550ustar00rootroot00000000000000Dataframes from CSV files ========================= Suppose we have a collection of CSV files with data: **data1.csv:** .. code-block:: none time,temperature,humidity 0,22,58 1,21,57 2,25,57 3,26,55 4,22,53 5,23,59 **data2.csv:** .. code-block:: none time,temperature,humidity 0,24,85 1,26,83 2,27,85 3,25,92 4,25,83 5,23,81 **data3.csv:** .. code-block:: none time,temperature,humidity 0,18,51 1,15,57 2,18,55 3,19,51 4,19,52 5,19,57 and so on. We can create Dask dataframes from CSV files using ``dd.read_csv``. .. code-block:: python >>> import dask.dataframe as dd >>> df = dd.read_csv('data*.csv') We can work with the Dask dataframe as usual, which is composed of Pandas dataframes. We can list the first few rows. .. code-block:: python >>> df.head() time temperature humidity 0 0 22 58 1 1 21 57 2 2 25 57 3 3 26 55 4 4 22 53 Or we can compute values over the entire dataframe. .. code-block:: python >>> df.temperature.mean().compute() 22.055555555555557 >>> df.humidity.std().compute() 14.710829233324224 dask-0.16.0/docs/source/examples/dataframe-hdf5.rst000066400000000000000000000070661320364734500221100ustar00rootroot00000000000000Dataframes from HDF5 files =========================== This section provides working examples of ``dask.dataframe`` methods to read HDF5 files. HDF5 is a unique technology suite that makes possible the management of large and complex data collections. To learn more about HDF5, visit the `HDF Group Tutorial page `_. For an overview of ``dask.dataframe``, its limitations, scope, and use, see the :doc:`DataFrame overview section<../dataframe-overview>`. **Important Note** -- ``dask.dataframe.read_hdf`` uses ``pandas.read_hdf``, thereby inheriting its abilities and limitations. See `pandas HDF5 documentation `_ for more information. Examples Covered ---------------------------------------------- * Use ``dask.dataframe`` to: 1. Create dask DataFrame by loading a specific dataset (key) from a single HDF5 file 2. Create dask DataFrame from a single HDF5 file with multiple datasets (keys) 3. Create dask DataFrame by loading multiple HDF5 files with different datasets (keys) Generate Example Data ---------------------------------------------- Here is some code to generate sample HDF5 files. .. code-block:: python import string, json, random import pandas as pd import numpy as np # dict to keep track of hdf5 filename and each key fileKeys = {} for i in range(10): # randomly pick letter as dataset key groupkey = random.choice(list(string.ascii_lowercase)) # randomly pick a number as hdf5 filename filename = 'my' + str(np.random.randint(100)) + '.h5' # Make a dataframe; 26 rows, 2 columns df = pd.DataFrame({'x': np.random.randint(1, 1000, 26), 'y': np.random.randint(1, 1000, 26)}, index=list(string.ascii_lowercase)) # Write hdf5 to current directory df.to_hdf(filename, key='/' + groupkey, format='table') fileKeys[filename] = groupkey print(fileKeys) # prints hdf5 filenames and keys for each Read single dataset from HDF5 -------------------------------------------- The first order of ``dask.dataframe`` business is creating a dask DataFrame using a single HDF5 file's dataset. The code to accomplish this task is: .. code-block:: python import dask.dataframe as dd df = dd.read_hdf('my86.h5', key='/c') Load multiple datasets from single HDF5 file ------------------------------------------------- Loading multiple datasets from a single file requires a small tweak and use of the wildcard character: .. code-block:: python import dask.dataframe as dd df = dd.read_hdf('my86.h5', key='/*') Learn more about ``dask.dataframe`` methods by visiting the :doc:`API documentation<../dataframe-api>`. Create dask DataFrame from multiple HDF5 files -------------------------------------------------- The next example is a natural progression from the previous example (e.g. using a wildcard). Add a wildcard for the `key` and `path` parameters to read multiple files and multiple keys: .. code-block:: python import dask.dataframe as dd df = dd.read_hdf('./*.h5', key='/*') These exercises cover the basics of using ``dask.dataframe`` to work with HDF5 data. For more information on the user functions to manipulate and explore dataframes (visualize, describe, compute, etc.) see :doc:`API documentation<../dataframe-api>`. To explore the other data formats supported by ``dask.dataframe``, visit the :doc:`section on creating dataframes<../dataframe-create>` . dask-0.16.0/docs/source/examples/delayed-array.rst000066400000000000000000000034401320364734500220530ustar00rootroot00000000000000Build Custom Arrays =================== Here we have a serial blocked computation for computing the mean of all positive elements in a large, on disk array: .. code-block:: python x = h5py.File('myfile.hdf5')['/x'] # Trillion element array on disk sums = [] counts = [] for i in range(1000000): # One million times chunk = x[1000000*i:1000000*(i + 1)] # Pull out chunk positive = chunk[chunk > 0] # Filter out negative elements sums.append(positive.sum()) # Sum chunk counts.append(positive.size) # Count chunk result = sum(sums) / sum(counts) # Aggregate results Below is the same code, parallelized using ``dask.delayed``: .. code-block:: python x = delayed(h5py.File('myfile.hdf5')['/x']) # Trillion element array on disk sums = [] counts = [] for i in range(1000000): # One million times chunk = x[1000000*i:1000000*(i + 1)] # Pull out chunk positive = chunk[chunk > 0] # Filter out negative elements sums.append(positive.sum()) # Sum chunk counts.append(positive.size) # Count chunk result = delayed(sum)(sums) / delayed(sum)(counts) # Aggregate results result.compute() # Perform the computation Only 3 lines had to change to make this computation parallel instead of serial. - Wrap the original array in ``delayed``. This makes all the slices on it return ``Delayed`` objects. - Wrap both calls to ``sum`` with ``delayed``. - Call the ``compute`` method on the result. While the for loop above still iterates fully, it's just building up a graph of the computation that needs to happen, without actually doing any computing. dask-0.16.0/docs/source/examples/delayed-custom.rst000066400000000000000000000020521320364734500222450ustar00rootroot00000000000000Data Processing Pipelines ========================= `Example notebook `_. Now, rebuilding the example from :ref:`custom graphs `: .. code-block:: python from dask import delayed, value @delayed def load(filename): ... @delayed def clean(data): ... @delayed def analyze(sequence_of_data): ... @delayed def store(result): with open(..., 'w') as f: f.write(result) files = ['myfile.a.data', 'myfile.b.data', 'myfile.c.data'] loaded = [load(i) for i in files] cleaned = [clean(i) for i in loaded] analyzed = analyze(cleaned) stored = store(analyzed) stored.compute() This builds the same graph as seen before, but using normal Python syntax. In fact, the only difference between Python code that would do this in serial, and the parallel version with dask is the ``delayed`` decorators on the functions, and the call to ``compute`` at the end. dask-0.16.0/docs/source/faq.rst000066400000000000000000000121171320364734500162620ustar00rootroot00000000000000Frequently Asked Questions ========================== We maintain most Q&A on `Stack Overflow under the #Dask tag`_. You may find the questions there useful to you. .. _`Stack Overflow under the #Dask tag`: http://stackoverflow.com/questions/tagged/dask 1. **Q: How do I debug my program when using dask?** If you want to inspect the dask graph itself see :doc:`inspect docs `. If you want to dive down with a Python debugger a common cause of frustration is the asynchronous schedulers which, because they run your code on different workers, are unable to provide access to the Python debugger. Fortunately you can change to a synchronous scheduler like ``dask.get`` by providing a ``get=`` keyword to the ``compute`` method:: my_array.compute(get=dask.get) 2. **Q: In ``dask.array`` what is ``chunks``?** Dask.array breaks your large array into lots of little pieces, each of which can fit in memory. ``chunks`` determines the size of those pieces. Users most often interact with chunks when they create an array as in:: >>> x = da.from_array(dataset, chunks=(1000, 1000)) In this case chunks is a tuple defining the shape of each chunk of your array; for example "Please break ``dataset`` into 1000 by 1000 chunks." However internally dask uses a different representation, a tuple of tuples, to handle uneven chunk sizes that inevitably occur during computation. 3. **Q: How do I select a good value for ``chunks``?** Choosing good values for ``chunks`` can strongly impact performance. Here are some general guidelines. The strongest guide is memory: 1. The size of your blocks should fit in memory. 2. Actually, several blocks should fit in memory at once, assuming you want multi-core 3. The size of the blocks should be large enough to hide scheduling overhead, which is a couple of milliseconds per task 4. Generally I shoot for 10MB-100MB sized chunks Additionally the computations you do may also inform your choice of ``chunks``. Some operations like matrix multiply require anti-symmetric chunk shapes. Others like ``svd`` and ``qr`` only work on tall-and-skinny matrices with only a single chunk along all of the columns. Other operations might work but be faster or slower with different chunk shapes. Note that you can ``rechunk()`` an array if necessary. 4. **Q: My computation fills memory, how do I spill to disk?** The schedulers endeavor not to use up all of your memory. However for some algorithms filling up memory is unavoidable. In these cases we can swap out the dictionary used to store intermediate results with a dictionary-like object that spills to disk. The Chest_ project handles this nicely. >>> cache = Chest() # Uses temporary file. Deletes on garbage collection or >>> cache = Chest(path='/path/to/dir', available_memory=8e9) # Use 8GB This chest object works just like a normal dictionary but, when available memory runs out (defaults to 1GB) it starts pickling data and sending it to disk, retrieving it as necessary. You can specify your cache when calling ``compute`` >>> x.dot(x.T).compute(cache=cache) Alternatively you can set your cache as a global option. >>> with dask.set_options(cache=cache): # sets state within with block ... y = x.dot(x.T).compute() or >>> dask.set_options(cache=cache) # sets global state >>> y = x.dot(x.T).compute() However, while using an on-disk cache is a great fallback performance, it's always best if we can keep from spilling to disk. You could try one of the following 1. Use a smaller chunk/partition size 2. If you are convinced that a smaller chunk size will not help in your case you could also report your problem on our `issue tracker`_ and work with the dask development team to improve our scheduling policies. 5. **How does Dask serialize functions?** When operating with the single threaded or multithreaded scheduler no function serialization is necessary. When operating with the distributed memory or multiprocessing scheduler Dask uses cloudpickle_ to serialize functions to send to worker processes. cloudpickle supports almost any kind of function, including lambdas, closures, partials and functions defined interactively. Cloudpickle can not serialize things like iterators, open files, locks, or other objects that are heavily tied to your current process. Attempts to serialize these objects (or functions that implicitly rely on these objects) will result in scheduler errors. You can verify that your objects are easily serializable by running them through the ``cloudpickle.dumps/loads`` functions .. code-block:: python from cloudpickle import dumps, loads obj2 = loads(dumps(obj)) assert obj2 == obj .. _cloudpickle: https://github.com/cloudpipe/cloudpickle .. _`Chest`: https://github.com/blaze/chest .. _`issue tracker`: https://github.com/dask/dask/issues/new dask-0.16.0/docs/source/funding.rst000066400000000000000000000013621320364734500171450ustar00rootroot00000000000000Funding ======= Dask receives generous funding and support from the following sources: 1. The time and effort of numerous `open source contributors`_ 2. `The DARPA XData program`_ 3. `The Moore Foundation`_'s Data Driven Discovery program 4. `Anaconda Inc`_ 5. A variety of private companies who sponsor the development of particular open source features We encourage monetary donations to `NumFOCUS`_ to support open source scientific computing software. .. _`The DARPA XData Program`: http://www.darpa.mil/program/xdata .. _`The Moore Foundation`: https://www.moore.org/ .. _`Anaconda Inc`: https://www.anaconda.com/ .. _`open source contributors`: https://github.com/dask/dask/graphs/contributors .. _`NumFOCUS`: http://www.numfocus.org/ dask-0.16.0/docs/source/futures.rst000066400000000000000000000330501320364734500172070ustar00rootroot00000000000000Futures ======= Dask supports a real-time task framework that extends Python's `concurrent.futures `_ interface. This interface is good for arbitrary task scheduling, like :doc:`dask.delayed `, but is immediate rather than lazy, which provides some more flexibility in situations where the computations may evolve over time. These features depend on the second generation task scheduler found in `dask.distributed `_ (which, despite its name, runs very well on a single machine). .. currentmodule:: distributed Start Dask Client ----------------- You must start a ``Client`` to use the futures interface. This tracks state among the various worker processes or threads. .. code-block:: python from dask.distributed import Client client = Client() # start local workers as processes # or client = Client(processes=False) # start local workers as threads If you have `Bokeh `_ installed then this starts up a diagnostic dashboard at http://localhost:8787 . Submit Tasks ------------ .. autosummary:: Client.submit Client.map Future.result Then you can submit individual tasks using the ``submit`` method. .. code-block:: python def inc(x): return x + 1 def add(x, y): return x + y a = client.submit(inc, 10) # calls inc(10) in background thread or process b = client.submit(inc, 20) # calls inc(20) in background thread or process Submit returns a ``Future``, which refers to a remote result. This result may not yet be completed: .. code-block:: python >>> a Eventually it will complete. The result stays in the remote thread/process/worker until you ask for it back explicitly. .. code-block:: python >>> a >>> a.result() # blocks until task completes and data arrives 11 You can pass futures as inputs to submit. Dask automatically handles dependency tracking; once all input futures have completed they will be moved onto a single worker (if necessary), and then the computation that depends on them will be started. You do not need to wait for inputs to finish before submitting a new task; Dask will handle this automatically. .. code-block:: python c = client.submit(add, a, b) # calls add on the results of a and b Similar to Python's ``map`` you can use ``Client.map`` to call the same function and many inputs: .. code-block:: python futures = client.map(inc, range(1000)) However note that each task comes with about 1ms of overhead. If you want to map a function over a large number of inputs then you might consider :doc:`dask.bag ` or :doc:`dask.dataframe ` instead. Move Data --------- .. autosummary:: Future.result Client.gather Client.scatter Given any future you can call the ``.result`` method to gather the result. This will block until the future is done computing and then transfer the result back to your local process if necessary. .. code-block:: python >>> c.result() 32 You can gather many results concurrently using the ``Client.gather`` method. This can be more efficient than calling ``.result()`` on each future sequentially. .. code-block:: python >>> # results = [future.result() for future in futures] >>> results = client.gather(futures) # this can be faster If you have important local data that you want to include in your computation you can either include it as a normal input to a submit or map call: .. code-block:: python >>> df = pd.read_csv('training-data.csv') >>> future = client.submit(my_function, df) Or you can ``scatter`` it explicitly. Scattering moves your data to a worker and returns a future pointing to that data: .. code-block:: python >>> remote_df = client.scatter(df) >>> remote_df >>> future = client.submit(my_function, remote_df) Both of these accomplish the same result, but using scatter can sometimes be faster. This is especially true if you use processes or distributed workers (where data transfer is necessary) and you want to use ``df`` in many computations. Scattering the data beforehand avoids excessive data movement. Calling scatter on a list scatters all elements individually. Dask will spread these elements evenly throughout workers in a round-robin fashion: .. code-block:: python >>> client.scatter([1, 2, 3]) [, , ] References, Cancellation, and Exceptions ---------------------------------------- .. autosummary:: Future.cancel Future.exception Future.traceback Client.cancel Dask will only compute and hold onto results for which there are active futures. In this way your local variables define what is active in Dask. When a future is garbage collected by your local Python session, Dask will feel free to delete that data or stop ongoing computations that were trying to produce it. .. code-block:: python >>> del future # deletes remote data once future is garbage collected You can also explicitly cancel a task using the ``Future.cancel`` or ``Client.cancel`` methods. .. code-block:: python >>> future.cancel() # deletes data even if other futures point to it If a future fails, then Dask will raise the remote exceptions and tracebacks if you try to get the result. .. code-block:: python def div(x, y): return x / y >>> a = client.submit(div, 1, 0) # 1 / 0 raises a ZeroDivisionError >>> a >>> a.result() 1 def div(x, y): ----> 2 return x / y ZeroDivisionError: division by zero All futures that depend on an erred future also err with the same exception: .. code-block:: python >>> b = client.submit(inc, a) >>> b You can collect the exception or traceback explicitly with the ``Future.exception`` or ``Future.traceback`` methods. Waiting on Futures ------------------ .. autosummary:: as_completed wait You can wait on a future or collection of futures using the ``wait`` function: .. code-block:: python from dask.distributed import wait >>> wait(futures) This blocks until all futures are finished or have erred. You can also iterate over the futures as they complete using the ``as_completed`` function: .. code-block:: python from dask.distributed import as_completed futures = client.map(score, x_values) best = -1 for future in as_completed(futures): y = future.result() if y > best: best = y For greater efficiency you can also ask ``as_completed`` to gather the results in the background. .. code-block:: python for future in as_completed(futures, results=True): ... Or collect futures all futures in batches that had arrived since the last iteration .. code-block:: python for batch in as_completed(futures, results=True).batches(): for future in batch: ... Additionally, for iterative algorithms you can add more futures into the ``as_completed`` iterator .. code-block:: python seq = as_completed(futures) for future in seq: y = future.result() if condition(y): new_future = client.submit(...) seq.add(new_future) # add back into the loop Fire and Forget --------------- .. autosummary:: fire_and_forget Sometimes we don't care about gathering the result of a task, and only care about side effects that it might have, like writing a result to a file. .. code-block:: python >>> a = client.submit(load, filename) >>> b = client.submit(process, a) >>> c = client.submit(write, c, out_filename) As noted above, Dask will stop work that doesn't have any active futures. It thinks that because no one has a pointer to this data that no one cares. You can tell Dask to compute a task anyway, even if there are no active futures, using the ``fire_and_forget`` function: .. code-block:: python from dask.distributed import fire_and_forget >>> fire_and_forget(c) This is particularly useful when a future may go out of scope, for example as part of a function: .. code-block:: python def process(filename): out_filename = 'out-' + filename a = client.submit(load, filename) b = client.submit(process, a) c = client.submit(write, c, out_filename) fire_and_forget(c) return # here we lose the reference to c, but that's now ok for filename in filenames: process(filename) Submit Tasks from Tasks ----------------------- .. autosummary:: get_client secede Tasks can launch other tasks by getting their own client. This enables complex and highly dynamic workloads. .. code-block:: python from dask.distributed import get_client def my_function(x): ... # Get locally created client client = get_client() # Do normal client operations, asking cluster for computation a = client.submit(...) b = client.submit(...) a, b = client.gather([a, b]) return a + b It also allows you to set up long running tasks that watch other resources like sockets or physical sensors: .. code-block:: python def monitor(device): client = get_client() while True: data = device.read_data() future = client.submit(process, data) fire_and_forget(future) for device in devices: fire_and_forget(client.submit(monitor)) However, each running task takes up a single thread, and so if you launch many tasks that launch other tasks then it is possible to deadlock the system if you are not careful. You can call the ``secede`` function from within a task to have it remove itself from the dedicated thread pool into an administrative thread that does not take up a slot within the Dask worker: .. code-block:: python from dask.distributed import get_client, secede def monitor(device): client = get_client() secede() while True: data = device.read_data() future = client.submit(process, data) fire_and_forget(future) Coordinate Data Between Clients ------------------------------- .. autosummary:: Queue Variable In the section above we saw that you could have multiple clients running at the same time, each of which generated and manipulated futures. These clients can coordinate with each other using Dask ``Queue`` and ``Variable`` objects, which can communicate futures or small bits of data between clients sensibly. Dask queues follow the API for the standard Python Queue, but now move futures or small messages between clients. Queues serialize sensibly and reconnect themselves on remote clients if necessary. .. code-block:: python from dask.distributed import Queue def load_and_submit(filename): data = load(filename) client = get_client() future = client.submit(process, data) queue.put(future) client = Client() queue = Queue() for filename in filenames: future = client.submit(load_and_submit, filename) fire_and_forget(filename) while True: future = queue.get() print(future.result()) Queues can also send small pieces of information, anything that is msgpack encodable (ints, strings, bools, lists, dicts, etc..). This can be useful to send back small scores or administrative messages: .. code-block:: python def func(x): try: ... except Exception as e: error_queue.put(str(e)) error_queue = Queue() Variables are like Queues in that they communicate futures and small data between clients. However variables hold only a single value. You can get or set that value at any time. .. code-block:: python >>> var = Variable('stopping-criterion') >>> var.set(False) >>> var.get() False This is often used to signal stopping criteria or current parameters, etc. between clients. If you want to share large pieces of information then scatter the data first .. code-block:: python >>> parameters = np.array(...) >>> future = client.scatter(parameters) >>> var.set(future) API --- **Client** .. autosummary:: Client Client.cancel Client.compute Client.gather Client.get Client.get_dataset Client.get_executor Client.has_what Client.list_datasets Client.map Client.ncores Client.persist Client.publish_dataset Client.rebalance Client.replicate Client.restart Client.run Client.run_on_scheduler Client.scatter Client.shutdown Client.scheduler_info Client.shutdown Client.start_ipython_workers Client.start_ipython_scheduler Client.submit Client.unpublish_dataset Client.upload_file Client.who_has **Future** .. autosummary:: Future Future.add_done_callback Future.cancel Future.cancelled Future.done Future.exception Future.result Future.traceback **Functions** .. autosummary:: as_completed fire_and_forget get_client secede wait .. autofunction:: as_completed .. autofunction:: fire_and_forget .. autofunction:: get_client .. autofunction:: secede .. autofunction:: wait .. autoclass:: Client :members: .. autoclass:: Future :members: .. autoclass:: Queue :members: .. autoclass:: Variable :members: dask-0.16.0/docs/source/graphs.rst000066400000000000000000000057531320364734500170070ustar00rootroot00000000000000Overview ======== An explanation of dask task graphs. Motivation ---------- Normally, humans write programs and then compilers/interpreters interpret them (for example ``python``, ``javac``, ``clang``). Sometimes humans disagree with how these compilers/interpreters choose to interpret and execute their programs. In these cases humans often bring the analysis, optimization, and execution of code into the code itself. Commonly a desire for parallel execution causes this shift of responsibility from compiler to human developer. In these cases, we often represent the structure of our program explicitly as data within the program itself. A common approach to parallel execution in user-space is *task scheduling*. In task scheduling we break our program into many medium-sized tasks or units of computation, often a function call on a non-trivial amount of data. We represent these tasks as nodes in a graph with edges between nodes if one task depends on data produced by another. We call upon a *task scheduler* to execute this graph in a way that respects these data dependencies and leverages parallelism where possible, multiple independent tasks can be run simultaneously. Many solutions exist. This is a common approach in parallel execution frameworks. Often task scheduling logic hides within other larger frameworks (Luigi, Storm, Spark, IPython Parallel, and so on) and so is often reinvented. Dask is a specification that encodes task schedules with minimal incidental complexity using terms common to all Python projects, namely dicts, tuples, and callables. Ideally this minimum solution is easy to adopt and understand by a broad community. Example ------- .. image:: _static/dask-simple.png :height: 400px :alt: A simple dask dictionary :align: right Consider the following simple program: .. code-block:: python def inc(i): return i + 1 def add(a, b): return a + b x = 1 y = inc(x) z = add(y, 10) We encode this as a dictionary in the following way: .. code-block:: python d = {'x': 1, 'y': (inc, 'x'), 'z': (add, 'y', 10)} While less pleasant than our original code, this representation can be analyzed and executed by other Python code, not just the CPython interpreter. We don't recommend that users write code in this way, but rather that it is an appropriate target for automated systems. Also, in non-toy examples, the execution times are likely much larger than for ``inc`` and ``add``, warranting the extra complexity. Schedulers ---------- The ``dask`` library currently contains a few schedulers to execute these graphs. Each scheduler works differently, providing different performance guarantees and operating in different contexts. These implementations are not special and others can write different schedulers better suited to other applications or architectures easily. Systems that emit dask graphs (like ``dask.array``, ``dask.bag``, and so on) may leverage the appropriate scheduler for the application and hardware. dask-0.16.0/docs/source/images/000077500000000000000000000000001320364734500162245ustar00rootroot00000000000000dask-0.16.0/docs/source/images/array.png000066400000000000000000000322601320364734500200530ustar00rootroot00000000000000PNG  IHDRrFwbKGD IDATx}Tu 7r#ސ(byj-]u5M;)Zͺ\emTѓvyڍKC.+;ٚnںkGDAaA 38c~;y;|&VUUehӧ- ֱcGɓmGIwDYfِZ1?nǏ0KeɲӇ۴i jjj66sLN,}M=y`5lȺywlYRtt4~qn6_7]maaavz-j[PP-;;K)Kqeg&MDqq1r fG1bO/O,}MeNx"> )))\z ))) wTYzg_`3gN6b={ 7΃g(KqeS+usiwްjO^^.\C\8BY,KZi3geϏ]P\\)KqeBJwq+W۾w^Gdd$zIwlYJ+p=PVVFRR cZyw5k)P&R·~k1ck׮6[dd-%%駟zzi$e;n,uSDtSDTED N\Dzݻw`VZiذa^n4&e;lB^VV7|J+4ڦ,IY,‰'rJqpbcc)KcR,tCS!18rS!18rS!16 СC_ DϞ=9Yȷmo~!.^<Ν8=Nkz֭'Ki5e,,B^XOFJ*+ :=VkzRpp(g'#c*RY6EY6fd25i&ON6y^$ 0BBœ5}> cҤۙ={jّRR&ӯߝ|{i\Y۶mC0ecޖ}t,MKbҤIKktɝY\Ù>}WllK'tsgw'IDDt(kM_5)*:M߾w`زm-jux&ɓP[ke?hYBҷ~ule٘7e~}:iK[ӧ+۵ug 훩2vouzpzl_DDDbEݨ璝OvxQm>tgf YZVvBVZΝȎoMYp/2{LnW,Y2/iЭ[lC{weitQQ-;yϏů@^a|09sR~l< ƏOn&:c-ZIMMtt7ޣ,g2TVVpilb7 `U=핥f_$hk E > ӧڻ+K3@qq1.pD.z]r!!:tT rl>7n'gܸj37n{ll_"#;իW:^B`j$_d2+yO:tC}wfٜJ#sGfB-Zij>͛ΛE~~6޽߰ԩitދ?|+Vfx%)(8pNnkIԩozuc݂dr,uD.G"&&;wGۗ,, [L0eSiE#Y0ŋz:tDEE9 u9+W,h_uW-n{Q1 _ϲ%1XJY)L&-lkY233ͥ˗/3e<ܩ#j'q?=z wsm.00 W)Kgt, NpwgܑezŲeHOO5kָl2w >xNN]]0V+W|}-fQSSQz-,]]YZ,Xt'N'^7Y6dѣ8z&sƗ/:Ծڵˈ"*3gwzn%Zvm5^yZu#,[BY;}q?ܹEwdl!/** 44eSddG.\(ru/sY_…TUUQ[۲;U۷o"!a0?<ߡ>Ҽf:wNrr2C !..ļy\6;03?v55լ]{lIGڷgÆU727.UtދɓiicY̙Lv?IHNxrr0hz-,3={aZ`ժxg23_ctGd&NHII w&''D/_Ξ={ qd6dH2u_oY3a£DEuns(o_}/2DGwkx*-5u6/?oKrtLGǃp}F.={õz_{,.{'/0vmmڻ+Kd]6'=x6n\훘413f:c4HtjlRSg_w۩uКF|#3c:287eLDfii/94'/Kzy``L.\(rۛ Na2HLkǴXJZKDD]2f[Sucn,}CѻGdfs \:vUU_%XAiE]j7TP?IqIWL/d٨2ql;DANNgΜi)ŅO=l\zˑ2L(K T.v.vYVJJJ< i!???"##e.)wf 6իW_'|¡C\1xQ;pK.ry饗og.)eee|7ܰŋ9u+|M&3aZ8JE&)Kci;ҧO]IyyFCB¹Xĕ+ir4fBNDD'{z " ݵ""bp*""B."bp*""B."bp*""B."bp*""B."bp*""B."bp*""B."bp*""B."bp*""B."bp*""B."bp>Ym6VkQU^K(KeKܕOm~ϱ~}:ųsNӚuyRZMY*k)|擑r~F@@ N՚ʙ3X饴TMQ2220LM~mڴ哺ӆ `޼ $44 0jMkm۶%"vx 1i̞=TUSR&ӯߝ|{i|Y:2l۲ܷ3.ŴiILt;iicyuN#3K3+W>|8ӧOutݻ~$11 ?x[~u1ef7~OQiƖ-olVkC;su[='z,,ׯO'==tr+11qq22Vrbt\:A[ھ}3VƎMö]oNךV;s'2#%%ߟd"""+Fuu/8c5~]Y~G``P_=or|ٳ7`rsbɒ|Lnbڻ+K3Kj<~~~$%=аw/~p ̙bwJfs3?? oū=zzl;wnJjlu }eٵ7}p*++8|@_f_$hk E > ӧڻ+K3@qq1.pD.z]r!!:tT rl>7n'gܸj3L&37CGOÇ ۗNp} ꫽-z-OYzgͩ <<>wdi(,,`ѢExNN]]&W jRQqѾ˗>_{_f ,[BYz7Kĉ< fۺ#f=z4Gud^wRڗ[XvQDEu̙\brK PwO +/P[kneK(KUw?~s )ȲB^TT@hh&sȎ\PPu^̟ өןzPILsw|NNn]{`EW\j+KciH,"bp*""B."bp*""B."bp*""B."bp*""B."bp*""B."bp*""B."bp*""B."bp*""B."bp*""B."bp*""瓅faxzU]]%T]Yd!߶y釸xק3n\<;w~8I['| O/Քl yaa>+,',gXI9s^J(Kee٘ ##צM\>;mذ {@BC szط3.ŴiILt;iicyurǎY`&_HU>%e2GQљiem64 SY6mY::;4\rÇ3}tXNN}޽NïQNךקFNt8}8+Yrdf7~OQiƖ-olVkC;su[4zPw_eݺ?4FY6MY:3;(-- ==$NЖoLmcӰk[㵦wqr)g/nK/>fڴtx͛(V":U\Ɏ3jԔ?}t҃>fAN<ɗZܹ;=))9l{eiϛtv9)v$l6=7{ɾ/Y赆p`8}:ϩvBmDGw>hQo˲ko2TVVpilb7 `U=핥fx P\\ @ttnkee%8K^iǣ\HH{>}0a£=ƍI&997n& UUG:0x/%2yy9\zn_B`joKdW6曟0tH(K̲%#K3@aa!-ge ͍"??q YIDATFov4wŇK^a ytz+3f<58ѣ#>h4S:bצ^> :mK1%@t-L&+Kܑ?@DD}%++ @dd$o&LpdTZzp…,^<իСC'**Yp?Z,,_&ya69 ~~~7׾}rE(|=˖Pґܑ 33\x|2SL.̝Jh>¡w0~o9yѣp[ft, Npw[V``Aǯ,S,Y6z W^,[tjjjXf&sjj3`wtkX.t,N8Oo8aXV**6wr'k﫵lT7<@aeK(KruΌ,}s=zeSxxU˗Kj_^naeDDDՙ{3gtMu>G;yu-1@=*..kSBmz-,]Y:;;l)2#.9~ݺ9,/ct*y稭޸y5f4~vm?y%%ݟ`.^<@ddV|=˖Pޛ#Ks~~>.\xiyFrm>ҳg< :]r=) 2A2bD6m6d=l߾&4ڵwz2DGwkx-5u6/??ۜ-] 6uF3fx,INNvـٸq۷obҤof̘#ѩIM}9o֭uۥDZK1F|#FΝPQqqoq3dYoʔ'2f+Kҙ}YgrBWPp Db^;RJVZ""HN1ۚ,sgS=z&3O;Wұ*/8pݏ6fF JK/2wRý)倫t}_SOJv5ԫ1s"zN1ǎADDX)Kee郅 ..DӼ~̘8bb,(K<""rQ!18rS!18rS!18rS!18rS!18rS!18rS!18rS!18rS!18rS!18rS!18rS!18OMlVS]]I5Mn&?6^Teey4KaÆMxx}͑#]1xPez!.,%<,,:600WL+_W&88{ë koeil&fBDDtSDbgUU55M_8?W^^Ppp0Pg6۹޽ 0l0V^}ݶ_ٳg+>}:w 6xz9B \Y[C]Roq_fZCmv@uIKTWWPPp}X~#8vx7ԩF۵ !1qV$-qʥf 4f""B."bp*""B."bp*""B."bp*""B."bp*""B."bp*""B."bp*""B."bp*""B."bp*""B."bp*""B."bp*""B."bp*""gd25i&OQDDʕ+ >.]5mEJKKHOO'))ɣ rFm??O/c2ZMY*K_,.] **x¶mO?ŋX>qٹiM_OںA[+,'#c%儅Q5}=)883gKie,,(,,`ѢEtؑA1vXBCC]>mذ {@BC szz |{st6wň3f&Snڱ}3(,x#}CUU%]`ذqζ;,ۗ,^{5~az-[\6Q[8w{N~wXw?":ڨ5}w_G.]:O߾ɓX_xkxSTt}fe,[6d21yjkli%ԝ/}WYͶQySקFNt8}8+Yr][wfٰ+L2/;ӥ훩2vouzpz9!n5/.$3O<͛(V":U\Ɏ3jԔ?}t҃>f'v#,V+;wn!+k-}OddGJJ7^Y, ˗K={q&7+,_|̴i -lsD^Xl԰fL???hֻwNhh8yyyINj6瞛=d_ge3[ojSܹZ+ԝ+|Ѻed2séN|9Koū=zu+KHfѢ8@| pOٵwW͞=z4GudTVV‰՟v~<ʅgQ g&<ٳlܸa'dqf?ɾuX6OGll_:;0x/%2yy9\zn_B`jo+V|=K+l7?aБQޙes*lY6{CgQQa.xaݻ Nƾ}r」wWе̘Kו?|rrۗg]drjS S[())ݟONYDGT{eY^ĉ\=DLLw4, <3#;^>^+ YxW/CNTTpZG4q???k߾}yߋQz-,ROa27of_6Ҽf:wNrr2C !..ļy\6;u8">Ə-'O0z4rƌNV!V_gQRRD``+Ɣk#KKgQPp __oqGd&NHII w&''D/_Ξ={ qd@MM} GN]]zAAeLRʇũ1BCðZTT\mOWk٨6ݾeK(Kse%.ʼnGyͶuGd6oL~~>UUUX,~;ڵk粉-<˥//v2""{3ݹD|-1@=*..kSBmz-,ݫ5Y8Ǐann;GFv…"گ[2ϟee,\NUU%wnB?^8nɓ())wxZ\,[BYzoǎe3ujwG>S{] rsaCkV'!CF0hPFȷfmԩodd`aN7|x*f|Nç'_ %F}9pۖQz-,3|oDB`~xC}ܑϼO?<Ο?Kǎ]lWSSڵ ǖ4l`߾lذ{2X>cÜ9ov=cDz8BCùr'rZ~=Lrj{1yl&iicHAI.\("!.RR&7ꓓA6NoY,نjUksLfY'r!CzO?͢0 ?<<ɓRQQNffOVWW?9  9|K O?-[o2Gǜ>}!HttRSg ͩ¾}ѥKbc^w\oY:CYzou/;̮][}wg> image/svg+xml ('x', 0, 0) ('x', 1, 0) ('x', 2, 0) ('x', 3, 0) ('x', 0, 1) ('x', 1, 1) ('x', 2, 1) ('x', 3, 1) ('x', 0, 2) ('x', 1, 2) ('x', 2, 2) ('x', 3, 2) 5 8 8 8 5 5 5 dask-0.16.0/docs/source/images/async-embarrassing.gif000066400000000000000000017733431320364734500225250ustar00rootroot00000000000000GIF89aO/b\\ DD##KK33UUtt{{jjÄkk||cc,,<<ss--kkDDD''+++|||llltttDD AAR;;++dd%%%==dddm==^66cccc\\\33333Ͷ==탃TTT^^##XX88qqLLL;;;< nz/~ ƫn쟽Lʄ,y ;t3㣇, ﹳʪhW~33oȊ\J(#C)Ӳ$l̟͂=`b#7'A b ҆0^ߍLEŞ@_W] 'WS֘w"~5)+wΊC!);nY 0WY`̜} @ keI }EXWB ^1pC(~ۺ' Ab2P4jV| 0p` Mʒ %,KPk."S YLq nEbAG/Hh7I(2Q,aU l>f9 P1 !(3#;-BEp"&"8At E*yABL C(AH,@ CL1-`}:HG$4n B|m%F$;/E P %,&cqnPg7 ^XjZsCsRiB ¹p(d爚w }:fPPJkxb@ KL@ Pu!58EiN0?g+W U@B , 'CNXA!HK(ar F)걊E2P%&iiEh >Z9Hbz8^hFES8)UI&&ʗ0oC `AGGr[NCKTb!kF`1SRRqiBT%i+4uH ~U("`ݶ N[ rKHx|)H$P.s lDԠK5`JG !x ]a&| MPM!p# J'#@@^ >Q2Es}a 5 PBe`1sgguN!JW8S(WxdڃePE0 ?sm#em2&VKXTEfQhZfY I~3c(@60qI4qhSv ;E_PqbxxXq XJ hdEӌ*#9v,H=2sqiuu=WHwJs&%reX#(Xw5alDOI7u0fe[k%I wtkYpTvjBk8\o5!#vF%cWsC RX; Lqցc IX(S-&Jgat%7"gT?WDvDM%n" m@U3˳?v{mAXbe pcpV&fD"<@6"piD xmΠ а~5&'"UDimPyVuD5BF0<} ?)0H S7oV阘4Au 1!Ql 21uYE<(?F+EE4Zk%pIBs}Cpw[8(!S%s4)YjMf=k wt6~uiFh#!yTx>6jI>< \V5(YBf^qYBCXSIO&Yl'e`tY3-3Պ!`rI0q0i{W@:Ę Tq7Jb%`ҸfR񣛱}5lM< ʝ|yHjY)&iv 0GP7h =(Yy- UHgq[YߣU<YIR)I'99ђ,1N1ccAJƀDWWR@GVJ&eD^ FPTϚXz4Js?)YWd~'y>+jsOz/UH[c# zH:A] ;0u"0U@ .eO:e1hC@:p2!?`ؚ; jee(?CTy #t3co'l*0>gO!7;Uxvmps#Dyy%{qkjM!%1;;&W3TCEjh9Xxw0̇OFEUճcI0U?VYѷ㐐$cPw&a(4U5gfFWY)J%:G0t30jwD:oD ?Anoa@QYp&2KXcW fCF~khc63jzZssbXtR+aO'ԣnykc0>;ax%?OujtDtu6 AE`UQ\ME;)qTg:0^4nn،sB!Rj%מl9m0gc緵YUKNl% l#P,V88Tt3TDSzs%7"SC$pƱ#uz;`{9__ |YI?\@yuN4z,{35>7.)]Qrz34^:Gnl-a#O[9~-^'Zhra`.d+|}"䁮nAKMN2y)>+^~}8 .S/gi>=3R/.Q^}Ǯ/ʍnY&p t]=7@@@ > A@X Hf [!aغ F'Uib'qV1J[1hb]N&ҁ]/pb$HMP뒟aQRτl[Ab[[a6 Z#qV}ǺʓP]^"b5k\q̴?i- u4?ԏbN#,yOd%rϙc:G I H F!F%!F HÇI J̿ 5Ժؾ#B޶E:5 J 'P#E)B4m,Nz1ǎt<IˡtDpe#J2LTLF \dD~*RI(#tFKjH'SCHd&}ЍU| @AA"A-.pZȫ 9T$*V̚bfسv Y6ȔjUY-Yl"φW9JW, |}VTuoUUExᤸfq="glTAlN~)泩/==ɯ V~OA "WYu%X{_ZsU%LYS}]V8H\hTzTt@S^6#Sae]R5ѕćLavԡf9U` haVHeBdRVdgTgU(d]'/:)tTNU>ekmiTmg}BwpUE&za5]EYYhSzOkO Wp 0)H aާ7-pC:ܗ#CԠ+pF(g7!\*0BfX"@J '5vBPnG*pO0cRԩJ @;1bźp N|;-I:4ΡqxI!qNLb81;hR 31JE\e O@CpVF̼ť;p;0ep031 s`uI+=GA\LYMhoڍwWL l8evC!<ǁtE.DAts<>A>QyLOp rWwq!J ,W]ccgq*UbigbxO&<;p[s~SSe4!F⻴YP]BD6Ås{~7 =,"svr'a8N OV2˃?}Eb氃n~#$jhh$1m@kkA he8B J  XP8I` ()*a h!]Ld ! !#P-@b -1Q0<$%0Bʸ%" 7`Фą v\?DqL$l8M\@CRbpZH0.o.0]so4K`(-,]ŅAyt+k:4Ы]\%yfǘ(1:ťk34`NlcT;͙r3IP t 9'HiM]Ya|6+$6+H0 h ڐs )GOl0`,u5AZԚ-%N+Sb %RZ,`,Ij*T /:ӠBdQi$5S5^$)U+։ul'Cԫ*W0Le5VjGӤʬFU^:Qy:U(TX/~l $òqŬ5Sn cW;ڊ%f!FT I=CHVe+qx\2#U)YS<՛mm T\7 hJYx0vͬo \,@$iMKK.w7PΝty^ޮמ=p';"WӼg":Y b\ D[ҬUEh<&q"(4uy[jv_8;kℿƂs8C./!b[$nr-rE2GąKy63˕0M]39<` ץCJd"!r6skŝKߊ8>pDXx cLGq; Cv'NϊwVzotN}GOL;c-²9z@ '|2a' ,#npTztg|8EFB [M 8 }#TN%SnVAۻ9՜PEld S̖fw@NyGWbg On>}N3 .0ٻ-ܓ-nݥ$kvAYPګiYy,(=ֶ:a(6誮q^qs ]t ^}m,u̺ ^{>~˾9 ^ . H^p*v.ͺnoޗ׎~~b?Zx/pdJA[|@񆰉h. #o! -/1pbyzni 讈1/P*pK+YwLiF>E0MZO ȅ V?^o=OR3OT/ @+euw/k5 c Pk`?9??kȃ/|}zLq IsϦy__ ߞЉ/ouh\_dH QPP4b:Hu}/ E/kٯXUꓟ&F ӏ_IyO 7 % 7 !! ¹ ˹ժԳ̼ۮ ӮǛ᭾ʸy*V0~-3*_Br9w+6p.g1Y㍐8 %f\tֳsHD4Q6wgL"4)J4-N,?iIꣽ 6Sα˚*rTl1Yu ^=]Q,M$)#پ}w6%lbW[. 1$dvffb l_4nk Q|*m0r는ڂmUM]iuo- {Q۳ӊ&zY"x>Sv\YS4DZc(V$aP|'4g2-v^} !Aw 9g׀"(0#-2$u祘W5cs d3]B_47S$"]ω([vf>)yN G"R$z@&c逞?؝=r(AI$*RhIf)RI^B>4' @ A^9y7LV4@hS6r'cZ]Jjx>, kx-Kq&@.LPj }2H:Zׯ6+/joz9u,~2@ ,P)!誦yq [.ol*?&($/B ̭P 7p!Hy&Qb@ 1- GC4 kt?s,t2s8 m{=p]XRs,3;cɰJe#6;kr@S]u% c2F.L7kE P!n7ѦO<ȟn*NЁv4BjnFL}8=k4֔pt!"5QKJD7q{;%4"_ #!<T`8$ PA.|[P0}  coI@`> Y`] $4H͊Q#jJ؝3) .b҄ј` cFE#Q "@4B'#81!͢ AНj# iP2,H$! - .ɣ&PEMBe:y$6 Ё) V L!Y`s1ѕ,e0QLK2kI.MY,Y8 `AL%67,U029KE&1yIoΒJ,˩NG)1K{3%5L$8|!,JIrP)ASbql yΐ3Eқ8]f3ԑ/&)JaHS?դN|)o Qb~,dQQ<*^zͲVMgWHYNU(AT e~Um\c,[Wե* b˭;5*ZT(L-sYY6juPdиҴ)`$f]GPR\-1ERǦیfKt$I9N5%-1KVRԭK]NJH=JH&gAц"suLQKLͬK{D|SRՈ5%u9svX KLVDM?G ~K`BU~ au&!Ċ.%r^MiӃńp"KI&"&c0_6Ke6J$KM9\MrRXawldG#`v+B-E$Dt$BV@ѼT{KfjtаP Ca7H4o1! -!- h:0WюuY T mޣư-a 0 ('Xp9[Yx(Cna`EȀ#Av0xaC@02ݽ0sWƶ6Ȁ! ;[-^z BIr >.G8[ b/!s#cĻ9#n|IAQlE%3eckn[F|\jmB;pFr5 5/՗v'*JCl\s 0q˃p==b@Pdbv-vxy>'W4̷ sR>TİgM*mr0Cb`=`yPK\.S73y~~bv)y;A[C_;x(^{.Ǡ@xJE''G ydTBsaY Gq=uVt~t'Gp\tXbVIS&x'tɀ,؂E.؂P6X7Ȃ5.F:ICsDACyLȂLhNPhOh#Htr8ZvH^XPI#{?*DUp'lh1JP]Xbd;XN ks x5u3hp(Thc=cU`9;w #h={"95ԊsnȉF$ƣ2 ls(F( "ˏ6\Izpu,J[!)lK @ŴĻ*\z̤T- dd2@x\:hpfGp0qtLS&EQ]6Uχ0zPRa\%ڢY R_KIXimƕ[]ִbML5C`z|Vmm*!PּR^0H٦5 *Ȕ} Gf!R! S=P=B}'⸣4+VYF[ΟWGtw6Ȍ~ZwRacه@ppнT6&MK42GsE&fDeR-%Im-uR}%m mEz7ЙW ~M. R%e ͽ4 {JW!3]p ;`Ѐ!;CS-{d= Sns?FXԄ$K gD\ Y Qu!0wfL zYطI0$ d=YMiF>uH-G^L 1ęYʅN&e0GO1N1VMNF0gM їNPH0t 6pPٍ6*UTѦӝ= 5Pه`_p]{4UB`TA5d@ei߂Pc'nj\fH> .yi|!>F D@|P#@`Y /vmI"w35sf>UxP>m~CER~,tٿH;Z۞y`Y. S;n@b ҃IjfUW7 "KC~gbS0ԙBZ]A RT %̖W3GTQ\vDL)D71H8I0i9؍'d_T 4*!T>Z6';<$!ɄP: h-F)0pQ7c݁}fk TH'k~`s`޻k], bS))^GIHZ`6̱ 1'p@iNp7I2r(⨈Y'/iBre G&`<ƃ"&h|T >]@S&LD\T(GSj]jgtbd>ɥݔ/_jzp')͒  HRJOLu磠Ռ&Ue UL]-hDirk5kEͮb*UrRumLӰ"4_ְZRب5s`aiԬN0\+:ɪ^5s zКuh蠶MmPY5NútUo-\ζ}Trm4}je=[\VkOvVյBW;ڱb}-Tbյ--,[؛Bn6U VL\T.9n+5 3ݟbL?bڰ`=-eרu,i"a*ض1fjnfQ ءuB8O珷K{ bn]ֲ[⮞)6q1B_Tgj#X/>O\1uUG%x<{kW#қO*֧kgz#p#3?$?;R#?zWzKp9?!:?~kc {k_v~}˟ {7~'yFRc\g8eFsoցq| S"3#&x%2)#_ӂ8"0Q3l!6FJ~9X'؃ݠ,@Xx3l&=F(R,}LHbƄ$T sĴZ /؅ `X?58.R fH<؃LFKevH Pi|VhK,ÇJЀTX/ve8۱9؆n)|! Hx3n7 J~XȈp舁||HpXyxny tX5QZh4(nyȌ׈vy1LJ!?HJҌ5PIyXr؈(h؃1 @x>fh=1py؉8i٘ɇȐ0)ixH`8r]؏ ]ȍy8(`x, J듆bX4UYSXi "ZHpx|xM2i]I+QH\qdٓ(i}ILؖeAf`tuy ix1VL|?ic(Lh9&ȑXZyffxc8! ) 6ؚyt||<-#byܙ2ӌ7WHp(" cȟc0cȋȓhY8V);H-hXHW(4y 0 yX7#v=ccT( cG9cY@c,CY`( # ]׈#c0jV5 DzFBCeh7CFxY`Chc;p\:0`J-i(#xcF( #ږ be%ʦ=x 9QrhQQمIPbWʤX9ʡsZypNivYN &jApFE:v86Zڃ:0|h=X*NC*ZCUPC O([@Yy#hbpYWUE ڍxňJfU@U*Afb=*8zIb`Zxjv7K]b0: *m;QebP(ۅ2+*+ : ಛ*J+T! ZeU% ]kjIZ @HN@s*ept:|X:O!0sP-x[Pۅ5k&%b06k>D+TXϢf8[ ZX:,x:%m𚃕Kex0*p$m iC n[)mHI0&{18@ XޛmPv{@%Uw|M# ߽O~h-E r} ۍ G =_Q޴֚݊kx$[t&4=Kmg-ݡJbc@B~D^?t#^nYV^%v-gtJeyvQDK.jN Inr^izt~< q.!UzpXgӝtPS^m]w{ w߈z;*7:X>vT$m0NWKN.~UŎn~h.gm'ѱ؞C~h<.ww%^[-yv.^m읱*֢"S~hp o ?O>'.n"u9p $ҖӞ=_)H u>7֮V=ɾHr 7^>5<?SHKLN0U/<{sO?_F23-`m̈ uDo-F?6 R&oȟeZxs= PPbf8rqOc__]_gO&٣?r6frrPg6rv _ܕ*4R/ROIxsOfySX*կc{OOnn\ozU_E! +fS   7 % 7!7 % !ę́ ͨ ҈ϭؚܛ 牼÷;蛴ݚ aBC0U3Pĉ̞E豚@fHy0:RJh 瞝hϦ.|txCM2#&Ql|Ɣeb=*IYugfΞO b芠sꙅi˝:-Zh-5(Qg7'8iJ䐩޸ s"*h^"K}*ʯr s;M+) \Td*ι-vLFJReBZ#b8٭(Ⱥ+x.oJR9p抭lF'n2Z.<'d *; "mo%wfrp4H͹(3'O;3K"tG,ڄ{x:;D2 6J}\*F.Ґv%5y3Lډe"hÑu$Ti=%DŽp]G)2w>!"2ND$?8RC&"9J1I̫HJ.wx_vgc.~jXb׬xIѿv=?:WZWڭ[ҩc$b6S@cۺƩ{᷐ʝ}UfO{/!r*p ;H2ѣzhVXh #0]1au%tCb=X5f rW4bUI2 4D|zz#[+ ѧ-hE 0sU.k<ܕ ]"IԎ XTNfYsPƙC4‹4d"'$ғNIHf1糁}l62]Q0昔鱀PR5:'V^3דMM9 h'MugZm)hQĀ`cfL5@m!"Pc7rَY&rG\Ƹ2!OQ#BPoP`EHM amXl.> 7<w)ΈQ,J@Gq1tk"WwrL,`w9e\< e~ '%=.u7 1,H}3xpr[DPm+g:C ŏps":X̞]\= ;7z/.Χ8}cL対w/`7|C7<!^+=|wNű"/`(_UNrKRacW{)z# )_37 }@?C@F z4p|upqJ i ou:~x7q}XrP blF W} QqrIP%7]hAHrt-wq3X'Qŷ`75pPF 0 PHJmJ 0!`5`EHPIye@!D5` Rh GPX!0P`fIJPc0   P9`vh %`:8X# P pqmcmjx 7 XI5PJq0 s7vT>7P!uz0& 077 HVX{3_J7Ўɘu7p hJV8חqHZ5LnF P҇7 50!ppB uHW5uBP.FPIPp50#p}:瑄!0DbeE,P8Ǎ(! '=Y9nBH p:zi B@9*鉹J6k EPրZ'07`YyF`Bf;XE_gH`80^J` ~P((E7뷇اșYxf88 9HP@Rxb!X g8V('X4YZpH~c08)nlsAS Y؇^!P)jȆ 0@ + dxIZQ|@i@ YآdIHԄ耊ٚx@EhH*Q[9xأ{7ØFYyi`7}qmڦ%@p aP:OaǏsWSJ@uqS_W!ة*ř#&F99Szz4H a9`{sHP:jIq :%p# }LJꫤe7}%{ Bڭ)|*4l  `GXا:w:ژUY' ujY橖u:ѕ!!I0~Yc,ڦui[&5q#J )\^d :Nm/MGΌxǍTq_] /Qvb̘ d?lDhz⌦p?>Q,{cu^m!KYBNi'PgD~ߠt ".N;[:@>*&V0݌H%拲\hh`nؠPp0žL>^Ttc?&,NaL0 W4R7NQnu$+W^_>6Uqe4מ^ϳ`~F҆M>7RFBσX^ Gf2IGKK.2>q.?fb+ud5ZO.eɼ6<3 $1>~F7N@f/u\xS[Oŕ5=:O3!t!M@MM.?tX%Y $\c?o?*eRK2?Nz;Oȏ%[ucXA@K4(|5fO޵6 ! 7 ! % 7  ȉф%Ү Ҍ̤ɻ䣵ᢦДrv%Sr&$ +Yt%ђV]"8C 39 g,3>5/f'M lZ xLP:R85=eée$+!v X_rvKZ:5Z1'7Co>-S ;i`GE:0L}4b~5Fg)6'lr_w +vgtXW'FN]FTT *BȚg"=U7y_!iSߙ֎hS-5i>"%PNd)VvjO挸)G6⊐z+57&Zh+)j-٩>,Bi0$#z'nimמmSTJC̹zi)S|6u,d - PsH슎y L-a˒)򜌔\I,Iڔȴ;u .g>2Sb,1N8j˸Da÷lSuLX6cɱB>@$NZ5t6" Vv=4K& 6ȝL0u .H|9_3{:dn +@nӞwb$wϾ7: 0K#Ϟ`-I:2~'QG˘AQJv&DMtd:8P4z82Rg4E:MLO0gqSs!|*ԡd[C)grti5ƞ6ԑꋘY1SoZ¶uc=N]KA߬PwuEĿH GI3;(βmk.ǵu9VzĆq':ŦP {ζ@]{kԵMʼ{xINq €xol7c#嫶e`G5yU"r/89-e}S!O%D#B ɭ'}I6M_zI/̀/!X{)xwohIIW?Ƈ~?O:v7`oDo!wwyԖyl [zI rKԀth 8'=w`qWiz5Mt#x5P!sR%3/W-!y<W>0;)818x7|F+MVz4Q*[LYV8uY0}EGzyE I _Y9r rIHPz(k ejGYtz$zJ Lʕvڔx} R꩖ڨ1 Ri7ڢpڤl:rz•~ʞ5ZyQʎ^B Z4ڔzzŚ Х z,Šig9a9\٠idP:XѫY*RJgIk馁Nڢ D *#1zG,$")*Jagy&耟记 rɱMYʳHڲ\\i.k Jzkyyx: 5 wH Ř ֢!PvIqZ a\E@Lixw  0^wg5P%0IpcD[ pm 0CeH8e9A;#`x ;jI@c+{ 0cCʹm^EyCpGyYC@ X[cp,9xc0C@Y[G@O b0{70+% _ UIU Lm cFs{X9 ;z8[ `*< l 'bji0Ƥ7\ ;gS]w ` mп| }\ ?m%cPe ]T ,MU=//]wJP']}STU ;ҿXeYHPԙ"mB5P|GUR}xHWx$CgJ֐0gmepq=FpyF7J5BtZcnx-{(t}YSy;py}b#p]xm5r}::0}Wxb4mצ ׾;PۉY`ȕ f}}͝PC ]Dخ\koMQ=ܠ}]ɝיUm5u ׽L2͝ZE߂8݉mٍZމ] #ޏDF!2>4^6~8.,uƐ>@cF~3^WVL䱂HRbD=>XNㇴ\^m )Y3d>[\{X>@n;P0RuxNCQrAP~^sF{>pSfBU :/(ݬ >PꁄЬcGZaF. `.>^0^\>U>ϡs-$HVCTN̍꼤#n .p .~Z֞GўF En_ 0C9S%M8> m(OI#Ԏ*R<4 kn׎?o9o]FoQ_:& sהqBM#~>O~@>G Q5*"E]NnqB6 Np=!u@A ]o O_-S`{e?1?U^Of@IN'a@0i{N>wzWPvVKy;Þ!4sQY"   ! ! 7%%!! 7 %7Ȼ īϊ %ѱ˝Ъ % ! ؀VOhK0LP `(O \u &BET4KS 6k Bf;CĔ?n3}$Tg>zx]:04N-{I "SH> :c:HBB!ŶPEUP#%;~T*$Iި_HF2'THɝ,(Ċ@){"ja6STc6&[#HLר%mrkM[.HB$)=YF 5:Nu $n%I2Am ")1V H8` %)A I6O_)2Yc)(Q%I-u@P( B /C4I(qD&&_QvK(c ERL;FoEXwd *FiUe_Ղ@^rD0D0 e&TiǦYBrPg!ԐYYvCFjO| qBj{mQJ#!e `(g7B(Te(%$VhZ _%W 1 ;>ĕ\8 GW1- ;0WIpkMF[ ! NvȽ'~ w[hr#'({^XƠ] aRy[[., B m9l h ?L$%D$^=HC07,ʊڍuHH1R :#%0O:2 @a=2B!ݥa%UYa %!orJ (61;8-wj8 ܉W9()>@`%- - _^UxN!=-n҉  e^ %^;h@ַhJ1 P?k(Z )즄=a~ (A@g(6 gzptAA 7@ÀX```$o ĎVqaA"j0D.kc6a F!;A6ᳶTM*1 ʡ6]kCTĬ!OZߊF4AJ#()b@ R Z An UdW.&RN%"5ǽX([T se,O+Qj)IhȠ o&@\5_L'HI43=f]ȮUXXcSb?0 ͓n#X0WIVjj8NEtSD0CqELFh꒮.bĕQHhN38:XY$D-tc цU^ XFLV ծ1TeJx懣X P;Z $ -IVМ QJ%"CDH$1 A IaFM<7hCzBHJ$JAx1Q;W,hzh1E8h(b"`rI̵5DF0ՊómϦ׿Ʌl65 J&Fh56UUYKաHRAQ85L[GӃap(+-K 45Ip*]Ȩ\IMqTJ `#(`=t|Ps!HY6yT11Ez,+ݦ̢@;0=fU[|6APN.%(A42 ~|&eiVFQZQ7%|B8taR9ML4x kZiĕ@-;vI(wR/TcDU @Pp :ޤ&Ipe<F @F1U KFvb'jZ R="2Aa $}:zpvwpysG5 t{y7f%Uaԇ.K@y@8:e7&9$V$H@khMqR!xa;F $6lha  hFXm^!tr"\Eנ )Lɐ*Xa\h,.bUQu?V\1s: g?9R\ #F T7:N萋! Tsdq‘XOƁ^A. >,(.I0y3br'qv&C(cS-Bhy`cъV1Qcg $wW!sKgy*7j. r"%~ Zq*H[V fCA%ub a4%XYBEjuٔh]yYmȝyh3oq'Ib Uň Ğ >)ٔ rYRɕIivdiDQ|P֝y^I4`ŹI y/Gxg3Mٟ,飃8RZ(M< ?QlVi7 %  '%ZQiMx#ʘ$*5j쩧gy秣 iG*tSrp:@Rzg-*R96jZb\^z*Si^djznin:,zZXwzS eZ/wJD z2 9ڟMj:TZzӪ,ZǪZߚwj"ʜh6犞`:leJKq:pzQXUʯȊs ۰ + t{Z۱J$ѫ~% , "8K .[)9p+ ?+5kD[,c: J J+ JSKkqZ{g^d۱'{npr;tZxz~n{[{ u;ʸ;* {[)q+~ɹ;{K;˸[qSAkfl럜+!;@`B$˦{#<z 漋k/˹fekL<{ k:p #2 ƹ 3[;fL #[ pI|;<|k$|3(!+,$!2; P,kLhB@RWH 7CĂ) 3RlRT{@|KmWºELR,Ư۴˲JR|o|[ ]|DǗ@) $UȈ !<9s IȺ\lvl+Ȳʺpʨ0` Bsɜʻ /LX]̶UvR<ɼ<"FVƼL|l̊\chw̓ƬlL< ̼l<6ͼ EǑǷ)\vϊ̔|˒lB5|Ll7 ʬ|(\ƌ̶Lɐ E|,ԬmҿL >j<]!эL(M%D M.}|?=\ Ϸ=M ȶ|c ~<ԯԼC|4}̓JZlˏ|^]enKG,ͶrՇL˙|Qs{MU-ؾyط,d,  ڭtM-\ٴC =|,'#ͼ\פ}g}rBȺؼ|կojۯ<ϔ*Mϫڀ Ѡlō޽Sʸщ}ίҍԳpÌM\<-գͶMǽ<חh#R} B}F@ޔ< }&~M ҵ٭㴼˝,ЯL)N >ȆSM~~M]~}}ˁGjܔLLknNW[}@p~ۡQL5:MjNOgơ;0FmNw"ilw hXj8m 2;{CziEت,nK8p0n-h):/f;-tV 7 ?{x,,Xl)8o+0[Jp#/˳rM 36 L0~Ht,M˲Rk%Ww +l@s:ԩ<-dE}3m4IOܬOcb6hs'l+“uÆKFUH6fݐ\< cSK{xF 8*n++8*/>NfM.Ccpq8zV/ ئIR70e6.Q _fݯbx#;{Ia_w Nx6Ioxю'no''O|%[#@01:\ހ.Xи.0GmЅf×ј CC2zX .w ? :qPlkA+_T CXŏ1/q.ѸPa`\ '9?}pRDbK-1[Futl.58ePc06G2uc(2˗4)IRp ˜9Ȁd-f^bӌiF{<),e!GjrR9VB3F,;NnŬ5u'P|cA9Y2TgDY0d3['2Y4t!ݕN w)K!MdsWA!'4OT 5A6~UgSݷR.1u*K+E"TVL7C N6/[g06B [k6QӌHSu[-26g)j +=IFA=bBO6;bp4.k<&חxM+[ +HM\.OBq}[Ƣg^ņeEUFdEKb6yWܐ[F3 ߇ {cna]їQFȟqݷ\jfbAf@9lFQcP[6?k#5NĠ-3hV 7k*DV,yuS]7ʛACπ6T٤ZY(zFhqX'5fx:װĨU5 Hp"e%)6شڻN6@O^EFP.($LMcu|2luȍpѧW}^EhU;Hsõ}:H]Ҟtg#s73aD @Cu8y7qE/}p[A0%p5FtKl-z#rmgy~VY;{v&| TA׋zQ֘L}QK!w=0=KֺbVN+Մ$ߖo{!~ہqQ31?WcمzCԎJ{8Fgd>g#n[N:}SFcWX+?WnK^9{xuGZP}`j?0 M_D+}>0'}QW .{3}}pFxw7|7u u `MЇh~3b4Wmg y% w)~ҠmFPg4xX~79x)Wko|71(wAvagxzW}у 8~ U\uD~@~i儧AQ($Ts᠄px^xx\؅`8؈dvnr+hH|_WcGE{HxAB(U"`B-ndx*ׇgA38B(ъd׎0٨pwhch| ǎ ɈkC"IXi!"y 2(В3`H.P*0p~7@>wOٍ[*b=YJ#D1ɇrUQȕH#wQǧ'"F@ kyn)zyb"Y +g["فd+" +9)ؗhSHii:"i8iYTyIr qI"yiY9`VVR 鏻yx߈#g" E"d$$?ٝҞpɝkG)e%Q;9|ٜh!Awɔ֚G9"湔9h +n%*ҹȉY13zzz ~9[y!ꈜ)'KjEt㕍 kEaCѩg[פvV㦰IS7ix9]J3DpI} bxa +ZpX/C*JZؐ Z:rVkڤkTz=:}~ԭVVC>a)I}B'::T>؊ϪOoS*!}01¦îO*T) TPj U/T Rp`Tɲ! E0*ي Ed7I ⓲sZ. BJ49F1`ĥ5SɴOt 1T K4z,]xzP`{Z Ex7P#᪛2;69? *ۯiմ @ k+/5 *Cp#3ʐ; ຣ prIP|:K7 BP K@/dкK7{BЖ iS'KL(A E6wYk+: ˘ Z6/;0A)H Sۑ;ջ$ H PEk`$+̳y!z˺L°k 6#"h뾋!28Op%e˻S AL~= 䜡R~5@Aϻ<,y]ܻӦB,P^}ojb{~Lr0Ht}= C ,ʗEPHӳqɳȊ֣ҹ{-E.pZ{ |дKam @.NNa y:0 P нK# >]->PQMӡRݕJJGl%[#;.玘9ɃpƫB.B%p7v~c 7˾(~o>GimxRC#m7Όy1:]o頕I B %%Y Y 7  :E !:5FIIC  5G!7JJ %c! # G ϳ7: C%G EF !Hć% 7#  :7ͯB%F:!h`ܧr,)K!CbJxBR #u8Q ": 8@ n[x -X+ I<a24-uׂ1S$9ɍ# q4AAb1IJRax DS9%8D!%,YC$K ʏ:-J-XHOUp*TT-X,rPc(QP!b/b0EIS dA %oC2y:xkÀx5/ 2-7X#(ɲE0HUr)tKfPN 1!2y8PC}䁠z5]tဋEu4hUXOH@7p"R%DI&JzY]R0W^FXC DR@ 2;հ;)1(EH3Snez|' ޙ%h:M"9I9聵@sBtd +pe5,(jAN|ŷv Y9R *}n4\(aH*ɨ!#v2#11)V/H@G_Vs_ȑaH:TDj0 ijIKe]&"s-ZRk% @ڭZc#_%1. KKO Wr#*'0$`Ii~&gYIH!+Wr*q#ύ2%5~J(B>gԔ2"Z mTRJ9[g}-Q?z#\LpIӯ-:! @efcU7ZDvap K%ҭII*x!-7TTu]EE21G!5"|#.(1KtP YqRٯmovP _~z%Z5+_vOSz{2g50+Mj?E?_8603Gd$xgX'H_}$:Ip@dR#@A""DX Wb>}4b&\pb+,IP04L`D|AKV#oUdGBO]`{h#%{ķ1~1!%ȿymJ<%XBhdR q$!  1|d!+I S<$aHHҐc&ȿTE葔!J|y$_Y˰h㓠rYL0NQ(_SY+kٴ-m7Ƥ&zJa{p[X1בApl!8Ű|g P ‘cAo_ir8rf' %JujtGDD,? }‹$^h͔:EݬKi:]j QvMsܨ_N֝-x}j72 )w^oy#޷M[wg׽z={T>]q{}^joF=uF21o_g}80[V^׎K?|Jd~;쟾g#gmJ¡Uه#TYw^{WDJnOg|Xt} ~8w{g~s"|G#Xw|?4PS ~O!7\ 8wLT1y$~wCQ-xhOOAt $/g{MpSQ@toU8Յޓ%Qȅ9x`ു5>?BMPX8NcHrfwJex#hTO2=8O7Ahb&5tmWPS=SY(|kXLpwKԁ$%_(~Op{T}=rXhHMȃp脹W/H|Ix#(b&vTj=t=TBSňNwp(^肢Ȉyh#`ЉKXL慔 ek~>ӎFՌ6x#UX*& ɋF iHYB5.e({P2#T63eXPunVd)C< b! PI>pd?9vqIyHRY‘x9YE8*9cٓ:IcYLG \#k9SXÔ'2%iNiH5${Io YQIhX4B5 `ז I&A >y 3Uyg)R9x)z(f֘4ex7qpTu™vJyr(dԜY‰#YG 4^yz o9y'^G<$tٕII> yԈޣQDXX G >z埡ɠ#V9H7 wzI(뙞J}yh1D稝T AxznDA*d1VS6"9e O[ʡG QY#ty5hd:f:i䓦>ja Q gɧ٣ZW*עtj ~^SZPi2 GS@y\l֪z(*rnqZDzثzzyZY+inf諨ZDڛKI|6TK0Zj}!㪋ZD:cjҹtJ-$j Hj%):@Sʬ걩YZ' 4{!ۮ7аR!k:@;B%PXʨTBSʔJ^R1j*iKI ~:31Z+Fʴ# ]TIK\]|W]mq~RϤ 2M=] ,Am%-+l#b\T} wՑ<\Xխ]=\`.}ne}g lM-`,VfmV|[pm &ʀ,f_-m= =юi ^=`bYM9&ٝn԰ AG07'WPm>Ǡ ̋W œר\ ׇ}[ #W٭ ٷbl+1b,-M1FΩͽ ܯc-lX}Fp}PZ}} ZP=50 Z}߁b|& >i|&ԮeJӦ@ep*X %H~&7N@5 -.|"G% gbIf(iQ;M!p Le~A~&].ᑥ2TB1 J/(_h Xim?AT-EpPѮR#~t}. 0_xqynW >5h^^Dz|.A>뮾P\A26~}.A۞.A2y짘nh_XIXBpirN>}>~O}~eF0~h^XAҮ~y_w Mܧ߯XY}/=&y,/0}A_q(}* O.{s02ԞAW2UWwӖ8%h4offu<b#=jul}nupft~Yeu/L/RdkuȞ<.ӆmOŴ)XhWWVY?o>OjKQ/}./__Ro HI 5 #I 77Y J Ǽ˺!IκҶF%յٵܬߵF⦩J C Y :2ӱ@_@J @  !1@bE((H1dHC0yQ,[dP@L @rF(_*y2@fR2"&ȂF+-LkmqlkmNhmm*@鶫).;Ջ,"]E"UJ#EA[ @W m<0"DrL0j kr>!Bp>YƖ ó,@|`AL .n3J KQUM4T+5& ,m5f-Ap5NݳcSRoˍx q6s=;rڃҴAz'8 2sK:QFK^ J_wEQDj ղt! M G oǞzՊI u<G_ #t{޺vU;—BiÞ;5전? b2n0,OsZ$P{[܆?Oq@NoHMl <@oqՃs&e8̡:Ї~H"yELb"E$E"mUԡA,^"(A e Q5xqnJ1 ܱgF` $$c#7H]|#A024$&Jqx4#E*g,gIZ򖸜ς( ,w _ Ȕسd:Sk"j2cZ6SfC8c9:6s"4tv0;IO,f"vBC%>̟I&A3QFŸ =Qdbӕ8<9S(3A 2vC$2LzPDT@K o1!wa)ɡv;URaGj0>6o*=GgZ{r EX3ՄF!RS\= Gڳ@'^ j¯ҸiRESÜ>iψO~cT@UPTB$2+n³*(D~´`YZ״UG[jbp)V[Җ`!Xp=iT7!TnvG=& \ӺA̴ջٸ"[[Z8j};l ׵X+7Z o5L:RUc7ҭFr-BWu`i\vP+WmH+5g#@3|Ab5pV +6eƂ?aGp1q*h2q= eh"DAMW*DXq"Kc,w 8Kbs$"8r\29r{ y8me/Sd|2kX(ٲ׿rD~($+f=RY2ΔqQ"l`35Hf$൮gH.L mnC 6*}l/[aobkT׻6ɒ1m[6%'`7omKgbF8ï-pB˔h2 oDl(D>;]IivQ?o|0_ONU?( /."q"ю f"gpC@:(A =b[FC$ #ّ-kiwK6E\E|o0;F` ›[څ@RvpF[u7 |n0)BkUchk! 0*%Mg8 zƅuȉ=&sexz&!sх;!߶{'],}s㆑C|pa%*؉hA2}G7l h%pu # SF y35`;"HHh:U}nʠv z PBr(]X6Bx@45z`~+'j\t0hoIpB` P$f 6ImV$!D`IXuF %Y׋p >G 0a]ro&fVI @Nd [wr35*#F`eR91WP\qt5zPm(i8"hp\Ycy0$8^B 9\I&!w 8Hh CvI`¹EyzEG`pƩAP&;A $oHpvYH?)@oۈ] Y FPQA'#} P~BPw 9瘓Z~bB!dA ʙ'q@𞩀)"\@s,XԶ,i. R9 ̳nHC QC0Pk iUEp}g&a]z=W)Fqk!p.I"` ТhZG'  ^z xi0ce@5[yi)085^ k))^~g&!87w]b]70)9oZ,i d4 UW,p~J zIS$li@og.>VB!?v@2:igkJpGSv2͵]V[wE.sSt 9x0h?꤂*i*(8g0 j cA+QZS1cY 0}a4G eF#hnk99`ya`ZJPC`eXj f̹c0ʸ~FC3֪ZH6if78(`I"0`٧R:[*0њ_j5a~ 7@ :YӋ<ė9F& .ar;qkzqؑ{1G4 &1twUe5xGᔹH a"8G~ \ (s b u#}qPQԋ5o >9GHɴG#F~rkKs8ǒ1%0rW:MB sܹCQ>@d<jT{sŧKB`*!)j(Éۨ y yĕz7 .H+0cg*YB=C#$jgvʾyg @gL; -(3TG  % "<̺ ˾IҌ~p*jiSz;-*$%FQ"pD z|:;E5!B(F:$YFBLJH`@Q}4Q{ !  L`L JiЇ :P-:#@ pҕBM`l9\-71۳ UO ptkG0"xĤ_fGJ5 |;Ss8wDb~VySW, b#^F lj[=aK=Mrw햬[5Z&r{o}kx}m[bj~qt!fإ"mΞ7A5="1b̑@g: 黸Y䦝._%nf*^Vn4}>6]-.ݞ V>~  Dp@~".2 ?g [/\~.- . ldN ~e_M Ee >M . c^^Ǟzbc; 3O So 'h#^P`p6]bOe@uxӓ0jlV&\ Ou#ǼXU^]Ƶ@2NF&`N;Ov}ܹݐ bnďd#FOIAV kêo"@%V?F 7 7    á!7̕ ՜˦ܛΩ %锼%!4JF# KpiD$с-NHňCWմ0 Iǖ]|erGsĹS'*jJ$5E;jDЌEo#Ԕ aSQT]5A_v]A=.[vf]ʹU{AULPZUɍq}Ne^ÂmiSx2v_SNQvyUbglsnڿuˎR, v*2mHLo<ܞI{޵s`_Ãg?QzpۿϿ?y@YRP|_xG`v=(߄ZE(x7!}5M(haD0(c3bM_Gy;wФ3R#3$<2!)cWBIg@*祕u鞖braueߡ&I裏h(fziRhJ*~JgsJ԰V"˫i z*>cN\J,?kǎz٫R#FJbZnɬ*(,vik&rDl0"s"a'L"_Js˱o lI(l-,$ 7t0 4V;t B ̒<4C:[ tLw2J<10*WB+NII(Y;eK HXnJkH@ ,Dz%%z%Y.{<>I ~^'r#y0N<"oy  O([tz[;{R{C~/g[|_<ۣ_eo|#&ʷ>/{ ?oz5Ё ׽a/{cH`H֋:' N 68*υFȽK wxNdAΆat<Bχb8;"ZR^+‡%FщS'EꩇV\EqQy^lB.{faэ(6wbQGx<^ 7vCܷ͑$-LFҖle:yO/J9B/c*gKo4,xkğ.=KF0GLݝrvȜt>$DuT4ټѭc,YIfrK'N.;=W^ӚSf/nh;+1mǀe٫>{Ph>u;IJh@;ꎤ3P:"ҥꄩdeڔS OuSCKgT!vJJuT-@ U)իuͨWqZPJN#**).c*0-*稼Uw!((JaB(L_wφv@tpBEȂ(![@bJNE8@Yj6 `" A]~[iG-Cnc_c@ІFwo=BuZ*a Ee81Wʫ+Ook_5Ou{Cp^%b@m{_Go:#`C`>0a*݂wηtcV [Wo AX [ 1<_/b-a)͂%_o`΃{cs, CPFiIAd=i*CZn@l_/sJf' uʴelgѾr^,jC`Bckbn1oÛ0# N ]\WSZ;GѾ&^A^Ͼ6Bj- [6&˻ Q޳B6<_ 7P- x0x ֖mBzss赾, [#B;! X@~61D+˸ GhtԑW `9 w~`û ? h!;k.! D|)vA ;9ӱ~#a^_<ֳ Uߗ VBTs ' i>/9HWG (F`27}'tߧ50w(9C2"xW|(DC`}g8(7H"(#.2f~+|z' `}'GP3K;(>8P}Rh""#XE|7"VR'(}H4#h8UHg^Zȁk Y78W(~`Xw>3<}k#(7JG'Tȅ3Q?a(06|z8{爖88sǡ=:8xH'Ag~'r ؋wHs%}gQ!tǘ @qugH(؃^'8"hB9tXs؂7؊|xHX؃}Ksi?h IH xv(rэhs鏊7@(2'}$"r`-0r "BPsćCPNyHHxؓ6B8fpr* >Hh+}(+(cPRYgz9"yȆ"^ɓ"ӘcI5h 8 rհqyi h 1i9g> k8Y y( Jq9 j ljsySBd1=hх9 6Ȱ4}1R`-y. t⚼i-I-̹k镀9 sp1q9 ԉȹiҜƉ l#ȖZ^I \Y8)mQK*xx( =: J !Fr*4zD1ВP:Z lfhwwZ #ʅ@VڸⰤZzla8 wNxI~YѥШu:`gy77+/ Jꠦ܀WZy7£0rzԲ:wl)x8 i§9z!$xj 4*IڡВ J}*Mso:by v 񰧯jJ@g*uʮ蚫-%7+rq>Zjuai1fʱczڔ; 2w9:u%Rʰ [!)M:_ױPJ`3 森L Ղ1,G|*ԑáP83*($r˿U:/2gOq{6/`.8 '{;c\(`,D\(@16|iH"w̖˃cљ8хΆSЖ@lɯl҅wj,xj<)L% \27\L_EΕ,Ɋ@)g֠Ԅ"w+wWV葲 !wCoi[]O]/ ^֎Ԓb R b׊)עWMӊCnFI\TM3MmWÀחׇRě-l-9::~}֤/,Ym؜ڲ0Ų oґ@N]b-ܠm]8m6rϱݔ=Mɭp=ȴ]m9ݍܼ'-9N9P}3m }7= pK=؍P ގsކx܆)I}Wٞޕ -܊@~LH⃀  ӵQ&,N38ŭ߭=I."n9-9E7} ޫpI 想è4jՆ fa\nMP^=0$9Qk~bV6} bnD8h8^۝Nnjd # F Ι|\.ScάR8|MS %͜㑠C둀!.s0duf~NSn9Ὦ8.+5>{}s6Erc!3s ^:λB$^4^[_nmƤ 70[5nWT3q |M*NVoBTA]e*>ZO3%bLk<%{.ܔk*mowC_<#JO,:[o݅8Og6}K*MێnגƔ0~+.~~/)Y1<#Ҿ9j .eo;?ʥ>5:&=$I% ! !   7š ̏ӌ كϑӬ̬꣎7ó˜Jk `[4" _[FÈ̃!R9>y"KfDIeLM/o.Ly=%賑VH44L6 #Q[.t*mG镑Fn ɑQNe*,Oe\c;*Yn~ ~uQV7 Ziz (TǚN&] 6<[\j^8]\^$.tk/?l~T[FbGzkHw%ˈgHI2 (r"e$$ή!QUx!{b3"6uS:7]%UHm92f97)A #\H"BHT(7$ 7 bl9209՝<*@>c[ULأǔHהUYM ڲ&.#T%"܉9iIW-&)'N ]@JHnXڣ8z9im>@ZUW5(h"VWQ!e^Se% x{tj|# wV!ij@ŀ擪\]Y,nYC0.Vwvm(~yB,:ڊŠܦM*P2p~8(" a5@ ;1I@3B HD I[# K1JLF<xqEȜG%ܬQS 4#I0$'m8xG%Ԡą@l{}W7@ ʌQQgF mm7ބ}{&yEBiJ(qG(c,s Ԡ10DcDH%7 A /F QI d L \b$1ҽ#F( :O0KR Bxh lF`*nbC@* %P.#A "m\d] kz*בI,{>Ja#$P\!ÿ@m]0I6H^e%P.}1s(B RiVD)a=X!ZtW⡧Q;ArwA6#=lP?tT#(Ppb#uH3F9˚jD<[ 3~*&mF7z E(b hFTZNe68dXX':g\$Zֲ`m7Z* %D.PUd8X,Q$B4v+ Sd*T6`4ˆxpf1kФT^X 6H!hغ$+d ( * 6dֶ! jqrR$amѤLв f^4lNFDpa F VUc@+E+U=װh7W|B!~#jŬ6 F%ښDމR Ʊڕ1mÏ)D5Մ]WFt~NM ߔ, /xBQDg$Pc(+#>1y'ƳA=' phh"QDg!p=n]xNʠE .\A8)0g?5VjxIïkg1;u=PoלM O%fQFp%HzHbu==,2'Ms |H|ml;cYfAb(gf%ޚmPYW[0ںmE/8%1eA+zq-spBLr (unP;rn @5H C<^5DDwbN꾯}11yE2aO酠d+e?һ ǢDNYbb7iJO,vP1 ]aсoh#~<.k䟘-w2_}ZCXVztw2*0a+÷Z (=~x<>G|}i %zy(Cl-Y5=ƭ_XkB~SnSϷ:'Xx~.uO J05:<>ts>C~CD5hCkI 〫2i'hh(=§'457 ~wUtaK{~3#c[#Cs}<-褑93 HpD}p4ؐԗ SR Hc Z`cch i&ci s 8ZK\ ec<L% /cI;y+PYJ6$l Y-[7Ra6 [ cȐPt}5xWlYt {9}ѕYaPppxl4_uZ_wMy ɕF< o6Za5JS 1 sPRc@p8YMYY(杚`eU B s i`cQG Ú c"L 9 J >BsсЌ`p;Є)n gy@KBIwזP:- @#{6J}X ZG{9NY(Jq1X:AO[5:_*@v!iZHMQ:LS'Eѥ4XKNVB>XEQ<ʥvE 1v*K{yڧ0JfUwԧdY ~:mD€F:d:nZQ>Ѥp۩O( bg頛:Iъj:6;$*j+1PaZD{%Ǩ:j'{v?cءzȧu[eئ j&* ZIdJ p˪*1/s 1;JٚzKJ? u`:${ * :3y# `7q2O5[ }0̆*zx[0 Jb w\7J+dkIpAX'M ,hz* 3;Rje;P;wK♹rZ1!~+"qZ@!A,{Ļ˖;0[n!˹ Q½U*Ѽ+>qK ZJ㐻f"*XV%\ö1쥺8 ª^S!ck;sKkR_7%a4 !¨aak xۮ˾ڠڄ ̰faE0+&,gz+f> b!ǃKA# (>k஽ݍ) > e\}p a  .{>   -Bb {Q:[͐4N}ٍQpV ` 1lqN nޞZhXQ/b <.5å}v\dn⃎& P~j5m %]!s ~逮 ^ŏn{x]^2㎞Mk nU(Ցq3aþ0qon?dG߻.J^Z^Џ+ݩL;L]{>{NUӰv7AMW0۸.c ޹1b * tIkՁ!n'Js 2qu? p6ƞ ֠f:N~0r5y^bFTgJ )cIR0:3:K G4&O#I_t FM/ ? ɐ\Q?AtNo Z±4TLYqըݻڤ):@VcO{ )qb͑8KL% +UBx iUq5& U?C#Z,p޶:kdsNxk$ى89Y(99_`óa0z] m6)vW٤zȓ׀zR np c|! cp lG!'uχ_<^VPfV cM!TIcVM.FxC M EL gt d<7%H0U И ˈJ_&:^=s!틽vw1WqHXNHd]9~ ,Մ0uW's :wMJx7:Xf| 2vMi x `ڣl?%dWc2=Ǫz)]>㿿~6|{R-_w{,zKƤ/ 1`$G%[9B pl.{yP(WU\&a]2i,)!#_V֛g ٌè/IJX4ω '%E]S-o Ё]8F@?++=1GMjF+@ C'!ѐ*G_]Δ&B@\胳-SS hֆ e-$hxPZgx&5X8,ܹӵN}(F"taLS{17aam/7K*3—Bc \OXyWY5vWY2P:B0u# j(_Y!<}Q|EtjH vKD y&OKf IK~2bB@MixR)^zG*j wLi~6;J|CakN(kv8q)2<^; LtX`P_#8GdFu-Lk@X*B`M![!W}PPb@4_G;XHt i' (u{*Ls:'$L8ECM #>ޙ{Vs \)dLw7)4\ɖ|ɼF$9|4Y= o'iG0qDl 95q )H_úW$ L,%[QlP4̼WZp<N.@, 0i^*z)aI.:t%r꾓 BL;Ns{`\XQ/ *@z m_((E)?LoBvvXѓ:ƺi@h4Oh|ܭ&rf*G}u R&@<>WB?C-'5p 3|~cjj0|-) qtr`tmv}xĭܙmA,18isҧ/kbUȔ X"MPƻx{%+  Є..p۲=۴]۲M JW*j[~uYY=!it"ܥs] Ϲ7qZ I}m}y|ѝI[Ik;m &=Z_buir]a7@`bʋs ypI&GԎr۶rz)睙wZv =4ߏ]gm#݊\-q@QqKc~F%r0<:b>0Q<.Be_7j,7+jW.P>MڼywHc]^WVSBW8Q^&p@ggS wGb4mU0تu#lCѾ;0|)n^̝/ Ø(Elu"ny:ϼon3>wo) $?{rִʀޑ:U!HHGC$ɘ=' n9ץ o>REÇ}ڳ>~ {kk}ȃ <\x;pn[m;xxmcCG#:IE5 77!%  %75E:BCYYJ57 Cm;es[Am#7! .'Ӵ&!I !75IBI !FIHccYBH  jё$Ij$Jt82dL<;h,SJ9 )g{zU2u0cZʛ*!:D%n2*]ʴӧPJJիXjʵׯ`ÊKv x*K=@$1s外^~5[MHF X65,d[TEѫwuTDTVUve4\ >҆V B"fΠ]NdsȈT:(lúu9d+˒U S8= L"H&R*l$'IJZA{dTm"Nz (IprL*WI4,gIZrM+w^f4 @bL2f:Ќ[@X̦6M$@ 8IrY !5,#)kH*\ȰÇ#JHŋ3jȱǏ CB$CHɲ˗0cʜI͛8sɳϟ@ JѣH*]ʴӧPJJU jʵׯ`ÊKV]]˶۷pʕvݻ˷߿ LÈ+^̸ǐ#KL˘3k̹ϠCMӨS^ͺװG#M۸s Nȓ+_μУKoazkν(O|ӫ_Ͼ˟OϿJUh s l F(Vh vȚ($ UD(,0(4h8<@)DiH&L6PF)TViXf\v`)dihlp)tix|J~n j衈&AgΉ^vfE駠*ꨤjꩨv꫰*1Jي뮼WB,)GbJ&{կ6F{'ifصv+bR䦫zno+@l' UpG,ĜrKo^ ,$ &,q0,4l8<@-DmH'L7PG-TWmXg\w`-dmhl-WH-!Ax7(-xnCGsWng~tߚwmƞ#Ȣnz~Jꬷzމ.n/o'7G/Wogw/o觯/o?ROHkQGL2'H Z̟O=ĠGH‘(Lbb#:! gHƁ6̡w@H"HLb4"s)Q&L|H*LV̢.V¡1hLc6p(:vco~ @JЂMB"D'mRͨF̍zh ҒTa=JW:p|LgJӚ8ͩNwӞ@ PJԢHMR 1PTJժZXͪVU\^eW}FK`Nk|\r \srKTk ׾+`{E͖H:3#KYUuf7z%[CKZ\-mӪֺM I46rjFPmiKw5-IN.2΍.*Z=_s[-螔 -` /r7/ysGfkuK_u~f/p^S>l0P+ QkBatB}  ?kq.gL8αw@L"HX<E>2R@&eTyU Pnp6+1ṔyA:ʴ:u9,b7=%Oj9&qn,BC4Ѓ*ECYs64FBKLtN!A2FP$ܳU; ծ. #j:$ X&וPn>1 0vAeUʛL;2KmMrNvMzAls1[> b| Z, \` xT< G"|#38 N4`4gA &r#u\@ D'HLl@]p]H8CD8@1pa3H@ Z. 6 4 ohغ@Iۋx)*l` ^ t@4@9 1=Ud8HIZ`?@:6L -@݇"WA]շm<8 X I"? D-`XEt `4w33t~x p~ "!r*-p&t`@t"=1Gh})z%t6(@?QAZ̦();AF Aag "@Vh!vP3A@rIB! pH (t7HO8"}BxӃK @4Ȉ{)3}C3 |wV‡H?(3P|JH v2@?yh3D@H3ẍ⑌hS"wa!ԋGb1cE81EG#{HӘ-Ds~./!"G r-(7owHSx >Q IBL"F:򑐌$'IJZ̤1Mz (GIRc'SVibIZq̥.w6d)(`ĥ?If:Ќ4IjZ̦6nz 8Ir( jPu6l'. Ȁ Nxd 0ZmQ79 ~! nsT^oŸ-ȼ#&G8 U4`W1;@uf/0U Ep(oUpP  .TlT+̶Q 56`]Q. + ~atod"5`"x`zw$`2P"Jv`b8dXfxhpj:BІpr8tXvxxzO|RX_?p؈"7X w؉8E#X4VM!B,"B:# H*\ȰÇ#JHŋ3jȱǏ CIɓ(S\ɲ˗0cʜI͛8sɳϟ@ JѣH*]ʴӧPJJիXjʵׯ`ÊKٳhӪ]˶۷pʝKݻxU߿ LÈ+^̸ǐ#KL˘3k̹ϠCMӨS^ͺװc˞M۸sͻ Nȓ+_УKNسkνËOӫ_~p˟O?S(h& 6F(Vhfv ($h(,0(4h8<@)DiH&L6PF)TViXf\v`)dihl%tix|矀*蠄j衈&袌6裐F*餔Vj饘f馜v駠*ꨤjꩨꪬ꫰*무j뭸뮼+k&"BF+Vk2@ܾ@k覫+k,l' 7G,Wlgw ,$l(,̘.2l8<@-DmH'.PG-TWVg\w`-dmhlp-tmx|߀.n'7G.Wn77|wX.褗n騧ꬷ.n/o'7G/Wogw/ =Kb~_Է/o}CW HL:'H d}̠7z GH(L W0 gH8̡w@ H"HL&:PH*ZX̢.z` Ho,78c6pH:ڑ7> IBLY9E\IJZ̤&7Nz (GIRL%UV4O@4xw``'3= Ð@$׭eF,pT 8ETMYPiY wȻH5[rѺ$"&  ׍ퟁ()eІI] ]~  [ÀqNY|16հp]Fv/ 2\TfEpD6q- iaL3&t2)p\l h[ OK&儗CDx4G8}<y鼄#C_uVT@^mk3( t.Ǩ9nUu"6`J)`ֹ-2`sK7S Hes ߈ 88`P4yD@ ,ܹH 6gGx>8[-_0P j,ԟW@Dz܈#۪ ^s8P ( 4J?B0HB!:,#*uH*\ȰÇ#JHŋ3jȱǏ CB$S\ɲeB.cʜI͛8sɳϟ@ JѣH*]ʴӧPJJիXjʵׯ`ÊKV捭 ʪ]˶۷p&)݌i˷߿ LÈ+^̸ǐ#KL˘3k̹ϠC~HwӨS^ͺװcCE)۸s\z Nȓ+_μУKBMRν3iOϫg~e Ͽ(zG_&qVhfv ($h(,0(4h8<@)DiH&L6PF)TViXf\v`)dihlp)tixg%U!砄jh5¡v6vVjj^zv駠*ꨤjꩨ:r꫰*vzo: ڣkW 6m mNkvkek覫VmWKޛ. l' 77mľK?Gin g o݊G(#0njg2l8<@-DmH'L7PG-TWmXg\w`-dmhlp-cmx4z~"N"'x/hWnSw .n騧ꬷ:u.n/o'7G/Wogw/o觯̗/&OI5@xA\rV Z|3zg B! wTX29! gHCl8̡i@ H"Qt2<"PH*6MmEtq` X'hLzKlH:l>FΌ~evj/7-zk|9xrk;$Ĭ{1oRK׾!.`/Axׅק { &j1QW*3j-%*XU㶡3bNJ "?5\<&Pph([G.{`L2hN6ps'@:xsn}k`Kx}ض@,(Ϯ@j[ض6n{3ҁo?¹ ,>osfӶA~-j{*@ Y@m;7q~ˀw^p# j̠i6med 9mbF+oyL^/'帩@hns<!͸[4פ?_tʑ~nG5QK~u&_/TeB|ekyہ{愒qP,WN31s1~&e"N -w$~6G('ʚ:B?ѻ/k^vç2=wJ@aK\S>.ˍ%D*o Q/]P+{7>K 1=Y#JG2}~&wBf2 iw7tw/2"xz!"H#H%1')"+t{|}-,ǡz183 57S(FH(={'2m 0phjl؆nxOcNu?сp4o؇~gH-tAs8؈h@".؉ xFp؊؀HtJ𶊯MA"Wof6>">نԈmȘ03 8XH9縎(&# 8(X@G/ H` 8lrv ِ9YyّY 0Pb:` *i4-ْ rQ!'0`pW" p PF3'Y @  Dyah$ L0X`'2!Po0 ~闎 9PQ" pp闙 9 pP 0 3Y h0o`V pm>vy! p Л  @ Lgdٛap8*y9)P p6,p `9 op P YP p }闀0<~0~ PZHП` y2PlПZq *1q`m.  0Q&0()@w` R l`p PP EXIi`p`6!gP ylt=JmЌ 1(L " Рo@Y pp0#~I@"qygp J*Hm D@50*Q~РP3aq `z '1ֺ0~4 6 p ;" g9 # 8 IF)0Qp0` ` j*# ފ A y 0` ~:+=ۗ` G&ˊ qw! l'p-B a$ᨲzn/pzjK~;FmQ`gg@;)Qy"px{0C+ J ar*A ; ;#뗜0#: F[0͊ͱ_$p ˫BS~1Q+p뗖У;  0zн+A ܙ/~ * t/ *Q ɴQ$ @Q })+p*96. kd1@CL5 } L")< d04 [.K; a0Ż1H"'@mg;+" lxN/q< v|6 Q= Pp0nAk-ē+“ж:QR&Qxp@, {;pI; aЗ"< 7y@@@m`&_,5\.ͿW<#7ۑ>|pY 0&З. @!!>! Ϊ-< l>Q| -'p05{.P5ӿ,7! 8  ]l!ÃЭ.Эp.ѬQAaFlQ |# ~9;qo ~9A׵As ="7ޭyM= D14Aܑؓ}0"Kώp`;A @qPqHD  ^ր#=!< )P(@m+)Fq "amCA Iׁ-+Ί1Dt{ Fp "4!: p7D ܹ1m*l#."< = iN%#]P{ h}j-AQxFa~}C%,5.FǑީ᭞,pDrӜ. g.n!^@ BAF!~>qL`Q8C|E! nʙ~ֹAt?N;0SFB"ce}<#B YS.x SY/?= a >˗"ك}L[^|;D(2; -~?-Q/:o!ɗ ɡ_<ZrөO_<A["ڛ" p\}N!p:_@,/JA/M/˻:!Z"5Ň B_#@7.@aJ5!̤|S?@.} >>LboPnA k6˾,#  OZL-U,S/< ܎"`l,"Q8|S"2l.P|/; 1ɮ԰Km,Ld"`Y2 8|#9݊Q*J- q "}< 0 @ / ʙR)x\aaBA .dp hpXE5nG-tSȎLD K1eΤYӦLI~H*tvo&QI.eӆH9TeIKnU!9>H/xem3XfE* I@AIHI6,$cȑ%[avY:`#tLƊMv+'d!o0{$Kr'cQ T}/gܯgc CvoOZB&E CFg@e+__~EX)\Rpjo@ 4PXdA@ 7j&$Hk)kFNiXf Zve*Ud c(pΙrixiz~:)Qaj@Y裐(6'635nF駄*BYj`q꫶JBފͥ`\k&6쳄m V*fqV-bؖ4IJڅ θzkBCl' 7G,Wlgw ,$l(,0,4l8<F@-]LF']J7 NGRWmuk^\7 o`-d= fCdnGJSZmxY/˶M{-Fu'7G^ߒWngO'o砇.褗n騧ꬷ.nu~o'7_{1Qx{6g}1;~mF*_~Oۣ~ K䧻 p]8_w@b(p ^ Zf\L/p G8 .b$L W}0!Q(8mHC=0@ HDP!.,"rD%:Pz,JX̢*\H2j"d5p$HGŒx$ qǎ1S#  AF$dA%F*0CE<|&G92E'7IRBj)J\(cVҒ[ ,o9 [NBG.5`2}1s0e),If*|6 n5o3Nd2BghFD¦/OuEB9 ~~>"ЁMBІ:,#D'ZPͨF7юz F)Ғ(MJWҖ0Lgj-|8ͩNwz D* N l0LvjBV0ǚw% [~a/DUW:2s)$Mxp*̞bOŃy (ƾ%pЋT~1rþW]@]?_W[ߢo< `ƬiTxy>3l c,G;I [ն174G  QqptbHY*.h+F $WE|?р{PUg +x6+$@f? vE::t@rX']M "_ein) W84iP*[|Rl,P'[CX_ǖld&Fo6MA8 _/z+ H T(oȃXdxM؊RHf6  t@-R0 xbcfrƨ,$h H 3dOQ~v,doPa'/q HhØE *[ah_-ve0([$W`-05[P1`g2p$48 m O7)  ; *[5@5] 09`(2p+[?54 T=)Up 3g HPz!4|9YyfyK?3yę p@sWMoa iaٚ y ɛ #雨59P9 ͙Pٝ$Iyٛש#i I ɝӉ`"ٛb)щɞ r #S8 :-v^ᑅqØ"jzhR*ڢ.)J6 {CB5B9z\c/ڢ, ~Y>H"*H:6E H-4jA r#5aZ:dZt0 f^75ku:T7'}Cpx9J zPP y 9)Ʃcsqz:Zzګ:ZzȚʺڬΪ3UʬL;:؊3.ڡP޺ \J B +{%㚯pگX4௮#awE5$^n{#®Ŧ۱ #%|q&$2, 2J,M8k 7<D{<Hk4ѺSh0`Ҵ N{(TK;XЁBlr 9i;v{V"uB8R % {{61[>'Wx3rGKy{(kd{1ς1;嶤 0 tXGp{ D:+8   P{6W6Ř Q:8HѺҋ;v2qkX P;{蛾⻾a[3`{ۿR+q܊QQ֫ <@l:\= X"<$\rB& !! ,"2|w8<>@B, >!  H*\ȰÇ#JHŋ3jȱǏ CId:O&Hɲ0 :I͛8sɳg,!^Z鳨Q_5ӧfBJիX5T`ÊK6hӪ]kjkLl=k]Eu˷߿ LÈ+^̸ǐ#KL˘3˫B?MQҨS^ͺװc˞M۸sͻ Nȓ+_μУKNسkνËOӫ_Ͼ˟OϿ(h& 6`g=("Mhfavnma@ |(b(XPTh,0(4h8㎒@jaD\F&j!PF)TViࠈ\vmM}+C~fJjYژn)tBf &矛蠄j衈&.裐F*餔Vj饘f駠*ꨤZ$hiz5 묵WhIkM~kDZܬ*2,ˌ1.Vkfv+k覫+k,sV* 7LNG,WlElw ,$k(,l,;A 03`z2O6,D[*sHSutL7PG-TWmXg9Ko`-dm*lp-"jmx|߀.'m'7G.k4ktd'g.畗JWY.N-:Z$1>/N˽ I:Wgw/jɃ|觯3'~/o?߯sR9 :b]G:"̚>rYaOܠ*&C0Ic 4̡wІ> HLر2S@|"W#*iE'.z` QŪ2%|SFsqpLH(\> k@@!ʝ񎋬HZcHcj*F2Ѥ%QL*WV򕰌,gIZ̥.w^Rg0Ib2[Ve:@|4IjZ̦6nz 8I=?;g;J`~Ocҙ\S5 ~V˝c?JЂMBІ:F!#JъZͨF7юz HGJҒ(MJWҖf.LgJSGD~d0pL Ԧ!<H8Ѓ@X&ZoBL DHZ" ZSbxE^`,P +$P'R% @hM+ ,x-DL<@4P+d C%o`k H,@ ,` (-hCHi} H hn%4ˁp.HHPH  *  .@u[ @ ~7Q v+ e 8@ ^ >V7 `A/`@MPp*L x*VLia¤iMYa<  ƮP&o"< 7$N2Z ȶ@Z9 f3ޱ9.yK/ $VrHV {7Qq*@_\$pVC೦#\zAn=*Zn{`ȯV z`̋5܀e4 .}0T kC οAZ q "9EilMmG)g vHVPAX@hߔ lo[G@z3  s!(>yCrOL $h|}05\l"o7cC|xc rl\!/n}n# ޼AZz* 3^Q3xmzkgP3 P 5ϑpw14Θ'̯Ѐ/4vz;nB7ys{Ob/vy>N H- ;=ۢv2Ӄ$ˁG+ɽt4[~23OgyP:S6(&t {p%7=7 8] n (.Wv(P\/0e@7!%88X/eޗ} nQ-P\4w Ik78vf@}0f4k@%+W6LxQ2"m'|`] 1'Z8 &bH'4ڂ[p]\, QQp`30S3Y  #(Y {4 0t]&S?e(pbV1IQWpW@}Eq( h$=cPP``Vj5pUhFR73-p9'+0 Џ7r8Yyّ "9$Y&y(*,ْ.0q1y8:<ٓrw>B9D 3XH !J>DOP9T9 8)VsZٕ^`b9dYfyhjlٖnpr9tYvyxz|ٗ~.B9 Qsy'5q fc'y)™R"L%1yd9 9^X0)yPb9Is@ay9Yֹܙ ؝y%Yw9R !i1>I)f ؟:eڙО z IvQ"& :7s@ӛ#ڢ.t0:4J= z:<#9ZHz%j1G:I: i 1x(2:,C=Z SBV3KJ߹Rbʟ_J Qjg O ej?j"i2qdx aNVz zZ#JLڨZZ:*wN!X*Gǩ-Ta1.sJ Z,zJ JZʑAz"-ڬ3 ՚D!,@,-H*\xÇ#JHŋ3jpǏ CIɓ(S.0ʗ0cʜieC8sɳϟ@ JѣHNYPҧP*!ժX>eׯ`*,Ff^MڮoʅwWj݋@I /TPqTNcuXrRp-T̹IZ7{Y*OOU)JIO 퍲_gVJ}~L;h$gՎ&2+zyQ~78~?<6~_'z`xyVȞBM8 vփJEЁX_#Ih]=e(zbH$NEqQW%-%v0܉B#Cɠ9d_0MYPnXВQ\iIKM9&NhH֔?"Gdɹ%{IQy .iH3j$Xj>eO'L9Y馉mA6$PzhQFjFnZ筀iXFo-:l5QV1#Uv,DdӰ^L]JRbZR7}DVخjCvYb.$.BUL9c$2{]G@A-q'96MG[lQz51Qj[:z2g s.8¾Ds/YUSE"ϫڻBDgq$a90,z4Ca(lKJ4bkE QH 8u+,ʇxD!nGA~qwM2 ߫$z[{F^ɞKG^9 ϶6x.Cu^|v;OKrO/etއo>/c~綫/cO?gZ7S@_>2Z(7g wL0T> | 2HBO7C_ WB BCb?Ic4r!($6}+,bXC21bb./գ_)/m1c;0Z#B:&̎xb<9a6y|, 7F:y$-tH N̤&7JZr(QA*WV&,cIKQ̥&gK 017]D'La)%4I=USx,4anlP6pLv|g)zڳg~uW@OM(& L*+%Jќ E/Q=b2^b޲(5Iuє0LgJӚ4-Nwz}PJԢU' Xiu059> #34ޅKIjZiUF(XNz!8.!dW>^Y[#tD[VΊި5zd`bW6Q@Eje7[BY @́bf:r6(}#6V~{JXJƱ]a׶6S# H?J Lt˝'tRd:YN Uq9h=Z QdBgkxVly/1k0eG-/jx)&Mn7j^vyজ~/-.z* iiSArX2(Nj]S$c3Ɖ{9 c6݋^'*lǡ,9;s[\ T^8*Ect$ofZ8s %e|Rt|K\\+^^}F>ڐfٓ]BE/XVCROGzq䴀jۄyԨJv-UQD΅MVSvrDz\&JX_,d_d_-O*׶BRДa:$dQGZ`:lhA5Ƿ@9r[ʖ?6L\XE{IԂ7/loHˁ?Mz2zffҊxkv w-4*5U9$ZpЯnM&֨ˌ376S,Vڌp[oOuSd}-vb8Д"ؑ16},Q6{K=^6VyINo2ɂk1).Ň'>OUoۆWzQC_vƟ&{5G%MPx.pxB^6O|}Y|S$F3_&W.e}H_&/Nzgr'U?{觖e9&~gwfb}Kg}qzKT#V^hZlrԁ xgks nM@w]7y2EOke-WO"ՖlQAqg>2{6Gp'/YJ[66~ fkR-"\zdLkThǶh'"vׅ9A0AhXY7xE7j7~&@hwlh9vbt]<E(QN}QXxHs/BXA񅶘"u!8tĸxQ،蔌S|XxS̘]HۈS(exSԈ(88TnVՈ~rl珵ԏ+DS9Y (w Xf+X(z)I+gFr@< Yc}S(ْ<I߶g>L!97CIuh#]"n%&֞Y{ pYކLYsJc\4c&co)6[ć?id&dh+>)ע,#ǒ. 1JRJpvRa~(-_)*;!!;4eǣ؆٠,Qc#4t'yoWZ*tTq`u[&snw8)_n*_C)Ѧ#) YrIe^:ʤv >p1񧊪~MMgɊs),f'*-ZCU0/- iyG7xqQ$榴ɟ.i$ Q[t SH<\|J| \\LLS|!$RNYgFbwTMȜ˝A5EWsB5q,e|Y[}&j媷ޑ+ EzigМT/$Ӎ:5mHPH&r;`6wigm_-OBݲH [esm&Z"RwӋ8m؛ C%LBQ#ʶYٽs'}ݬ%dxC˭6 XBOKRZ}CJQ QP*dF\u{_weEe&ޱщ˧e_M<:V`;R&Bm. E)D/n>Vv͍Ԍ>{NBN<.KI|]u7^^bNcU7:0{T9F-GqY&>cKV̷ykrnT%Nvh5ZStr"|~ja^ɎwM"7ϋYmS(.'@U>qn֘ćX:r9NOGXNH29-Ϩ>Ãj{Z>].빎>HH`MٯU^0􃕮1.~W-Zjtlu.ݾ ꔷ[YѝnQV>M m-0,lBG.O ݮWAV²j )( Iُ҅LRJ^8^ o7=^s=엫&o#"F$_&e&^/+$y:0r ׷CڛL+5#fU<+$2&5ftQg55o>'ha@XA*X)K)#gb=N$Oӧﺒ\"FpEi̖t}l0׿HٌBu~ߠaC/]*ΒOW!_FCf D $XРAZ10! JXqC$qG3^Y!(dȒA*HS"˗]N%"F&-4%ҦF͙U1Tk֝S(כh3UKmEoZnݹx{v/½},xWk=YY/cˈ3ogСE siQ[Vmok_zfUGmv߫g~<\ȕ/g9͟7L]ȘNݸoG]Uǟgϗ>N/]/@ 4HAtпp9ˏ C0B; W0<4;q7kѳ[FdI+HSdB$-{rJ* .'N(mͲ|b2LJTn7sOO(OB D;@ %r!EKEPGB*mLҴTSPCuTM%#L=@5$^]uH-PV\]4TK 3}͕3`qU5BY\cuYbuvFov\PUO'u\%P M`{}j5L*}MF.Mȴ}]3SfuayJG.-VS4m.摛YHg;y{6@KJYvf5/h 5@6#z66q>B]I3pm ;uZ-7U n>J@Zp,g]}$6dzr=55u]kW0w/kSΩ,B~^yϚAj0SݲD!^aEz&xegrdD_b߽! wt#L|ʃ^Q2U9/aP_0gXTYC+ OK[!Zȿ.-v~`Fˣ@+, "sv FQ Q USjU*Ө WjWUec%kYi֟:IUZUnqh|ZWrzk_+la KX:b%V,zB Of+[-dz%-J{ZԦjZn6mak [ܚSmnvjV$@ens\ xnukWj إH W `Weo{ (W8m!1޽hQ+lM@DXp'|+XXp&`m=N ``\bX3qa|xNq `KvǺEqM;9pPh!Wk{dI̒u8R,1S[*[4r/7zOsa)2j. 7w&sh;{Vm h%3ccM|< SrHyΔ̡7S(J[[SS-< gA Ht/yy O("[sA^<[]\ϐDz#ӗ$ }T^rmR0`fLH 1wQU蛞r|c%ۨ>]|)-)U,?1tdxTeM~re{$gMK/"v_xE  Y8ʉ{;?G is*h3g  :6ԫ!5. GXiURz~2j6*Lw;ly%AKLb3d-0hdxqJJ8c{LrvۛL*0ӝ'QZ~R13R>%*h̶#Q❯%ϟ#^}Ⱦ[㶁ܶK5ėUc:o; , \NcO۲P4zEK<l)H2I:,@ T#d \ !4BRA!%ϰzBA,@X)fBBlBCzAθA BQC6(7LQDB,DCi9ԏ/   >Q P 08 H (Ds";)t,?68B`W=8 3$8^*R,4Ғ=#QF @R{Rd=r@7'n \0 HK ` 0*m*a9>ElO=xL(J!?JK 8O8`DkR#'쀊FՒW N pd024" "-AFi5 @x {-˥YBE LB`M2ٹ1Fq#`JXE7ֶh䵛h؀@PjOZ,IX2`Ԕ }u0( )+pJ{eZ P0!y(V b5V[e4CC`wm Ip=Hus+Ƃ pڽX@ӯ5=} TM4BhHHPU?u=38/Y҅^D ڿ( x%B oI0>zlE)BZd;\aAj =߻Sɱ٥UB4 D"9K3jYϘZ0Z`-Lv3֋Ս>aK(I{ȚU BWy6o1$n1cèbJ0hO$ΌJ\L0VSJ0֦X8>9)}@S+icPa]Y6hO6 ⽨IxXPZ1Z9'DF7O"Ѐ=vu˫P% DHx]K/"RWC*^var!0ָ]@?;0G`yˠzP{q4ϻaÊth&+xzRm1?v*@(o#\ w,%{^y-oh}ļJ `CSR[g$5@ը +UzWoFw;X` 4 z>Sw ~tb SG?4Js~AxCs_ AAX!D!" lH"F-BѠCf(ȅ(L hiʌ& h1͠B-j˖+附 )TMQiTXr]׮]Ê%+³jNLb-\@"5K.޼z4#+PCpԹm*^u$C#V"3Lܴ\NvܰPΝR=SV:+hшe6Mp $ yk$^x0I]$E#ޖ"긿fA;xÓ/_awz#A|狧$8x'4 }d׀]w[@|q98!q } 7aIH"›!֋8ٌgUnW!r}#HoV@5$N EE$]C5vu ꙥ^O8wda&^Iz%PXPl%%L UV9ښQ%Zܣ2:阔ZyUc¢k ^jb]>Y)oVhp*:Y Z¦:+%$V*f,^ r.fVܺkߜ@`ga仯QҔV M톖k+ ?hÓ gA|<Ɔ'vZkw0y*Gru,13LvO;aH2W$aQ6;Bek ,N%u%RƓJB$oTrɓ,ikRٽYh8`֤U5}-P XЬm!Nï$l,-X%~^-/.鈅^N\{WH .V;dF~$d)V+MwxCJW Q 3MX 3KgPX6c/}]Y?`/z MwB7%JPE"_2(DDE"T BeD4@aA R̄d *0#g%"IEMg`vxrt LxC(.3%2+ Byg&I.L$aj]b&F%<"}zy-/& {.Kd+ [qvk3L' H󝖀:jwɬoQ$(ХP.+F8iq9 Jhh4jYAD!09TTh? ":E'И:>jUt6p @)Ā']:ҟeQPD&Aa"Eh$Va^m'͉B^1V6Yxo^ST@EXPedPR 6@dӁb!fEȕ(mYf*"[ .Lm`>0j@7)@$]6cTbJs]Dn_"JX"zPT~$tm_4+L߂$ly]3  :V@1H/ J"@ֶW6<)5!ʁ_RXM=ō#,]Ƕ0L1E Hb2]Eű ET"qKPKEEHQ2ۿ8V0u@ `I&>e+(+@cq"E]X8 T`6uH|AP`a(( W&Ly4 < @HD!leS8,bAψDLHY%+BxelSہNy-Ђtqy6R٨"s#֕&tP7(,hGL(L_ ax؇8SA057Gu=YTnhIPcwXXq?_<$7h::e9)uIB%H1 15".k)y5_6g @A s$%ͬ%*Qg["`Q6G򖏷w<*wH}zG.=[j>=ߚm/~둯3o>SKsz݇/'??ӯ?L?o˿߾? ` . rR6J VZfDv~   j  "ʽ ̠` !.atF|_^aAanqFam!Ш!af-!@B. !*i٩%"."&X#F$6] u ztl|"(Vl"ʹ](b'~yp)"("p'z+֢*v-E&z-Ue -bE"."QB$2.#3632&d3N#5 !YJ0j".֌'>(vc,rb1"Q"70j0.,Qͣ-¢fb9֢8n j<8#E*b,@ZZL:2$=΢f#8D"K0")*@"zxbH#?ݷdId:IۍdF $NNdNA\OPUhh+N]+dȤQ%'dhddLBJ+$Lc\%uXXXYԳZWB E0]֥]]e^_pe[Z@a&b&b.&c6a*$MVeZ [f^Lߋ4@i&jj&kieE? &Xy` E![l`gEqD}4 &W&_f@u·nEoFod ' RC'^g p\rszoHtRUgwvnvvJ)xyŀfxR.gP0g{FtlY\}VuGv~'-FbaEJ %(}0(_<(|J(4]}b(~joZhzĈŎ u ŃFhfu(:d^lz' BX`t(Q )BEE'}8i^@iz&_ 燞rŜ[Z\^ Ei],P)vg[X!yCpɋ(p31 fYi9AĈpM*Miaj>lq5 *YT NSKlE*C* t*c8R%Vދ矞*AĹ(rUIC*Qū*glE<6Q*h+k)&Q`krPr-BWf&^|\'9YrE)F,*ºiʵF B"jV\,߱UJŷLN,Q,ԗllD,Zm@PVѺVdNmZZVmJbHH+]~Bk@fC4>3t?s@G8\B4BKre%Gst8oIt5Ut]3KLsrG:CSȠOw|OuVPmaES3SO5`\i2q nW5XX*X5Zp^𱻸Ȱ 5^^u^+5` @wt6blb/v265µcO6e+Xe+e Wf`R7P\BvJvf7hp 6Tj+Sk_-m>Ҷmi3Sn6gogvpnq)q렭q k7%w.7jCYJ7Rbwxwjw f0xQyKx y7w`w {|w%|#Rw}7~w zM88~owɂ78AE >_8O`oY8[~~?x@kKAVGȼm/x@(4Bz`Q_A[;_"` (^"T=y.AAjKvh03Dy*aou/5@xdH,A $8 "x9q%I6_s}!ďmhz"56VVGw򨟆RyL6:$p$%HȑA %$#Bc(o At7^ 0EY5l+U23p@Ԙ  ]]gB/W{ F_$͵yʹ !$PB! "ƥpwg{{I r\yՀ8Qɫƣ"@f~D-|~6jVY|,wUyS\0#g߂9pv]|=}yPBOۖ!yFA"DB"qSӆ⨽VlWPtիi_H}laI ĉ wԓ|:Wb͎9jUpk;.Y談S!y^! %;,DU;~dt֡~ ћG% (;T=߫WoW̚ =sgk 4xaB V4 ? )VHФCg,vdH#I4yeJ+U` `\f΃.u'AA{L9Δ2ٴ(TPkN%I! 6"JTU"%{@iٲvېL "vdjo`<a:/^ܸ0BEM"@-@vgm XBԩYv푳煡-RhW]PI^q$2~ =X ϛW~Gة]wt7EL<ФK׷VyKJoLo$@k;7&,ΐ"OCܭ?ĔRDJlONB&ɬ-IqAG s$$&A,(ͣ2J+/ I. IJ0@&!^q(LMoϚm3oE3IZ>0뻼flvLOmsc;DMKϖgk2!K}<$;tU{ fix)ָw$N3e+*?|@Ϛw5*UP`$ a8VDHiWj"j4 Xmwx1FnEg|=mEÎ:#0!GFݍ6}'їqy%1D]T0pPP(-׵yM=ɫ7Hy/A%O rE=$QhH9{)raܩgPp9ZDBMA%~t+IϞr(w[{J>Vg!˱إ; U։ü;vuxhM$6M}WOa82$=4zRm-M.uiЦ멲ՓFh EžE7]t=`jE&ezI;?(7Sw|D*1@~%;hۧP]"'8پVJ4)}RΟ~焏%Ȁ)(,/h#YS~ʲlC)|d)(0!9&D- "xv0n/l8j)kLnZvyO' !lf" $d &t% )m gvd0e(pO2b8.!!*"aP Icg0N,r#W.# L i/ɣ'P!V&k%g1-* ,8 aGaK|<& y?Pie"mG& 0q8a0!`a+C#aq`RyDO E;^>Ѝڥ/$(A.A!& G& DR";MKhuj1 ;&/! ([U2J#;ո( l&gR?MD 7)???ͧRF7tNS4ZAQO4%MB+7~3B20T1:7q.I=DCAYt=@1(]bTF :^T"yrPTa%cWee'б.LfgWX@RIWWu[Tעt/iZ!|`!@ hNKO"^ [ -EUB&RQ_5^6?tMnNstXZ0RkN4qDSc#3Gu#&%ЕdHvVckx.DaTUq$R&X[SQ5xKcmj;JjtBeYie5To6j`ulGBŴh^R]~`k_B qu$dVf}p _[QuN;PnUk HSOz@is j@q;V 'ֶo):g!#h c/4Ed7Rr3vj9rRIvI68HuWv kK7)  [Aly)&@~k`+AZ n SC>/$r+  @+Hο:0n =-@.@j`K>.Cc R߽z_WMU@~60ATUcE vPS~BPT$ cQ J"$$ )`GENHBEM 0< XFL0dF !FMU@bd:&7 K2d iCo)X$# FDs`daL&)UQ ZG ) tE EVF e+b/Q K<%JB"Q (16>QFyE_^Q/ `c~sPg/sBf.(LwC$ KS$sӴgsQ`_(( (.*5g6T9  .?\`E`29E/)Q+,&Z޴Q3H[Q\&?ݓpyBl6^R#`ө:6$PMQX^<ր &ִ6j9/Ͻvm0GBjp[ @ PXHU0^{ <g&}e'Qti(g1P0%L0M]-)5W(#& n+U  R h bljdOn0`AVRwh%/D C=i P l |j`Udΰ/ { G $(NW0gL8αw ^L"HN&;PL*[Xβ.{`L2h>a 6pL:xγ>πMBЈNF;ѐ[,%EkƅLLWZ "F~PafNP?$КaW[;`ڵMfEndB̎$X E8ӱKj^aEp*w v~-oлޯ@{oL~|_gqAD'~-7 =xqDAN >(gq -t|7Ї~ S@:я3;Jp.O(Vo`χ椠# 츮ͮ;~;!.w`=>wDAtCk'AIॽdՅWVϼ7{GOқOWֻg(삠f=WxA{ᕴD=) w(y>H@s6~>xD7v¯ذy?_#ωﯚ5P'88  8Xx؁ "8$X&x(*,؂.0284X6x8ك:t>@f=DXFxHJL؄NPR8TXVxXZ\؅Sj^b8dGPhjl؆nPp8tXvxxz|؇~8Xx7BH$׈({8Xx؉8Xx؊( C:xBi8X)ዕ!, !-H*\ȰÇ#JHŋ3jȱǏ CIɓ(SD˗0cʜI͛8sɳϟ@ JѣH*]ʴӧPJJիXjʵׯ`&] ٳhӪ]˶۷`Kݻx˷߿ LÈ+^̸,#KLʘ3k̹Y]QZS;v 5h(r5URn=7BMxk!Ɠ ܭs ~Ɗس?{T#ËO/>^t|/sϺ?(h&`[-FVhfv ($h(,0(4h8<@)DiH&L6PF)TViXf\v`)dihlp)tix|矀*( j]F*iGNj饘f馜v)}*ꨤj*#Xܩ꫰*j뭸뮼"^+kȲZk6F+VkPxv+k覫+Ђ@_=^*jl'ӯ `G,Wgw ,$l(,0,47B8<WSPBmDL7nTWmXg=Zw`-dmhlp-tmx|h߀.AYT Y(4G8s 7nEf@褗n/l+_Nğeo``F 7G/spysȶ7PL\=ǃ1Y/~ӯä> ` p{]b:A lI's9BE!&* /'4J ”P"Fgj!J^0A #:gxC##^MDØp֦X*{ð".ċdLcy8413|cE(7סcH>*G @~DL$iG 8򑐌$'IJ> S&c %MR5 *urH)g ̥.w] 0Q 1J*L2f:Ќ4IjZ̦6Tq"8Ir*L:v<{%ιT>~3o(%$O쳠/q@%qN"{ȠH lXD!(Й/D!,2b(&HҖN"8 Қb\JA *&,@z"jSk8C$Z3$`jA$!?BlDJDh4g(YDᨓD־8$?\DQ%J`<v:K#e z'Ip~@ 01ϺV%J/WH;!LޞnoG\ ׷%Pz֊0pK݌bPsGBNۭxR N..J-!sKJx~rW&* bt_a~(D#&-*RbpL`qTCзĮ$N x)10%ة!t,^L: KJ"P1M`qRE48y 1{a@̞}}RCLhl%atq~l"Pٟpbdrj &@F2)I親>N\xuɧCAQ3J]B@I <)M=7 %> Dea`z?)i_7TauB,O⁊` V- R~p?8^]E  0U7!Sc8tXvxxz|؇~8Xx؈0Ȕq*nx!6d 0Xxbp؊(;:X2Xx 1b1hGPʘ2@08X0s`ڸ%lo0Xx ؎o1pXZh1CwJ?0WiK RR KhJDJ;-2"9$Y&y(# Viap294Y6y84 p1I!B9Dp;200.Y%p.g=1iy;L `PSE-a0Y0ѕ+21eg9jY2r2tvT2z hk0m^`YdiS2>ٗn.Qfo1X -!xI0aӘ\s9*!CoiMɛ)᛼CC))3yRMY.љrX)iYع$y&Yi$YTyIhO i1: ̹#i P(yɞWyR# nv((y$ 1-'Fѡ"*1Yz1/£&&#)ARRUISj7.¤%%(!QBiic$$'P⦽ &0ZzJ$^ڦS~0ZzJ%} $::y;0`ڪ: Z/ګzRZ1Jʺj(C;j2:Z q`->V;[[ ` $H&`:*Jf௠ر ";$[&{(*,۲.02)66{38x> (P.DKC[H)LU&s5]Ŵ &R`Ct$X`fa f@4Ѐg`~0f 0 ~`VU(ҵ` ppP#AV7  WI&E uҀp ERh -u`%"&@ <_C 1Ќ6 9ZQllpz X)5 xr<# `ʛW) џ0Q;0@,0Ш; (1`^oP nhuEX &s4)E_ZP~/`Q${; ȿP3,"Rz`8'vww2{2tU  ~ R2@J*f7.@3,@al"Rz]J `xP ZP~%2@@Q /p;@'r[x aR~& ;~l` XH[`@a`@j"EÑ{wLW~Z^d I0k!RA pR`\̇ 0}y\p}ٯ"Q1  !~~@Δ\<UYL T!P~x~=(Q ڷ2,&C v Q\6']*QP#-=B ;$q](uyE z8 _|;`*Qp;2p" Gs*`7vN2dVcdA Jm Mj;"c{nm,VYpQdI&0s)Fxl@Ԁ%a֗1З?f;S%vw},tsl-p5Q!g\7t 3bVGt5'Ȍv)Z=`;j!{]]1+t`8k^g>8`m 6 ރ#N 5Rj}PhopQT9`B Gw7Uvx:ilwM=&`0R\z0ތn9Qhpp͑: " <'>p:Z2NuR0. Ѭ&p2!?٠S X]c{Q<= z u{t=qzG(9!=`?CnFk> KM)}@ P2tZp~=,d} 7 -S R  m@J > xꩾb|&z13??!v fV--+0jѕ? ~=t/Ag^Aq@>.=, y ^]:/wJ&Q2AaE0/3DAr x(NqD9Qf,ȍ Io33\)X]X7_=q`QI2@=NXQo%e55R2d/NQAܤ irrg&_,@ N;!q6V;! .]} ;EA.aT;%_ Ar#adoP5Q (a=a "c;^;AHc@<^ $XA DfR%NXE5nG!E$YI)UdIZn44I9uX@<->@#PI&th 9UUH&WaŎ%[$BI'$Bf -! c p\$3&L2Dcȑ%O\2o-ecbl$fbDb%smܹuk!@O&7n@ۂ165Igow% tOXm\C&0iJp@ 4@/Q* 53B!".  K4D-!Bֳ I"/E&x!ê:fbrI&tIK+{2+ Q * yC=0TsM6Dͤ&hr LK3*MPFuQBҝT2X E κOSUuUVY0;z<9[=iʄU.&lYɘWdUvY8Rw#7d$Ϥ"mkJOQoEU"NuDIC$Ih͈VSJStw)XΌU(IEu`v@J ZLU؇/@`)(K$K0nIK֒cyfc2 8Йf BaOHz'IjS'{qiD`ygjH(uF{%IPlQb98=[UJ2E %X#' )hpV,Q+|s;Y$ߑ$Y=7|lZ4 ^Wx{c 97o~xK`&E' o7֟/&$$Q7A$OxЀT#0C'np##A -D"jQC;Tc=* D2CK8D?h+ \H es#G(q D$13E1Cy6~R<1Bd!0bbXHE A&3@d@ z9ʊ<%,DC%`3|'JdSL" A@%J bgD$'BU L> L0NA `P1832z*F=DGQ! 8E[T'D! qu!Hq C|S"0̀&dP0@\i2aaBD30N,MP(Մg%9 4B%pT6uh "@@~F5d H2$(QBm&Q;hc$j"'thV"rM@4l`h*^Vmmm{[V n}[W%nq{\&Wens\FWӥnu{]fWnw]W%oy{^Weo{^WP_Wo_X&p |`'X fp`GX v„N gX6Ib|/ bsx@S,%YpcX;,Rc 0VbBfrg1&Gy%eԑKLfYuZ/sк^E2^f4otHsUG s|gZЃ3@Ћft8,C VJbiL5pNr`NJVtN>fɘF/j\OIy5Z(u/k`6OlhCBuGmJ[:̴yRmn3;v?ꎠޖMq~snO[@$n8`]LMo[Q499xG3.N>p3˽*I_ٸG.!Z%n9bي>-Wg&2TA*r DC[rGH#yf:V஛ qBC<2JHVgQUv}%iGPvJNJ./^O%Ϟƫ'0By<7iSy eI,z%}1zґIh{G%}{C2Rg~|G_ө+C_@r@!}i'?Caw(_kւY=zޝÈ 8!?l@5A@_37M 21>K-@2 (N|AAAϚAA?@ A A]B"LC?C@ DADB,DC_ KEpFfրF?i4k, j:=(mFGi$ǐHEpLGu\Gvl0GwG=y F{G}GG~F:ITKTL%J+6(]OTQչ9SR JAMUW}U{lOǐ#K˘̹ϠCMtά˚^ͺ5DƮM۸sC Nȓ+_μУKNسkνËOӫ_Ͼ˟OϿ(h& 6F(Vhfv ($h(,0(4h8<@)DiH&L6PF)TViXfbZv`ebi智lp)tbfx|ٙp砄j衈&袌_F*餔Vj饘f馜v駠*ꨤjꩨꪬ꫰*무j뭸뮼+k&l.쳽-^W״ Feۧ榫+k,l'P.>,WlUwj%'O3$l(bW*0,|jK8<@-DmH'L7PG=qRWmXg\wMT6{}ff۴Alpmx|߀.n><܈' .Pm㔟G㗹WeeT6Ǥ'H_Kz2nV);/o'7 s4$H@І2 &:$SOl2(FiC`6) bh)NLҥ Rl )AW4E'@N Ѩ<):iSqQR @RKDbUW QZΌl <ؓu7J#d;jeOI SĶMVoی$A `Jq$v^ER1`,iЂj;؀=ȂT@6C@18|W2@ HP&1@" x yȁq0?x@ *[n 6@e&``{1 @x[XϺַ{`NhOp{ xϻw9ftnO⏒+7 #Oɣ/OoCѓ~tVj]Z_i 8}[?Y}oؾ[2?P#?=;n=_/? /?8Xx ؀r|8x(X¡C0"6x<"8$X&x9cdtct(HxLhL884xڸX؍풍/AfXx*.8Ў8&xx؏8:߈-ys`S232uXvc "9$Y&y(*,ْ. Y294Y6y8:/ד@)59FyH9'?RN5MTYVy79!]K/Pz(b҈fkByuxp=hyw}y5i&s9h)A y/8Z)T]/YuyI/q 0 Қh&!ٛ9Yyșʹٜ9YyؙڹٝމW9Yy虞깞ٞ9Yyٟ:ZnDAZad9JB@ӖI߁iӘpb'}39Q J4Z6z8:<٣@B:DZFJLڤNPR:TZVzX"$Z*%إ`b:dz<2;gZ-np.r(?zzP|{tڏ˥z¢3J?OBng:Me9(mz= v?ɨ?0*:.y 1m qA5j!bwR:!*Ѭ7ѫݺZTiABQq!Z  A== !,-H*\ȰÇ#JHŋ3jȱǏ CIɓ(S\ɲ˗0c^X PFi@f1PF$);`lF0 +o}-G(P@ /|)FD&xFFW] poG$ <]Rn+,'q$dlͽi 0H<.@}[ EK<,Lsss?7mդPdX-vgHe=LyM5HOsx7(Jg* ܂.U ENۑ bj @Qׅ* Q_T 騣Mަ.fucTN_4.q7?U `4DgE`|C@% ko;!@1P 01׏SF+PH4 "|H(_'HA$TR$@"E@qL KRJ0 @41A dP*AhAj"U&-"PH*ZX̢.z,)H2hL6pH:x̣> IBL"F:򑐌$'IJZ̤&7Nz (GIJL*WV򕰌,bY̥.w^ 0IbL2f:Ќ4IjZ̦6nz 8IrL:v~ @JЂ:D'JъZͨF7юz HGJҐq(MJWҖt8xLgQ TiNwӞ@ PJԢHԦ:PTJժZXͪVծ6^ XJֲu)8Zֶp\xͫ^׾ `K)b:d'+P9fV`<%ha8.tMjWֺlgKͭnw pKMr+1 tKZͮvz xKM/DRt|Kͯ~p9 N;'L [ΰ7{ GLvy&NW.N4֬8s[dz>L"HN&;PLeح7o(-{ŖL2hN5pL:xγ>πMBЈNF;ѐ'MJ[:m[MYBPԨNWVհgMZָεw^MbNf;FcۢMj[ζ]=m{ǍZEa>vMzη~NO;'N[ϸ7{ GN(OW0fW36Ϲw@ЇNtw+)-җچ2PԧN[XϺַ{`NhOpNxB1O؆ha\;#ϼm(!,-"-H*\ȰÇ#JHŋ1ƐǏ CIdH 2\ɲ˗0cʜI͛8sɳϟ@ JѣӧPJJTjʵׯ`ÊKٳhӪ]˰ pʝCVxꍹ`߿ Lan9!Ð#!˘3k̹眉mӨS^ͺ~C3v|mۖoͻ߃e ȓ+_μk7سk9ËOk><>>|BHY5  R&iWe fj[CHtȐI4!S3P؇"hVA؜_cj*XD'yFɓJVģW>6DcTֈaXf頌-=U jbiZzhpI[)gySh|Y#Z砄jg q6裐FG *饘f)e馠*fvZԧꪬ&h*Q*무ֺ\b뮼J+,!\䰘%Fײ,]!+VY=;jk1X+k.OAۙ+kSl' 7G,Wlgw ,$lL 4l8P}:OyvD*F'L;s 5WN&V)[g`[,I!ʴҊ"taNHfJ҉zH&L!d %tdb*P` ~G.R͝9 a(I!PRI'pG`"xJb~ I r9zb? "}D;"Շ_'~D×/Z[taOR:K"K;2!~̠g-p%!6HJa I$ 4 F KqI`†@ " DȞSC!:AA$.ĕJa:lI'.B*DJh54It&x^>'E8B?H,"Õ@plb"'&60bK9BR3U(/%GVPL`)KWdvN̥0EBXHL척$ &6APs&jM?VBtI'ƟpLK# T<DȤ"(LHc!L*[S.{yz1O{#!h Z [M!x3)<us BЈNC7 uRy>8(bδ7N{әf[Ak (}Z”ޭQꎡ ɫG(4nkϽU(N6nfۚІiKX{I[y:ؕɷ7Flz۲MLVuk+Q3Cܾ)qc։3o[WOd*0 äD.7*WXdǤ!y, IKo{?L6S+ eLe1Q~, `8XhG &10Xx ؁1xf X&x(*!v,02! 66x8H?5<8f׃@B8DXFxHL؄NP?Md"s`X,c Bs! Bw'w^j*l؆pX.[`[P vX Vhh#`6s!` P;"!`X4p:{ z@0`PmE4{Mx7&- P@`(2uS0X5 P | fqH* oD gN1/fE 07@a`g RlY?CP~[c83 4 --K2 ]`jXgzL P< x2fi 00b1 fgE~iYuu. ( 1" 8>yh  l2k P `\`5[uu DY8c[1j hV[D`iأ;E. gI`.afNfoI j!uDPMeU)GXe)X%E2(` H L!͉[)PQIyLY9E@eQ9Q Y)-& L[E:~GSK62l7L7^UI70[LI %33 4 a;})Yp pjrh2NН+OMӡGqhE _NB1R(.CṢ;Nzs^"p$J FJdַJ ZwTf@i'ugtq)U = 90z qПCaQD4 :<0*m"f (PX:*ä* Cg Iij: 1{a@X&+`JXns*0mdd:zT;SG%ƪF1KAnUJ]K)Жw0;7:s sJz_ --&<`61 0*`]!L^NT'f$pz.C9D[&O"7H{|OM+9ת;g^ŰU{ l{-*a )JOu4CN*+Ch "z{Rn_҅bRdulu=12^}h{KY `ѹy{)[aQ9E{42˺]q1B` ַGUT  (okBO91p*" P0% apP m *23)"`+Z "2ДTz s` p Rg`u$PLP ⇨La_!V=9㓳 BlF=kΌ>Qޘ`Yu `V͌ Ԓ U1o<@=2f:c~3$SX p> Y>^~븞뺾쵒¾{^wa_N 1>^֎(6~ 7la:0 0^Yӈ "˻*^~>\ڐo?_oP ` =n!t!~M.  DW&/pӚIgwA5 _(g|@B?D_FA2_tPR?T_VP/ *1 D- @Fa/Dg?KǞ]poh*v`n?G}tC'hu>1gj[$=Ail1_x({o5ٮ\4=*4+*o^o?O.kϢ'OXS2:E*oG_>/O^UدH/q$XA  !C.XE5nX1ѤI:XͤE$U^,"1eΤYM9ug}%ZQI+@B҅ NZǐCA- UaŎ%[Yir4 B*PS  VH+yMaĉWUcȑ%iԽ5͡E&-qiԩUvY9Tֱ;vLXmܹunח&nυ/g\iѥO(f귏Mwŏ'oԺQGo/:zϧ_?ƚp@ 4NNRn?tD40B 3pC;0ހDSTqE[tœqFk zG rH"}AD$&tI(eAJ*$$rK.MsL22T3,KL,sN:봳NQNsO>1MOtPͨP38pQH#rRLmaB;SPCuTRK5TTSUuUVEˢUXcuڢV\suW^{W`vXb5XdUvYfuYhvZjZlv[n[pw\r5\tUw]vu]xw^z^|w_~!D_x` 6` a#H+1jb;cCY^ڰ@b]cQ6L?r)`EL|cNy?( C8$@" d= 9#JHY:)! [isĒArM<$C{`?T;ۭ⤠0I- 9\I $'߈$ N\sB"I2 AO1 1VI/K!uh%AAK ?D &!$|:6peGrY? ?$OA@$N_ ߬?X"FdD!wITz!P{&_e`X AP D$ǥu:ѠoGEЅ :ü$Ol4!,!Cup4BIwgJ$M3 Wh[8DNk̖׀?$ %!3!c7yI⋓ DqE9яdd#ђDB>@&:r&=Ed~RM'UDED@ou ^󐃘\)達h1Ĝ$TJ('?%lpV7"8S8I*qJ\'OșIj>O|ؤ4K~* u6gEA̒*% hBJȍU&'Qdd4xrTIjTZfhѲ| t "\BHLW%]IGd6JWxZj3$A%z0XTʡzPAqk_ г I*`T~elc"R)bz}]{Y|}ST#8,hI,wi]ZpuM#QVc Rk%2=-IC &&1| D^ۇ|(ntpNI,/1&*D[zZUގI"j,r᫫v޷Yő$(Gܳp~ YOaZxۥ56%suipf\2w,M|b7Y!DLB ԰Z@qbw$5 V+ôOjRDVE<-wtÈ5HX*$.Lw ̊By[ 2-qf S{d|& |CRïd*piq-hqYzN 8"A 5 0,bB=4w?=]%l$@8w - Y8.!ۄRp8)#ҋ^z+Qh?BHL]LHB-=+AG|DH̙DJg:T .NP1ZA69ŵ-PN WQ쒺8 XEEƠ\ J9Q dD@ ;`Ei3\ ܒ[kql)GsL>nL.a1xGyGzG{GxI@ǐYǑGvԒF6~ VFȤidLT(>ps2HٹH3-c%< G( Ul4GYIIl\WaG[BS=ѕ>@ 6D;X4 * :NT6, 7!"QehU@G͈$|Uq1V# `U5UU VlB`d m-DԸDth )s@(RgQO4ʼn4iRmxzWDױ0Q< $5aة07T?WhV4O׏ mYp0!Y#,vY,Y*wѶdYj3YR=w2Zo RZZZZ1R(قZ[|yYs-QXd[Ezױ[ 8; \oՔͼ] T,ZǍ\ɽΜ\xZ\+We5 Mŝ;'ܚU;]hR[-aYXRJ~ݍ+]MAMU]]^b5U=nm^(^#]ޔ>ݰ4W-t$_h}[e-=]Ŵe= oE U},j`_O ݩ`I 斳 >mL[vNbYavdZ [`aƑV^a_sHqaZk\rqe#nTbu!%,aø..bl(&n!>Ic)`ɨ\cݍpUc2>Aqc{A@M0FA쵏% Է B=GLҞh}e'".^B3FZƯ`Y vhdMZ0 a½4Ƴǖ ô,։(0j ؉$Xil0^%.퍰 3~>ldihk.N ƈ^8<0춈.0Ԥ؁?SpPn>p'mxk G!,,-H*\ȰÇ#JHŋ3jǏ CIɓ(S\ɲ˗0cʜI͛8sɳϟ@ JѣH*]ʴӧPJJիXjʵׯ`ÊKٳhӪ]˶۷pʝKݻx˷߿ LÈ+^̸ǐ#KL˘3k̹ϠCMӨS^:s`_ÞM6sͻ NF_μ KNسkνËOӫ_Ͼ˟OϿ h k 6F(Vhfv ($h"gϝ,0(4h8<@)DiH&L6PF)TViXf\v`)dihDlpjQa"`B|Y `Kp@ iVXPp襘bjD5X\$`B(h jxf&T2\ kuHPjШñVK\D+DD@\j̓n;m4/f&cp  ^(h`_I00xyB bP.cHě%@0˔3P4 >D0(zf@D s]6pH:x̣> IBY"1rE:򑐌$'IJZ̤&Mz (GIR.8*WV򕰌,gIZ̥.w^ 0Ib^FQW`f AT:j%oH9‚"nZ&8IrL:vAo!8r/΍t Z$-v_[튴 xq Ph.Nk}oF+%Z|& L߮8|`20) [X7,r_[ 2as\7(Y]Lc%8αw@L"HN&oiNU$*[&.+b|>Ĩ13fy||3J <$wL>y~I AЄn#9D(iTzD40-HMWӜN3g-ԡ> %j3A3`i5Dd k!5r^zyVH a#n d3oc 2h_mmopb|$N׽QyFJ~NBZc'LGy܉[R7{ GN(OW0gN8Ϲw@ЇNHOҗ;PԧN[XϺַ{`NhOp^ؤxϻOO;񐏼'O[ϼ7{AM-QGQNzؽ<gOϽw1=zO|7.[;ЏO[)dӹ{OOO7Ͽ8Xswo ؀8Xx؁ "8$X&x(*,t# Gdւ4X6x8:<؃>@B/8FxHJL؄NPR8TXVIH8t\؅qŀ^8edjFpihljr?O޲ٚڢnzBꬷ.n/o{y7G/WogΪ=Dblo៯7F| HLQ:45׸46' rwAXV"%+VNB=m4/l gH8̡w@ H"HLz&gPHE*ZUb.z` H2hL6I!H:xL > AL"1f#|$' L,9JW <8,@ @4M :$@ȈI %bӘD 1_VIֳXU*dЀcق@@.R DSuH<'4lb L`h ީ:I0$ wF(@7y<P/P(Fyb )FitA ,z4 GeNbAЁ[ &:S[dSD8 jղ,(fbzl5\Wb@ ]I ׹$HkrV€(fUMf-b -k1!@͚!"P*#=-HMS&&8h*&DKKD櫄} NDAr;m:ZLu"n ,5ubc '8޹VnNLk6x 7}샋Rts-@K#p pq8J @ۦتQ*PL 5yRvr{(؃/)WIʥ 6'3aOV`4H9wn!?6xɛ\h9Q63RFύJ)@Z|vۗ\T:'"C4v!i`6.JYּuz][@Ǿ^$\ jKNvLhDzVHElg. @l1!`JL(/69;o}Tsi7m(?~7 OpdkGԽW]2r8M1r<:z l$ &c1\F/9 rqN,QanH6d] %1 ϟHQWb[?5@"nfԾ64@  >"EpP H)@ j ~(k DD$+`xU$"hin͏WALW`IzWʛ#sJ1 ̌$@9Wd.BAE)2t K_1s>p K$pO@KO-7ݩS@P-)Qzg&-@o%&2@{4>0M-~BxڃUg&/O_4 &;s OaaTV P&0P ٰ1(`TXVxXZ\؅^`b8dXfxhjl؆npr8tXW!Gvz|؇Qz~YXrKA~hB(x%G҈ !@X$򉇈'(@B9DYFyHJ- bQ[ITyX(8W1gzU`:8{"'fi>$kbp9B0B(vlXI@2z53Jڤ!N:}WäT<$GgZCB dZW¥fJChlZGnqy!tZ,FzbƉ|#sZV?1wZzB:ZzکQ:2Yڪr2 XB@zŸFnEXJĚ̪@ڬКZzغ܊ڭ:Zz躮 1ZvڮBg:ʅ'[{(ڪڰ{=:1hAz.۱9X$*,۲.02;43( `2˱a5B;D[e kS4=JPeC-RZ?<۵dH`V;f{h75inkyH!t:sp;z۷Q~+[G!>)۸/qN))KCs[!j\{: R ู[ ykW0ۻ;[{ !,D-=H@*\Ȱa&HHŋ3jȱǏ CII\ɲe0cʜI͛8sɳϟ@ JѣH3JӧPJJTIjʵM ^ÊKٳhӪ]˶VʝKxzŠW⇾ LÈ/*L9#_3x̹ϠCM#cǐ%^r Y˞M۸sO8qdv9ȓ+_$ﶾU7sxԳkνFN^u˫7`dž_;>~kϿͧV}@P_ 6zD{2W4DI ZW vpx am~X7Q4EhiQ7~"N, SEٙAjdMLvM4NV1YI=qi#<~ESȑbbƄPJ9Ԑg!uN(eiQxA%YxH@Zљ#d%+>iNVzUh$ zhNZYIJ Szb'JFzzŶ,WF1;1;V,r)~{[B%FFĤIF Ԕndiaf4e6 S&YlA\OVE Q(;l9>V%#'iO!< j 4iXFO2k)qY$G,1h05gQ -,6pǽXܤ-x7H-xe;2owX|'G>d-ybweQ> yWꬷb[s;簇%hվoAȋ%V7/SȔKgߒLIW}i/ M觯~vKSA]7CuyŦ^D6tw( l@?\ vqK=.ezc!N""~& &L W0T]cHț!wC{08Ne2!):(Ur?3 +_IK̲̥.˱ ƂC&N8&Qf:(ZDtx6H0[%kEDApD&J1vBD%F_⤍ɲ|ΟD"qv! >mE b*%6QgMkq4J, D"aA 6gJӚ:HppIt"lJ呄!F: Cd#!m'ԪШVE$ NDg!ղ%کd}WJ׺u2)ZDT|`;6$p)  Ld'NS3$Az:)H P"]-ֺV'\PiImaP. M(\m"YbiUY&tk\b5BID'.&TeCN\j\$z@LBb8:[zƣF"F/F4#a"&U?XYC"ʢ0@ጹ %b0-˂nD:1F{OĂIɸ,zȘ0(jD ;2 E:uEagNp#uTbI$kByIU絨'ga; ND9hNΙ 8)DA1hҘf]mJE؉xjGسյAW=I78y5wB 6eebĝh D4 ǎi[c+:/ L 9nu=GI\ηW c$H}eg3Y7s%Îy#Gp"߂o(w"0$""N?O5 &"6\¥"  IrU} !Ю+[-*N!l y;S@ `U\EВ щN۝v><ڬ|ex1$k4h$6vFxHJx0PS}]^2O3}DS`b8dXfH_>pl؆ `Q Іvh>Wp|hS`?GouX ]u_f]G"-AOq-xgZq&2l؊X^7GS6Ћ8苕<8P؋fp?oD_V0G L6GQh ȍh∌chո>t&HQ;؏Hٍ9稐 !VEhʈX Ɏ Fh划ɏ,C0y92YIr ِ!Ód*3|Eft`D\5Ir:Q^єe469Q6C)G7J_&ؖ;qa겒PbB)Pg#'!}d>9l9oYY*#11Q1W)u F'17QQihbL;f*hWBeO9Q*5l< Zן6QHۉ&R$(5?W3➋ -DjIF6MzAY'IZ$:V4X*y[j]2e1B?^ U:*bJ=hjզ?@1S@qx]z:498ɑ-/Yu=?#?A\:<;1S=q3gPګrʪUpʺڬЪ$cfؚښV3O(:jlEuЇڮgHZ[4 a:1K08:Ӛ<ѡ7R۱[ "+ps8T;fв.0/ - 6{f$7*$@`A[9  )k p @d@t @ $E 0髾d {&'7 آg@K sLPz@tHj0]WKtuvt : g S%aB>j'U7;|l04,UC\I'<=VH0z`P xxP `,Np `B"ePdbLaƔO&{c!PNu T -Ɨ !,eReU` jK+ȬZ~PP ʍ[LsW=l qm@-lŗqko< $\=@s L[d ܬ`ul'A l0R0U͹yp0T \v9Kt+qlE:!PkL<;YJ%S47Xllan}<a69Ы Nlp5A öC ls[@ғ%L2}n M[apРmBp]'g?]6 ~P[ 0l3ԓ, ոAHH] ] ֎Sop9ql h>WeM<6;\;! @u 7fogUzXNM-70R:Mv~x>w}3FNA>P{|QH#E]pZAh'{\g߅v9k+n0g58XՄ! e!봝wϥqh\Mk0g^\+#Kc_~]e@NkP^ONkfnrVnW.&>b:ObgYcS!#HrR>#E^m~ziR^XKyg3ɛ~sa!dRv YAoCBFK2; |,.G :.e>G5CR(W.!N?*:cR6Q-A>hO/-hunlО2CLJ =+#%Q# ]FQYϘߞ_:jZ|:,Gɑ}a.]BFy?a?7n~Bs8SS˟h~_\|~i> kx^cs/?膔DL"QPA .dC%NXq$AXرI*y$YI)UdK1 2YM9uO6%tOІLӊl&!HEnWaŎ%AYiծ5i!D_M]&]Kx&\aĉ/faC }r4%R7q1viԩUf$BUݪt!I K4 mʭǕ/gsz-wui;=5]Lŏ'_y[?dWUR[% p@  @$ Ϫ$Žp6Ckd)B쉽JHNE *3jqGitk.#&{JtI(dHr|-(Dd\Cа&/*h+t<%+ߴN"s/,ϼB"ŝ|?UtQF2ɊICtԤ9'Hٸ"ӆ"TT7 @n 3Չ`{+HDZ{W`թAtEdFaIZ/Ylv[n $#c͈-U2tN< r7s,/YdӘ$QcE[318D2z;#*y' 5u 8a1B.o =&Jl9iЅrK!G8`"B`"|"pD'*N[0A?Q1?4 !]](1N`A#K0VhABXbS G=pohp HBJmc%ĨђْG\(BH"(^-l!R2e-myK\r sK`S0yLddf3L-Ӥf5ckfSfkB`nS A8sSdONxSg=yO|SꜧyO%hPZP&T ]APȡhE-zQei$aQQ)H_% :)P!u--NES6)BSBAPUKeETSj:ZU\FN%ZU@1zְ _@k[[H ínU+VS_Tؼ_Ք %la {:B@lc{=,FA f z(]c)@|DkzG\dmk[+ GVWRVNn}ѳý[3$Wens\&RmĤÉfWM.D\:nl\L/0M$AWu y[RWoiP,E7WO[]oD7`>9~Z(a&ZLEa? ]?L;@p>S@l@r@ܤ1@ D  @8@ A <ALAC[A/3AAs<A!8 B!;A"`: 9 h2Ԩ7@pDFÅ_@:(C*A_z#8DtRtLO BWJ85ӈ B]T(JQJ-XD5TxӋzTrғPKaFOKMURRPV HLŋBY]_P4`9Uȷ,cYMUݤ`mViVjAVlcVnehVpo (:WrMnAWugmjbWyW?WU{W~WWWX-تZBX Q5)Ř@L P y )c*'(־X͆ }]:neJz4:W]XY`joV<< F9*FisT@ڧ֔YZ_3ڠZ#ڊQX&#(ۂԞZ@UҘ#+QےZ@bVbM`}̝[ \\ V}-\:YmܰWǍ\[ɭ\x\ܮܚ0Zͥ#4J*5kӾM[ί[D2ϴUu]ݕNT2d=M;UsU(%7 Ou8)(X<{K!]Z^H]Y*ԈN^KE& M (AxA8ސk>K\u䝈bt;T#>]݉nӿte{吣{P"(_(~4l6P>P^ S^av9 \%^b&nb' }b)>(b+N*b-}^Ix?p$c`b3^Ks9bV F6߂㯘A =F{+8]E!*-<59fdIPdI>JJ3OSdeN[0eM4~%U~eXRWeZe[FYe]\e^T $fa>0fd^2feg~xRfff[ZEf"䒓}`pf'淔Uor^*B]sVwgR_x¨g}ƌ|hgEׁȅN] E#D) b/0S XyAZAc(3x`-`h3 \R ATPZ"`[]J ~ ymjsl 0)8X>)6.h~l4>`a(7م ) iH `~jЀƋ(;0)ˆ@(: A p H 쪶8ka.(8 )0V6>pnH6)pn\o kv&K'^* (kn(k)8 Jp+x p6  +0 qx>KWζ&) l ( pj N=~%QpPr؅ ,Os> k7Ws8azr3Gsx+sl?7 8o htE/m) Etr(GtKttцtL`i(thsQdtU&ls0@o-f1w 0o PErsxs$\quv v(iOƆrZ kl_Vs4KVhhPPws~'hWtSGG/-Ox0w wxE)Ht`xh*'9"戅7l+`b5]@3Mm#uXtyzag/'z +pjo@$y\z@q0|쮏(q(s_ Nw'vPZ N&zE{ %Vyjl赈߉>'3l3/ 9o;>NŬ2Lv| ؅"$*yYxy|D!"bON,~NMwdx_2G-/ ~IEҰv3_@+ ŷ{](kxo.w~}߰*unw7{ z/C^#(k`YGx!>xY/)nj/Ƈ/?T0@j>xhRHuiw0wxxX|؊(F;7?^?BE= Q s?Ow+($ 'X{%k|&r06[hj)0ufyu _W* (kHnp(t]`(7&To]w&rugS$wrfn'KsH_zRHu+Xho*onGBeUY{E"eU! IGy_쨀_ +nƉH-ϡߧUm(C'r(nnOEgL& 耲 x7 Ȧ_W&gW했0 byW$^ ~Frpmwjp킚x}Lهo3O~j n{Jr&7w-sr ~) HoV6 pmvK ꪶh_ jI"h&;k6k 0 @=QpT_Bc݅  x @2^SLu{u^;i7Bz}io4%.H{[}hf`Lw/f}jiHӄFAQ@ S%i尔zTG i ~zx^#>#J@6w#>zNd; %X].P|ƠhS61#E} 'YhBK~lN߯3N}:U^W}sd}箘xN`CGB7 $̄v#=MaQG?a~28bZ_߉}\7gЯ]A 9VM7`7-S5ju 4f}v~|+߯]yè1^?.Yw6]s&LVI*`%}/ZhDzI\݄xAOᲀtOH|!O;lj~|5#|%#AsO&L)h>2,8p“Rh"ƌ7r#Ȍ] Iɔ*MvAIr%K0/|e̙:w'РB-j(ҤJ =2М[JU)&ә(QfUK=Lx@QUۑmȶ!ܲ "U#> *.l0ĊV,VWTW̔?r%Kp3hmhqe\:4زgӮm.Eh&jU%ozڶ [4n5]|7ڷs{׷]m &)Cr2u?~jд{Ye!eZuuUzz!!8∏XEa`Y qĂ,@B0KE^R](q9w#d2ԜyD"M:$QJG' wFS_Od4aɔt5t`GAty'}f ~<} fP-8Rӣ5jF RDZF矝z)_oT :_E\iVҩ:(\;:PTʤ9**,މW@[hGZWEk)xQR;/{/kѡ JW?ٙyeY0GGEkQ;,[|1k\gfG.i?U1mRu1<35|303A =4E/LuM;4QK=EHs@FVS5]{5jhic}6i6ۙmm=7u}7Oq7}7sͷG ~8+e)8K>9i8G[9{ak$:襛~: yP:.g6Ht;Ͼ;N ?<-+<7o<!#,2-#"ݟU H*oJJHŋ;5̈1"Ǐ CIƒ =\ɲ˗0Q P%͛8sr&OkY2IѣHLn(ӧPJZk)UoNjʵWDVZ*ٳhaM,۷p-v.2vW[ݾL]ǐ5%1˘3?yΠCfytϦS^4O_˞MBq͛ޓbNȓ+_μ9YlBسkνËOӫ_Ͼ˟OϿ(h& 6F(Vhfv ($h(,0(4h8樣hT lo;i$tA O.ExpXn7A 8 \AēSH`ehR@9@$ Pe柀6z %2<9g*餡y@œ Il>I@**d4 POJꨰ`A h@~^RO~P,6NFfT0ūx@ x"“+,@@LI+lѻ\O^ ?z26lЦ ,2;+ X1,CʡTp4׼P'k@K;{Rɬ (``+TW DC'Vwu-8S3(X}"H;?@+GW09h `Փ(@d@V,p=I|{g x L(8Mx @ p ah ]( z ?L&:PH*ZX̢.z` c 2hL6pH:x̣>v  IB.B "F:x$'ɗIZґ& (Z4FT"Q|)YIKUIJ,kRܒ.{ILPr0MSɜ2IMK<3є4H\ٌ6INE|w2r4E:ezdK;x"i'?OR-(7 j7A0t+ U(5%e&EP]ftE͂я20MjOvbe))KiRNT,2(k 9*SOJZLR/9Te.P5TYu<RծzXzթhhGZZVp\J׺xͫ^׾ `KMb:d'KZͬf7z hGKҚMjWֺlgKͭnw pKMr:ЍtKZͮvv6 xKMz|Kͯ~LN;'L [ΰ7{ GL(NW0gL8αw@L"HN22&;PL*[Xβ.{`L2hN6nL:xγ>πMBЈNF;ѐ'MJ[Ҙδ7N{ӠGMR8WVհgMZεw^MbNf;ЎMj[ئ/ÑN.6&qWNvMzη~Nj 8;'N[ϸ7{ GN(OW0gNYfl@ЇNHOҗ;PԧN[Xzγ{`NhOpNx;AA=q;'k -}gwT6G򇐂?n~4,CD=yƔپN\yr# Ï 1{h&3S;ӿYq] 1 ǀqt^b++5(W ؀€8 @ h0q 8$Hw*EC+aZ/#SJ: 6Uϖp?}&5C5JxpzQ(S1CP-+F腅pbxQ|g T؆rtXFq;E]X&HeDE`JLzP (爆M$~@p҉a:ᄦ؊hkB0x؋nXxȘAq"UЌ8ŒxTQ8XxxhL!aH!!,2!$˗ݚ HD*\Ȱ#J/!ŋ3jȱe?Iɓ(=Lm˗0cʜir%͛8sɳg3@ JFF*]ʴjHJJիbJ ׯ`ÆHRسhӪ],۷pʝ-ݻx+.߿ /È+kxǐ#_m,˘qR̹ϠCMӨSyװcs ۸sͻ Nȓ+_μУKNسkνËOӫ_Ͼ˟OϿ(h& 6F(S XA`DF8M(!`0@DLA \6@<"RPDiU@Ŋ,$AHH0ft30ifl` IBL"SE:򑐬 #IJZ̤&7Nz (GIRL*W F7򕰌,gIZ̥.w^ 0IbL2LMЌ4gR5nz 8IrL:v~ @JЂMBІ:D'JъZͨF7юz HGJҒ(MJWҖ0LgJӚ8ͩNwS=@ PJԢHMRʥ:PTJժZXͪVծz` XJֲhMZֶp5!ԸvU(׾ `KMb:d'KZ{f7&2gGKҚMjWֺlgKͭnw pKMr:ЍtKZͮvz xKMz|Kͯ~LN;'Lƭ2{ GLd+W0gL8αw@L"yO 7 WucY)8s*@ЇNtHOnoN K"h^{ vdZ/{n;s gvWN1{`.pm/|NV.y0 3_[O!7}dJֻ;>wOOe=;CaϾK!,M-MTA*\Ȱa*H8ŋ3jȱǏ CIɓ(5pȲ˗="HIf6sɳϟ$qj G*`*]TP;85U32u`AV Kȧ]˶[g)c.ٱvE˷ߨ^LÈ+^8q #KX@˘3k.@`ƽbӨS8)^-m'̻oq YH0|#mmmNzM+;uOD ~`Q;F^{:avuj&m'{-|f~w'ᇲ wx b^ ^p+VGa}1!q1藈fx(h@H75懣l:cX׀$h]Y*0ړ]h7Y]>%ƦjnJ~Y%Y`|X Fvhj޸膨UGr4S=BRJ䔬x`+|b EiE:QdU,E`ޔhϖPUU5UNhnuһoiZԩuұDK.XRgO֊; l)k\-zAnmGLl /[Ds얽ޫ׺|"TDJ wl`5\uo3#BYI'z^ς (JC:!fcMZ{ЋsINc|  ME`J14@iu:jl٨.єKs 51@OKžFujbTjVXuO9ìz` XǺUcMZ&SnY\J׺UQw\׾ {VMbk:k,d'MRͬf7"r hGDҚMjUqlwUm/[<6jp[>ָMr).ֹ.s;OZTͮvKAx}w=z^my+B׮k~~Lw`]r¥N{6ah %":Sìv"UXD"DtX4q5c5]UWHl1Ib8 .JSy>N!"șЄ0!HoV2D"q+͓ a # < RȢdD$_1}taX!X"L͡igGצas 0BT^!pe8a%h&BWD O~xÕɠIx. @`"|Gw3;Mi" "Is<%I") ]=aup~{J*!IAr]ز"A'(!*W;=_59CZIDႯ$1 [\iO~N(kN|yI'| ?ϪgƵ7 -=st6! 7$[i85~g=g={m% =sއI S+RV`n@d ~fA Ev=|%.(;A(p66|=~US7 Pdgt fX*V5Af:>! 0 3x4f{oP(|Eg9DSS1w>nf`u@x"!@CЗfPhuiu.%giF2sxwfb8C>GEF}Q){Sygk~”hd\]hxNb[`Ck}Q XhȋXB#]+8 8#jZ؂#A2ވbs}0Y0Xl=6eXx޴DˆXwp8:;(ۨAsH9Hx?u~>e:h_`Y0o8 RhD&dAYVŎ hyxc\nj)Qmz}SAz%VpcG⇽HSw8%}kHhPIYRUhlNp}'@dLZ:c"gy ^OE{Iy!QH}=9x J ImX})b^l0'vf1@w*:i!`t4ylQQyylB5YYi}pNJC8U 9'dҩiZU`]<縛WF9%e` Vz PP{/jUdɨqWv@g@~Vd&:bԜڡ屡F"U |fXzc(:oFJ/(Gc} pnp| f{+Q kHVj;jF̚J;*" Pb kK8qf! @=?";$[`[(*,۲.u*a6]4\9{<YIps=A{$;;kEjJ UL{]OR[Ws2kZ\$T۵`+__fUeY [i{5wնZk v;]r{ؔVŦ(H{^g>qpP&|(*,l´)y2<:ၖ:<\YTWBţ\fvp0P!*kx`! rHI]KgN율YUX< @<uH X_Ĝ:~ST,ho 6LE0,Ɩjlm ȘT|cK\QVlk,~̟\sL@aRqƝօ\l`j7{)[6)4#˩b7Vqh2d :9=n*'|?&ʤ`uXfE:` ¼52|Z̔f͇a`(GƱ,BU h =в7NJ8\~ @;Qm<݉P;1RcU4\'<>y49A܎@W~=M=1F u8quP-R\r_$m|'<|͋y<\q7bKR ͢XH;V>֖~x-m9|(ϊɩa+Ca]A٘I/&1Fڽ,uiܩ}ݷxc͡ܤ -=-}shm]=aJlSWߥ@~ ]~K!z_M?]ݑ͔M\U=.ng~ n!N#>%~.)Uʘ|؝$kQ㶑>*8߉ nX҆@^'ݡG[J/1.3nB%;#{yojlnpS・ܡяz|ɛKUK~Q L>f|nە_ܚ |fQhSpJj3X & @뮾 'N9CgUY}^{^[/;w?& >>]Hx!$Y%o'*]+\.2O3_i D䕮98?o`AGjlP?T_%VYhW7 -ҬV[I-ĝ ډiQo_ "=fehnHBtwRFWxoQSޥ|u9MUX|_V^\m9ߨܷ_޼PmaK?^%έ ]-J\S]WKmMuz?_.yIV;-Oe#s۔|g"ٵw9r ë/=M_vz~:o-4 D0䉽 IB+B(jл16Bĩ ./KPqFF}0Ǯ"& j!3+2H$T#jR'wrI ѰI1;K1aQI2c1.,M8s9|tFΌ's]PD44? 0ctRQ,.9+%LDbBSCm)UҀ$0J.Y;$ R\{XcM/]aUt+,a@jWo&d ғmm+í\sKQWP6a# ]!;˵r}ֶrXq:3P 7pS03,?z|8ĉڂ0!}=ι&2ic WԜ2R+Hݥ6hwYxW8-n}4:ͲUc2Vm=ISfeqS9J 4}>Λo9-gCtu1nuɋM.#<{t^Qw\7Z]t6SrӗԒBD*GN&4WEUkQ[SJ)^񹀅{J-_=Sp;Hlї]~NէMELA4PI3 UP?yh"$!/(Y+,?W,adx*8!oC QC$bǬFdb8bhNQE,fQ[han j~E4QkdcG%/э3J38.q.L?"`HhL/ZdE ҁ0 q%I7SO"ɣO&ÄP2x+,IM(dcf .qF_M$,_<&%U%92w̤|>:;,fMEm,K5u"X2!gRX3}  kiKg"+t{]Rnw݊V%oy*^Ve^Z-z{_>l~{JSYo |ݪ峼:\wV)9޵pt7a,qM|А Wbד1X;59+ )B2;RkKp){MTNNg+[2#?`1ոȅG\f }J7N&.\.{LCcٹxOU 3?/JUѫtAj}eǔ^>SMr4,zĴ@V2nkDm$Cg<E~mftaԪfԠkbۥcO dYrDI'A% -#DE\C \ cDId?8YѮI:l?7721$EDPLEU|UDSWl=jCYEE\\Ň>4 aa(L|k_{=DC>25=='|)d ^Ƣrm$l|KC+J +""5n4Aj,eGh4&G!:o @v<>sȯBBG;)c*A3, qC@$sHtT=&N#Ț@1qCHLFtF$R/xD@Fœ?iTp,+D Z\7aAx0$"l(HnDsKzI?*xI4:|.CJJ ./2sH ƤH !!LL.$y@NDBaCJ@DE~FHELHI7̺ $DkDȴ&H`L,CM`C_*NwBd˻ ?7N:? {$>0sD5ژ]jF;,OO$̢ TB]MTN2¥ĹMh о)$ T t?]QL}=Q4H5EQC$H P:\9Q}O} KA̭Ģ[ZV;4+Jw ńG<6 G4MkӮ mO#K(Sso SsJȉtD>LK;]TQ +%)CwKC=.G%*QC5hTO6HM5 $/e|tT(DG閿x#hM61hTw1ea0IH:I5 oVl5 vq' qUI|JZ1-dcAgU$2 lkXkGWՊ\Qס8&J=$W8UҌʩͻN>#)"]?zГр4El YbA}JYyUڼڛBE{Yژ@Eڎ-XZ,N*ZymΡUJ ڦJRΣZE[&3Ӽm>u'0URH" \QBܾ%4"[%8Q\lU ܙ=UQ̻kYRC2UB3PʹYUҵٽVͱ'/TE\]کI -l^$e*ު^慾TsEh]eeʿmK]rQ}Ҥ_9}]H읈 ɦt`x>$}[N~:ߊX_M L -*TNN5eҰ\5K4Ǐa!6X{a4J?T|mBbb hb}2$v*Z3.G"N?ˁ\~1B)ckN뱎طU*Y`k}kd ^ kk/푾UF 쿮62.Ȧll{U&0llF3bعl^mfem>l-X$㹶.'xX߭ Gi_n+K ZZTncC%^h$>!tڃ58,JIm86nFoP4`oVi}wʋMcm.:SmnG>e&6F b*V qcY=LR /~ Įp"NfAF .qPnrq q"ύ ќp hm~rۖI=bpd.W+O5/s eN,6ӦufrS.tEoIO3QB'mJOrflG 8 Nt4tPmH?tSNOuu(f( \u o 1 k(dOve_vfovgve7ٞe5:;6 9^kiqwrinuvP%w{pvsœP(x?xOxO.Px7}mhFWB-xױx籎x_D?(]yyyygyřz/z_O+fz-ozz)Tᚢ.dӃQUQdT~-Ij! Q!J JT0dԪhnDp1P$pB D0q ))3KgvZ!ӤeO;dZ4l]WE:6"؍37O%)ګVNTVZDHKSOhB€$p  cjҶo7{廯~`{6D8 h\%,R%g5 ~TB lAE0 f- ޥ)(P*XP3x#L6#P=cA@8JXW %g'"K9T)\D (4 @z^(ey&iVf9VA tu7'Q gS%9"A5`ш-:)ZzintbiCIVJd'ƅQP$ P-dG& U^B`⫽Vh.i z!IZ,AHp f4/\ؚk칣ʙ.+PľS"LGTz\ BV @J WnS{W V< u4; x%%*lW&p 1hr^~|gsAτ 'R$gȂpeM`e*|cBtDr5.~ 6ʎKDrȂR*Bv @Pd7Ş8$~Lr&I‰Zg4h @{>>x> &ɫXI` v:0in6VȠH 3I²fE gCĤ7X $3өMbOמ_N0g5tRSE؋# bR({ZU" ٩GH_{xSibtOC=oaWBJ"8݃>=\1MOӀ q6LCYdi? UKyܤ6z&=/{Q3UC3X 85hiIB9US2l˜k2`أ54t Va'Uhzci}pjO@6aOwIlG+ HLik Ŷ&dw*]B'EZaIFE hRjyX Ջ>raOU4B&ڲtIKzvGMk̶d6JW; v: s3ꆷ~Sw)5|/to+Fua[1+}QN*46n ~(, >@]Άͽ&]>صIXaIʆ` (c߮-D3FȕGPֲ؏SbMN/}LGd|6M7&k}iW{@ދrmLxJ\V6 d*UFkeB4Ή@"USp˘EVg5f L Yv59VoS,bR@?3H ־$PlsGHKCCZʕX`{$JQnk+Cl<բpf1fmZj^262NI)2^_ jD: 8r#z䘪D#=f'RVrǑV磓`C9#gQ [Gg^?NyccdݳîN׊]**Y)?繩cd;K؋:}y< ֓R6D ~^vү:^ , q puGyNˇ ojSFTYՓJ} >WpCxS` p#Qĸ>AT?$ Foľ95q56dֱ/5 _L:ޕ_*|^|l)\ ƶܟbIh"KWT^M 1hQ)^VS `RABP [ỲaЀ L BH!̀e@(n!j]LTBZ0$hHXNY$͔]֡!)d!QM%ڨ S$e] !:KС9n!. UP! |S D$⒛`iM+^ "(".H+d0- ґvVI3*UTLO$DQhDOōH>-*!%&3>%p"t$>" <@yH xI <gV-H5fUF\vt %bDT @y4@ (BHRy@cD%``eb1l1ATZQTA2$tB(KLMb @NJĀ\Ldue@ F%cZ eOfWH?"A$ S UF@ lBLHy OrΡDzmK %QbQ;.Z&u$hPivUp   p"tB!,$A!$"kAdFĀA 'Xr@yE=YdZv%\ޑF!0%_!gz&QR&Kd uPIB$fRE~^ Y>2Xp\f{AܥL@H~ \S`S>XAYIG •ٱJpH tvmA\׎-zwJ-Rh¬jрC@C,\qJ[AnPW,TX驆ᙸmKn .*ծmQlƞN⦣Ѳ>^nNnζ*JPnh,ڝ}ףʻt.".֬鵈/R-$hWD*~/za. m:/o'EoB.|,=/܍侄DۖlWGNh-Z6p,x&J0,(ݦ2A D8A_ǰ mna&An|ΰ,( DzTޒA,d 1WQ,p"1igoԆ11?q1qLб1oqK2 !!D"2#7-J@#O2%$G%W&oeCx2'(ٞ2*(2+(2,DzI Բ-2..r!,h, ̓Ծ.ڈ  <#Jt%ċ3z4C64(I O ry$_ʜٯ%͛lܩ-&ϟ@ JТHLʴ)N6**Vj5Euk֮^Ò*I`&M%ne+'ݺx_Tw ,ˆN\r1㸇KxrʖFY%?κy˧N4 Wî9ڸmo7 o{8_OC/|iĭkeܻ/y~OϾ=9bϿh& 6F(Vhfv ($h(,0(4h8<@)DiH&L6PF)TViXf\v`)dihl:tix矀*蠄j衈&袌6裐F*餔Vj饘f馜v駠*ꨤjꩨꪬ꫰*++%j뭸檖+Jl &L W0 gH8̡w@ H"HL&:PH*ZX̢.z` H2hL6pH:x̣> IB@,9%a?C`&@)6Ʉ(`-@ HE VKpRLT`WH9Lp"]O@\Il&) H$@` Z T`ϬSbpV@"0A$ `4N8U i@ 6~e[j 2dO*ȧډh R,@5_[J{g*x*BP IBL"F:򑐌$'IJZ̤&7Nz (GIRL*WV򕰌,gIZpd\=n)`Q/¥υo0f)ES1ӅhR/̦6Km6ެ/i4j$4A$kHK)0Ӻy>} @iOsMBzi2 =Jъ egsiLq(H%PCj)8GҖ0LEҙ8ͩNwӞT5d ԢRԦ:kl'TWZ5yy)z0zտX%XǺ A<5s]t%\J׺xͫ^׾ `KMbzE$ju,`zV[hGKҚU}HjOF[mN''@lH\pKķMr&ۭ4!ϝHh+ʺnGz xKMz|KrB&X@A O` c 0a`[U^px9l<@5 x  < Np81q:-+n V`$`)p)& lI@ T )fL X#ha39&` p?w0UhUVS(rPP8#' d04ӧ4DI$@Wze '%8NW ELl &u r]O*hR\Xj6j;%Ptm{7~i\޵IzP{-9mKa $L2|+k P q ySr \,yjP)Pq!y9¿ Zq{l&cb:WePyT?~c/~uh {=(AoXJYm-wx7Gx!zjI(ur?x  V(&?axsevslxn"k'a"%Vwu:q0|)/y;tIX &Pk}w. 0|81{vu1?q7!`(&/xUgkO&x#YKt]I)pwX6i~mVkƆ4pRt+kW@ztw<)xavo/P(rR&xa`&|d{gkQ66c)@o2*k*QF'$Db6^fp Xh ]&pb6* X(j9>/((nV~a`gSRb,0c&1 086 `)@*w1x-L1R'a@ bh0" s0P%P"'2 (YFyHJLٔN{P9Ti+#PXZ\ٕ^`b9dY"Wf^2(S9jr94bt ?C et~#h9Yy5h=!,,-H*\ȰÇ#JHŋ3jȱG%>Iɓ(S\ɲ˗0cʜI͛8sɳϟ@ JѣH*]ʴӗJJJVjuׯ`J44jThӪ=u[g}Kݟd˷_s vÈcktǐL9ca+k|qQǜC~y郗>ͺ5Sp-vM*k=4q :$}#[У.>O A{{EA_]<{O;z=??߀S~&ș VO &6BdOVanԃ=uxXjxaOb]"ؘR-JiM*ADꫜݶ&^ˆ뮃a* lA~T̆vl Vkfv+k覫+k,l' 7G,Wlgw ,$lGN,0,4l8-DmH'4ů,-NG-5~Lm5!]uBzl͙^Stdm*lG4v۴wo[5uϭ|߀.Uކ'8/܎G.W6Ԕemo砇.ȝ[:s|i):m݅Nn^b<o9e-|G/ĩOy=o܏=woF>ϳ<2aNR vbl@: Z̠7z GH(L W0 g8̡w@ H"HL&^deNH*ZX.z` H2%;6KT:"&| J&= qEA, Þ! VE:=o$i-SQ+\\q+J-sJ~sEFZbxK^iMzzȶ덯|S6wq/~3]hSY3 w)0%HEՖt+{ GL(NW0gL8αw!PMBЈN ́7s@0' Ȥ7IiKx_7-BӗO͆yjZ԰fu*PkZ׼vWm"c/G %&6;ݒhzC Ybm04FDmi\#Ԥ J4JBx <=mW5azdInV/R&X-nHD` =Nٌ L0g n @Їsl\g"PsnXϺַ ]رނPhOpN;O Oƫr$@?c=9$=ie",aB\"W(xa L  JIW)\&"ؠIwD&BTݙt{NAxWD w!K@Q:Em".HAbOS` a'PwFn$ l Pg{p` # 8J@w ! 8 z (Y%p}|!~#X{ND ! @ @4;3xF@? 988 ͧ3~$! pq,Dns0 o`xHv8cYopH@!W^-x@%1~Xyj' ~0 p0QG0H+i(Yg{ (pB!7(:0~ 1 (/}HoRQ~‡Ìp7Q t;C tx 8{ܨlc u8[B8 a. 83.hިJx@hA9 /1pӑ p}zH߳\*Y6(Il`Y%$ ?%9T0~l)uU>! PyB@dpY BViA!Pay6( t`{ g AAPr0vP A! NT>`?GB,9 hYG5TG㗆9d0 ZI3J9 q Iɔ- B?`C69Aq4 )21p@ϹÄ@iA ٘BNIJM9 Ou$ ay A xIXT iΣ! M`i})TYA@\z)#7 7Qw-?4I: +j^5[(0~'!;¤A5Q i,MND@. D̈́ 8=ٟpzAY%qk$ ;9 P"! h*SJ"Qz*BmHJ թ!thXYE6QXl:qT E{P 0~U(78 %T/8 l1MdHE g{ { 1y` ~9"ᮗ`ux PGy p0GEc%7 p@DZ :ƚ,  58 G P ;al8p p >PR;T[V{XZ\۵^`b;d[f{hjl۶;y6r;t[v;cw|kv5y ;` `@;=k c+#@tsZ۹4~A-kᓺz;+0 +@۹D|682uL>Kf6co!{ |`۽;JyW@x ۾;' *ۿ̿,f" [A ܾqq<j0"<$̿ ;86Zܞ1!&3),8+.1/;&ĺ$3<6n;„2!DtF28f:,<|5AKRT V5 LNPSl0|1p,dL2̂4|j{>@e,.E^~ " 01fWP,y x⼓{  (- Z `1PZ:y܁~0 @㖓{` {P  S(@sw&gfD\0Wwz`80> p0遠n`x1-@rF @@g`XI煿ّp .d@x~#!ʊza{pa-h* of+P 0dA.\g  ,1` 3{^^ zlp >Ȋj! ~?n ] zn0 nUA ``p 5wM90Q p34(1"f'%3r8o@^2 jϺ8}Z.(}nGuH*-m8`p` Jc~buF7Q1(,"^ Y9 Tx 2*b2 a`tn$uxqx>YY tzH?f4hEm{' J|\h: 0f,p($)_^J]{ԯ%$cʦgo&AdA .4C%NXE5npd蘱B dA^M9uCI&M T?sSQNIoIU$0ѪaŎ%2&DM2qΥ[]t+Ix'\បMC5E\ٲ!?Z@".&]e;?9]’^ TUށq'QTEA#ťO.8uuMT*HY;a'_a3J"iW.Rc-OoєK0V(e[ذ-(/g*d;퍏.kI0œڢ}zZ$XdT338.nnD/ho/jD:p~KgE% `Zr'o 'KT0 5I"<0^DƤ%.$j8KrdH$P%D!Id[iuEшb)ƞ,q!M Ts,$Et4 "5",hg#TFLQdp-豑QB%4rRY,H8$t8 ƇdIJ%=.c+{RA}t\mI00)/Em(4N 3lf5dɹ +KGHnٽ} bsN*߹-J`ewwR2$D ]*p&`џ\0U)4[?YTYPw u%S"HHO%nANq`iK{-XdM}IDlx3J(r.fKQ/27/.SVmHUkt.hեw0% TIDxV0.`M7.C]- .!HzXTb(ѫKThZ)](T2Q ^Pfva!j]8GFz'FĮ@>* VmN2.v:Ev3 ٧y )bsysz:WN d%.dʨ/UEH@ےe!a'{=E/Z5R`"XvVb1RWFC?W|?pA/RuZ;<`CLwB݀ԭ.!Jx"f- DAo0`ɉ)b<,P`ň_];0Dgkh6;E'π\ 8+0BLL~eE/#I<BXK?Hғ>A{>,0蹡?PN(?`IBHE  \BH5c>Hh7 X0)#3K)9Ҙ?P=PCXN@J@D8@P)KЃ>82C1|H8; X@C <.t1IB,,@2 C1AXi=L2xAC@x29K0;Ð k&G?6A0D38?H5&BEWb|KH ĩJF3 iE`FĘNPC FbdLDEm\GvlGw|GxGyGzG{G|̒G~GG HH,Hp  L< $pGZ@JRJR#)$ EP`40% @Jr$hTH)HL)`E~1@/=Q lVL7N `$ $0ibhb`n3`#)rʢ$8!pw H (@ ʟ[,lI  g)²P 21h2AJ @PRX>@x(4`Kta4L Li` x)PLjlT*#ͥtAFZVb `H'.@** 0SwquJ ,'@\43); b)+`ӱ昁*Y8ZZjv%_.q:&Tv-;4*.m9:Ч FrG-I[ ,H8uū&v]2^ |E  5sva$ 5! kuF::Q\84  ӸmaYDzmF@ט-;jv5FtgVp7^]e[`>a[F uA 1ʂ@= I `8=@:@g` Dp"wHf '[ϼ7#0sGOқOWֻgo-Ͻwޓ@OH=s| 2@ߞ}Rݮ}r>({-)[i?9~H?sT{>xGAz >x>؁H8$-V(*,؂.0284X6x8:<؃>@B8DXFxHJ~LX7sTh>wZ؅^b8hdxhjl؆npr8tXvxx |؇~2S8XYX؈yAXXvXECyQ8XxHz؊c8Xx؋8Xx~3ȸ،x:Xxؘڸ؍8Xx蘎긎؎8H6W*qѰ),s8u3Y*icА9鐓iXiT#ypSãoÒ㒷(Ch!) #:y<.{E?y98YEi9Gi.DIY9K9OI9Q S99UW)9Y[9]ي_ 9aɊc8e-C Eg8iْAm8oyu8w-kDy5 ГhgytY`b|܀2(rBuӗٚ9Yyٛ9YB@ 7ǩgpi0vi1Mߢ0Yj)y1-#R2َd``= @A+ 3p0) g:sJr@ Yb^&J3J[p9 r 6,6%;z3?0: QfL2RPyrv' IjBУ\.3 c*f2 R$i2"rZ,`w M}Z2t-J$3<ߢ§ 0 `*-:)0mz-:F%*m:ʒdhSJ0PBRis\xi !*2NR Z(᪹@˪3J +U׺,ڭ{ 0$PZDh8Y.,*i e⬔s 򯤰rI402 0yAĀ/5 Ұ#[-KP@욲B `A2:<۳>@B;D[F{HJL۴NPR9 9Ò\,^a++^kd[|pj.n*`t{+x; {f%}K-8[r͒=B "{'[ h % [U([ `# +һ "g{ƫ(›";+F!,@++H*\ȰÇ#JHŋ3FǏ CIɓ(She˗0cʜI͛8sɳϟ@ JѣH*]ʴӧPbJիXjʵׯ`fAٳhӪ])ǶpEKvݻx-߿۬+È+^wǐ L˘6̹3ɞCMͦS^ zװcE-m VͻsY*>x桋8wN| cνwuQ8yϫ?o~𲡻~Bޟu?U; S}6߃lr)AQE0q K"J,a0(#Rh\/ިuJ}w⿾}觯$;o{}* { `B g@\f4!z%O`Z.U(,R0 gH8̡w@ H"HLB&:PH*ZQT.z` H2hLa؜,dn|Ŧ(Gx̣9e ᢁA/zHKH`YȁLH$I8=tTA8R e* H&+ JS.IwJ`?X+w9B,Y%d˴%2%T-VNęfB3&̦̒6`DO8Η/:·xψ TςM(ZІ:Ԑ }DyBЉZ2J1X> @E*Ҳ'9)IBiUt>!_#tI1)ݼDdь^KH'o$hzA=`R$z4Q(UծgMZָεw^ɾ_߼ NA3Ii[6ڞLneVh x 6 0X"hAy`,ЀXǸ"Ѐ μp@ @$px` = H8p\o 90?dV.+ X p P t|#R@1uV@w8iE2@ X{. q <"s*t [@E4 +`ԏt$X \z7~&xܞ?wŏ\ |60@L?oi9>:~@ (MfԿqfdwCdZؗ=PZ83=|.887?A}4t`}7Q3AAvȂ~7G}g8'<8p(wE7B$>@؄sc=7xV7Ga8_56x@g87^X؆t#mƅ (.(zoH7ӷAч7\(Cn=- H7gsOYXwChmfGjȉC|3HsSAvHu zI(q7|49;br>Ug|R~7NmfJ7Ĉw71Q(GcË3~3 q#؋7[p؏×wxHi|d7 Ȑ,3TgHbcއ/1l3}`yrG,S>|@297vp.1pВ=)6@uA kr") u Ti7`x Q$0~z7_6Dv7q ki7וgRw.Pwu7$p3adK3 1`x k3sip7r,@ nߖ$ 'p{pwp>@ٜrYS R@М ,isn3`Z`:{yHٟ:Zzcl ڠZzwGȦ^<f-dz8(*+7*ڢv%0:76zܧg< %fSnLU@zF>:N-3ӤobnCaT шQz1\Zi9c4on4tZAsx3z=ӧ~& L SLZPZꨑZ(KZE:!* 4Tҩ)Aj11Y6Zzګ:ZzȚʡڬΚ:ejV cա]1E*1T:UíêJH8(Jb.-0(+3;%2 a0[r[++:b;(۲.*04eW²8<۳L>,"KqAK0C{JL۴ %^2 qiarqCG"D%EDm%aA=g'hkZ &xCz/}K(o1[{ @HI;K{"(A+*-L0HR1ۺ u'HkҸ){*" ʻ ۻDrXM+[)mjb/`ڲDɻi<˽.Tj[rlk->k%;Xqٳ *1!)\,!q;6s(µ3*.|,L`?%SU/1 >1g6=\A,D#<¶¼ w{mGkOAHcSUX蒭q :JlŸhj"SW˖'\ɱ(e ]^ XZrN\܏e+JAn*Aypg,cɑ(e_Q2 ܺV5%=2!Le˺%<\|Ȝ̱Sj!,3-MXA*\ȰaB ,HHŋ3jȱǏ CII1\ɲe0cʜI͛8s3&. uiADϣHʬtӧ W(JUFjʵ^aKٳhz}۷pe K.LvU[޿ >`H^̸$Ɛfe}-̹ύV D^s2׋3oM'U%VԺ} z/#8μzy;N8uůk|yW:}{˟O0kv_~XQ| {ٷ~u釠v=hG`ft9q~hVqơ(҅:"cc3a\Z(!w,i䑂WHJe>6YR:WdmOڔe\[Lҗ喥EQ"X#]nEsX:}Vvzf{!p'Ij6nmɨe`)ѕp ӥMWC%{@&)jz:VjWZ밾AkUʬg ܳɊw_bvlH9me~+n\.mnNޮkX n3^+epI.WG_g,V _Q^v,TeSerF.<\e,3O*PMDDD3)hb/4K+ OWݐDG3ne)MԡU/e͉(j`-wcmx|߀.n'7G.Wngw砇.褗n騧ꬷ.n/oܖ7G/yOo}R_w/o觯wDo[kԆ 8!@%Sq[ Q:<|'HAڑ̠]"Xir`L'(*%.HZx9~͆3b'BF>H! a)Pu(Z񊫙"I-eZ^ HƓz"5fbex:юU 7=n#Aޑ$r!I *3ɼ7)GHz5|a+AX ,gIZڒt.w^$4cV˕PJ,2̿$=|&4%Mʝr[46nzZD, NS,[$٥.l' O`BLX8zq +?1($zwF%эj )!Ғ(MJWҖ#'LgJӚ8ͩNwӞ@ P+DѡHMRԦ:PTJժZ Vʭz`bWJV-XBZ̵0(\J׺εbH]WkHP Mb(\a, ^1f7zֳWhGE;!Q3`lgKl] k5Z0E+Ьo[ ǽJr{jNV\r.r]]:/mxK^Zy2*-w%޷*կ#Xj$ A}+!w$)* +Si$lW71MD*/I0)hsE%acb'AIy1`xQQ~<;H1miy"(J^DyLEqO *X0#Ň6N9a;8Dts|<ϰs-PBЈNhC4x;ZҘ^,%ىRkn?J\j7AfMZִ~@p^z贰&NfΎMj[<ѵn{۶$MrNv[G*}Š(ޫ}_N2O;[%*Q OXL)@l(E:.?"C2D`NT"p$FHN9 ]ENpoҗt48!?ay9R NN*!"4DŽ1qCAe_!.qJEn`tH~EP]8 L "Ѓ!ʣ'> @bH|G$Q L\bdHEJ˃g 0yы~Xz a{F-D~6P^&$z. =(=L$Ob? 52~ }<)"3$zN*Ar{/L_w%UJ" Pv{=~J9g<HUXv~ 0 }Q gPvPJW"80hЁp|S Hh1(f> Uew mu|wBA\A 0 DZ JG`!wF\! |A\ eG_8`}z`npSxpzyHt}tf{pz8[Ё^H( hxpzPvpR yKHKU3THzX8 8;`'OŨR-* {@苴hY$ЌyZHkԄ8|P^A JwNxWy x8`}aItkxwuixgĘ/o`jБRY戎f1@q )RÅ)0n=1#&Cs xt.Bo6i]pH]!є<49|D8f=FmpqR5*ɒ8 ib7pit|Fgᒀx9tϨ 8ֆfǙp9 ynwfimp YijQ:yIש@aМUQP9#ф\)T HfA ǜ"sSYoПZHF:= =(d@[Q7P)t<՘gXp嶡d 2:HdIv x` 1!Bp8 @IQ 'aH ypzo7\7*0| Vey0!rV!㤓~晽A rJJQ zm:tp4 WlШ{A @%! xgqê0*p{ ;ozB'eztУZ@:0 u Z|? 곥uDr٪yLg tЬB9 J,P:B @ P :Pg@˚<ñcCj%>pW8x @q4 ry(y@8l}LttΊ#1 nqop7 LP>{XlZ?#!o(7q 6G khJ[{+\#t[{۹;[Z9k9&@0{p[MuJ1Jk+K@{ț$ ۼ{1 0[{k۽h71S`蛾껾웾  0ۿ<\/@Y+U ,։+`j jUp"<$\&|(#<-:A<4\6|8:5Ƒ*pq[&F[q-,JZA,]LRȚq\W1IK|i2pKl HlpWѦU|M#wa:H!˶ cqq*sJ|\ h2<.l;mќ{ ɴɩ=_5k<1ʹQUͦq(;ΰT!FaϾ:ϯSF9aЬJ!xE\όSѦȢLblҳ Tp8:<>IZ1`F}HJLG-J5oV}XZ\WyBM͚Pf}hjlm0R}K8vtw9 أsj،؎، ٔ]ًQ1ٞ٠q ]ڦ-0qڬڮڰ۲=۴]۶}۸ۺۼ۾=]}IɽpD]ݐ{&0=laqUи b`2*r- &vq #`Eϒ@ p 0`` PS`MYKe,('@~< d۰ps7ߠaI1"4YpDN/@w8n9!LBZt@lPe@M{~гva31 !Y/-P1&",֚eP~{4 QZ 0w wctpkPY -J0~e[ڄ tp !J3"w~f@6P`B-ۭ^}R` Y+Lڮ;}3;04`"p[s>ށ{ ;q,Yl:1.%q[@j:M[/h$Y98΁!yPX<?GTOH'iϘЁ,XN P:ʥy&Bd6IdPZnYip?9tYh}3/?;Q '}XRS}R!.Vo(kv_0|UbM%aJ.:\n?/|a_LP%j>" Id*{d0d!]]O_! 0G?K&P1⦅7l, $XA *DhD %NXE5nG!E$"@$C` 1eΤYM9uOA}r*RҥId(=X$BLNZՌ&P `%[Yiծev*Q Mt+U_fN2@ =Pcȑ%OpäK:dgСEb1kرeϦm]d D횞G`@lYdxsѩ~S3yΝwU\|pY( :/*)ҤKJ3i}% 189PXtA# n,xpCZhCSTqEZD;kPD@G 2HN&C?(ː!tO@ͬ`I ,0/)DIHKI sZ"OZƣN<TrK* $t"PN*!3uQ{MR<FJ 0^8PGTTSU#qC\]uVBd4 @Z{]ҫ5N.v٢e-c6JMKkzQNѲtLsLw bPuUjD$n3#7t}3}T,L_ojY-y &nxc$Z㑄wG -|Q`ZM-ܜ樸[*4,K;=h`bz/b=krr0>+QMlCS7=KƮ萎'؊Wl%Qo 7\V}P[nq& C+; 7o[ |NBa!Ewޘ$90vM ң`Jދ7uI-<>g=@ɟK&ͲP9.Dl8|G('4}wG*O\% H" B8SYxfPrP=f"Nq`ca edțثxX@V$@Tc! *d !U SIOtSʞE*џK|p* Di< "EIf_d-rCdBR^96ᒈ= іdBxˎ)pP)6K( .l&hMt  ~R2Ak+PP( f Sttsj&lD@@*P^N% 6gX^0VU=RNTBweZE [HH0ծTB;PBiJЉRB'(+O.dMF#TY.p8U9O&E+Ә@V3`& jI{ܿ768^Dp ](XWbی ,*ĩٕػN(v" D8D~ahP'9@ L׃. :٥7'9#U!⿵-GDL"X\ D^"'ЂfHx R VGA e0\Vl]`VH!"; Ab#ÆfD`5'$ЂT`L<@":Kd GksFF;x9> BMtT,NTt%:K ¹%1Cko"\g2SF5 %5eB!J(f4qd"*>oݓ,u@3T fu nCGH `Ob>D|ۃ2A*p xDPl*w=%Pt(,a3,D ~qg\x="*!2"yM~ !2I@y]r3y9Dr\;y}s]C'zэ~t']KgzӝtG]SΨ~ug][z׽u]c'{~v]kg{ۋbuq{~9Bw'>bm&| _x a %8|/wH|-yOX|Eс%;!0Aιb ~ ZB~#w`Pdoۇ0@}{@| QcrC(j~M_Ȅ{-}_D&~:W|#2ʏ+sHbZIx@ I6L@Mp|@$S @ @ @ @ lc A,A \(|AAAAA 4@ B"@H(B%RD? } 3'r.)\+,\/ C23T4B.78, :Cиôà`Cl9 ?@DB D2D@XDJH ID*)M N$E EEVt L#+TTUqE1]|YT Kd~[\G f$g. ^C_|Z`Olcd,"Fj XE)4CF5CustDh8[G 8E)FXF[t 5ԉp|D|z${aFvC6T>䢇ԉ +ÅFHGỷ i Lr|-X\󤹚Ƶ:+P !>M@u40QMTQޑQQQmQ2 Q 8 -RsI?hR'}R('SR+&#R.e̓R1S2-S3=S4MS5]S6mS7}S8S9S:S;SSS?S@ TATB-TC=TDMTE]TFmTG}T7SHTJTKTLuITN9O UQTt/݋M<  X#2ե%]CEC a؃F?0EXz2UYRU„AH7F+M@@8 Edl3^;2KsKAx^E86<еZ}ctKBJU?- zu 4 E-WXf+?`73VD@70xU;Xhm=LJM6.PB42QYٖgS Dp+8` 6Bu N1ִELp.@/jLp-]˲݋#ZC/CX֢ J`:͠KŴuہPI5E0K[>ِ}\Ȫ2Z@pmܟ\\ lX61\?8Y(܈ ]m^ ?Wh+ ٝưZ):50Pݚ(PA%E D H޷d-²E:7-#^ E@Dp_5(Ex 9 `#E\' u .ЃNpYVaD]'> I)S3j _H N!da#n!paX:b(^ a>⎶8 KNbNZ (bA(nc6-X(N`bac7! 5aa=dєI(`0dD&dF bŸFݣ8*Aݩ v MPV 23cC)J CTSefcX2M fcȠ2)(lebQ"OTj P JcƙNkffg{O%PTsn&aƉwy^mV糨pz#~f(n%FfVf;HHhr&e諐݉iGz{藾i곺qaYj"8c"$;"b Mj3]% [5jtށȐqd>kb}Vkn&[jUn>F=E. 2jVBp OV:n̆fnNlMi`e.mpc(Iܞ0vmf xRS]r]Cb.nYn%9.:nBhᾉmޘEp.N A(ꍁX+oQ= D(7`n^@ Ȃ>饧&E7\ޮ6GCCٚzS!㑄BX,Aol0Hpy]m]qo/JEӃEp 梋Ko0AͩBA;EX7`."?%dzm sEKE󅠄3,:?1"EP$o嵤 EXFӃ\`G.9=7H 8uh2`W3HDNXCWHPBKf \!Ah2C^A6:#UL%7?Ѓ}@C'^L`'NuǬlw_vH' x9v?xHMxh:KuqvڋߘMxiq..y0y@P>Pyg&ooy o!y#7SaS 9T琧Gzzz?WǮ{!ȓX+{G9߉oWuAhH{u{|{x!yzğ |ɏ+ ɿȷ||uqUQ ߋn|0}0ؓʗ%x Կ}X}J7.UշB}ܷG~,Z}~,a'tǟap f~~P~L"u<Ē{HG/z`gG}Ǻ pwf}i`{ ICpwֈU؁ڸ7w?gQ710lj ?g|Ƽ x0_Oh֏ x py s~z_goooooooo/'xxxxxxxxxxxxxxxxx!,%--HfHȰÇ#JHŋ3jȱǏ CIɓ(S\ɲ˗0cʜI͛8sɳϟ@ JѣH*]ʴӧPd Xjׯ`ÊKGfӪmv۷pʝKv-w߿ L8aˆN̸ǐ#\,Qʖ3k̹c̞C-ӨS+%H֮c˞M6FضsGĭ3|8ȓ+_k|yΣKN}(ꩯcνkO<A</s;0(`b W&1F(NAHZ^a($ aY,ؐ.V4֨6"c#ȣw>z?iHRSC&PF)TViXf\v`)dihlp)tix|矀*蠄j衈&袌6裐F*餔Vj饘f馜v駠*ꨤjꩨꪬ꫰Y@"C +D+| `lK@ 1LЮ8 {>plR RA "$  P@ P`,2 lpz&` fP14` lؙT< 21$,ہ ,̲8lʠ3@ 2F<B7e@pd/\f utmz GoTY-i*L4v1h܀r#w4 |/8JxH/3C!U` N_PuH {T.)}MH7w{;o}N @؇/~Cw H({/ Jyo,%m{1d06Ou(G69+@,Ǒ m V=0y IWЏu"ڐq2_(1Y@dQd.`8+;| 3&Zp %5Du 1 г0M+@HP /y $ȀPA8JqZcȮc "%G<>`3$@-J)gy@CX ˢ 0IbL2f:Ќ4IjZ̦6nz 8IrL:vcaO*%O;ЏO[Ͼ{Wڙ.4>OϿ8Xx ؀~l8'X؁ "8$X&x(*,؂.02(Pc28<؃>h#?#rCxHJL؄NPR8TXVxXZ\؅^`LG7'y!{jx?׆#lD1th"w8K`\o!~n1}XwvH[L`>ЈX6b(`["$aR2r8?'|{"'W!,?--H*4B!#JHŋ3jȱǏCI!ɓ(S\rI"[ʜIM/ɳF > )(OF*]J(ӧP:p*իXjʵצ_(ٳhkZM+0[kʝpݻx 2._f 뗬[yI#KbWŖmyⰇ WLӨVNx4듛_˶khb%o\-c#_RmǙKNBWF^w- uӟƮ÷^st= \'߀}tB_) HaDRgՁ ńjU r\jI%r@P*hؘUNb9a`88'gAJ$!E9"KWRYP0v#_b$(XM8u)Ew$?YkjkK#|,iCHL͊5 u$Or⵺Z*(nګzзj(jh-T&QDߞ/B pTpq[B,ZBkb;n>:1[_DK%cSQ^Y/Z:[dh!%I-/,q yl$ol1rv>OvuMnE3Pe}wV, [ANbו= +B,9mp [ %xt{wt!4_8标nV凉J7u>閚7x~|ZĹ@N$ܻ_w/!ROc5ag~S%IGvl'?wD^@5`_T Z̠(= j ̠C(DŽ$L "Ѕ0 gH7!tb͇@ HHL(2&:P"s(*ZX.z`|21Bd`M3qXo#p@:xc.<ڑBS@xƥ3et w6qWզKCXh*|j{ I ֹ>2cqJ=iFZ.9_&%wu󟂸Q i%H&qꦩJdJ8 Y+6t٪h:qqlԫe|7 pj5JVZ4 u zȭhAAE'J o گ77) ) ; ꬛bG lֱ4Kٸ@unx3l.:U^Cȍꅳ;]8%rLL"UJJ+>+.CH B[93HU*f1k ۢ+'NE"H}H$( 4PnKVv5ct y l'*~ꕱ2baK# ;xzKjb^H`X˜+u7fۤDl8.Z_HgKGVa+OV!,S[Y+ ֨hvk$k.dtkI{1ۺT;0"HKbjk3][ָͪlsJso=YC-V ,-Pzh 1dƾٻ'LJIjڗąóq eafS$:\D\G7I|a 𫤝l4ȧ)g|lw6%wE+B.ܽQQ-5;Ў QAX&|~m`,gp\~w_4<ᔶ h\Nf=+Ap4rې {+] Vra=_U]N>8b^.&Nk&^6Ȃϔf;3N^TH\1/%w͛Vlw hؖ^t"aMn')_tߞ 1,0-4pZm*1EE۪$)A?~T̀C`A 8t^םx_Ȭseh ,(wфlPd:B*}_ZBɺ+/,7+^GAl {_:ZiF`om;_/ρ_R)?gFtFm+޿B0PӄHQEr)CE$:px  %NXE5fG;r YI)UJ1eΤYM2_O+w9(NG.-S &4H8@!U?` CZtXgѮe Sm[qeI]!b]ʷ~ۦ[ذS"@2,|CE]nbөU|ׂQ=B عu[;n )`mСE)0Ńv@ܴD}]%o> uaf ɣ^~"X`uA <* 3R, d@<`r"OB + B ;0&-ÞB`AJ 4 $p* (j + $H$T!jRq"lžJ#0$.mI,t6ds8%tONN&x3 :H' 3dq">kGtR(u-ң0}NS4A.cP&?oAeJS`uuVZk[e5 -( VmVfu6eh44Z5#j2ˠ*7 0ȭղv^z ^,w_v% @]H*NSOᕗ_#W'"xg$P'E/c P e/vec5.fb8&%8"@0a67:Z^KX |Lkk5Z&ؒ@:v69l߻mvhr`i%poqM:(Y m`5%gh-F;O:Q7o_lwq2 ھ4xW}ϳyr@:Bu9z*~|7-Ia8BޅL|aN~뗞H>.oO_ (~,'$F M`=|h )O+da m:fq#ג80C!4Ls6 uwJҾ rk4o1 :V`s)ݪD,6τUR08f^dc5f ]H7Q{ "820 C&RT棑`k18`DFf2Wb`h$BϺg (DMr)T+5iK έ"M % SIQ7)Kgŕ/af|LFx$ KDHaP8]B!Nj:fyL 4-%E#Yc4I'NȰ' I2F p,=LGzQi!I˒p#0*-JqPo~hKD'Jx6''zsRH SĞiE * ZԦi3h H0 ԝniy,ƕk%V*%TX58-g*&Sl5]0BX1OI}MXӧ1X{ԓjkMbUR0 )~:@b+td.JZTv'H FY\t41k:O"1˻)ZLCRɢmy?2o93炂%[`TUP`Rp*O^XI8)^^Vxͫa4+H^Ro$& m|2od4"n#*Kq|X5Ey 4`5P ƻ]%Cĭr[:d9" 狕K͑D \fS?9.fdTs;GdT[\ŧ`8)h+~rSzR'܈mOt.)ync 9}5`ߐy*,1,?,/A\ C2d*>՘ h_x ($SDI+1$+4 ER[ DEB+EV$jjűSEXCZE\EE]E_wyE`Fb, FcLFe\Ɖ9$EfF]? TSAmR=-j E :F;}7TJP( Et ӊT ym8P5LjԫƆU"uR OMZ[emۦZE ITRی:YŽִg9O.M8; ɝY ;S͝V5&: ѵ:OlrJӕڿڽ0ܰ@d]]5ۅN\ Yd9M%^^Q {^=CXgE_E= k;Ӎ_N_ ݨ_bT_YOv&]`\}3~16 `R ,ol` `~N{`HlaaNamm"b!Y`,5b\̼Гad‹Rf hz-43N}TÚ`̕>jPV$c ^۲,CT<&3s%&ۚr]-Fc >~c[ bGF(N^B+ۢКjdմl;bI<- e l+ 1ݩТӹea#]cQZ f%EͅTb7_>!因忧HZT\[Y&羭LmfDC{g&M],vbvvTfhn]hHUhȡg(b~fbuύ@ؒND:iW+u78Жn~ߙNi\&6iť``&䄅h`Ԝ@ꋩަ} ꁖ_j fGR} `-QkVkab$Ek-c-G2iȼ>أ^i ˧e%]왾K|:dpa}ݠSN\6,No}%)U޷-=.\__mcmO|vn.@-Rv\ =mxnWL-8/|Y<3g>CCNjg9I5mkfzi,6Z&]UҾl? gM,'jVN"*H\:ppn+2EobpcoEͶ(,paXR GLG;Wqdw>leqm94%_b$&?rݶqZ_DCMqvGp" rzf 3W9%$郵O5OoUUӶ7'f SuqMVZŨmЁ<7BL7n.sksssTQ, _ctv;r=xeP欞-"iZ➘6XoM U`/'@gcF%^_EV{Ivva;Mfv<`pl ur._p`huy(w6wǑ|lElG |w}/5?U>xΧi淾nwl?qZv{x^K!bdnGtp~c?*'yawt`K:iwpQdSGjyAwFb8OWzz^Vp'd{'_ζ_{{/F{{||/|?|O|_|o||ȏ|ɟ|ʯBvߖ!,M-=L@*\Ȱa #JPbŋ3jȱǏ CII\ɲeANʜI͛8ssdŌ?{ heˣH]N 1Ơ$J!jjW_Ê₱hӪ][*۝]ߢu+$ݺxeK^6È+^̸cqjd/k̹篙1|a̦S^-x)i۸s#.=[ޘ Xv&C.9ȣG?;􍮁˾nv 4c&9箜5s/<{yi⑷y~OW|FZ}ݧQL'ᆒY8D'v{r.ƨY\2樣X &|$7^يX0.Z4fq->ieO9#k?$C)yeSMPQ6EVt%[%w_f`pufho7g^j?gaFFj&htF]rIz{-Y…*tvAtjܪߤgegSџbd,Z~<-;ѣκU;WFM] Zteni+ߞ&Ej+[ uEְyi{Tͮj o9J.p肪 =\prqIq0k FkZFFlޘذ=AD%1w49&LTOL_CtgS4լ̙;'wN>_)Rcpid.֣Ul-B2;m=^wF3|pҚqm^њ&7~۵GyWan*AcSCO:GEzK>jO+~QDZWyjےy`[uyOCdt߿KR6qSPCQXvGtp}z܆bv`uI&Ov X IpxIեU &@1`F6C?|^ CDJ0 6*ӘZ&_bjAċe\8 $5,аFDwA)-b[c8rdE3H43YF~Ǝ#p(>15)DG鐜RJdo(KQ/)ISZ fi^Q0UM,P+S} Z2SGL"wgZdXWQh3h^F '9ɣlF^Lw:>N5> J^{3FgDNv І.$"dwЊE7{ U9"T0 =N`Mݸ.iaL!1ZH`&)Po#şHF]." 4M *$UzWͪVAm"a XJֲp]=kֶuH}jHx )]󊭽 `KX-6kbαlb*ʒͬf7Ξ hGKMjWAulgK"nwgo[Rmr:דlt-;Z=iuzW$.^+Ǒ7MVꍯ| o|_ w_LN`u685߇D(sA1Ġ3pT &IX"Q x"#qF`6^F(x&H!@LȓD*E9J%*U.k? FF$쀷\Խjw(3o;- ʉAEtĉ7Y8(;ge\$>3,I9`! 'k$ȁ82 Ve-֝.%pdA@#XJ<ט^'񘫵+}[S[(q#!Hm.ȖPBܙ&^Ip#!jff}`Ñ~(8o>;\8Vߒ#&ĶkyZVo-$PbzȲ\p#c&8%0MǝWnMy!_I<$1úLd)xđ;Ob`i5v}KǙ7sALZ6nãl%!hdV- :k},U#a' 0.:I!l~c+@ܼ~{BB7=Nۏ}`M|hT~iH8ŵ\-$Ff ۜS'}njvR߆7^}Hќ.kb'H|e#'}wa?f0Җi9uBƲ}}(B9qgm`$q~43}C:oy;WiA W>&|@h?B-ExE7!v ;GJh~Lx o&xK`;{_S8tEYjpmP[|otd#Uug0 p7A ywH4f6a)(!7 C=Ry;1~XyZ탆ŁekG hq[XhZX:avH}8 d'G_'aԋ'3FD[& 0 oz2Qp0 uW%5]ԸhhP"AmxsX7s 2([=2'x]aV >e%XwdA p[5G`)'KI'҉jqƌ*wwhTXJrg=hr.A %y])/Ig5iaET{{6 ! `æg@!G~F/8j'}gf)U!p x@y@piuQ 8   hDck+wكXXva&fdThVptfV @doP ~؛Q"!U@ N R YyؙڹXٝ& i]yrK:ƞKRnYvyǟ |:Zu*q*߹[hUnJ\NWɠ&*e0 ~1:4!Z8:V7=xg%V2J?FzU LڤNڤ0TZ6GJsE"dp`b:d*0 Pj@ZxɥE)יvzI xv:xA c:ZzJ9Yڨ:J2:r;;*S**y:Zz~IJit*$j`|_ա%6źN^):˺zv:'Y[(ڑڐJ*Zjʨhg뺎*:uia`zѯ ˎtʬjhɒ*4ڞނ@e'{C^ 0ii!!Xr+6ƘOn6)yWGC$h{,gxٲ!# e8@D[Bxs08,Tq86lc汣~DH3d犷BAȵIb_Wi HE[U;kY"tc*B깂g57Z?ɺ~Wk,TAkSi,U1{icrqgI٘J:"H;gd[4[5S-8pԾԖt[k2kf'cEY˓6\sb::5+=;k<+oY׽{aGiÑ&'Ի 9_ M3_t1š$/ܢn&B{+7g-|;6J QSi`[9ʭcXƳUNQ\WeWnŒDZ;z{Jjlj ۛZzȽspɘɚɜɞL8+kʮ\lʦDʲlzLȧ˼|dhW:ük̋Jjg¼-̪U35̧éEܬRGP|˹#,ΝCJ٠C @=xEmzCԒ֜gI;u+ 4nW-6; y]d]f=ZQ}j roVk_?Չi}P   RZ P)Yv 9͛,Id_`@?aVRD|W}&mr, }^! 0aʭR@܆atr(%c +4i]T=lW-Q0f`S,Pm< ZVi8Y  Q `LUSp#?h^lRP6N```+%f=⛑P0&?S.YED<^(MᚥP@D>p)ROnf0TMۭiÖU@ MQčfyN6re>ҋ>~N 0N&e^N癥>-@Rba=U{ߐq>V`q:˾]U=@z;\D^!Tf0SYnY|]k9XP . 8XV@c UN573"z׶-^T$zL`+ Vun.}"NkkQU.ʎ Q& G-GPn`8'c,%cTRy]PlCo@/Q/EpPݗ͋n4QP{_f%poQ a/QxE-^?Kqَj qd)rEHc?N@:t! 6ߵ,UvN<\PJ/Ԙ(a>n/X7q{}?Lͯ:ze_f^ $XB  .dC I@"E'G 9XÑ!UtȒ$'[T 3bJ67eM04'Ϡ5s8eQI,Q|4j Nykժ[z45-Lk }8\>ׯQ=;x-ԦF=8%AИn  EZá]Zi7V:2klGKgٱ+^å0jwm&@aWA{jŏ'_Uz/g;{^7s=p@P*#@Kp*4@[" 7ݔ0)lMCꐳKc IT,:^cCF2""1d03­fDGR-J43 3FvQ0D034JD4tL5-0$˿r PM0dN*q!+k'B䳴LAԔOCuT%uJUScE  P`RV^{%V|vX75X^eu@Z}ZVVg [w\r5(2@";a\zǛ_w_L_.{&a#fS+b. $8bx\dSVYӒWv9-H/e!yg{޷e ::zmH^Mzөj-ޥ<@:P>g[[tŷ{o޶5uыֿW|q#e'sC}t:7|595Dtޮ)&Vf͜5%s #@rՁs@{$`m/Wh$,<-i+(*PP[qA}M@[&M$`IF6IX*6QdVx@,  Ђ Pv4ɚۿ_mʅW%jC}K*FL@H  <[>7%;Aj5eo{:W} _NWoQ_%p ls9;X̯-|(b=ay*0M|bUTq]bKg1qmlߘ#qc<rd#'Y%rI%G-UdbYzG2LR23zjf3^,6r|g8{&RYSHs:{YλLkVHGZҐN0iLg5 YAiC_,Pj]um}k\ Q B}k @{2H@`'@h&&NZg+[ &y[l ٞ[랖۵nx? Ww:%\'x ~p ;=؅T`qx^}Bu1@R.s*?)Q$%~_ }_?ܓ gውʸ ;Aj! C>xa*+C h١ :<#iR )09Aa?9 6yI<<>(|9(9=аai@+Dػ #6B0@9#;Cْ6y'`BB¿E F ɬbh?:-N\ 0OOACb1O,B=%X V?Ea3Z@9vZTB_I9 A6xaڕa`8,޹(@VCPb@# jGTJIB$ -f2[*6V.v bӨBV6a|1dd8SJ2,%JHgH h~YeNrV%I8\@exk=f}}3h@@hIz 6#, EVKyj>` DXNx%{yg>=J}FE3=Q=`Mf)^38h]IEV&R!TMI6ciu6DZ6xɪCح6SE`jpj=ibYcLekꦆeAЂi뺖 IٽF1FD鋈nh0l~,[7ΫBڝFgY%i}&)/miXIj`b I hXj@n~n]ͦl*X֍J 6^n/$F.6ooiBSNFkUn8d=o*k15 p 끸v /q:H@@k_oopp2ᨄ=3>u)epÌ0PqàqƇo*'l[Cn-tXio!*2Vs*grCqU3q?S=(C^E@IXsx8\rJSTrzSOj/TcglNw!0*3kW}Jp$tpK`_ù(^#"~_aǏ$D[ tWUmrt0Kreg.AUSٙ>3າ􄌂w[G8]G Rgl.wyEPwcU,{wx2GAts6e,hw gQ<ICyy^=f]b).J^:7we^/Qzi?zy CHCEڮߢn?hsWo1ߙ7s3usio,X{kqU7s~GRAx(ov J||*a8}HkU__{j)ńkEl CHdOYC@Dv{fXFXiL=2)dr/?utoix ,h „ 2l!Ĉ'Rh"ƌ7r#Ȑ"G,ِ$ALZ\4iR'2ifGCT EkȠ(ҤJ2m)TDR:PՊGBt9)QVdVr-T(NAk.޼zBL/` ]0x%28IZ$K'O S 1UJzаZ̦^5زgiҡT-DT.yMUK—3i9ҧغCDo"@WD:K.}<[ǯoCf=uFrѧMlhS^a J8!sRH{|YP"۱U% ޅ\mxӉ=6$*EzuXX~5#=$(_IG~?$N(n)VSZy%aٔ2uY֗!b )!.nQ!$265gfFfTxl|'jH6LQ  z"mM hUr)kj:$~Vm`NH1dy*NF:,B"jI[+d0RE ,zmRId8j&]2c~Z_'EO@ɹmR;Cg^&)ݼ Jb$`\" 9q;, GBmIXrD\H(oGK[}5Y5j`lvo(B)GZG2dGH3ajރ<&g$I"71\ȋ(IIgq}V:y;߉d"p_T'mOz*QFr0 ~ݦ*+<|!T 'ۭIR%dL%0q *D" ?БX0`%20yJYr0l`&!G:gwB+bw1@@Qo~*%Np"l|qW 8GyҀl-c9KX D!=^JOXI./Tf!R( K"{I&K@b6Lf9uC 4b0Q&&)> tOs@BJb)za EtLR2yCSp(?Qpt!K ʣʴ6TKsS,a)PzEF6ԥFL}*T*թ>ΩT*Vի!]*X*ֱZ.%+ZӪֵZͬakQ*׹UInc]Spiw嫶 6kawvX,6},c)TO,f3 %gC+5y֐${ղGMekc+Fz_m۪+jW~UJOEs+]@wlu Zna0ki{]=/z "}2?[җ?wլx)Bt"p80##,a8?* sHECQD 0̑S8a!91s1y*F&dq%OB(e2-s^`BCD'@2Ӭ+R~3X#x:26DPVyrg16.~~^Cc҅kuːD#eynh7KKȩ/ ^q(m2j'Wz2S5ꈴ'ntEZuՊ'm2kAOZBN6Y}չ#Ϟm}2RVS A׳6tn_ENdJ(872wt2]SDO 9j6B}w{̫LƵJёBA԰mSߚv$#m$r48r&G@in^OϣCy񜧃I[y$rLUubWm_g^nmI靬 [^DDD%%u4 K!JB!`"!`ƠY^_F!!JϧV(MNjuRL z!".]m0%#VD"mYUM"^)8. HumJ%"**b*~*",Ƣn,b*/"/ZqɵK'[" c')#3e5#4F4M5^#3"#6n#7 6v߀8f3#]$9:H;#U}#<#=<#>lݣ>?n?@䠱#A$B&A&$C6$XCF$5J$PA$E^$FfFn$GJ:v$e((iI$JG$K;>JdĤLRMs!}^e Md$kaZD ND<ߧMܹUD#VEԡ^lMSfՆHaM!\嚰XEY.%S`A%7&OJXZ UiJX#[zId.&De6W^\jfeFh^yiFJN&kV#eό#ibf4zeI&MlVe^:Ҧo^5&˸F#s&Mq1FFl cpvJg||'Uxyb[yg3'egfQSz|BU|:YL}d)MZ'Vt(&h h`u&8FuNE@cU^:(~hfh](u$艦Zjdϋ2f:iE^]9(Z)i &ituՄ *im.m&Xn J6_F-J~nṶnl㝦.*ήs|Jvn"b.x2n-V.IlZ/)mEj.f&zݿ^6.i:$ҫLzA֭Ơj&^! Uaxo*#Ɗzh{DFǞ6݀+"ooE 놬ZG<̢Gi6Jk^m}XjM,QjvRXw/x( iv+)t=‚_0ԧK0AplaĠqMr1~vp EZ~/0B$qFž$j/Jcʾ.jl|:0J%m&xo(/fí*LmJk}ɺ+2#&porq+{N0rP1 VR@+4[-&2r k7n1N+Bʢ.hض;D(0uHItW$3('(`}>߀G~z'`f`균~ v`6a w}ra(rC:"n#Wbv'hb+ިcp17#v5($^9ik=uAZE>)o$uMNVQnecIwetY~i&T]f_a7se)Pii'[mfsqgNu)(Wy's}v6W\VZR9Ui zIEC.U*mpJSSBէSJۨT*j+Jk&6F+Vkfv+k覫+k,l' 7G,Wlgw ,$l(,0BGvA8<@-DmH'L7PG-TWmXg\w`-dmhlp-tmx|߀.n'7N <.Wngw砇6n騧ꬷ.n/o'7G/Wogw/o觯ϵ/o H:Ul42z GHbo &L WP$| tpG1w@ H"HL&:PH*ZX̢.z` H2hL6x!+,@8@-< ` x{>ł )@Z xRR0C0I P@x@ vt ,e,@կ/)X X50Mt%|T 5x^A / Jؔhu,X:O,`I $ >XQ#-O*U%me{'I @&I@hy˒|Rj o4si2`4A--] '.LJ3qלWjNے5!ߔh fk><>K,&b#RAma;qoN;0&>ɄG\`sqpX(X*(y{"[^I2PT(D>( goHd.GdC&JPmy'12 ެ>y!h"b#\ӝ%L 1- ~dI8IǏgߩ]62nqQ%}IR3В|_+2{ƛ/|i]J3?0V>O2pcT#`}o>]-$GHPu3"ۮQI!sD}@4< \}11p| ,'KQIDT@7ZNpc߇(x@B:DZFzHJ!LPR:TZVzXZ\ڥ^`b:dZfzhjlڦnpr:9jtzxz|ڧ~:Rz#ڨLd:ZzB h9t㤜9Zzڪ:Zzګ:7 zȚʺڬ:Zzؚںڭ:Z,z꺮ڮꦌ:zگ;[?z C;[{۱ ";$[&{(*,۲.02;4[6{8:<۳>B;BKD{E+H;@k&;M'5/vQ%X!Pn\{}Y^Orh+%jNҶn;$g{`Qs1vEP(}/u;k#." ![r !ҹK !0K˺;[F!,-V3ȯRׂ̮3ǭÜ ˧W#J4pŋ3BdÎ C&(ɓ(qL#˗0I͛Vܴ_3 J΢\"]ʔ6MJ:TTj%tׯ`{Y5Y]Ϫ]بeʅgxz4޿L+_~ +V(xB/M Ɩ3k>))͠aMtE?^m4װ&:domڼ-qc?|УJs렞cݭ }Yw?I;Q)>{DOgߟl\*W6h(h' fܧᇶ! h(,0(4h8<@)DiH&L6PF)TViXf\v`)dihlp)tix|矀*蠄j衈&袌6裐F*餔Vj饘f馜v駠*ꨤjꩨꪬ꫰*무j뭸뮼+k&6F+Vkfv+@覫+k e,l'¿ýWlgw ,$l(,0,4l8<@-DmH'L7PG-TWmXg\w`-dmhlp-tmx|߀.n,q7G.Wngw砇.褗n騧ꬷn~oQA xa83lEW4@@tЂ B8" < :. \X@?"( yQTNP.hdDzqU`t {8x a(>KMJ4 +2G4 H<><%SLG1 ``>S@J$攒639%X@6c H(r t@眅: ts;'(GL},g*>y@wE (0M"m8wVYl#J QLMR:k/H`<ԦƩ+ v>jdOm!EOEf+`BMK@)2`x_G{ނ`}(%L u8W ŵOH5+1 ɭZ,ZOX](6eU^]4ծ2  3-PQ#AV:kG9dUҚC@Y^t k_M# V@H+(+(QAYk.wFxsO1Hby,|ccWp¹e ar%`Q]8 7@Hqzg%ñRm^|41.>73Mͩ[/~Mqa9uƶ`qc”x=v~Y"0H_8t~4xVWK /j24(@;0f.eǺ {T0A-(-l T2z01p:yFeq)b\ Ľ!e7i@|&0! znO<@_FQEZrny#{Q$pߡ},{H[mt~Mbxl]4%7 cfm[|pnK{Qp_wp`Iv}UF"T$@﵅{W.%.٫_qAP'qoE9*~({)T:$F(1 ̀ ƒt JpNSOHEBYmtRu̮ [s T~l:[!xr7+ǂ|wp* /).3* V9 ϒHIq·H'e o#%#u%B HD]J/Pyr h PY= a$umϚAP8\h O J/7R0ޔ5\!}B>|)0 p 0J> `&>3P~ELyF{ 48DW@-"K#JW0z; 8?W,Hy"6A&",)0Cxpp-_ meyysztXvxxz|؇~8Xx؈8XxQ< 8X("~@HHRXȀ‚y_2 Hx%&1ø.Ǹ&Ɉ1˨.ͨ&x1ј.Ә&h1׈.ٸAEӍg#Bfb㢎eŽd"Cbc;Rmb Bab 9P;` Sy8AH}b ‘H4$-&%"0)-+%-y0/-1%3i05-7%9Y0;i* $5hAL) q27T#Bx2[9dYfyhjlٖnpr9tYvyxz|ٗ~9YyrYyٙ9YyٚYy/ٛ9YyșʹٜI. Yyعٝ9Yyiڙٞ9Yٟ5 0z ڠ :Zzڡ ":$Z&z(*,ڢ.02:4Z6z8:<ڣ>@ 2Dzo2HLڤNPR:2TzXZ\ڥ^`b:dZfzhjlڦnpr:tZvzxz|ڧ~*d0zڨ:Zzک:Zzڪ:Zz09:ZzȚʺڬ:Zzؚںڭ`Zz蚮꺮ڮ:s02j&c ';kS c ۰;< bY I %$[&{(*,۲.b0;"6{8:;:!,$-M`HA*\ȰaB #JHŋ3j`Ǐ CIIS\ɒɗ0cʜI͛8s @U!QB*]A ?JzիX#ʵׯ8KٳhӪb۲ʭt]*˷/ 0NL!h ^\.c(~Lm[+k a“9M鈎OcsFc,8ۛ=y>͚a_>_̣}|`Fҳk^z1*y_{^c>h ]Wx^5(aH5EM6 DƵ_=W$Q{]0Gi `~x8jȢ^.(8ZH]:xWPVihFJ^Vs=e|REhWmbj\b]si|E_mE՜oi(sxʥ碐F $V[f[o=騤DfNi9mV[뭷ZYf+M믢ͺ&K񚧯fذRfZVm>欣n;RiQ+aצz-c߆n.Zo./{gk]s0b[p!lհYw,2V1qY|el4o3=U[\X ?Ǽ- VDaRo2"`oOmhlp-tmx|߀.n'7G.Wngw砇.褗n騧ꬷ.n/o'7G/Woߤ@w/o觯/o HL:'H Z̠7z GC(L W0 gH8̡w@ H"HL&>oNDHbX̢.z` H2hL6pH:x̣> IBL"F:򑐌$'IJZ̤&7Nz2@(GIRvNVf퓰Ԍ&P̥.wh &Xsm2f:әWx4P[2]pnz 8IoB̦:LE\$ΰb8'~a@+f]n|r'7LbQ\JO+փ aݪ۞pCC么CLB\A0qH,D$bő5DkW,p',x0!}?p[8XQد'}ă-1Iߛn2ޞ9WC$&ot) A bdx"<$v=q 뛔PnTaA&qOAo?ħ=$֟+&8y8{KݽW_K oRp|Z$? ϯ\A?ԏ R׾[$d-GCya+2}4}uo ~jA~W?Hy787njxdsW!|c$ v1xP?(X="VfE vv[݇2sHzoy ? ؃s{Ptrq{5Dcm>XI~vA~D>I8XHl~!2y CvQvutoQnpHVYGtv8 x?a(=bH%k(y@&mׄv>x9VxI^hFx2T{cTI`iC3d0%G`s~`Uo#% Ur! |s\7gr3r!pr p؍fዤ(ȃ瓊x8hyj8 &d?(B I7 36aI m64X;%)GYfu1$8:Q`qwi?'1=N1{.974j,7ZdIzցwfA4=7NA^HY7 "HnאNW8Wu$ a`ud(є`яT?fY7)症y0Ƈ0xoS-!SD3Иr$ uY h$]i;/@$ I?YH`k6`0"I8vHn7id㶟񝒓@9Oilp1Q yg)_:H|TmMm`0xIAJ;i&vUg A Pux @J-: ~EQ6Tmφc`V gp)A&q^9"| "6@zE g0iP :P@wvsp dvoV#61` yD ` م]`s6{AP=`l6 e a $'p`f̸@B=D]F}HJLNPR=T]V}XZ\^`b=d]f}hjlnpr=t]v}xz|~׀؂=؄]z9؊،؎ْؐ=ٔ]ٖ}٘ٚr٠ڢ=ڤ]ڦ}ڨڪڬڮڰ۲=۴]3t)NP঺]A,Z^* &=U`?` L_D'g8T`L 0M3-4(^} '۽LS!.L  #4F-@d 3'X@06L. 6E&Z(*>>`$KN .Y=0=^d^̣h|Ur r.>(0Y}`YknS)Lrzj=un Ù; /Wp>#$="f ><p:"ss^jP]] Y.mʎ<Ɏ.;qL>S*Nka۽N<(*:.~;L>*8ϸv!aovqN0l:_8r?ߥߐ3U:_hخA/;nݞd/831/`9k/R:MON_:b35a4_#mAJf1o#:>Lد9,/X?;TVQOUv_O; (En Ѩ-VN=1װCV~ ղҰW7oߝ 0\V)|FẎN='nË|]ϝ7ׅ)@C46M&?PcT`׾?`ֽK[cy(*:t  ܛJXv   p`vG{!)DToFL ceW_fUƒ$+.F `xv*mSS7A} -sܬL/wV%eMM[`8*SʚCEY^i I#io0uMi)jTە@ML-O .0Akܟwk&GugS[?$IH1@-AL >E+Z^q:7~Ok /k+M-/qۺ;TEP"BٔNPIZ1TYVyXZȕ5^I$}GfS PSupǖ]Fc`{pYv9Nw Ysd|{9Y#gwA꧘Y~Pz4DTvXٙ'K֒`ٚ9yɒɉYE=℃1 ʹ̹̠ҙ= j9؉+ى0IQ07kc6ٞ6pQYɞ/PDQ M jl)Ifz1ju$ڡ 7$>%J(,ʏ002:4Z/Z}a:!@J\ڥCJ@.SdZ ShRl&pDJ!m_<u2:{`hDt{}*k*"1d7XjxzʔK,~-:ꙛ*ʧ姊5)Ѫ(!hQSx@jR9zĊ *Z:cW"ygѬgIP5m9YR-Q1fg4VWn::ݪ*+u"pafֈq®2+l2w0$ s$Q K.y|:* s(uu1[vs mDxӓ킱Am1؈=Ke72"8J.}OheUV<Ȉg1FK q.*3IFU(51(T+qYK\mkFJda Kt:1>o xyRi!6lZ dzG5+鈳eNedsc=3ksz`)zg[˼ ksл V+ֲ,y\:W]k8{C1S[|X{⛯ZkW+ ૯ Ag\ѷ`+<+I q}$G;њ"?B=Ԁ ӦXbGJL]Mji7m< V~WY\}]^Ix`=de}֟tlS;^'q-G[Yx:ssdה ؂]؆}Ƅ؊R@ P pu ْ}¨_ A l@ 0 @p gGfsX p P {p z @l@ A񀝭thi|Q p=ހ@p-{! p0ޓ@}PyȿQU=! d@pp  g  N@ُr@p@߂0lp 0M~ }v$EtQE[~<ދ0݂P  3J -|؍:E ݑ` 0J^Ml=~ïTp`zkWY>_I.z` !`. ڇKw8sT odP4݀ۘ_~ m\Nߛ.9nR> ppV~삀 >4L! 摐8A g` VI=RE! ` G 0߇` p` n#tp~0pp5q]Q a݁? .>6imlM@} `V> ~?Q> `%u ˗Ep:o 0oS2݆Kq߽6B](OT_햰/j0gD~_$Z\`o>߃]VߝfjOG7!5$/a]! P $-,RE O==`kA .vv,q5+7Jx d/CTo]z%S9 ɘw- &,d+Ij)jc8k܉a8pǁCɠdL՝ ހTVjfD}KNIy(b`JYv@,jK@Zf%>ct&i{5tkbˣc:(U&Js>& ]n| tv2 *:͆[wzmr<ٗ*~dBh?á-5hu $<"wBTDӵ5_bi[ݛ^5r="7pEID/$֓6T)mZԦ@!0ۗ@!wߤ j,y :fUO xЅ9aN''Ӫ(Yp ^D:WA09EX s8w:S)9 RòI]:RC EA&{(G4.ǭvCy"H(/ ҘHk:Ӷn˺3KNm hzIό|ߣɩ%zdS"Zv,(V?bVc RˌR(H@ 0-y3g1O*=/oxMk:ƙ$kK$MT-B \կk ci`yO|ki̧:I`/N\a{DbO*k2+[M }U@B4tLQT05KyInv Rxϑ{1RՏB]PybsO}"*q5 yPWSiUf0Hx<:0BFE+KKj[ZdLtM79$YMs- 2T| azVFVQ(OTŚ&!JjS0UȂ0\:z%V,RK,i]ۜ>6@ *AeJ( mq{cDuzPduH%K:k5!WZjX̴ hu+K֨?n{^:-A "ӱn&tFH!. + et&ѓ_3! J-ͯE7*pL( 3ZbP=d쑯Qob2\e]qB 60I*4e6ěSR-y{hn*ђ)Zf !/Hb@ LјUε%BoH%(+q8AAJF$~ȒFz!V>JXP6Ds׃%^I "5R3?;^VMЉ#`Cp[%Vub!AuʇQw'O;wB$BpAgC"*q3IPԉJ>D'PA@~%@$Ԝ( esA׃H@JOs !0 (D%_BHL`B ا08'lN`2 6D 3`0< Cp ģ9";s63K9"AUIE A@’p 08;LB!,B?PSxZB`HDBAGH@0EHp+_JXK2XR <= KH[&4k[+.D~ IJEPOL:?>In6LDDHEFXQTAT$ KqDE]E1E`HEcA:Fe\ka|D]zFfFj2sFd@\FnFo G7iFr4ˍtlTGxGyDkG{zF| GG|/r H,H$[P? U?eb4OJPUQ UWGmۉ}(V)TOE'ԘVVH›QUYV=}eեVk1--J!EB)BAӈp͢ML!f(UU}t՝Xɮ/oMEYX %!ԁXze*J)%&uUi~ lZEۡYY ՛V(WH|SAA;Yjy Ӥ%TA׈7YؐN eۂ%Q [duβVQS5XnE;KQۢ\}Wőu%;=G\= G% ͟ 5%;Ս]Zս5 ؍[FM]QPM%͍߅U&ōB]_m_}_ |i8>__J G0G-Ox8X`n`~`` f` _ul͘` `^` `y,AA$݉ԔRPtraC&M N.4H)B#ۣ3".b#>b$>\8$nb'huT&sb+FSȅ+b"6,~1.1-[3Nc5Fϙ,]c6c }c4^f[8c?F=f:cB.dlcCNdEƑ;#:kEdcJ䅔d|d5QdLD6P.e8KA6eUPHoUnYVWNXe][[e`fm^]cfQ`F"ugΟ fifm>.mV,eg٪e%gt>t^gv.,GXhJ/*axesJѸ:L^=0ҋXc!1<>fgދZ épiJ2U4WBi<;NJ޿5wuޙ]Zop&[XpSq&xl>nitujGn#* tg(v罍d>ev̒j 6P_3*.``(6kffN2.j4R Aln+wkQl4¦la>!m m}l۝쫦mS^MJ~dlm׆._>in7FΓg*3FI랾e -j1_+n\Fﶋ|vj])c6Bnu8\ono/ݴp۞g/h0>0N{poko[ ^į5ǧ+a '%/n\eqq`. q,q r&r$(6NrkU;Nir0hW bsFj5ϷV3TO:;T )V :%5Z!č//D+yUuoVvcE?cBH t304 5jbIH9/fFm_ݬhT^?ou&wu)Y$Bj۝v*/"7=!DU 䖙m[r93&^Vx#(qhNwzo5QѸ s ~tFOJBv%xqLwhp7wLc'/NzGxFk-g6ÒqlXv*qy5ydvncyn{ic[ nf{?~zyqcwzOf(7-=vw6ezgo{'o/{*p_\{78}7 c)V|31W)W߲|g~Kn ( ϰcn'#/m}}~GqW~jw~}yO7 w+'=Ѱ !ui߈8',H@ 2l!ĈHh"ƌ7r"G,i$ʔ*Wl%̘C )&Μ'AHiS& itdQC2miˤHRjO+*ذbjXv,CIhEEu-ܱoD.^mUoĽw.\i&nX1ǒن3hr١z*`pПl tZh ֎ {,}ِ4;ZEU9t:'ձOs;%/o޺dϳ7/?Ds% `gLfc ؤ`aE8 z䠅.!!8"%X"ї"v+"1ʘc.x#92#=֖H]EꨟI*pJ0iސME$UZy%Yjb[z%a9&aMyԙe&miv'|ty'yS{ffU*(ӣJ*P1U:)j]zc:* hjڪpB+z+dkT+V(,j,BFǬE;mR'Vz{ǭ{誻$ˮ[>4o^ /@K*0 H<1[<1s\q1`A*!E#d(sD,zL%,PNqU)3C3̩wBسA@si)0sil.I˼բ ]{5_6ew]i؅mA6Xu}7yw[ہ >8}BS"p x፧˰K/>:~c^xL)ybߧ魳Q3t{ Q6>< @gD4qטlXP].$tdĆ/r+* /QXb!F1%H_יPHBIQY<ү) EHClr5XNA0\VJ[Fz >*2EoKx'g.ҼUcb-zv&7Ml39qpA:˧0xWt;,z 4=[$h: }(D#*щR+P0mP6я2$G`P -R\ژ!K1@8jx :E(=F=*Rԥ2GFW{ϬʴrSaDZT&_DڞdXiVlEbX*ѵ"}m&د =lO~z},d# ,f3Y̦,h-yum}-lCϕVHfѭo+.31%.rX*Y} ]_Nu?sr[.x+^Jr=xm/|{ʷ+}3/ . >0>e hA05 sIpaFIb $AS6&3#_,*IT"8'0=\" ":A S DXd 7h4eT$t$(6a^&1G-~ZQI1$D߆I"~8Ę'=\b~s <Á rdlC*F z2 @L C.H K_D"GYm ^ WP@LzQ"?@`r>v{C)gз zw>L~&x&U$!C,uQdUtx`j uYX! 8`AG^|NBAđYi ڗyG 1U$ %xY"xס8V $_%t &, 6`." ˉAz؉FQN?NB|uN:YG#DA{`$CB}hrbI+#\H"!J ISm4J*IJIJN"Mj|^L_Ud5dOD`Ch䄠QK|c=dT8VBXYI%&)eNX0SZQF$A\OT"JFQ뽒dڛ_I0:X@4ZEVTE$DpBCc@/$JlNy-J]>, jj &f&Nk*vDR'{\ˡ\kWqVxb'S!LgNHvx`'yH0~&cE{I;.L~) Z|6N K'A$X4g.fW*(&X\h4f,Uh%rsJFs~UPb$'"^ MgRE"[E#6WHfa1((hZ0V"_a*)`fz J΄A=\ra\bD'Ttd'%kb |i:h\٩N$Aixۢ'"6E%tj |%rGG iN% uij,)nȤ:]A˖:gNН%rDŽNE)Bf<6J! 9&\BN`w*Ux b%#B445<򁪒*ķ "\$+J (bVgm+E%0HHy^m"yʼknzH]ڽ\vDyXµfgDd& YfD!x}n,m^I~-Wi>!Qd+aԡEHB'Ĩi&~a&l6@b>*G|h%PF* iGt%|)B!$ ivJITǪr ά% ',B'Ih:ƲfKMXJEv-"ҋ1^5LI!I o--2'EH/I0/0{ k'7{.a„U3GpVE,4D:?d :p7O$%m$]:/ r s6!6{22QP6FB .KAw@?4A NDc7F#y\4Gg4=M/p.עI%{BD?218[MߋCKs,²OJm1WuGT>]>u0UtIs(!Jsu5Ys6#Ѯ&Zߵ\x5!>hCV.ɉT3^/2T;YAtE8)1fHsrbcAf(ieSDPvLi73i|l6'Em6o6pp7qq7r'r/7s7s?7tGtOw@,\7vgvo7wg7ux SyI@!,F-%,@*\Ȱa 9Hŋ3jȱǏ CIɓ(S\2Ė0cʜ%Moɳ'Ŝ> &ѣH*itӧP1N:q*իX6X4ׯ`Ê+ٳhӪU`- SUbnc*j߿ LK"F[W,ޅu˘-Ό/Ϡe*}q Q3װcc,[ڸsWe{iȓ+I{Mw/xʛ}<oi+vn'X~_%U|E5dǠC >(RMؑXW^GD#Z`hUTsaxyqhP0RvR`"iHd !h7-?V$#U4$A^yaZ]AccQj5eTUJdr:\1`l'Օ-IhRjcI U5HA nbJbi.l:_:$FRQ\ W>ccKmƊjfƎ U{YgqwkZ-O9Ebt%v(٧}DzTe;֋b=!%;»^V UPAH x'/Vy+;,pH|U0Ljw2KQ6HGɳ>'\MT.BCQ}\YI{(BlmBl7647E]sx[5 yw˗˕yJOU#P`za8G.{nK.o'|/|Wogw/MO~积/o?h~/ ' :0* |'H&,̠At[TCHo&L WBzA! gHäɰ* qCHD@̐@D PK3v T3T Kc3D` 6z /Př`0":#IP>я @ 0vx̠$D.!M+ G@ (H Pxqe@lQ b * @?F | @H2JL%OpI: S-"xAGY <eTA% iN0r^R(pLiN2`@e=ۉς&G/1L<7Y#hjЊtFjde,GLL\Q{לkѳ;9YF*gQThFVM d[#zV5c*4SߏX+lMB}qnd"4/GDWfТHK_z&j2NHKzABiZv)g PƫS]{Yƌ2~ʘk8O`6QIN "0C.O0d'Gگ!Oe&phl ahp?J|qi7A>{e%DL@dezW߽a ~3u`YAA{`G|^ pZZ\!. IT `WBmk ʢ6t Y) 2x Ms[tыΓ GlWpEpNX۝eO={.CƷ񐏼'oA[+<7߻j8O=n>h~Tdd/V{Ͻwާ~uHP?;7>_F ̧ڣO}`=#L妫S%~"Osw/;$J_߸_iI&?%w@&A4 )>;$C=a82 "y%(A'()2$+D1358gxfhDY 0-\a/&(0҄=B.I^"r?H)XQtYE@q\sw++QH+JhbȇPH`Had~r!0Ra? lρV'n0r joN8V`U #r9DDnA(A{x|-7&؇`Xh_^2adXfh3q."΁3ьrZ3ظxQtq^.X{җ$]؍AYTs_x*$x ;Wȑ:ᑁ 4UyĒ(9? )A#29z>3 Iň@ ?؜v#D1t5n6\CȆA TʙJKI1~bhdGy`HBkvasg7:H'zj|(֞ ?XHEvt '$Zѡvc!jjjTJ,Z6ZCI7<ڣ8:{A {>xC'GLڛפP*Qjx3SIhxH \t3djXg6ֹnzA}3X8\g-R;Ktvmks;uVx[r| 0A1g˷;{Jgへj[긗w0 2y`ʹ}I;N1{iV۳mkHYq˺j-&9[.*L[`mDf뛸=(dky۽8HR[[k{3Kȳ[kwzۿ̸<|x',<  <{lCLz;ZC"<$Lo[.|*)Z-<1L3̶,75>*+?,=,;9 Z'9y¹\˘UîKRp:/sŋBsj\$ &S"ʯNZ;蕹 s̙J;h{ĺa|rILe}]B#'٩Uiܬ 3M,>J(8ʂlGªl ܯ99ʻ$92g[[\B,Lɜ>G,Ēd^8̧1z\w]\3 q>v ɒ]ڔ~K&(Ԣ^zo98 ^<ح],dP^o-e{er>-B?19RĎQX*haR? |qGc/wPOC\AEFPx[z|a<؀QBta, ۠;Ƒ߹2)5B̞kN#儍ߌ@ov~o\Ֆ^ԡofcѹK0{kf<ܳO@3PB{rP+MQM -)I'R>%SN;>tQ?5:(TA@UYݐ֏ju׈tU(_H]a36dlRX Yllhv\rwsUiם R^w^{)w}=_.n*6XJf>axo)7ӌ+c;Q:Սd5e[ ec& fCLorgNg6a>zi?t4ڦz2j3mM꬝;l8&&V{mbv[F3n붛ȕn|v9 WGj|åo[r+/*|VC\BtEuȭM%0DgcJ-s8sX~A/^%8.m>mi缮7PkPdpf߭^a{Ԝ}Ơ ~襟l&S5@nl$}k=qO^@ aM~&'da ]Xϭ3ċ ixzB:aYC$q@ bxD$&a(wD7":QSDņ*+*GtEQc$#{XF4шzPθCWV{o:Q{S8#?2- IpTdsFF2l${$-J^7Y&5).S2 j7ITZ OJB% cY8eғ,nI ^'ȩw>whüP*L\R৞ #<2nb} ',TR*iY#Cvdģx wɢؙSM !>Q\hCPFm eEG(&ǢhGQ7 GDRK(Ui&R b2m#McjRT˩Ma6tJP?:9)D)/gԦ^jTaSՋa<ԪU,U B܉VF]m|"$S[]Uf~iUVK'CZNq)v$L!v䠓M/ 4,WH2t/ .9.(quDFHKZI>}t@ᠶPgvg&WA sW湦kZȹJ; nms[d?mcCow2י {ߠוONZ/7AIl#Ml~wDp_g;776qǍdsϝC]gu5c}L7(~iI=r~垷:س>tx܁ 6F(Vhfv ($h(,0(4h8<(ZQ>Cg I隒. HFYR`=Q^`)dihlp)tixe|gJ{)蠄j衈&袌6裐F*餔2hp]馜vj %Jꩨꪬ꫰*무rj`֪뮼+찥R5A"Ġ3+PAB*9I 1Ѐ  -{ 4;LKhy` B "$`BȚA :1&|0@`oB&!E6,$h-DZ¹ ;EP46@mt% @T،-: &KC44GHiTRw 5dy2 vD me-w~ {U0@- @ 'l&k3ߋWnhN{v)np@%`Ӣd&zG@ W V`?F+o@+~on N%pqhJQ¿>V/gdOV?H'1$9 f$@ 5@RY*3C$ĀL RÁdz[a~&>cH3!T)"&Vy.lPB'ZKi 8qXl'/#H6(4"8/#+݈86яcETcv"+AR\HE%4} %ǶoTH;e lt >$.!nM,H,nrby)X9,L*YbpKdBݴ3SWD  þLo!㌛^U3^tiςt$Š٭&!xA0JѸX(@܉ L0Pː"\ڠIW0LgJӚ8%Js@ j|8 HMRԦ:2*T.RVSVծz` XJVdnhMZֶ=oPt}J$r(~`=)lJ'c'k'ɖIJ,0;jjlHD Ғ3jZ®.9kWd+vĶym7R^ p۲7JHr}\-Wz q;6$s-]f0Hx!]my%DE{ܙė}BeHK X!.`4x0* [8Ual kcwD (NqnUKľ{e8rҸ@L"HNduy2l)SXβ.{^IYyhNpL:xγ>πMBЈNF;ѐ'MJ[Ҙδ7N{ӠGMRԨKSVհsM)._,$Le׺vA,:m5/ۭϾ O݌hGGn訔 @Ax螵[Mzη~NO;'N[ϸ7{ GN,.#c. 8Sss@ЅHOҗ;P?hS8QWQճ{[/k]N ٿvpNxϻ]oGq&k8EOiQɖR<2yTG<5ϗF힯Ec:2qE``v=gOϽwOL^Ǭ|;rOcϾ{gUOEm O}#68Xx ؀8Xx؁ "8$X&xÆ~*h#,Xeq\6b)^>x~%111fnaPH]4l&{ Rb]?[Å-X>Ffjl؆npr8tXvxxz|8Xx؈oZ8)hxҗ&ʼn扚&m2&xix&؊e8&8f?F[ҋX%HoXȘʸ7u". 6( pSW8XXsX9긎ӎ8~3(7xeVИ 4x(#2n:$k~##xe-Vx )%U)&)vyRInGj#It;Rq+,)/Y)y'4 3:/@B:DZFzHJLڤNPR`9XBVyy^:d:V:sjP x}RUvHgiyzY|ڛZ駂ZF$ s@ꨒ *Z|','DxhlZtBtE`0s鉮*|zja$56&Cҫ a꧆tEG(lz)/:9>@Jc%Z"J1ns`e""+jqjѬک![Xq+ڰY _;+ k "۱[ ";H& (!.02;3n! ,M-HHA*\ȰaH8Q ŋ3jȱǏ CIS\ɲ[ʜ)#8s~,ϟ DXIB*]:FL5Jԫ=nʵǮ`[v<lJ /7:p?.K]-˷߿ 8ÈUUEKL2_0kϠC3S^@װc~}ہb7ֵK&0xq眫У3t.Nn s 07ugKFd]8iad ej.$p]zyf!yAz!}}]_р./6 xfQZG&(䐣q8az(AJ${(_rH$DfIؠvNX!ZifFd $OSjhXgb\ffcr瞈&Si)݊Tb@y*)V}*%`x^~!1ꡣJMQaZv:zџ:? Jjs&I ڊ!ltnmnRDw9bɦScQTUT&'}Ѳn}ւvRSi Rஐy>$!nS ȗNŠ;j|{ѹ Oī룅G1P/Żn$N ,2w  s)"d9)ZWi5=ѝbKE5ؕZ̶bk8]j}0Ie7E;]]6ww?ɗ7]BRV}Gu.Y~-R9fuuEvEofx߹ך epgt"SD2gw$wK|Yonx˖9%>9=|kECN;w@e?Ww_$5 z l-' 9eQ6x W.q$yY>B5 Ry鵓]'F{W7D|Lъ}>1Db;؏8)En0#ѥWYH: rKl^'. 585ƀP""$\] IŒt'#&E='{i"P) 1Ò@Rg 1y>+pMꐸ l<P̢HR&"AMF [L@$aFɪK7 76 \|P;*#:RȺQ)̉(yDvmaJⱀ[ť$a SMQƄx3@r6愨z> OǜQu@c(V87鳐Tb3Ѵ-%Ay TÈ5_Ӊ<FN4@I;NtMŲk:3K5}_{ 27CM -f⨁cz xK,@p K/OC0xf- 2D v:fuKĐ;#Xi ӈ \ah}mz ɽ}MQӸ/-P] D聍QOC lT,@@ R-o oU8%XZ]N$sv[LgfxnAf9yZNtgE;ѐ.#FSҘ63N{Ҟ.c QԨgWVͮRYу5v7^MlE׺N;͎"^Sy>nTOKo=%޹׭fqG쎷=jwѰwE~߁7=1$ppp4<}\ [<7qVe܈Og&7ʕȑ忆M.sҼ7넝k)>\Q^lX41:|Dt(X0t51guX(#Hճum§k8t&;޿SD(bV/Ddw<]3M %q Ab$&7`w>+?TCÛz'Q? :~Ŧ* A|$yo280|@p%W g3@DBŗw"8f6,D% Kb"yW NJt4d~x7a {56#"hV'zW!` 7yp'0{h!G 7HГQ 0{p{A |opgw7@7 DŽw0LR ~j<8{a!Dh G($hp%`p po0yGKHQ0yk0Gak'z |PO~H$8q|0y]J!ЊЇ$Q /8 aqI%v!I(:n~$Q`hI2Rwh@l CCE 8 jyBb1~ƈg{p9э|.hT>$[Ѓ= @g:weCh P q9g=Q  8l i x/IR!#鏋!aЀpВJ!цP  {V9nXЀ>Q7xO#<(ߴUw [0yap6uKJAl@arfo& g0yy긔KA8y&  )l~18 ZXuďC4X_IsЊapw֙9umLQ wiFmpnF)J 禗:A`pBa QA9TPК G 9 )` 疠a s{ 0yj9AV{8Kis&W{xT9 ~tU&t5GLQ.zm: bIh̹09 d>*;Ť ιgFJ`>V`@ݩq F5*ᥬ(>!uaaJYQ 9P's  XV?3ramp7JSqȖaФ)y&!:iEJTrA9*\/tY쑫xY~vB#rI驩aoԣJJtA.%A a~"ᗈ҃z5*]Ie 1ժ7~kWf"5(:L҄&E!A)DV$MCLA 386)+dLѫ3u2#mv3 Y8Qz/j% bʰQߪ%/vA*E4ʲH\0fIn9 s[&AQ~to,Ja#AKApi6+A{ʡѡ zx2ml{=QhjK)9 9 9qV Yd.2LA )}nd% = [V\ȩxfR1& QT{rᜊːŏ?&yi澛A jAX;W9ڭ6!G wKQ,#AsiJ!  5k Ή:! K 8[J7ıġ!A VH@ !ł-(Dj8M+:G {?dŌ8qg[s!n̋'8 @9o{Ll&|>{H4xh~꨻șK[[{F91PhIߨH[%|8yH,8ʳ I |֎9[mA =`Ѐpm4hJ\*iA |ѼskA Z@,B;X'f8' P Qڦ!{ .Ⱀ @ -N˯ pat8+ 0NBq3>r:H>mLNP-T^妶)tlV^γ6s&_clg|vEnolv~qx~}>N\芾茞ht銁{z.阞aWgXpV頞Y;Ȼ>ꪮ肾c&UMg>Ҍ~֮-i^.o~r2Ҏ"whĮqNJ vQ}~صʰk~pv7^VS/ pp 0 ^-J? @(|k;>$- )h 5p)@? D{yEގ0KO]׶ w\^`b6fhwƐpwpr?t_vs9.i~jWap4/F4SܷqGϏ_uD&=ۈ+ݮ2zD%okj !Ovi=N]{5R_7Rԧڂ V!I)#@4K'38鿉jKL=3Sy4]`؝VrU {Va؅EQJ6yuUp<"3`4)Bg2Ё\Y8ST7&(wfhF8[ߛEQhŋ*x> d:hk=j4JKp:!&#Iamam8(c6ùn^[$lU[Р,ا[F"Ք>j);^#?h-2<E U nQ;z̋0N =T yP!6S0  KYjտV^ {X&UlcD>V"+ AYІVI{ҢVS~lmVmo [Wp{\&W2٬i\FWӥnu{]nwGZ/y{^Wze/^W,nl Wo ַ&p `0 p`. მR px, {X|A&l#)fLbP#!dLݭ}|YC&r|ޮȴY2y&O$ r\*K\r{Pp75V5u6yEڬe=)]1ԛbg2 sBOW0Ș :Xho: Wf2 HR6ÓNI _WѥԇfVTwϭ>Kj?W9.yyp~n&DմHT͋ϼ^/E ϖg+^_3o6׳>|}ハD(̦U?Vײ(pdV/ۨo$8fn2=KJA{#)Ƚ rUjȃRqZ'涭b6bLd7za^ yqʽ~ Oa8/]Oݧ{u4=n 6;\>ƺgw6܅ww<MD}/<eнfryh}MG=b,GLuzO$8}+mج HҀu%f[nc;jgo|cg|G <:"|IUh7k[s?"y3>\>Uۿ[@.@g |  >+>XA;@+6"J@ 4@*B$4>L`A2/Aٗ@$t 1"L#;5Y{B)d%B„;,@H"*L;T, B+j"F4 CE㣚EO聎Cd?EݩeDtFRn䈒[ǚ` yGi{ ) a2F` (p8H2lBjXȘxH$ UY/Hq6YIc-puyG3kl:/GLɌG Fi;GǣɂD͐àİ6ER9dƍE sfʄlK:FJNӚ|JF]IAJ*T0H50i Lv̌4M&Aќ$g$yL̒GlA5P/!9K5!Hz1zKĈ1ǧOb#\l>hΉ 1NI작N׌Fk˦Ű PtN;۸m^,{HH> 4|PHMl ¸PPE(IΩ&O} K烐hM3HQAQܲyY+q%8<L+4<,φRU +;ϲ1QMr C\/W 0SF!#|,Y AP 3G'"Lh< b>UK҂CJFLH 3Ԡ8͜H SS%,N8ĮQuDM7L}ZWݫ[&U M=]2O @ VceWZ؍S0sXVMXXnX="%`MՌ5CX]YmYպ_ UQaS#}YB7=ט@Y7scT%1X[5c{$z֨P학=Tܫ7-q,S fŋU;%2ck۰:$u𻭰u ۺ--s[}͜(D/ [ֽR)eCL\m2Wz\+:\+1tYSR 8pW:RSIݼ>҅ U_]:Tm%0=(9ݼݱ i酬 Jަ#Dآ]r]gިCZԃ0`T rvE/7-$;_==|c՚HS]i" KDž >QZy-C4Eղߋ_`~3a)Vf3ª"uQ ?3@YSYYQa|8=Q0^ E(0OMDJxBEabc:MM&5-ЍTbĝ&3nkܓ`.]?{=D,_T>P\(F(-,>-YВ^َ̰Ee-cUuT[LN,SZMafifɟӂfjf 3bdn.g!goVE5A_wpV-rg{Hf|_gag~.h>H^hίnhi~h-h.mlggiFV#X+i^# x{Uf`v~[Ԓ})+ECi0^%me 6jfj<3\jڋ"- ŬA2^.,cSYSze"ۂ#ᩲ.g &,kG3l.jnrU$(leͦjvFvh}&-̶lZ3T,VFm)zݱ>>VXϖ$~+ўݶR&.㖯^f#n P8F*ctLO};KǶ ali\V, I1:JeMΠ9̂k$=$φ ΂?%'6Z*5oXY fpᖉ 7>P *ޕ>Dp@eU܌qJ}M@y܉eø}PyDq6~QHHH$B'p]G5:MOܠ}o8"nۤjU B/`,!WE'G]҈M>Z`qv +<FbVst_MwV~p>m@yoYr^Md`ui:ɕ Rq6 rTզjG;юn pA(A\;o$X^#DyG06J QTOICQ vS2  Rq[?@&C8 riS/ D ޡQnUOp-CL"9IEMGqH Cg#, Ooծ.ؙq&M 匋s?~gwN٭fknE).Md† ONֽ: oԺn>v"E|\7'Ju~:zwY`Ϭϯ~,7mG}ϰ'auߥ0 2shЀ"Lp!ÆB(qAg/b̈ #ǎ #ɒ)r%˖._Œ)s&M#G̩䱃v,`EDq@N`P)gXZ ՅYJ8BET׍2kZO\=6a;k F_o?{Jć>ΒdʟOd>D @%-Dpq3 1&60, OnP:|ʝ3T_ /_`'CT)"a'HB&LpZAL`Bf@pxЂ Ɇ Zk46\ 4ňXkI[?PHRfWBXSb%eUKzߔЙpfE@I4  f9!9&E]tA ȧC[5AC$G 8 ) S*ޥJV)!:V1P'F& g,SV~ZkNꑧ/꤭/W_kL)F,Ӓd MCQ)VWK-qYI BґnMdꫯpf0nIW^ ˺ XmYY۟^rl[4kY0AABqsTlpP-ȳ dh+IkQ*=jcS̒lۀ(:w,L YEEfs!`vz؃mE6Soq ପ)}!6 =V}WX< !*cAVV ĮqYsB刃,`)yHS.qD6fP&[Xd7q>K׫XdIR䃐(8(0$ʼHݜ e/rB%!g>Nb6^q$8=JC|e c&8.9?yɊZe*(IM R"5 -%?|GQf`gC J%?pֱ",RFKH^gԅ1uaIʖȔbzԴB1(A"\9dEg۽1N*B;0P#D^7=H+gY:05fwjo3W]j& /5bMh;'n89e%7[*<$,Կn* };^lN%q~VKftɚ`Cˣ;R*]%`6qNm+0Q\"u[&fzN2gXBDs6'Mʱ0_^$~&Șg4;+G<*Ij<7" (%=~eqlB֞yaK)yKͷz 8ur {߇oKpJ6W"R`lz•c>\a~6#~KO\֤_&;MƷ@E A-`ߴ_|KeMFZ@E`S^]_UxԖ=U ~[M MLrIF`MEJ  a*l9a9aRŖIaH 6B ؟!D@VaL@BjmLaW6!.ecLdG  M#!-b*=h aNaR!|=$2"E0PT F cDtARjbk"KLP@E \"!/%4#0F0v8NO\TfBueC\aL#c,KKu(#Z ead >K9#qRɒ.zN -0.D%"%JaM$z5ڄ8|55 GJEBEy]$GC& ($ФKQb mUNZPDLP6E)VƄ("(e~WSLK dHT}0lKMQ]*$,S\6fC D/WF%A]MHɅ9eBdc%B X``*DZd,$L>jaS3 [\dvu @@戈C O飝XJTy0f6D?jHJdDL`n` Yl7-Hj"ubD}ZlneG@U{V @P@xrF,H80$^xD$ꌛRz=ra|c]'%I0Vf <@L@h g X,MlC EjeBh5.,0Lsuhmɔ b5) eiFHff  g)X P(N[b$e&(1eDm৥Xͥaj$@ @ BnyS(3>ZA%tPLބ ct$eS Da CV |HqE*b,@uA:k(-+)uTU+x DAVio}q׀+$}eƸ+H^埻Gd+l ,t+k*2,%lJRl< Eljr\, elkǒ~PȖ4l+bʮl,ʑJml_(k*mӒ:Mm~Zzm(a-hגm:"Ӷ\gf lغ-Emܪҥ.:snn*nk-3um)Bn65kDSIx.nF@_Ӏ+Lecv-6ʄ bn 7B"o*2SJRoDToj"}loRoo2ܘo&Ko&nPn!*^۶~Do/Bp/KD:oLnby^#ppK/=)n@D[p-?p -0ېp $ , wpJpp0/ZdDìj_U\<ƕR1BtYx!BK$1]pa*ofa,Yx1;ilo<{j!k'!+ {qeKpS+D>9 FG3hqAנ,8VD"VtWn0rFY;!^EF.q$f2Ir1DsđHi#Պ#/ladL,7(Յ lj&2 h)$ L os(Xk[ۀVo2EU]/KAl\@Rg= *BIP3KaTc.YH{)[f"k$I0Zx2ұ+_D? @3R)ƻB5I\B?ĥpd5 R甌j4BkTQYrKgʄCkVg ]ߚ ,^?NYl7!1j g 54#vb ua06c#WBOvMVb vu,~\tvh˒b`fp6F8sXs%Ǒ8@\Gؤ춱opl+f Pwu[ucwvkvWwd*F lwD(GwCyDy{n{D}~wmgw 7wJ}+PI xN8Fi˘rs'8mSX~AcӉ\x˸}xN8SrܸxKy=ay&3Ə439Dㅓ%SykyXysJ͗ۘyǚ?y۹+)r :zU9ٴyyYJ|WOƣOK/_z`:\nz@:etk+kn[9,p3z뺞쮓DraŬǕ{H@7#cOv֔QYOqG_kfgcs0B́(W|{x?2^)JD< ЪkO XخPл=KB;Qw@I@}8IWdI{U4J/GJ{,o}F[|4k.),o=9 k>hO M?%IJ|f"F"XÂ?Lasj:3 PP}MXܿCMvKODXЦ?Go_>+cJ'y 7DVGD?NDaCgo~$?CDWcSqGRESc+y 5kx7!ޒpLϿlϿ~kCsϿ?-c;(Dj_'8p3sr977>f{4X?P;ħ٣7o:fyCG]O qD xp@ JbA5 "Р9v"+n1I#!pcJ3A|C9y ʜzM wRDjԨFVU fblaU""{t׳ mkUY!G߮{7(޻uؗ' >{0~;8.[e\x`ZMKhv_ ^ꙡed5fF۴GGa=26mhgj|8Wə7wNN_:$~,I?SL:yH?bhWϑ%R;?׺sʻn J* Pخ6 (3ĢӲ0D'5 *G.+să$ƪ.iǞzT1S ƫ*Dz(CӐ(c(+FɤU+&ZL@B8-K@L$2N=ԃ(Nz;Xe֪^5Tqq^dW ;ҵ[}cSD5fjiclVnXog74Sb]w]n}wVlIuzC|S}l3ށ#B`MZX΂bXL5ܻ:}CMdeNxmڊeyYa9?Th*9ţ-fKiꓲN8iׯKzh.ܮQ-91+餶1kWmM5o|X3;Ry%rIA-&=YE˵zrr, eW6t8]r`ˆjo=cQ',r{CF=^fkp[Ԟ WwYAʐo.o5ߧ" N%6} ~k"  f0{"렧'e! TF%6ڌ3yaFCd d']8  !OlWEz T8*ROdJF}(Ie?%yP ĤP KWЬ>AE.@Ϳj1 8E۩/¹t"*ɦt,(qp#drɈ MoH?>j g9Zl~#ZI!4!uZL~b&͉Hi"ĉȌ : NLҟJ ^$,4B&ܔBlu7R4c OIt8o!0:3PRY*M*7s}U@j,Oq &&#9 RR0ob7{ʭ WnOOMyDYP aHřPsYB NBBzVf\8Z׾o"hcK"qTD\kv,ޮg1%(@mr2+;ϕt[ʝo2h5{ۃdy mv+.nc m+Y|iUv2fN*m"ᕓ٥rwM[-迪Tu4`u~.a 5x!DP yj[f01ԏg1 .X'I^#bs6l13rBdmyE?3^eYlq疷l0we3MT$3ws=5!u<Tth:d~+$C#M+/ K9ԁM-i8V 4Y=* ա;f@.֍A>|J}ZAgdJݓU,Ap W 1"BUXt=8^:Z5<-c+WF2vݖs3f:q"^.gjn:yuSSlH- (^q@4^$ nZUTOԇ,)b3d5@߳Hi}`4CT9ʉW}En_ڸP $"W޷r_WmRËì79(ԩn:ftl_ާ d ˵a> Usl;$귤M⥲x=}r?ygP j F5Aim@y ۵B|֫*{|7z :ʩ41y͑>Nٟ? ]w_X긥0JOffi"P!&@GK-3P94¦Hdh/,O`d7_PRR-*Fߪ"@T PP%;C# l@< v U  ЌA@P א "ް pp(g' 1a'Q[Q'+Z q&07;N4$@GK1D#PW[*.bQgkgQw&{(<,tq!Z^@Yq !,3+eH*\xÇ#JHŋ3jȱG CIr Ȓ(SȲ˗0c|Mfɳϟo  ϡHm^ԙʋN4JիXjuή,1>L]w ,$l(,0,4l8<@-DmH'L7PG-TWmXg\w`-dmhlp-tmx|߀.eA' ]<.F吭`n}褗nz ꬷ.}NTi/o'7ک?/WoGw/~Do觯~u/o-ێ HL:g Z̠7fɩ ! u(L W0 gH8̡w@ H"HL&:PH*ZX̢.z` H2hL6pH:/4 IBLdFLO~t$ێ JAԓ%72Mr#!(GIRL*WV򕰌,gIZ̥.w^ 0IbL2f:Ќ4IjZ̦6nz 8Ir3 :Kxuf}v @JЂ$B~24v|D'JъZ4F7юzHGJҒ(Mp0Җ0iB*Ӛd)NwӞ@ P%DӡE[RԦFNT Щҳ|VͪVTխz` XJֲhMZֶ<:}*WlKuͫ^׾R wI\:P|d'KY^:4me+z hGKҚMjWֺlgKͭnw=隂KMr:,tKZͮvz׭xǫQMz|Kͯ~LN;'L [D oq0Ht\dD<$m %< b<1 cԘ/1QOe1(!ϯܬ+U;"yL*[YS x=R2*1h6Ӽ29n~,g#9 ;Fq_ BzPGI.А'Mi! J{X7<Â:9O<6լg}6Xz{_!X|Gx bf˯jcK+v-jζo;fvj`mqv fTg6|j'y[ 8ťq[wgL 8-P_+{  (OW0gN8Ϲ0Dy _ C7a޹~AR׈ճs=y^.0 /ή౽{;w'v>=v~]"%t_!O򭴼d1߼SOqFq~# p@&J1f"?F.yj%@<_䮺Hc%X?~/|\|cP* Ͼ{OOOw  U ) f3p70f0XD`, ؀@|afpH.@ ,(@.@x-` (#ig!3 2H0*( (*u73 @'Lh*QH6@Z2p`-P{n2,2 {!M(P81S+["CYS@?1'0pPO+`+(QS*@1&*Њ4H YazlBh[a{gL\aH(H3g ȍ3{hX"0x5hQ% p `1؏ҋ0pik2(Ũ93U9 I)2nHui2  YR0=a .3@@72(@f1 C" & ֔2Sȓx =(cUhya0b2"@o,jIȏt2']$S)a0)@+0W_1@)T|7õ;dSx|Wpza/Y2g间Y#}陼Y2)VA`9)٘Y2yQ2f-&@#ӖЎpIL)2uwYɕ!xCi p923x*9rQ&٠ c`mAzi cg j1+IsE)*2"ٜkQ12}9ۘq91&ԉ&@"8TыpJ2h"7O0Ux-" S1((eqg21@P o:2 @@po3@СwJ2Qz>0DZ:WJJSNCR)HP.Z*Wsd{hjl۶npr7.Pv{xz{!,SH*\xB #JHŋ3jȱǏ 1Z`Hɓ5Dɲ%ʐ0cʜI͈+]ϟ@ ȳѡHni)ˍN!ԫ$-$ʵׯ`ÊKֲ֣hӪ]˶۷pʝ;,ݻx~߼ LÈ+^̸c|K1peʘ3k̹ϠC,^Q^ͺװc˞-4]UWԍ N;<УKNЛGԎË~}|pЛ_Ͼ㖏/z3Ͽg}h&\ rF`F( A\ ^؃vQJBqA"[*5b0ؘ2b<>vuctDEL&gK&8doIXb\5ehY di\_jp'[lixΙf>矀䞂g&(.ѡF*5Je^馜e*ꨤjꩨ꫰JjMު뮧f@G+&++G)(lűVk-hW\ewk覫+k,l' 7G,Wlgw ,$l(,0nj12l8<@-D\HH'MJ7PG-TWmXg\w`-dmh=llpǍrmx|߀.n'7G.WngwMw褗ڦꬷ.n/o'7dWogw/o觯/o H#:P2x'H Z̠7z GH(L W›myCh gH8̡w@ H"HL&:PH*ZX̢.z`H2hL6pH:x̣> BL"1J "IJZ̤&-BMz e&RJ$L*WV{,gIZ̥.w^ 0IbN2f:%49 a/,F2p 8IrL:vO" @JЂMBІ: ъZͨF7юz HGJҒ(MJWҖ0LgJӚ8ͩNw-@ PJԢHMRԦ:PTJժZzV` XZ0iU2Y=Zֶp\J׺xͫ^׾ `KMb:d'KZTAjوFQf1& '*vw}-N+NZ9JܶjmemqF5};g D>A[]~u[u :~vK_%.|n}wYsXNV7[0 U1# Ҭm>f=(x~1JVL`1h,"1k)-2ҙts`O\CsE,+;qπ^  9с{U ]6wДFj8DgӠGMRԨNWVհgMZָεwzk&vXw==V؆3ΐЎMj[v(6CcqiN]{Mzx+d}[$w'p~{?לǔpGdߚ A!o@ cC$xZA_X+{\"(WKcn';s׊U,}HOҗ;PԧN[XϺַ{`XGiT4! ,PA 6 ]`E @//4ck[+T@E0lB&0,x f}(x Ā~ RV$^ @? 'l?0Os}4@7PLϧ@ jV(0&g'i7ag8v2wm"~~6gai" 8*PHaP"GBQ' J-@6t08/3pz0d5E0phCR'8֕x:fjQH01~[0gȅYb H pz.prH)*H@M~ׂ U}x)gMm8-XpqV/'(r1b H$2xh=H1Tkl&Zw0'A"xf...qHWB|G͈(xQo8)qHy򌵨e6 HJ'뒆0x8; FP1#tÔc8!hXS.< q1RA㐠8Hf> f@Y1^% (ؒER2eysAh$_ 'wwqzdyrܘlFxp'&x8o.X)ppHihf&3'tA.0㖔2Cመ2'xqt)ؙqIynQd4jKw2dQ  I-" Зaa'@t'~dඋd-&~   3!ǖv2ix#pUYp871hB3`n< i@9 sg\1 41I ӝ݃] }  K7f%P)& *j@?00iqۂy3x^;f@^lj."[Zw30q{j:C-3p/WGJ:C)~@u00(*BasR3 p*0'"0٬:v/:֚3Bzںڭ:ZC}纮ڮBc0+Dԯ21G! ,'-H:HȰÇ#JHŋ3jȱǏ  IɏN\ɲ˗0c2%͏+nɳϟ@&L ѣ&c!]ʴD)NҴ)GV̩ם3RJٳ=umAn>l+]\k3{ U-at8l5Anjl0/N!> y镥>sgˬc۸G/:o {^39.< tc>{{Oo[OORe컋o0? GF_G|&7_}& _ vmw-Vi~ؐQ(h6Й\p"(W22ǚ5F9t?F$cLcZ c \JfxOmӓ^F%eTfegehfDl#ԅzm5gIS@uff* u|gU W"JiO^^*KZJ*M^ZRc'zWk.Hk'*vJN.l|> "[bmQۡraKJ͢e^Nrn\kݫ/nP-8DoFWo C/SϥhrLS!yW 7\ `1KrEJ@-DmH'L7PG-TWmXg\w`-dmhlp-tmx|߀.n'7G.@gw砇.褗n^ꬷ.n`%ܮn/o[ ̗~p4Jog 7|O[/oCT `'{M: v AUS7p= G(Lap4,| gO!w8@\ H"HL& z8Rlb0-z` w|(Fao+Yc5/nd8x̣> IBL"F:򑐌$'IJZ̤&7Nz (GIRL*Wʹ-E(Z: -/iM{إ0IbL2f:,I͸Ѳ̦6I58r\ViÛ&:س'#(zJĜ3o'@JЂMBІ:Jt)QͨF7юz HGJҒ(RҖ0LgJӚ8ͩNwӞ@MJԢHMRA:xKEJ.%rCU?~r` +O)ֲ%fM^ vN\J׺xͫ^_4׽~5 [sMb؋Eml (Z-Y3z hG;ǒMjWZ*eW[nmnw[lD Ml:vەBwҽv+r `!۟7.zW||K/o_pvKNw'LaERWDͭewvȅ;Lb4 >Zm 0a2,N@w9 /qք"HN&;PL*[Xβ.{`L2hNsHL6ps@8q6π TЈNtGu=3Ҙδ7pӠt,LK@HWVհV5`Q3[5d]$.wk[vLR^Kl<;ڲMmi3ҾJmdVct$Ċ\7LnS%7)|ȴI;@pi% +ID#t18IߤMw6]ʕ4IS+0ЇNHOE`dč55 @[XϺַ^uZ;U+P0P]P8ڿvQg0 Nv`mOO;񐏼'O[ϼ7{e@?A˂H%^!Q@"!` E,ԍ2 Jg &|g͏$ LT"+ ߎ։! a`gpZd ==燶Fp7Q :듊0op>،2 b@PB_u" (`TF+ȉD@p=Ph3؏0EeH!XG$ z0 eFtALa yCM'^q^L&S4PAQ^H^ƌ$ K@)ID$xnG): ~8 (-䒓 K6̗M/I;t8)@4O'"lyCv9 xɇz3A!U).t̗@!ṫK ki/ęFaQ mh B|D 9 ˓Y>RɎYYɉI\)AgY yDP(ZDH0B >q^(Ey ĚyBI9 B dAؕE#/dp+a P[ɓB چy9戛z|7d ⧢;A M!*$ +P)8dh GLAЄi00W*B~QdSGT dx I7 `a:Bnȥ A !LԢ= ¨n$ Ejud f=YF@+Щ |a0G5Ȃ.xBHʩ\5UNh .xl`th hMTlp{P gzP {`^*H:~p'}G~͚d2" a@|hP `jXjܷZHZg` g8 ";$[&{(*,۲.02;4[6{8:<۳WBB;B AHg0NPR  0X`7M^b;[kpjl۶j+ @ n;t <9.ȷQ\wUwQR/mRиZ@W*G; ԰.2'1kv3M*PF}HJLm&űT]Vm1Π<^`b=dՔ:Cޣ&}n=qR 1Wz{|AشykC֊|i=ٔ]ٖ}٘ٓmew||iڢ=ړu<JJ= h@ G+8uiWi ۛ­ih~} ]~g i]3&ݭ=@>KBw }>^~ >^$eE}B=8[H`"`,s ,jft@]@P{p9 a0 a g|"5+ RPviR +@L rX@*}a f>3Bh) ѧU"Mi  0Z~i0"p}Z 0> ׀8u > ph+}o?XQi`Yi`{?`  n+ap{@zgPPi+@a+&1@S l~p 0q|gE 7((jQ @%BAF-afpZ> ! 1ʀQ qW$1f@mKBϱR h-:F7~0HEEe z&p&o?8((wZ*7}:nOP!1lP7灐 * _N(uop5B" }_v|y71 ? M0p@k0c5rc O6Fx !0ft.`$A dXv#o $XA 4 N!%NXE5nG!E$YI EJ1 RM9OLSQ,gқEO15$SiWaŎ%[l , xPL8[]>)K [ uAO\e(% -'9iԩaVbJz&y퍒^Mdi!lp'^q+(HAWj c ^=p}SDHTIm{SMl(&²$@ $8J<#pB 7 #4 BC.?6Si7@ K QqFkpTX dqȠv{9d?#I! E(rK|ȱp 4BzI$a*DΔӠJJ&sO>`ʲ> IN,0tK<+ˏ+71PLC5%_1B[uVZ3A1 V`8Js!û$X(AYj8[.F{ZpS#جr@ ?ADrq]z=+:)3ktߎ$쬽)*a#:_%8y,Jt*.#%ye3AۻP啭 s!Ù5.Xzg6h3Lb ".hJJ.=6Z7I|k@ױK_ zZ+#c@;؄k3;\]ŋkG쏓f.38xr`dA$Q uKw]fK}}VI(/I |^B_P 0ZHK77kCw[h36+.?d35]_hSLd.[Z>anP>p=9GA Cigh0s &1HO`dW mX ~|39 GN)oXD, lh(/OʞibC;杏3Уε+Y/tamvcq̎3"#rW.J431R\zxp)r` " q9] :%RwW?0z23j !K`%:@Y llښhxM"̴\ (K 87,K92 AxDɥ;n.ʳ=#Ա.6? "lx۠=րb i ˖aa0,?zR rꜮgPahSa\ynӑ`ujTX&mb.ӓc"Rڑ DƌN,Xz]gYL%=쓋TV\,,K7ўyrȮu )YQ&6!W٢N)V5:B)we $Ylvdr@Ӱfwb9†%SC-hj1@dkh,[I5V60o3yɯd= %&Ͽf$:~zC& | ,b"xOC5}{d;%nu/PQ0|U'm ]{6f*0* O* LQgN &6lO`HSݍ/TJlzJ8zkRi/FSk"pgt|_N2f⍮\q/%5 CJMY>1gҀ*cF)"e7 *mJlJ rpHļ86͜@1S2^Mter$2,$0!,Qh"bHM4VAW0$TkT\i.:Iְ3Z#`!)ėkɚ5Ó9RpFR]JEX(H%|W Qޠa[ Q?[ ą9H^8vQBe8`""?lB,E0X#̬Wk7b ,G?1Ƿ%17$C1^xcgG]ćPr*MKT 7'JkAѯwL E꫰Lz t nT , XF9镚ϯD̽ HDډB3:шB8X\:IS0*fXA& T`f9T<(Qz@D%$J "p'aNE*C!Bt(p@--%{BW% ԰9BC8뺮09,B=H@@3 $z3п[ @ˠ<6A@ <8WS%Qһ2P? 7 APD`9jbB8+TtԠDL0?(C?0LDB{3|KXSB/C=C>C?C@ DADB,DC8GuEM\Gw|ZFx[ Z6 ypG ȏ,H/k HLQG:'H Z7q !B g', W0 gH8̡w@ 5HL&:PH*ZX̢.z` H286pH:x̣>v~ IBL"Fb~$'IJnF ̤&7Nz9(GIRL*WV򕰌,gIZ̥.w^0IbL2f:t IjZ̦6nz2Gƙ`H$t$4@ L`d`g 0Hhxp:408 V!T2A 6hHEz}PpN0uH$F+@ZN`fP 1MjW&(-HR3R @H(R/zU#+B:UC NϯFP*dY @־V ,h * 0_  ,kOm#@< fYB0 El' [Ia?keBr,I@M[@l'[łE Z$dIX^PQuYeAo` E -nz*:7-&i:ߘ&`@oVb0InPE08x  s5=NA 80&|a9fʂ8/r w=@_DP9Cd^P=y3xr_&P((pXZJLFFѣ+&KczF_{OΔ ?kd&PcN `]kdִ| ui^w.K ^cK `1;[˪q|as :_և^ɺٕݬת84PK:^s ࿬33TG#mApb ^ m튿@R z ^xʇIfgp{Y`ZX@S.9W eݘ+(H2cVz#R8)RxfeNP'&hzwS6-(;OD\j Y0520:{u$ߒ`hmߛi f w/@ |3H iPZE "SdDt`g"|f K봀؁ "8$X&x(*,؂.0284X6x8:<؃kC40BxHxGw3KNrQhLP8TVIX\XI^~b82dMMmixԆnHp4tt(2v|(H~~GXztG Xx؉8Xx؊8Xx.x 0snhy8z0xȘʸ([Xxؘڸ48Xx蘎6tQ؎X<8؏3sy 9Ey"pDY,if۸ّ 6Έx6>*,ْ.Y'S2Y6y8:)E#*>dBYFyHԓJٔN64Y4DN@)IXZ\ٕi^ bYfyh)Jlٖnpr9tYvyxz|ٗ~9\t2[cg`W)3FC9(36# #9>yᓚ=ٚ󚰹=9S 6)=Y9@EQCɃK)2]O=Ϙ?10$Ҙuc/<35d)<깞Ӟx(t3 1!0~ _iI#4rԙy:D`@JCH "zm9&z(*,ڢ.02:4Z6z8:/cT<:SXQD9HÝIrOy5GT:2N 4x6Ph9ɶ~ӆ2a a7vZ6jӧEk~j:/Pᝃ?uck*>mb50Lyaz60:5Zes5mA=EV#*c`8嶫S!R:OȺiE`}#w;aѺF*3i2cTE@Uݚ*.s7 0ӫ7ۢu:9#7Zk{ 4ZX ;,][uʱ| [4V;hpiqȎ([rȲ3:V!1~Z9F#= Sf/4,Q;5 U'D1:qfXK*;2/|b5_ [O䂶T=z;[{K ۸151x8{۹YŔ;"4y9Y0i2Kp!;A{6R"5&I#+4AE+' 3M&[jG*kON0N ۾;[{;ۿ2/p !,%-SH*\xA#JHŋ3jȱǏ 1Z`Hɓ5Dɲ%ʐ0cʜI͈+]ϟ@ ȳѡH4ʔƦPQZА1Ujʵׯ`ÊK3+ٳhӪ]˶mW)nʝ;,ݻx󲥪߿ L+^̸?BL˘3k̹Ϡ/JMӨS^ͺף_˞M۸s 96 N}#_μУK|سkνËO_ϾU//O(G& e`F(VGvᇻa$h≴,⋉4hZɈ<裏:(DaF&LdPF)=IXf9X3Z`)di`Fd{hpzt`8թeՠ|*] jfjC=NjZrimjb))t$*Yꪒk*P뮼+k&6F+Vkfv+k覫+k,l' 7G,Wl}_[VE ,$ׅ ,0,4l8<@-DmH'L7PG-TWmXg\w`-dmhlp-tmx|߀.n'7G.Wngw砇.褗n騧ꬷ.n/o'7G/Wogw/o觯/o HL:'H Z̠7z GH(L W0 gH8̡w@ H"s=y"OT3E-:2lHyDL6$3 ݈X-tlRG"C?! &FvĎt"IɎ,b&-InAD'PL*WF,gyDOD%\ &W( LB&2]2sY!3IjZT3ٿc fRN),'9t~6Izq[<O|bo @Os-&BІ:i| #J=@3Kr ] BBQiyeN:QүIKMhșϤ6a` }}*Ԣu\hlydLDPcE()` XLp9~ֶp\J׺xͫ^׾ `KMb:CD /_f呧j hGKҚMjWֺlgKfE m pKOЍtKZͮvz xKMz|1v=ke SL࿸u|9W'P "plzoDP  N`[=<dY((XtP-@\ЂL:RXnVlf1XZ >v4@{,f@-m@ h2lZ|i nz{ f2ںíյN8bᠮn@%N TqA!qQ<tK,nni@Ry,}P \`@G-ӻeM:t@?0kDžփV@OK><ұmՍLTl(KϞvL[.gp,>kAbCg\'"[::6wӣf߼\JL yLC|({ o n|r 8w)JM uMrm\tzxu01Khg1@sm178#y}kao8t wqa&px3k.|dpPux3(cK'80suV{`!yr,X33Pm)w]&+X/o_Fhrs~ڕ_ZcU&7:*#^'{T3`oSLoH؅3f7'0>oshh3&hhaasX23flȇo`X؇23fj]1f3 j31j_3cf7kl6c0 ,%1'Pm(p&@SPi03 1"@5e^FuffjH43j'`giW`1^fm'3`d>`"36hWBb,i!6h>@=d41=P0@) "@ 4 9(`''p-(d*,ْ.0294Y6y8:<ٓ>@B9Dm@J(%JxE `MYY"VAZ 5+_9+ՕYI1ch4j?Ӗn3p;3t3vy7zY3|ٗ3%# 3yJEE#2y%TÙ)2 3Ǔi1Y4`S,x21ٛ9Yyșʹٜ9Y)1gR*pɝFp/虞:bF94.]v€@q-b7j,$[ 2o/yzʡbnZ&*j.p--"b42xK2!:.*[aD3Lz<<ڤPR:TZ˜VykZ`ZHd>>gZ5jz0cڦ Dvڃz*!rڧD¦~%085Z,5㵨>§z%  }gjKԩBRZ룩Z,3@Z! ,USz,42vz*t!QLUz%Ⱥڬ:Zzؚںڭ:Z7躮*LڮAnqYc:XZT'2{McY ۰;[{;>R۱ =! ,5-4@*\ȰaJHŋ3jȱǏ CntHɓ9\rȗ0cʜIĖ8s.lPϟ@ ѡH]SBƞLlMԫ$jʵׯ`Ê͘`ٳhŒ۷i_KwbϺxŲ˷/ت~ .a+^02BΈ`e/J̹aChbʼn3^ͺkWx͘ꊶi+>,8pȓ+'{57nHcߞ@sO˫bfÏ}}' FQTI釠F-FM߃V8YwDfH `Fل$b[^ G 88P-X2 Y݈z:2#E-)R tbi%R*euM%F}i&AciW`yؓA~m'nmX(5HePc J))&u٩rw(%hnib^hҜuryHRNV;XR"*6V`Vs**lnk;ju.]YDRX]+X;n[ox&ei?\~́ yxE 3Wd32_sZ2CHEhy:iH Zj2^GH(L W 0w D^HC#58a`f@ HHLbΌx9*PH*ZD .zH2t!<5p8!ܮ Yx>j^lILF:R|$'II㹰̤&7Nz (WCQLe%V_Ehi5$.w^|$)Ib&%2q "Ќ4IjZ_$6nz 8IrL:v~ΈS @JЂMB) (D'Jъa HF7Qx ŗ(F3R=HJWҖR KpLgRL `й@ PJԢըpéRSPTJժZ5iRI(Wr"kZі!ZkVխs+]0ְU^]䫆M`R'vUrMl;Jã,YjJJN,;M-L ˪#}X{kkQrx"mbK1|fr8ܮIl{r幝 nHK]|Yw+䰺j%M/̛W-}؋P_ķ PPN`p~J [*%ʩ j|SNq&30cn8۔J@rL"ȉ/&;PFL*[Xβ7-{`L2hNf:a]n. :xV3hJDٚ=Ǯ3Yh4v",Adȴ1BaBIH|R 1VO p[@!GJ(bo53`ؘt -k=VxM^7Ѓ/qD:$PE~!`n!ڼ@ȸC( םD螄 ,!jNFqC0{ =K(Q%j8`4"X> 2(UN9aVC K&ΞvmQ%]6I'#Sgѯ[S2˽R@Z^O;|`j@,",+o!#ڞB O vs@sL8D"At "XJ40O!]Y ˛~1,ߖWz立dV-6rCD鿲@_'HBp}Ous qDOKOZ$@ _,hO}-OTo/A 'q}fGka` w0y0?~` g li B")9PI'v:k g&c)07k p hw`>@ p`h:*c#/g Z8:<ڣ>@C7DZFzHJLڤNPR:TZVzXZs\u?1eP .JjkJfʦp~sc3 |ڧ~|ZYz(`C:lo:Zh0 PGɥ|QZzjKڪ WдZjzzya%VbWb- O gڬʬl:ڭ:ݚUjg QhjeqSO_Ѯsjz隍h  h1 Oz֯ ۰Z[uq  +" ${x+IJB);%K3bQᥛc.k'k 4DfѱG<>&Cz?NUPH`BS U۴-/{>:;_k&1* _1gZ,\{듟C^!dpZ]!DjGK#kEQ;N۹h[4[kN]gI1T\q빖˲H țʻۼ ƹ `ڻ۽[LNp껾۾F}Զ"ۿ̿0PԆ [ZzT> bO  "\&l `'02<0 n@8/>@B<ġtD|HJLNPR7 9s-DY-;-5 ^s+jL;*na.=.4!xo! HAx:/@jqO>5`vP` ;6<S0a3!i5ެ3|~ZpAᚔ%_цE^;xT^>0aH&&;> Sm./AR{=;yQ3\x}|q`05U'kcF|f^*Ëpc@ެcP29 W}Ǝp}.0D` J.VR g,xٺ5l00~z`槳 0D8r 4opn1p S? cdfI;,.?t :K~%ʼċen[46'BA0vxdo@}ܞ:X]zWBA?gAg:W/\"tosn9|/ꁡ&?%聡a9|c&/n2Q;9[uQ{d/;m~IgƷp X;{ ­c.g~#'r>xQߌg;i_tA /2S7o.p|a~_;H*o,T^rQhҤ3 DPB >QD-^Ęi9EBDRJ-pSL5męSN='.ThFL!$8>XHM @X~@Xe͞EVZm#V3ɏL C^5!Q쉎(FXbƍ?y$69hE=;FUWZj֭]{&"@^ygR?}@2[?F\r͝?)SRr*Jv͟G^Bo&AߧX`Qz/@$@[ĐJ?&y#'l0)B[BCG$CJD̶̆JFKʄFwG L Av& JZPJ+ti,,LJKbŝNd .ɫ\,(3礳N;B6D<0 @' sRZ`4ќb*6=E5UUW3'J6HV[+@FOEVa%X\3I T.[,RX6[m嶯JdŘ$I.@^y xPZӥ;Zu7B='!CLQ2c723 QW*`Xׄ/8c3Cd$YY_8ိ\h!CPa,ycwY&-%7($$eCLPFc98^0#^bkC4XҕHJivH&[܊LZ P@j'UxnGB^k@,Yh^ SDL3Lne?͇ub׈_1DIy宁" E`/$L,Dȁy۟nACm>@ 7A@M g}+$ޑCjw=HmٚT@n+> h@_%IX+A9{C$aXhn1 $0l+? .;auÔ1,"ыVXgX"*A p-&H@b7p'0*9ы_c8F2ь7ոF6эoc8G:юwc%яd 9HBҐDd"HF6ґd$%9IJVҒd&5INvғe(%QҔDe*UJVҕe,e9KZҖe.uK^җf0IGg DCf2LeD ̄f4'CfMnvӛh@9Nr!N'wR!iA, O~ӟ'@Ԡx5a3y6D'NbQP?4Qvԣu&QT *$ ^L2`Le:S4)Mca^jP: 48bG*!zZJr.# :UVժWjVz#tn^1VլgEkZJV?I_+ EW]D*(HbOo5J7sPռ>$_߃H#b&b!_);z։U,gMېvOJZ6(B@ls;AEn*ZؒuYboD=h;G=9nu˓n.O[}'Vw sٛCDw=au8 UR߮dY(nr_k`j!9Z!ɰId8iqv<$ƃNr&A֐bfrl(79 %gq[I=KeZcIl84DIJ(l07Vܹ{IgZb#L -<_;Q]GGR;k*H6f?-Jue=kZֱˇa۫$=lbFv=2pn 'M!qtR5N 4hF)LX'GyUr9@c/:T,7yub<z#GzUk$Mzԥ>uWWuw_{>vgG{վvo{>ww{w|" G|xƫf|%?yk$`4FL6#|( Ea8FpW/LL}u?$#Ra ATW"E `a8" {-gES~G0"H? z:C@^cBш  K 3 ILE@)@ TAa C?!\ :ʣt<;,$D%tC@IPJ$5ɰ`LCԥl3pM`Q#E霥oCPҶGXhRRg?0C.[PKO܍9LRjϱ<$C@qCLA96/ )mCiJpC`UL}D8A@`Bq2@M52?F=U+TZ tlն)L0Z%VЁ7 D BPD(?IB`=?7+Qo]J8H8CPCKSqu ;$I(Eؠ7`7 A^ׁu"{EWWEQ>G%؉m;،5 ׍؏>ّ%ْ5ӓUٕuFdB֗ٙٚٛY{ٞ=#z٠eڢ5Z2BUڥeZ^ҼZڋبګ g[k۱%۲}5@JYY;H[չ[?ڻ[`Ɔۡxڽ5؂U܃ŝˮʼn1[HɵXe2h(\UԷe Ì#܅݆=ԥ]pهڕ؁& M%5EUeuX^ߖ5߲xUeu_5߀! ,",4HA*\ȰaB#JHŋ3jȱǏI$A&SL ˗0cʜq͛ iɳO8 IԧУ6!dDdJYիXjʵׯ`ÊKٳhӪ]˶۷p7BKݻS˷/~ U/ÈM̸z ?L˘J"OAm62ӨS^=y3άc],moޭ0!;񯴏+W{У'.4c޸9Ë'J=+jO^=>@!at?FW}(q ^Gn jEax[&(@}e07b"1!QBdm8)?!aL*'dP~dX2|Of)^wed]f^fpֆfIxnީ瞘'av]jh`~W:GhFVh=馜f`v] mj3QbbJ%jmj孾lz"$+F+Vkfv+k覫+k,l' 7G,Wlgw ,$l(,0,4l앀<@-Z[mH'"/PG-TWmXgn`-dmhln-tmx|߀.n'78| WnaO_)0瀢fnꬷ.n/eN'TG/Wogw/o觯/o HL:'X&Q̠7z !,(t&L W0 gH8̡12ԫ!HL&M NH*ZuC.z` H2hL6pH:x̣> IBL"F:򑐌$'IJZ̤&7Nz (GIRL*WV򕰌,gIZ̥.w^Vh; /aNxL2f:&4IjZ̦6nz9)rNL:v~ @JЂMBІ:D'JъZ]2юz HGJҒ(MJWҖ0LgJӚ8ͩNwӞ@ PJi>HMRԦ:PTAѩZXͪV{ݳ` XJֲhMZp\# 8xͫ^׾ `KMb:d'KʂIh&0Xw0Ap Q" 0gӹ,/H@ ,Ko L`E,VKBf */" $H&`@o_p7@ ^H*pl7xxT jY-p v,XA_2@@ {r*`8s &fo) 8q 1[0L q ` M.eE&JkRX]r\`1- .V2o pX`z3A0g8m@G K k7RE4Ͱ]xz+m Cځ H/DECxDxi ly&"0@:%(RP(xR `>6 `e, p N-%;Pn_4 l`Fg{|[T@㽧 aF<n\k 2n 2VoT`ָ؜mZ (DGeQA.< l[- [`K|,=M?zD[% b۞^$@a7؂N֫FM(׋~k([[=jX@B{#(;;n ,Z)o hNO Y mjBy˿>jW[8xSټx7rih>@_.]r\LCd}-{7|Z0yƪ /PWk{C@r߸`*~Vc'ifp7J?nч't(..7~5ZVJ0ЁKB1\,0U17(O @Ͷ]@?j}!5[*r.@Eci9[.jW,`h*`xt#dQU()`/KC `.ZNBeZ+ ¶_pm"Գ+xE[]bf)8k+,3[[,̤͘0F(pq (^xMZ9Yyّ "9$Y#&*,9FIOqQa2 5i?ؒ}:ٓ>@9dSF)6H`L5NZR5TRB AWR@[R]@_YS3d5fYPeDiyKӖn4pG3& dt)4z@ӗCB"<:~ p?9YE8yٙ9Yyٚ9Yyٛ{b)2HYyʹٜN9}4yؙڹٝމ%әFx9晞adٞQFp2TYyٟ:Zz :;٠:ZTBj :$Z&z(,ڢ.02:4Z6z8:<ڣ>@B:DUPH)wJڤNP7ÞR)FUZz4蹥^`b:dZfzhj/.pJ! ,' -HdHȰÇ#JHE).jȱǏ CIɓ(S\ɲ˗#3œI͛8syr!ϟ@ ѣH*]ʴSFJJիXju`&*ٳhӪ]˶[_Iaݹx˷߿% L] +^̸ǐ#;,1[̹ϠC,zfK^ͺ{I:۸smkN#/+|KN@WlU{}R6O=K&:iߟϙo''4q6 ^ >8Q@RyQؑ*q藂 y($h"d$F+V0a UF/B~Ci${;YSJ6Pd,1IXfeSSnY^)delfDgri^ƉC`矀gI衈&:ܠ F*)`&L*zLv*Tiꪝ:j뭸뮼+k&6쳻 V[fv+k覫Rm k$oo,l' 7oWlgw (q B5q,w@@AqE (@s[lGS@@ >NpB4 A ($`BDPd lM#TÐAfm+@s(D$@@݄ T0C(4@ .w@u vPo͘kda@%(nM0s Wε{d}0 !UCg=uG)\0nB#}48H@, h6:IH PZD4Q4AP'I5:aRHˆ%dO?ȼ`?$S47UsX" NtL%p#U .^EhH$21+ FbQpcS 0+H1$8 B% B񐐌K'Î m$7I$hh`GπV5ЈNF;ѐ4r !J[Ҙδ7N{ӠGMRԨNWVհgMָεwv~-b~"6f;ЎMj۷^qmk]]p[18C ]{?7R+zη~8ᘟwoB.߷7\ 7n2qxhƌqw7׸GN(OW0(8Ϲ΁|@σN':җ;Pe[}Uz{ybzR*Л0fB>vȍ qw/n PPݻwx =uNۼO{d|OyO <'Y#y rE ۴vqz'u7F>wLIءO}eM?֜R @`@n2&j8?M(Gi2hE2wfOJ!gVg!Aue-Qq7&CPkx=gG!"! ee[75‚AkU0X|6x:Syԃ@8,cDVFLxNmj.xOXVdNxZ6Y8]_ȅb-YJkd8x\p7n+k POoAaX8d&Fs'QxXsqCiݷ[ziHwABtHj{8]a{FI_jr%%Fx/x8/؋.8RxCɨL~h$ϸ8_Kxiٸj&XM91%]3=xГ<؏;s݈u XY'DymVQW‘ـZ!)Vq%o'5d2I94Ys8Y8:wӓ>Y7@e3DyHrJ9lOAN2T)VyZ))\ٕ`(b9Rf(hl'nwr)eaeyz qW~iIIYBjYyٙ9Yyٚ!0yٛ9y 3JҫXvDA!ׯ`ÊKٳd!h]K!۷pʝKnx˷߿+"]̸cKLʘ3k,KMZ叄K^a+>3/ͻ=SƉ+_ulγk3YHԳk߾W8v?CxOLٽz|țs&IŇ n_"'HFhdBg` ك""X!yb^"~4Eb_8T磅2i8ݸhHҤ#DO6)eI@f&aELWZveJ~Y5E &bQ]{\rrf %(l\zv&N>Tm֨^ƩԢQJףYJViw i_f 㧡J١j'C}9jlvHѬR.X*>K+zN̽F[ 6 '8&+ٲdG"nGk뗶+/5.vn/GmKö گ ś9Ł"[qa<*\.Er,X4r<sk }6>3 gBwEvs5 hA 6wUuli2}MbkTe@0(w^RQ5A=xf}b5CKGx]On9g Gc@ [V|xjo/o'7G/WogC/o觯/C{-{lHbπL:3r$ Z̠7 z GH(%^TE%Rؓ,FA o0&.!CLb{ $=!*2~E."CB1v$Vf,# @D%(QB$b[% 1: $ -bH%7!A 'II%BGKF#N M+IJ,oKD-J G|E+QIK̼d^FE؄!pIXxʛP" ItHJ`3&$) 70"!fiJ@LM C9IO]b6'˼aܬ@D!ˆ J!΁:g>B%RFXs(pg37JҒ"EJGjҖw8"2I8)m=M4(({`qBA)R-éP ,JYx<^Xbo*WWUҘuo 99VaUR\Ӹw-=UulmLYå.aNͬf嵈#3(|fGK{t %0Ҏd@q@현kC$@ Hk_;jIy $̮ƶƍtIA(Dki]$ z-Q%nzK_!W=IC# Dh䕞eع,Q``<*10\4&D<s$ bU &+D npnʽA#Qa}3Jb$01S‚ qxD#[|Xp 6,`%  WN#E[aaGFD ̼Lb4&,zxD%XB$јFߥ3mF ^/GMRԨNWV Ԯ^ cMZX5w^MbkN,]]q6Mj[ζn{wMrKWv#sMu/6w7~TdOx ֣51{?#2q7y =ᰈ(OW°8miZlu}X<; {ZB?,btN3]:]PsKe8rѺǎAE7Ԯ&L6;\Юs[>#;[;xW>K|ϓQ0gɷ Jhs_Ȧ+#=$]=VZoB}+O?)/+z~:]vOA Ͼ{OYOᗟg}eo1?q#A@B8DXR.XrrH@LPm>1dTH?C\؅^@W'` Px!PW6 _Q"p P  1P*lHKhjxVU/YjN8\f:xW>"!!o'(% !4C ,Z1@p*"*c^d9j4y#٩p6xavzPiR-ꪺ:Ϲߩ!zS;g{)-YO0 Bix9Ъ:5p9JJ*WZ1{!,* ,JQ]S7cW ꯱ +bgsX'۪Р afİyފ۱6 I|a-o0}$f=׊Le=*[Ⱥ{|Q# 0  bNhE1Y^J׵8@JaɶcVp; j^K&0:Qi["?,+щS1pط61u ^{i-;%xb& xfP(S۵>Oi$ pH&,81`#EL…(:P1ըR2"[;C+&ܸ -  1p*{AñZ҈b1&L++2PL `w>`A 4-, `ܩ Y`1&E\V|XsZ^\bi_A΋˃܋pOdz |\u!]ȳ:;?q =p\zCy=! ΢ta҄ ,#2"-$0=;]=?8}EM\IlGI?|-O]#CMMCWM3S=A4Yg[=]9_k2a-cSes}g-i݃oq]ku.wyM6]U-(=-;mM(˳p~<=oVo!v\o]@}؂ l!,=1MHڈ=8lu-)FMTmۧ D-@kt Z7`}؝ڽ ȳ*]}֍=h=]?kk0+^Wʬ {+_P{'9$^'м?Au! >2&>#N8~<>@B6E>HGLNPQ>T^V~XZ\nKv`nm_Rb>hl# L civ>) P` l@0 @l` P}1-& `p b!i E]$~p @h7 Ja` !sA e\dp ~~0d@hsQP|Epl ap Ep H gfzes$Qtp ̅5ENdNGbGlne.` cdp^Ȓcl2aꄀc o a8/I mE 3Q&tke@QDPvf3Pca_XS/c_Z6`?NulP- GG c_OzpvL S PkNJИeP$Dž`qa 7GJ 3fT!RR L"Ϩa;;x&3KpQ gL {Kd1R5KA?s\Œ@?L0eP( g$5 XA .,$?NXE5nG!E$YI)UdٲIItId(S%A%ZQMzE$611!ZդYn׏FytQ̙`ծe[)+ED*K9 _f!XT%hP8%/ 1SpNdΈE[,1iԩd*%2U]mܵa٨$C%]-ɕ/O'?YYf^:jבJ3 ެMvկGIxv}wzF8p@?$%& -B ++zCCOnD[L ؚ*8\qGűJ4ȃ`y<$S0J*K4h: r& #1tM{@;>8ԓ*Q®)4PM!t! tSN;݈JTTo􎶮(KTc!S08uW^G5u=KXdt<:vZ\P,YZn0=zRo5JxJ-֚=]xK*y4^|yrڷ|x4Ibbc'9x`Tw\D֪$SbCuuTPX*Iye_b $9[fpnK8c2!7 ZUѡ%sxϤڗɯƋuZʺB&9hF ži%W-5{#Tb {e%ު{pԦ OiUq \rەۛpՠ&is@+kOzDDGKGTa=ִr 'v ݯGJtO9y굓Q!?W=@{Y-ȋ a :>*27ֈ_91MäeAv9E[oؗ-?pAα܆:Gn$af057$/X`lvüQG]T CVH+" ¾aÄJH WKL ETb F,B6E%ZAXBJBicC6~ JH L !0BPHF";~бV[J,VsLq-YPDswgDDTctUC Dt)dQg@b0}B @F/ΠDyԒm(JTfD.,!dXBƶ Dk_[Vmp&wq[fnx(w}o[wFo<x i\NxxpG\x-nR_\g9q\#'9<^r\+gyW>\3y7ur\'qBxsh1~t{1ҝ~14OҩbMVov|c'{~vb@{.U[ܽ\On$;R)X?{&4xȏ^zhg>ش=y  K_~^j&&Vz[AW" ^}}{ݏlw|'_co_(ڝe\u`;}l ~o;Ȉ ω6j!?~ؾk??s*ԾKi1@@ܩ$ d@@  ^@A@x]!A?; <?K4T$D̏% #\ ;<-&'$*D+,(Ds ÕЙ1  ʸ4 Yn)B68,CBrÚC+9rďDoĊD0ı*[RD`?'Kl LMĖDRPdQLED1E@EiADWU\V=ŕdC][T\ F8EEdFcaLbl6AƔPcEigDhL"࢈op5X/T3IXGvlGw|GxGylǨ>p`G/qd}lDG$;cCxCGE9AHHH Iȇ;ȾkiɁTIIbc9=K[=Iի#{7ܓ @H!XJJF KIPJxlKK?;@ːSCKK L,L%4L`F\̋L|LtȜL| ǬLL=LLL M<M=4MBuOIT ?ՈipGeͱKqT+TdARSM݈Ca *.nUb5m&[=]ŲU`-8+Vdu&KVfmVg}VhaH! ,0-SH*\x0C#>dHŋ%jȱǏ Cɒ!O\p˗0cʜIfJ8qɳϟ;s U QCbBN?AZAAAt,piU`wRu(зsdM det hRz~-pLL4'؝z`S-NBd XYR}5XYO2 2 jG Dzԗ*E- f //40d8ƈB |ɦ 3X?fiEpO_ay=OϿ<|`6}40jx 8Xx؁ "8$X&X8v*,؂.0284X6x87TbÃ6n:B8DXBFJL؄NPR8TXVxXZ\8UЅ#`8dXfx hlXֆpr8etxxz؇~87|qX؈;b58Xx؉rB8Xx؊8Xxx-3_8XxȘH4H8Xxؘڸ-8vXx踎؎8Xx؏9Yy $bD:%XAI%)#1Q=ё !!WA ْ.$0Y6Fw:<ٓ>VI@Yv*E9HA)CgLR9TY2V2N9PI] $ @ 0Pp_$ g9&[9t'w)_ tɗ+%&%#,yyr S91٘9Yyٙ9ɂCP)5jB)X9Imٛ9YyșY79)yؙ1֙QYyyFkܲis8I6yٟjEG y ڠג: ؍ڡ ":$Z&z(*,ڢ.02:4Z6z8:j%أk @:Dc>XH8JڤNPR:TZVzXZ\ڥ^`D!,0-eH*\x#Z`Hŋ%jȱǏ CI O\p˗0cʜIfJ8sɳϟ;s U QC iśN"Bi=J6T&I%ZL^%R[v%}V~UbR eh[jVEfF tJXY٤zbliӛg&:WDh$ 餔nh =n;] e8ʪJ%kF#:CBfOUZ%6_JeJ,C͞+ԾZزj1vjM#kCk,l' 7G,Wlgw ,$l(,0,4l8<@-DmH'L7ETWmXg\w4)dmhlp-tmx|߀.n'^G.Wngw砇.褗n騧ꬷ.no'7G/Wogw/o觯/o HL:'H Z̠; z `CH(L0 gH8̡w@ H"HL&:PH*ZX̢.z` H2hL6pH:x̣> YH@LPB:- @A T $&xIhb*4`epY2 bI iBbZ<@0-4pgQf3"T1ۜFC%NL׌<'@bG^I, < Di@"bRq5l5QtF`IX[~b*@ \ =1hDt^!` 0*% NS$@&}@BFNTfEaPSTE4 jx -b)P^EluڮS hd5]:0S vlYVbQMU&X hruXhX_03MX (z%kuWc;H3%h*ZUSR˟|@rY3/e L(]M7-"AT±Ԩ[ `Z\Tho~_ vR:9.7/8/[P@[6xPYL1;5ƭr/|$wP\_xa1^^ S,#J_vWS `&D"@F.#tĝld- kY/&Lyq3V]©&(`n5=Ŵ,(2Zloшsea"/O.ǘFѣhE+ǃ|\'U/H~  ͡ z,yk!ch^Kh i {j2 ҹP_m#z9}4rEECG@uҙS̵Khw1=7rwI۷~˕+m0@w)-M94K@pl$s֜/)-S.M|e e ] Y;y >]PZ<"<p$%uj/kcXa1wgs$\yF/sZp}k˶#Ew, 1@:X pwi/ѕg. zVdJ[PZlH?L &s7LK4;K J US+|W@ &2^`'}jÀ!1C Kt(H+Ă} hG)K I-H P1L,S>(.; d%h2vT @[Z_؁ySTɑ1pIN@KO/ N"H7`5h= 0&+`1 }j)8Xx8?8Xx؊8XxeH/a7{XxG7y)kчŸ@.l̨FhhFXdt(Fڸ`ԍE\4ExXhE/BՎF(EXPtDLԏDH4iDx/dQ-19Yy)ّ "9$Y&y(*,ْ.0294Y6y8:<@Bi30FyH2NPR9TYVyXZBJ^`b9dYfyhjlٖnpr9tYvyxz|ٗ~9>YsOӕ)>Ӊ)>EYٙ E9yٚ!69YyٛYyșʹٜ9Yyؙڹٝ9Yy虞깞ٞ9+98yٟ:Zz ]B :Zzڡ ":$Z&z(*,ڢ.zM2:4Z6z8:<ڣ>@B:DZFzHJLڤNPR:T>HVZ\ڥ^`b:dZfzhjlڦnprZvzxzys|:Zzڨ:Zzک:Z!'-&{KЪ:Tz*;ث!xTz@ڬ:ZzYںڭ܊j7]! ,;--H*\xÇ2HŁ3jȱǏ C>Hdɏ&ST(˗0cʜIʛ8kɳO8IhDHILʔͦPJիXjʵkM ^ÊkٳhF۷M:oYO^ͺLˮc,aGmUy 6WiwsܺKo[cس{5KË}|ћ_Ͼ%ßOm?ԏi'_h}_ & q>܀V!h!k 6ᇬE"nhYbi"Z28_6Jbp9X>~FcDbc~dC:ܓTv$a40eJV[N~X d)晟]\]tĚp'ed#igZm'Qa9[.'ghn }R)GnjJxjzZUi]T O 骶&Gh+W'jȆ$k0,>[Jި-kqF LǎKy.DQŋH+Qm^P9ģBVeIpS̔I0R"}P$na1OLVb k(xi*,4l8<@-DmH'L7PG-TWmXg\w`-dmhlp=rmx|߀.n'7G.zbg+(g4\-%#騧*#E.]J%UFN'7?4[=WY/Oޏo觯2R/oB_UTLsp' ZLS7z (L c0!ox= w@ Z  F IBL"F:򑐌$'IJZ̤&7Nz (GIRL*WV0ZRtiEh 0 84 L2Q`,X4)9daӬ@b:tS2ԋ/)$\f3e)I)$|?:O N UCOvDȨF7юr  H5M`(|Җm*hL Ӕ0@`Ђ@ PJԢ-NPO@mةTU47ũKt %I5iF[0|o˪ "U"!Ył({LeX W`c+X5K]k[zնɕvel^װ d kbvc YJvi~dmmh_Vm-jmzj `{["V,+q7bܮ 7o\vczŷ}nX8b\k [w.x3߭we}~wV=w|޶k`"! `CX/F 'X%/v:5q2 [5UҪ1b=Rb;dV)N0s%mQǼ6b&PGXYX|\68Hs̕&;YAMBЈN ;s8X'MA zF[V PN{-8:Rpp84԰&hF?LMꚢ ,GB!]R] (ag$nCFb9cd 3NqZpY4z1]}1X) ~py˹&:x'N[97{ GN(OW0gN8Ϲw@*0qF%*A I`,(Q gp:䦾74{X$*3C*H lPD!(~C T"o7" Fd?qH z< 'jD!2b&HO"*8YFDaBC ^?OҼP~ P?," p0 3`+.ғк(A C a}== "Dox~COxzgAG~Pz` pP z0 S w g M @}~m3pPzp( +.xaЁ=' -@ g P (~p~PrgPaA`a;ay7c \l{ z18!1v}` 6p w HH7H{Hs  p:] C?` w`8|6pp.A c B yo`EQ ~zpp<Ǎ@Ø6@x/ J70w hJH{h+_F$azSh 5ЈEQ rЈ~0Y=@|`HgeS %I,9 &L(XA 4<( ޘ*)6,  `2y7[A 8 <*#@|\Q 8N4 \cYqRPЖ(3EQ/]A xh5g r ^PC3x\# ȌaA ( fxbQ (3-`i<5gВ@{p%p @{ГG94yX xfћp3ocǗX# )y$'hq ĉ /2ӛ `Y5@<:q⹣sfH"XkT# 9})ZTXS ǟo! ܈#.j$jT"j gD :;Iv!5dZmᣮFy '燧DkxA4"wFxY@| ٔ{%"oA fȥQS zp~qך)q: yZ3#0po !ڮCiI4 IǘzpcQ p"n2" |yQC5:)J'@{'Vz"4̩: F-+u yaERTB SpO# [L w 訡SצອPcQǨS{ɧ8@{+ 7wA|0y!*;)ꯙ=:2X; 3"E_| N+Jn1x[vq*8 =&1stk:ħv!P:;2ǭN9w ) (DRA'HN w*>!ϛ:p~kKzᵰKqwQ f#HƊGp![9 *4*aGӛP5hA˶#x'yA|qZsѬC kK# 7˼!#  HØ{k,<bh+5)ipoA;<8v 5>| zݫq>c^:IfZL5s! +,94H# ZIcӢtzmNq}Vf ] UەRwGډC ! @\3x獌 nsXL  m r }!l-3&Ng:'0Ou1v(ݳ|Li .6zFXO:ҙ#ᯰw@B?D_FHJLNP40 u9a1{^K9d>Z/3Xnt A o^4TWn 0 J`|AT$ܑ8o @ ?L<IZ{ p}:S\O RھOq@Kg?>cW QØo_q@Uq9k*bK5l  $XA .dC%NXE5nGd)$DCQ:K1eΤP$8j3iҙA%$2EaUSQNZjđ%\yWaIOiծ-RlΥ[ݩYM_w9,PkEvDX.:ZL$,JOtjV{mvm{nn{o|p 7pW|qwq#|r+r3|s;sC}tK7tSW}u[wuc}vkvs}w{w~x7xW~ywy裗~~Ilg~+mW}w}~~7g4j$P@&P doJ@FP`-xA fP=AP{3_0BqTa ]BP3a mxCP;a}C QC$bxD$&QKdb(=Q-xE,fQ[bE0Qc$PXF4Qkdc B7QscxG<ыG@R$d! yHD&R;"HHF21d%-yI&wBOR$e)MyJT>%de+]J'Qa iyK\Re/}K`2$f1sIc&Sdf3NTbyMl䏅f6MZқ8y)v7?CNx O f,`%ڥvҮAAB\$-3-PIw`, Dz#&ha`R 15k$Xv @P'@h ZZG TzIL@ ~7W`2) ~{"b@"0C)kBH 0)Bh\×2C 8b'G`}W|e_!"'|Hf`&x|ׁՃ&F$a b^({}HV(ƥFݘT,c.2H~@s6&IԣyQG@Z \`d5e֎R2HKf(qiZpiDcRlєzT盁2'gb^y.%Օhg :r7⥜c!gZ~BhI(ezXJ᧱Vک*k]xjyj,ʫY^+B뱗 fђ%jd58Y `趋BJx}"^- Jì L.I  Xt^1`E< 0e.ILw+b Ō4{P&B#n. fO@zZ"@:IOM`PJu&}MBk&جB'JѷU`'# Ċz0khBQҖ MJ5<`& YЕJfTOB@ @S E/R䀠 .MbJ.X *Xa̪Xz A@ IֶDO fP>˷ 2,9ī`uȳ'@ `:!j0Q1'fWntAHˎzUM $؀ʎ'(!#SK~}JCb HP p{p n9R33.qKݺ VV$ [-8 (o Y1=~[H`( 0t`Zc , \[b)P /GL(NW0gL8αw@L"HN&;PL*[Xβ.u+8#nd+]hN-㛗58iDJc;Yxy ِ9; T!X:¨  $*,ْ.0294Y6y8:<ٓ>@BiG9VdfW4gLY>D)$cOȃ@V\9fZqd;bIK#fcіni<53mv0n|8rs~9dz~:HYrB\hPirps by>T2kh)YӚWmIYr;! BB:DZFzHJLڤNPR:TZVzX*X^`b:4Vdzhjڦnqnp:tZvzxz|:ZڧSf! ,=-=,c@*\Ȱa #JHQ23jȑǏ CIɓ(S\ɲ˗0cʜI͊odqϛ Jt'ĢH*]4'ӧPJJUNjʵҞ^Ê3+ٳhJ4۷p vݻ[Z W~ Lpܺ+n *Ő6L˘3"̹3R{M4ӨS޺yk~M[aڸs 5ao3 ܵȓ+G{^?||6νfһS.K_o| O_}( h  ߁n%bW`9ӂeavۄNՈ&e (6"U0hxu5$)䐑Hdr=D%PMVfLFnI0-L`i"u&je$暌)6I'gm'k he~"S ʨbtEJ;:`njUjPJ7ND 뭸뮼+k&6F+Vkfv+k覫+k,l' 7G,Wlgwq3 $Pq@,c,+p@`W `T@LBlGc A7Nf+X@R<RM:m. 6 .pu\+K6S2ވ;5،A Q&ni?0 Ws;bׂpsT^Тno@ "`3.knsi|7EKHmglo%`ڗ6&`(( {}fFR CpKR ƶh@GȨ}6JT;PYQ{/a.O%HD0; 'X&:1>( TB8 πMBЈNF;ѐ'MJ[Ҙδ7MipӠQԨNWVհgMZָεw^ +MbNf;ЎMj[ζn{MrNkuMzη~NO;'N[ϸ7{ GN(OW0gN8Ϲwnc@ЇNFOҗ;PԧN[XϺַ{`NfOpN$/0=O;񐏼'O[ϼ7kG/aOW֫gOϽwOO;VO[Ͼ{OOOϿ8Xx ؀`q!#$"Y,9h,+P%+^)*I,؂P((6X5xC<5BBQ9,PBJ%>؄,sLHX(^@@rSfP\8fLd(.bxj؆Ɔnro^8bOvzp[҇~+b&}x8UpȈ8(xnwL剽fx*jp:ȊObu8wA* ҋi% /8 x^RH ،'s;"И(ب2ZŒ⨌#V蘎긎؎8Xx؏xo'(A #BI!ɐM  y'1&I BX-0و%@3i#OiyX%4;iR#1JNYX GII!X\_ٕHW)fe! ,M-M$@*\Ȱa "JË3j$@ ŏ CIɓ7\dJ0c&L͛8sɳgN/e 鳨ѣH*8)"DXFo- jeEz6dF2ڷU\JdԺx"߿!vÈKx)Ɛ#KE˘3k̹Ϡ=WL4XaM+]װc|@ϳ YYx~Nx.HN1%ZMu!?+SCKO.K-}ÇO7:}%`'e}Ԡ|cH֧DwR}ч,I(D h#W5`Nxv@em(QN2Rx(ߎ $WdH$xeP7}Xge}Z)(s^j&졩l4woNxuR4wwޞ$ 詪6꫿1: \iam]݉U&D}볔 -ܠ~G@i$ [5,V+aΫ]+hm i9HgnD'ı$*kV|T#WksrK륿bH.s )"GvwlM:|~!;2E2Dr #/*Sg'5e}Cg=Jd#ٷv9ni#@>|$mv"}&T#?/uőcQm_Bc!p,"d ,\S<ȿv> $F\808- g) HpqrC˜1'c4hNL[DAɎaB rZ%oM pC0"IH\%= &$zX=8f:Ќ4Ij:S"Y5nv3"{7Ir3$L:9a'aY<w9g9ô< |@YL1h" vThDY!tE`@r >^y3Ȥxy=Nzx J 6 H2Ӽ/=riN)hpHa,d%^ĨIYOՕ/^5TF" 43KY=sK!)U*u3?]9c `KMb:d'KZͬf7z hGKҚMjWֺlgKͭnw pKMr:ЍtKZͮvz xKK Ey^- |Kͯ~; NxDP xk@,\$@xpd" D"@DmdX4A  ^8 8ň9s$CG"֨RKDB&H8DXtmSxЂ ȈZ։$qCDp0y7 7 ڄhA,0c@0; l*h2heo =CoaBTN!7iF %f! >u-(#S?;>0`fB ,Ʊ>I8PHx(s!!u "e2$"R&Q@`I pU2w$a ;% MR ?g8{Q@mD&eproII!oMb !\'p%v>^w2M&xCD_Q'$nͣE|;A%ᮝ]0fo֩nu"ȑlNGFrN 7qW.d*Tć&wIA$z}$B-o]w,:- .dz~;ُ"rމ~O&# @'Y|bPG RvbAr)g='`5`#*NLo*ȿ3D'||HBVv/W17y%7a}}JA CyWmzP[VXPXt`VہzsJAozwBrKsy4h\3-WsdEaZH&^wDX>x>7#Tppbp @dGta0 0F! AFUF!Ӳx1pxxmlP}\m0 d|{H!ljBֈ! Q' Bu{zHiGWf d8@oGt8 @H=wGeFV,eQBFK{Ќ"G Hyp']0sGX茪^H7 \ǘAxHuS>!ea8(w(d)\ӡfpqPjZ xk'1KaI)I.qpГ<AF0[&"@O >aCuXYH (T "7(lх )  (rKQn`(&ӷ͇ɖz! J}Z|~fH=\ofQI{hqgǧ7[7EQ8'֘F Hx~WiDj0*@B-Lβ:M߬|DĜ- [3_]^&}V_ XK|{ }a=̓0؄]؆}؈؊M؈ _Bؒ=ٔMةyi}#l6]=;eqڣ-j٦A>!eI ;fݰYڨ#f[=] `ڽ݊` q=ފ }ȽX<}>^~ > ~,dK"Cp*N,ȍ הEFa㚵m8^Y;q{ 9j[ml=NHK&N=Q `^cNgliVRmo"*]9&q~q"q6;>>"d.諥!2&>NZ>ΐ\.tk6匋9=X ~룊 4n[u+ElLƮ^1͞˸vIn"Ҿd^9nY >^~>wm?𖗶I>Vo?N.e7a  o05!/ .O <,=C<]~'zkwK=_]NPQSU_街Y?_W`b_?Wfoh^_h]mh:^˶O^v?ye{Og1/,#!~XX>#~`??߷oOX,_WЂ?]~?B_yo_Ry̟0<9|o\;ֿ w&Quo/MG $XA .dP5%NbE5nG!E$YI)U8%ˋ/eΤY"1mٓNA Q $eSQF,1*MW4իĮ_Ŏ%[Yi2R-oΥ[]y_&\aĉ/fcȑ%O\e̙5ogСE&]iԩUfkرeϦ]mܹuo'^qɕ/gsѥO^uٵowŏ'_yկg{ϧ_}p@ 4@TpAl. #!1&pCBCmCf߂ q@NTqEbE ˦ɪ1Fsq=qwrȉ$R=#a:rI&lr$WIr =DK.?|#̇lL4sM d:7GM$sN:= ?3PB4Q=GFBFE/E/KK5S@tTD%7SV!IV,+%@mJ-Fɿ\s"rII=Ojt9Ub]vpg0.t/3m/VL}K->m诇׻)5{-hʷ6&w}_a^wj~YH{?i,:ӼFPr)`*8<1L7K0ABFaMx3)L*B k|0< w>PazPC$bpx"&QM)f!ܜ+>Q 7E/6aTxF4jqfLcF7ƱgpBQk(/=֫t ߒǬP"ȿ,HoEVrTD 'uIM:/ GiGz R)]U2$.,"Җ<xKD$BX?޴r`,f31IT5yMl.њf߶oJw”NI))ll&;pF, wkJW&VelcXFVle-{YfVlg=YІV%miM{ZԦVemk]ZVmmm{[v@o}[׷ C{\New!DWӥnuK9\,E@̙ PUԼq{ًuX5K6;T@ |`'?Ppl`mpJC` xCp=aX@#l@ ń!kW,&1oӠ 1OxӪ‡JX#7E2b#˸Z'X_VsB櫄yc6jYb*jIfcr gbg>OkɁe6E/ G)%&1GX+4K(KeCI7IԻTDT|̠ JT޹6l.ASl M)Ӡ2^[ =4AFmǝ:TK Ɛկv IX> __-_=_MX[m_}_________``.`Ov;] )+)Kwh`B1XP+H b2 `&3K 6V!xឱ5paaP9a!68 -8bzj0bӨ'%*,b\.-pP)& <c23"6vcǀ9cc=n<>ވV>?ֈBd;dPdD.5PVQ@+e@eSF3U6V^-XeZe[e\eea^~ǹ; *p+fc>fdNfe^ffnfg~fhfifjfkflfmfnfofpgqgr! ,M-4HA*\Ȱa #Jhŋ3ȱǏ CIR (SYQ˗ $(IM nɓdK@aJѣH]r$ӧ- EV*I` ZƙIӪ]˶ۡv.۱v˷߿6.È'M̸qɺ#*jO`& cSϠCM ^m3k_˾9xvqlםA_uDc#_μyMg\b"V=[wGyGpO㳆.te'7&^|孖G7~F_\~RW _%(+"_a(cbv(xl#d_4.g~( IBL"F:򑐌$'IJZ̤&7Nz (GIRL*WV򕰌,gIZ򖸌# 0# 0a L*МM t$ؤ6IqS& Ir?%5wvNLznq>~0 Іj3yC'JK*ͨFXl 3;RАG*PhG$ ӚT/NwӞ@ Pj4H#ZFԦ:uhStѩZUV1V:D` XJֲģmYVEsk\DxͫT׾ `ḰC] >idVLͬf7Y 8%hG xYHZFF#9 6/mwۿL%Ap+.Mrb΍tKZͮvz xKMz|Kͯ~K$;ChbZ'LNx'C o0G\S'51WQu1gPQQ䱏) 42 M%3t2K)Sʴ2J-s˪2\J1̠43׼I5͙0lI9ZPv>ѳGЈN0ѐ'MJ[Ҙδ7N{ӠGMRԨNWVհgMZָεw^Mbf;ЎMj[ζn{MrN Mzη~Nprca'N[ϸ7{ GN(OW0-sy;8Ϲw@ЇNHONVwܛӧN[XϺַ{`NhOpNxϻ_\~OO;񐏼'O[7ӅsGOқg%Sgϳ;gOϽwOO;Џ[I,`Ð T8@f }H * @_&`T@w_0H0pJ"{M' G 1&0(g `pe'0'PH@fP<^`"TpSOh^&7"=1+G_^(1`E1zh;@G|ev  e0 UsQЈSp[1s]"Јzsh`] H\`~Q'H\'yPx\@}s88\舴1PƵ8莻Uj8x(Ј|e(hD@[hЈ혐Zr`_Uy(USH&Wo ,$ e!}x(0oYdf(%X4gVI5yW=6PqwX阀%z)GP V+Up7yT9ke&apiVjђIX9{ixW X Y0cuiX)XInyxe00UٞꨁiChD/ɉ)lef0Dz F%ꅙMi85jHY (&*X& BX~ajKt8*F`x<1FjC:XНP{Hڤ\r(vaKr'sP )+Z&l:W^x1 xsŌH(1r f*V" p )pTjXPT+P11 Y:'pVY&P k^Yw@' }ۗ+`' p7b)8U  <WL) ̚L" f'0i1,( ۰;[{۱ ";$[&{̱.Kv-cV2X4[u8[X:ճ>X@5gDWJxմN+WT[luvX{4Z۵dt4` 2b[`uhUjWնn;UpŚER%tKG,3??!:j𷊻۸;[{۹;[{ۺ;[$)ۻq0{țKyvsּ;Hx[؛eǽ[曾껾۾;y[{һ<\|Lp l<,7[ "<$\&|(*,.02<40pQÈd:,G[ ~fLx\ƆamX_(&RJGbHj*ü#f6Yh.>^$qB.XR2$yJV)K}(ibvewY y9 jN]t&5Oek Zv^砤Ma B(Xa8OHdYf"jIJ VN찪]ڎJI,V꣸k&6F+Vkfv+k覫+k,l' 7G,Wlgw ,$l(,0,4l373D݊=Kl!t]~ @JЂMBІ:D'JъZ43F7юz HGJҒ(MJWҖ0LgJӚ8ͩNwӞTPaHMRKf dtPPRͪVծz` XJֲhMZֶp\J׺xͫ^׾ `KMbB⩎d'KZ*2z hGKҚMjWֺlgKͭnwZP 4Mr:ЍtKZh0uzM KMz|Kͯ~LN;'L [N'viG }ޠR"Djix-V>Rcv10*/jW@^"\u4 s2w#K^DЋf8o6g .G{P2 1{fva3Ȝڱp9δ3`/|MBz>J&z3+J iIz4. @?ZӠ;G]! ֩[(^ԾM?]kت0e;jŰ7(#ټk0Ϳi4ٻld n@36wuCߴ}9Z?ꦷ쾭~o72} xN~pF?t1~{:&ۭVֳs}m^gU  \g?hٿ~ln.ѽ];޷_~/ͳ퍟&'O[ϼ7{GOқOWz_ FVh@ V\ ',&W@_a կ";f@ L` h *h@)@`{ P" 0}՗"0G2 8(0f@P}TX2@} +P' 76!2/ pR31(c9 `*s7+PC}) PX1fGX  V؅c@ &`gȅj0l 8 t W'yH  GO(S.8T 'c2fWL-@}'p @WH0")00@ H}b`x(2% 2h ވ/"@} XS@b.ՐKh.gT@i.8 ؐ뒐ȐPohb`p+@ְx2Pp2 '9irRГː(>).xѸ(gG .(K)MR +@}hbYy [^I.I9@e-B DIm).8 ; u.0ٕ`5ޒwkВ-&&y &~"RPHݲtY ; e(P0D؎-ؚr hliԂ7Ǩ雕)A D9i ȈhԲRЛ@iݲLjyLPrHyR0 ,D (0}ݘ) &@Мi%8LwEGԧ)"艴 8𡚐DxՉ" h3: +T4-(@Y px?Z. 0~L(qy  %`.RЁW0࣐P( LRLg'fH}S j.fpDŽTprjSz hy .XKX} ʗX&+Шv ~B} @ Շ`Q2(p0LhR2Xp(Yʫʺڬ:Zzؚںڭuϊ{j-Wp躮EeЮjV? ^3Z.Yy_: ۰;[{D ۱ ۱;#&{(*+[S!,L-=P@*\Ȱa"JHË3j$HA ŏ CII,n\r#*[ʜ͛8s, &͟39LI'У;ɴӧPJդ$D 7#vkUɪ5MHk~Rܫ}{ LX'ˆ?4ctK#˘3k̹ϝS8ҨS^zP48vȶ8wԾ -HęN.s<-سkߞAOFODO:}H+ݗ.?'D>1'Q_}D-`S-HAi9u %`HY(∃'by}WD..X!-fbܨ_cIģ"xq4d>Ha8azXf 0GAΘ" `i`"%H .dD{$I`~"f%^Ft(y-IfҨ&HfIڙhDyIRyGnaj بYqVJbg! : zjɪyQTz>^N#;mڟoZ謲F.K@R~~p} <~ l oD'oxZݛ1 J(1\_ r} p䢺Jl,~;WK//k/ȣ$hj|,| |3[w1? ȱ|{,Ȉ&]Lw>^ԵNM%Ael]Q)0D,dyxlvKvD[ը-!7wLD3u=yp3.饭9TmZ:.Qw/%җ4`+4m7)`:[\ꫛeD0o{ r=&wh۝ה&^ط@/~%GJhyA~90x5D ıS9*1;Aj5ĵ]O$Il(!NC c?ևXrGӅK!FVD$X%4Q3tj+h.w\fL-y3R0D1 bIZS;'L [ΰ7{ GL(NWxgL8αw@5("HN&;PL*[Xβ.{`L2hN6S8nLg xγ>I*MBЈNh8ǏnE(J[Ҙ.3N{Ӡ_QKԦNWV"`! 0($,m;uCK!Z`)B6glS !&AY8bx] K| o6('!{^?*K)w./q[6lvBIԞ9/⣘^(I1~/v!`OM0 ~HL! wJ 9YO̿&l v}aKfwh+aY6EyЀ }8ud#`O{dpz{Qx']0 zQ +v6q2&Cy%_;>(0mpR8rrܗ{=H@8 Fh|I_8OafcHЄQ/h"օ"ar(" o o%enWxlFl{{iXf戌85pt(|b(]HcjrQx؊}恈}rV؋m|0u ^{pyHcx@D _G 0mƨ8|crp6u@xOal$ u(uȉX6a o[^'oy3F oF 7 6wA$&@j' )o` y8xy&蓱f +Wk! 6 9 z3V2(/EpV)zfYɓ9 pv u])"s9h_dw:ydYo9 H ѷ]$q{;n8& ` p ~ 3saٙ& P V@ Qɀ(TI#V$ &8&N ~l,<̞f8ЊfК8  xsiL0ޣ9cf="#]% ,wcl[(X4e3q==?4=Ԣ7LԟRݹ[Sf+lҩ!YZiyLʩ |ջIf6i8m=ם*||tT*vy-جF؅l|dݞy}a-؞< "!V^Mb|v\-Qږ|miݛH:qmʳ]h- S'vԝҜMܓ>lXFܘh be6ܐ[fN1NmիH=caa t?{˝=d.Ќxs̓rQ]h%m">b>&~(*,.02>4^6~8nd1<>@䒷B^F~HbKNPLRKfs\y`b>d8dej*h[j pz|z~Qp~>脎R]f%obh]$`_f`9$1P~ꨞꪾnJ jN{Lz7#!꼎~¾h~n볮ώվ Ӻn.^ޙĮNʎdD.bm뙱d+b/^툆2>Ǯ=]Nn ^;:Թ]$~!..ũ ,&F*ao!,9>`:/RϟVgm?gikofONQi5wobX? $\vƛ:RCo?_/^dY]qe_gfhp`0_ȟ̏-@^ψΛo؟ڿ?_?_VI$XA .dC%NXE5nG!E$YI)UdK1eΤYӦI'7uOA%ZQI.eSAsFZ YnWaŎ%[Yiծe[qJS$y_uvI9Yb&@%B C|x1ϡEUwRhܦBlm1u%ڹwڬq{I,h8P /cרzkzgAzu}7%,ْ$K"!o?E$Մtop^Rwv@1Ft5d$B6%APRD b4Ao&(@BND1a "EqR 3T÷ч*a=,b)$Cw83ȦSlRr $ *K!e+G)Wz$}JN Tf2L|"UK3,IQfqI'qE'2Z&AiD '1iD 8"HNxRgHFz2$AaCn/^".Y9ДP pOJf hWZQϼ)~?$ЅQu  I5 0$yQT i MX3iO}j#Nv<)"E E>BHJaO Q"[jWvG(_4'ˆUFe!j?$)o]k]zWxjI7SV6.ja<$Z) [# 6=cBoD߱&-KW]AB DSY .khL$) ہD x!Ɗ$eDr:`ZRn];e %E MI/B۾Q~iDIi|T6D}WBf{YBK60"LX6~H*QI 6GZB5gurb\,: ?d󁱀#9QՂCD2dYK{ gil7KتQj$$i)ZM_e6[q^Qxɲ g9s>KydkQerTB L×++l(K4D7 cibŀSa|65 R6g6pCe`Q ԥP%%J7NZ&6kmCX9vI0Q!̆ ~Ƭ}zn_~a6%QD" zl6QټA9h'wF>FV8%D3tYs ,4S+dÄ2ym~s\;y}LC'zэ~t',UzӝtG]Szխ~ug][z׽u]c'{~v]kg{v]s{}L]{{w^O ]}x'@0xŗ<@-ygBp54y_H""ԧ^g}e1 R^y.Gp'w,np|'_|G_ 6 ~ /ȦIos[ ]@~_?yKu?x@,@<@LD1{;s @ @ @ R!ɛoj@{@4,A8SA| Cd)3hl@!4"#;dB ĈsB)$ttt‹B/)$*̻+&r7 8l14C:D; .Ĺyj>d?Al4A;ĪXĕhĺ{Dxb:=t(04Bҋ)O\ P|BK1MT NVTW\C9.RZL[\L]d.R aJbDcdDeA<ӈah{Pt36LLLLLƤ=LD́hJtI/IRMR?ᄿN,M$ÈSJx?NN^NN?謑,O,O$O,\OXN{ KN,gPOO PP-P=PMP]PmP}PP P P P P PPP QQ-Q=QMQ]QmQ}QQQQ؂<_D۟0Q9Cҝ 3o(hh Ik6?aT kU!$- .LP*ȫ(Ҽ,)0o0#%?R*WX L7@3h:!u.|p(R=N--I۳HA[ 2@LyC K=va-)xt?8o%SoW5 J8X;H E@X8CFe_7Sc%;ϗ8 0ԇ(h hHVﱄeJx26e `_TphRd!V  ?*!f =Pa|3H1ڒ Jx@c(RC| : 76YY,,,cH0-a#UNLd'~pn@c_QXf0C(hbYI#HX6 mC  ]~٭ 920;@~l@ + M8 tB-JO`>Re0 0K]v4*v=( 5Hf8ߠ,zhN cm{f0V\.eM4)XFJ*BJ$䝘 T)WZdC,9`@Z*,䈾IbeT ~62hd/+yh? Nk`0AZ(뚐I :>^jaʘ@ZjDai@ߔwR0|)! Ǝ 3p?.v~ژئ&+SPh0(ӆ~M0lئjыlkMݩ^mFxm&VaFvXP/pxl6 9n{Q뼞 >M~ hXQ۾^h"mjM /w"+@efIJ  :PpvSr^( Jun D(Lbg m&0h붉o/r fT ozq~6GLus \VCiE(ۣ{[A䬃A`nִ ?<κ{y0;/q8/aGƹD\xዿ&q"c:zz%l?(.KҾR=L~@usi Z6 I;ߍv G$3!sؼ>0. RhXe)P2NLSA=O"Ȩfr"(q #ӨҔ1g\#(dz1So#ǖQ#}# )ȉ#$"CZ|$$H1$+i)bD$ O8 U F4%,c)?%.\! ,K,0@*\ȰaBJ8Сŋ3Ǐ CIH(SyA˗ Gz4I͛aiH@UIѣHvŒN:$)BVU+ˮ^KٳhӪUza۟o゜ɱܻxUKw߿ Xy"^̸cKL޾3kYΠCvѨSyװbMq[=_]ȓ;nѼKAu׏O߻9^OmOd?o~K( Y&߀E & m=(xMhahᆿ5(Y"[& ag"h 2㋻"`7Z#H$^ș$ԙ$ipHfc8AE[cSPXf^X[~&_ZcEǥuf_rEcJF5u @b|)k5XVWRZ68TᤈJEؗnfDgiIhΊgR"R:fh+i+@Zؔq#iwilb`\"% ֔%j̦[ik Q$J$;6Kj긻rJ( ZLÆ[S %%Sl3o1N6kJe.[^v;Ÿ-4[.G)@L3#$ 1j۰F[FO .b0Iy5 w}gMBu#H2lnN+xzݺ8 2)IʜvaeIm\U3ណt p3~N͢V.Jä-Yn^3GMg2m3OEq 8ڸ؜no`} Ư~e}*o^K|mR>Ϋv7͘RO`^"J$t#CAyQOWVn)v 5)&/mI{TBX4Ar i4GH 5 r^N68 PIP|\Iج+rS;+raGY"2FT`ſ"<I3rB8̢H豑\ #IIL0I<,$CIJ"=LOJ$8nl,2Y|N*k\%dP09Z@Dfrvi^*|f()M_^wa7_F`JMZN':\g@Q'a[ z4,X['wY,cӸ8&%ܩا͈!*>noPv,|D&\̍r8w[L̸=l6r^j : )a/G3tǟ˥I6~ר_ɴ?}/0+Et@T$#sZCzqkK8tAF8t<ĄpE}KzGa2(wr"؂du.Z6x5^~8؃>h"ƃ?8D XH]uSLgjRXt0VP X\(X_$d^JW2YuWr(BgXJwdye{X~؄}HX2k&e$XJ*xthdlF]8mzXuu? DZen_$o8qQ87&nB8Kx]Bx!AUr}YXYQorƉh}Whxظ[ϘD XT?z6{ecY("|ȎU`HxYɏU\IϦcqY&${qp͔rb~Âr/po5FV"*I+EU/dryWs 5C,C&9C%7pBt,)yhCcsmt)V)-|@O)iĆG,JဘhX>2Bb8U7T7amSu75t=;)c~i=Tsm&%g1oY8)3Or !GԈptC/}iC-u7 Gȓ]ג)2F]YC`4AyAo),nA7%dIso4guiyoMsT䉁|t_VD?)3FBh>: CEZDwDD1>e٠y)u}&ajW j6Xb_uERWiT2؋Db0zq{999Jp#3.C3}s .*b/oٌAɣ™y3IzV E7Or'9 $߆UҦŹvS*yXb%Wzɨ j~꣖h1馑5깪َh?X yW:DfXuVաZC+w8Cw;uWǬZyY8q(1RZ;ץ :ZyzJ:zA WbJ$ګ׮pet`Jư;{{gʱ+M"[f%+[7v#!?겟I*53!7 og)]i5:IT%xhLkcM{V9CQKW+ XSeU+/dct_+")aH|ƊEoJyJr{۴m;f1ڊ\@+K{6(k4c{ bIEKć"Wi[;m5 fkE;|; kj;ۛ;jߙaxW3y[*ÛilJlKJԶZ}K; 1k-k_jՇkL:#ʥgQGkueeIlʿVg۝c Lbh'iG: ֩倻 h#g[h+(|V1fZ8/|f5ßó@̙C`k<\ľ{Klf Ll|BSlTlI5{źW |`dܱZ[JƑ*۬B*ؑzrz+zbwiB'Hs,:U<`2,;ml̍йt4I^ c8w:ދƥz{oX5o-(V.|TL.Gľ~&N#^HNvUoFL]k bn[+Y Z^=G؉^̳'C.oj8$?[/O{jB1$W\nGDIkgݳ;V*N`V?xfD4dc"S 7P+%]qv_:/;{_?O{X-OʝZߐ3쥿Bަ/>Lpo3WD嫹Mț8gc﨣i)tsU|&o]0_c  (@.dÂ&\H"5nNjE2$ɉGlxH,%HS͗+m 1̝;{mI(N?.ePģP5M<.:=1GbI4P ;mEiR8ɼ*J*H5LP+ NSLMPES)UeuHl0T0UQUNM#KG?DV@eVgouh*`7h%Wmŝ\CKm?dw]uu]7{7Bz݅4_EQo 6`Vfxb+b3xca cK6dSVyen}yfkfsֹژw0}zh6hLT^i bgwzkkB6lo|v[{nfoo}j [ox W|qwq!G>Er\f7{m{|'|Ϳ]}rwe5Ͽv;M(V!pS @5]փ`dZP98<~po"tXEgB)dJBCmpdvB[B|ֱ2CS> >QXghNbVbBEÁbE ]-F7ƑBpb٦X Ԙ@1Q q4<-@C. 8d'=)* pV T ?He*@~>yK\2rK,@R@ pc~She d_e5yMH+ LTb  A< M` @V L`5/lS]l26B @ˎxB*#pe_gE-A#)+@ t ]T3HM2zQt~ e`T@$pT6 < _T6*S&cBh<N9x bNc] bY^%5vt\ᩡ[~s29ct< s#Q].K۶K/RKhS?Xxx^8Cw}< Ze 令h qJ%Q XIujh lmIWd؉2d56 'j\ܺ{edFEaD*#HFoFʝ ; 3$EGu1K!:4`lBuG{<03j X\ٸ{HkG:3,#Ht2`h(F@0rÈF7` x/*lIʳȥ@ >8]Il;T9?P3W=3)T]4!}e< ۆ+U}C@ݎ5ň0\8qXŨ^ꍎ6Uu]6E%X^MZ\P_}:a[օAE#q^[2xe _@=`ץ ú9ۿ -[H^$AAX]ƙZㄋ>ZCUږ4DTaTtW2ՎLAfܽٿ > Df:`~oeU9uؕbim.ӑdb{_4.ƿEhf[ \煞`U჎hib6DNڔ!e(g1ivY &i-&YiiX>֙i+i6 vj1פJm m]j>jPv)(k>kNk^k6kdjF5Eฮkꪰkkkl k$Vîf M=sŮfQγˮlll6lͮ&l\&3mF6vLnmA"3ږ1۾6mޮ# l&K6ynwFZ>F׀)e9n)Ws}1l^©n`q1nƶo$f6oe/ʎp[DpJ#)iz.XF% /q .3qg͡qmqWgo0 x 6soW*R1$p\kpr-r.K.s$'ߧ$iXDS4os7sdl:s8Dn!mɖs|K>sAs7+tF_&BotIGHt"E//³t=&tN++QLnj2'$x;uW0QuYKu[WoLu]]UXu&iuZD1 pE鏮ܞu7P[?iﮞ#"Abeeg~^sU"oWx?cbk:,O[{Ӛe}gm#vl*}!ߠcj5\!Xۡv_わ6#}/hյwx nwE嬉މCxe~xcyzb/z \8@r<ڜ//qxzتuH|ygOJfu_qZ07]jOcX^h 7]oeɷKd_+<_=ӯo?+] sU * J8!Zx!%0E!`bv8"%x"'"-boxߊ3x#9灎=#A 9$B.U$I*$M:$QJ9%u$qP%]zy#_9&T e&Wm&$!sy''qu':fZ)(:zƶXhAZp 5*4VmJஜk\!9U{ AlAVS*i+ɖtr{c5UD ?4Z2;bAS hҿ<;&PLְHnD׷#rď"53IP;S32u$ӘpW2P,WhsIݲyEWaiLEP'gA7(OdȺRZGVwfOQ,vV 6uV6\Fvfo<|')V^`u@=~#dNj 8y\o^ݚ}Յv;rSXz헶<퓽]ʇ U_Uޮ:Wo\{ϛv j~eCĽ|sɩ^an~hE.>-o`$ySP] ;ip6rLcd{|O ͐=*$O _XR04rB1)&9V8C:+m !_f<(Z'Lb#=uʀ*N]D5~ߋ` "W8QnMɗPQ-bG=&0#wH^ndșFH7Y$"SP;D2HH}2^r .VQ̢ ӘBܠ 842E!;F~DⲘXpxZ# 3c)>1&],c7σpϛ$e2'tBiS2ysA=iO蓞uQ綁>p<'Bǣ4lMe9=5PBFnYƙБ& 5CLqh",\'^2ťI@BOt+MIu̝EjN5ԠL> kVQ9Ҡ%&G[y<*U}FN,zunUѨƢV4ӆQz|+`'"lZƖ9+KU4zR,T(V%M+5 R$¬,jSΰ`r)V>P2կ-nm[J-9Z!-!;QC26X!bDe.8V6UVu˝„=/z%fm1^`vaZ7dKⷿ _n>0 ~0#l_03jp8 3S,~15la86mk >j)!F>wy! ,K-H*\!#BlHŋ%jȱǏ Cɒ!O\p˗0cdI&5sɳϟ@IcʢHL4ѦP > u)ժBje@aׯ`ÊKٳhӪ]۷pʝK7›x˷߿ <| ~l1P#KbGȖ3k̹絕+MiOv,ȩ9^M۸ͻS Wۚk%&μzwCNzֳ,bkOxӫ?-~}z#޽9Ͽ?MDF_&`_-coFTaN&P 蒄"V!Da,a.!2~vC)b<>BZv#9 cL69NF9Rvd-Y\a`NGe])df`Pjc]&xf|ZgoщjW{&w6*8}I|)X馠JhiBuj২種*ԩ*֪k눿uklrˊ56+N{,jf~+ZjܙݖnYXi.nGb[f]iZ׼̖d+Cf0]5pn Ϲ& _\ ,$l(,0,4l8<@-DmH'4H,PG-TWmXg\wmc^bmhlp-tmx|߀.n'7j<.Lngw砇.褗n騧ꬷ.n3/o'7G/Wogw/o觯/ | H@L:"H Z̠7x>`p GH(L W0 gH8̡w@ H"HL&:PH*V .zH2hL6pH:hI>d[IBM #0򑐌$'IJZ̤&7Nz (GIRL*WV򕰌,gIK̥.w^ 0IbL2f:Ќ4IIH6=0T)@&8 Rh:@$3pV3 \~B @@X@B3@X-QJVBGp=ZJ:`b $p)Iw <@ %@ fӐ`IXP="P'0iM` 3`8h>԰( `|u+3`A?]!)$W-#PEe!Y@_2R@; %'cSl@lrvz @ b[j\!Z6 Hrݪ`lDS~sa~P@?[ EHtW-XMP z)^1 ]^H_Kb:ؙ@) յz޾L,pYķ/-~񊙙l-#g@~P!0 x,g[ȽA @P6&U@eG8Ǭ1cr` i j㿜80s\Pz֥: t*i\ DwYѮ`jKces_ 01TdԼP% 9հ`iH:.S^ ,wقdt5K鿐:Ҟ(e@̸6-m s~;TuZ/VhoK)@z]|{?;Mp]Kyג6^lhp] @TTx`aGy.Rg)ЊwNK7'OKl])(Oe :uKγN˯ϕI?z__jX`sY@hP@,cSw~XBV-0{@yRȕpQ9SwI` ")H2Xzx pӕ;$ut/L$@ڴ8lROL|> shh0oAxPT`@4g|ugj PH"X&x(*,؂.0284X6x8"dLe>@B8DX.aFJL؄NPR8TXVxXZuQ\`b8dXfxhjl؆npr8l1t(Cpz|8Xx؈K^8Xx_hhA#8X ,LtcD#1v8!nAwD F3'TƘ(+Ԍ8X2]A1ըA yHD؍-hF8s{!渎؎|8l#G7$؏!9LX$=踐t@bFVMO@ÍfȤxGC4&y(#5)ْ.#)Ē2Y64yv:YBٓs@*92DY!JiA RDTY o\ >NE~b=Q)dYCZtig ,pYdzy>yٗ=X$#!ɘ9B sI4$~m9YyQٚ9D}Pٛ)BI9َyʹٜ9Y9&Y2I/"ۉGyYy虞깞ٞsٟ)SjP ڠ:Zzڡz=y"j4&Np292Ap2=H@=O'8|)#Zy(# dŊB ;4pNZ:wqQJ:W59ɹ|Qrh1`J8ag:8wk4ZZF%B;D{A[˙Hr"xc@8 2W[Lk-_'vc+ybbAJf" .| 1R-;!:=ҶeqH#;!˸R-K o>ⷦ-`[+[,Ʋ+ۻ\ Ż,! ,L,M0H*\P#JQ ŋ3f<Nj1>I5 S\ B,c.T̛8sDASM0ZIѡ{]b!ǥFcZR)UAʵׯ CKVز&MB˶۷pN nXH 0+_VlЯ2"cT@LYZy+܊f?ذh%|p ;n63Wo!.8\nP9#4d/6mԂ]hًm'n?7w\|`{*w`VN]iwibk6HoFx}[u6rQ8uG1_]%`uJ؁-X.X6U ȗm(c\$2&B]+u(aT cdYe\X5ؓuM9`e_Zj7} >)1d[I. i\2lW%…h pHvXq$dxYjWDN]DQo  x\ƕ+zJJk)PҰvL9dCZa;cvE4Ae$ }iC SC+׹ѱ QTP+85xU AK–edpM,qCR$Oj.D6 9]K>u{RF gib jű[i_׎!f;H-QFQ@HEo-A$tB4EB()WZ+HJ5QO$"]7Cǵ2A.4cޣLBLm4uNnR:Q yLtdOLem3dDIcI4ogT9ER5W=la_Hyt>PUߡ$e# DҷDN.p#ڲ9FCt9P&CC(tp2c ܒEkpmI܇7+c˭Jü;1X` 8IѢ %h*![Bx'&qFĉݲ0kfɢDoPZmb\+u[32g{\ &$YX70wr ih9s[!!;2ƶq 1y J*),Đ#H"ia0R$d'o &RR9WR6H< ,UJ]2beCiʗҴfaz \%;L1\3Gknq')Z[,Y %S0d -ILnkc.9(z24.@̈7 VDW4BN0ё4'B)rҩ)ĢiCYtqĉLobӕE'iovHJ#DAcrd &Q83WއS̤7,Uո4FG5IVUծp\J׺xu ׾ ,DW MbBR&d'{{RvfAz hbњ6jW3ֺ6\a}lgKݻnw6\pKMr:e&tKZ7z7wxK^쵼Mzyi% 0?H7ͯ p^'~ l;x}'L,0  u+(ol'!~s8α]mx*.0}Ld -r&vNmL*[L N%YS.ϲ2;7xyl6?iL:x|Z6xp&ǨYψN9]-4'gnEs$N{:GMjB#-KV0gMZEYwzt\˅ĎX#tj^%ŏ-dQο1v~k f+o3nl0kS.Mp:4jX2$c-;HO;̰'Nq8e{<'GNmPQD#,L8.ܿ0w>E+o m9 u;"4nWKݶկnZ2KjPG[7lp߲,~%t5gλkU+x2E`{; ?S>"iɮ#ޞGOz5:WֻgO8sS)ݮO'^ޑ|U_x՟ipŏ4Gp~w6ho_ 6۟wY._{>2͕( ؀ NX3ƕ \3 "8$X"8Q@&,H0U ^@]"8: ~ve7oxDHɨXvϸG}To%2׸GӈѨ87nA 8nqxyi]P Dq!|uUxxX<>&rp蒎EᎋH%I^ 9iX$ %Hv˘q ;N=Sƒ QᒟÔd?YNCp&ymSyHj<6rn(*N^dPHY?pҖ::ՕmF}jz{H[X鉅wSKIi)|ysٙisHh3pٚ+9[ Piٛ q%j9Y*EI`1/0٩ɝii`H8-Xy"Ys(@B<?(VE"f::z ڠ:Zrp(ou JHt$ZōзQt&0W796jsbyfš:@B:DZF0o&Z< a ̸=fl{µUjoA gu,74FP. CK,X0J \ΐ šLP|hW|Z) va.|6nEж8LY`\nYhoDŽY- o~p^Vrl}tX\\><,p*y-ܹvk[w@2].v #ZZ !oeslQo#,\ :YZJk[|r*YwӦQF\_2!a_՜2;W,yx^l[ &l<,=eqͣ22а }L:<][9q\jыE$J%Q шC[< l>WP%_e4Ӏb F]~ L=|̾N{Ttғ`q2^K ].՟\R,hֻ2<  ,zVжׄ=SN n}ԅؓlQ؀5KĚ eӓCm]ed-]Հ`! A E_m]g ۊEٓkMHR|ܦ۶z@ţ%IMaܬվn{E ==mu ڄ\,PF! [Y][ӽ#Z ` { _q}\ >R< -ل^*d7A ~@߅Pg0$; &+~Z@*)} _8>\*%Dk C ;˪gϵ}U=GH.M` ! ; )_j߽Ka]'Bz` a,e6* 0=PzP ]۳@s~F@<[~` `U^H# A:z N J 0N]`\'`tPg ><ٛ =@ pȐTޙBN!5N9|5:IVO\w "?ɕ;$.U*b͋1ߥV5߬WzYHuMLg[a ZmY7cȑ%OlpAe4y`̡ExncҩUf5D5R9 NfIl,L=&^Zg.rvOG&gy5]R4G>]KoZDg<np|>p*Ϡ脂#!r(Tr&:Уd  #1*Q9' .͊ ldC Ó5p j&?,ʻhA.P:QɶZ 3#/zE|ʄ,Ƽf\0UR-4lC$i=JO I24$ϜmL5TN IQ12QU:Lg(E[Su>&\s-DAbXwZAKa]rכD\)Yl[p&k5WJmk\vu]ƭ\x-]ֵw_~V^w`I_VxaI`#`b3̬D(cCydKAxbW&Lyfkfsy )XzzVziv(0* zkkAjD)m{I2qnfm2|pbW$ vMoWE,r3|OxͭCrҗZSW}uUcK'Ms}w{w'3x7xW~ywy裗~z꫷z~{9%M7,E}w}l_^~?߅~ $` x@&P d Z@FP`" fP AxDxBP+da ]BP3aľ#P;a}C|0?H]5B?s9"_.>UeO@5,0" `Ktc6Ƒ8Pa0An( 6(Jt 9&Rhs@%p z+pK") ^ 6R!AJT~O`''0X"D("?P`C$v E2$f%$kT,ؠ] MpeYԓ59 8%qg=yOTE9:ٹ?("E!"O&\<,|9K$3'6PB& (QFzNшQO$Xce{QEDQ+rfVK6 z 0YԾڛl$*3$Xmm[-Ym[f,S>;3eens3e&)$z:W| vR %ny;7N kFśo}U#WLBuJlaك`6*loIyvQ"elJ%f G}ʩJE=L %2$ߧ p+`b~h7瘾%a(xqw!( ug\c#o$QmQkāxo,jp@1T*2YkQ"l"3ėrSe%-6םa_3BG!ֽQArU& !s@ $HipA LS խ!5SI) % .Lj =z)ĜGcg1ŲCUDѶei92 p'nt3ڥCl)& ge+HS4<(}M$#ߗnlV0W0$u..J`A*r]Mާǥ^Нί)%ӭȥmh9T^䫇]_.!PmkgÝ VR~w]ΟR^փ" t3LxG&9SK^ȆM wL|MXH-Kq$~$^}ua8$}=A*M } A4I_~$,y—dнd1}ǔ5?ɾAg+CP2#?G*I`K?,2D[@¨<@ t:,0`)@J.&B3l6*@xӈzHx=1/8pAٓnx(80i?H{W@ B+!6h7"z@0((B3z+D)K8DH"آnxf3h37Ⱥ#I@Wh?8E5GBHk0KxkßD왹=Dp<@8COb@PlRpJPW2@ERyH C@MtE>aL ;BE8\@Kn@ZbA;/bFCnFp Gql1ûoGtLGu\GvllGxGyGzG{G|G}B~G H9G4Cd;H\HlȈsH䝄4|H7v)GȠy9GHulDu !1P\ ) I JJpJL^  \zDJJ#JƾqKIp˭Y $K./ALRG˹%H/dK9xGtX4hL Jˌ>L LXM,M KM8YT 1TYUJe?e )ՃpnɀT4Iu_-d(UiW QՉxVU֏PkՋV7v-f 3JTiUp)RנV֘V ֚4M-O!جPkkbKLU]͉ w] Q|XXY}ل@كQٌaٛpYYيYYٌYGmX -Z=ZHڕ]ZhZ5YЌZZZ [ًM][m[}[[,ZMp[[[ ܽ]UWM\]\m\}ąۏUḿW d+x]XKLL-]-]]]VH-^H)'?h^}^F^^?A9^^ __-_2IM_B_m_}_________; `.`6=`Z]`~`ׯPW> &-`֝d %"O=vYZhzpav:SBAX2"68$a(3h`,V`x3XXT@ YT?÷]Cxu 8Y+ X0*bnT(AK?@ 6|=8B〣k)JP X ٫KpC?PjCJXF6;?(*`89."hM0OͣKh?EVdUX~yZ9 HPފ(beffs%PڈU2C0`g0MUg}0Df+@ *xg .Xf;* =C@0p 琘h:ѣtBxVNR> J@f㑘\i{*8@ `Di @]0" JIj h@(3.T?)~xdXkMA  0Gh}k=NF*} 8lX9l:KjPվPc m1"(@](n5C> n3p u*m[(Ip  n0Z+Єn( `16-/ro(x~@ppCAh-  Vq8 3PHr<ˊ9Xxm(p-/jp. g XL 0g ݦ$O/& Ă&Ь887wq@:sv xk =꺨p0>uK//`s( : ݆H/­(}<1GnXsZg?t/=0졊ފ(aw}e zjL5~hc.@vS+vuRo2F%Xzbmxcv L-$JÈN+[O}8Y͈ p|] с[3WЌxo 'wh~o3`iA׼X0] 矜yHxPсhQW_ _xq!ZBze `Ԉ y.`o1x0{/jtv0ow*1l w{|抬8r𼄇wO-%r)\.q8qp_,Lt0Gf2PP} j1g}Isbs̯Cw@*~Rط<_@u VqZ)}"30C:.0BHU`2wi׫23(w믊Hyn9Gsğ@uHtsoo R!-׃1vm0m#{:zgzao2s ' !h> 'ϗ3x?t~5f / *[y'@2ohg.XУ7@u%4dc }n7X,GHaVkg RpȩPBX)^ (j8~0𓐂Lj" D kWfbW膔e<1& 8sd{ zzĀs(r&ˠ 5l [РWYBX IHs %ЬHGph7j ZB@ 8 0 B<. 8Cꆕ21B 8 8ꍘ> N}‡(BP#X(™ aFR(`!Æ 0s`D8̨q#ǎ? )r$ɒ&5 9r%˖.Y|)s&͚6o̩s?< *t(Q:4i$=$$J)CX$,ٲEQX0@1 fV0"bpsΈW] x1ƎC,MQ,?d1 С\S3u.k {H{8Ə#Oαʟo\}z? 4S@pIT:#wo0bE@<7¾?f,⇁~q,H+<pB "0X 90X'+آ/IGX Иt75t>BCY2x-%OBSRUbmeKPgc&t!qMH1Qp֙Y}BtNfF#2*#Rv(}&$kYHIxjʉiG>iyJ&ҨH&Z+OH\ u:f,-éZ>7ğbk,:{q+ԫE&)X /Y:->VZ\pL:IiNJf;;$$r c,,pC"0'薜 qʦ\sgp8׮?+_G0])!q@BGpDWәWݵk45z]g},i}1 Eb74` `eŽwYYg3k7;TELm,9OD!9Go ^ Ú ?Y뷼fxit]3}M}|Ï+ c@7e}p ȃFuk!e[ !!J\:A"HD~\ 4qyQ86騑Pu)F΃"4g@ (E+zFp!*(G;a!S$?jғm!5eISҗžc+fKcjӛ]3eNsӟU=UPjԣ"UNE5RԧBHM%jիbHSVկ>]]Mjֳ>{O,ַ5ru+Cֻ5aNN6,a k"6],cB6,e+kb6,g;ς! ,M-=0H*\ȰÇ#JD(pŋ3jᅎ CiC $S*<.cʜIȊ6sLsL >RѣH*]ʔǦP.|*uUjʵׯ`-694Y[ʝ{ݻ]%C~|BÄ}L٭޷ fBf&͹4RQ^}qB׬cW,ni3m'h>8 &?\$ϛK{u~ep߽/۽ћ_0zA[.>j۟`~ǀ@ & A=H ~g'!I1V vz"as 68}fY"GnU([6*Ȑ9bֱ@ŐUX$s8.)]N#]JxjBEdؖ\dAIҔdʖW[ّ_2TXcYigm9UPK) AhEH(McX~TJN%Vh]giW*6DV24kS`o9 S72$bJmsz-CW+B*ņ6EYlQ: UL U-a KfFiomhKбx{Է;:P kQ5%.HnLƨOτ,Z9ܫk_3̶](g~%GA,#CMuN3zRӆݷϮ75:Sn2g٫S$).ўLл]}Nv/t#<>]U>Is>s]C[g /{ثB?L:T "H Z3z Gx9( hAL"H̡$U7,,o̤&n Vp(R^TQiV6wcIK̥.wFVAeE 4)=Onę̌fLiZ̦6M3yvL:שnK|Jъjr aE7<a})$^oe @)ƚr%l BҹxZ FXSdJaJՍEڐ<@jDRXͪVծzuv.X*hMZֶp\H)UBԔYJ֜M`X׋6N'bľ%,;OU!9/ P<+Z9 C&4&E"j6alsކv HY2]:#,tKZ؍$SzཛྷtKMzv|Kߙͯ~_ 0{H*A`@=#n'<_Re pxCQSEWD,QcL۸ȱw@񍇌ݫtDN*Kv1W&Sʩ2,Y-sˁ2j*]=f9ʽBr?L3R ;)!-G+A3Lj!q&*)!-b('>"ǼNBui<ϬIB欠 sk@̧k[1H&Y]Hd{Vޅ=Vms\6}Uq57Pus{>*g &H}'fJ'/@7'NqP!9{ GN򒛜#^"OmZlng~j<(WNN玍9ЇN=/?җ;}~kK4ַkQ:`d*}#Nl=þޣ0Nw %(𐏜gbOX(7?ɻ/COz+6KOԛr=]R.{k޲ϽwޱOjd =?2O[Ͼo.q 1~~?On>oA@8]wD#Wgt&Wi6!mx!Fas2s IFIȀ"~"M$!XhKamQтA"+:u L ȃB8D8;/8ITJL؄NGxD? fTMQ\LNօ`XSd6(XhjlEc؆pfB[Fo(xhx@b"q|^XC8Ut؈6aW53Ĉ^z:8Ao|xB(8؊j(('9WuMyrxȨ،i%t8Ըu2<'X؍ވ8#ǁXx蘎긎؎[ט[zg 1cDvd~$w8Yy= y}9YI)e}dsqa9f"9$9pX-l294Yx8:A֓>Y X~5KDiu?h4aBT)I9Q`'@@@M@2>"(IZ9Xw),9(kHŕoQl)ӗ6$yAcIQEd"i H+4!W%Ij!E# hZJÛ_#$ᘷAykʙٜe9]ڹٝ੝~%y3ݑ +V詃ٞ9 yٟMw)9#m9aVy9z:R$ "}9&z:Y!0.Z( "iOp  p ?F1 1 A'PWQZ0IDpVJR*j)p` "@d[)X(o '  0j00-RP,pt7@)` P1 ZJf)D4`0:-@Wj 7,3&-Ш wp*"`zdAO@!Q@PzwW0J2)Pv Jv 4*T@\,୦J/ozkP*JFj*(``W$+s#d:FQd )dA¨G a&l/qtMAKE{;3jѨ,!.[!RJLCtU%ZQ2X&E3jf$<,$-1_Q*!Nd&ұXi!S3T|N/~"7>^:`|߽aW$A[aI=F>tiff'$Q~FaL~ 0sAf>!Zz盇|jT΂NcIjbB2 @>N=煾ꢮ=~ꨞꪾ>~븞뺾iDKN~H:w=Q5?XN_$q\.Z-'.ESXne^c>ene[_PEq a ZY?!edniU^n ]?&cq*-.U2N?_t:M9 Dv^m۽3C\^Q?/2BOMQ~|->MtcVtmI]GdabXoAO?KOA]-\ &DNf v H"}0,{E g\Jbgɯ_qcĥؿTloJ\oJ\oJ\oJ\oJ\oJ\oJ\oJ\oJA................Nؿq $XA .dC%NXE5nG!E$1 ! ,M-M0H*\Pa#JQ ŋ3jȱCI㚒(S\ɲ˗0c\hq͛$k&Ȟ@ J(ПF*Ut)GNJJիX)B%4WK6)hӒL۷Ka׺xkYse ᥂ &>]KL_+kJx3 t\M^Zjf[ymТI߾z` `NoNz[|{gs2~7΀zߓ,!&f h`LAl P`Ђa80ؒA%& sG(%IR RF%JX:x \>zucUxԕFJF"DbM*OE$N L'.%_ܑVI֋=)TS^d']jEcY]v$gae4Bu|EH>jU-Xq Wfc|M*Re M2S:u!F6׫jl}%ZPKV[c5QKXٞk %nRgn[$t{ۼFnKM0h/\Wgp.%LŇ5qOodq$ϥq3um .&3nj-V3*<φAD^ѭ4J)4oSWMW\wc^-dmhvkp-t+l[֭٤z߀.n'7G.Wngw砇.褗nW>Dꬷ^žn;,-ޖekou/v7!-,ؚRg@oc|/Ru12$q[/̏xoL:'H Z̠7z GH(L W0 gH8̡wT> &gY"%M[HEȮ@Vz`fH2hL6pH:x̣>" IBL"F:򑐌$'IJZ%`*.N'Gi5$;eRIGW򕰌,gIZ̥.w]ڌo 0Ibl<2"e²{t3IM f]nzWj( Y0$6^c2u^q[ʐNSs&YO$EU9f"=Z$ZdDKEɠꐭhKİ8~&J#B1(M)ubҖ0LgRX;XEo{ጋ= *r*ԢBFMԦN2SԸg\ J0g6_UBPU,oXV2 YEMRj*KdA}0:L+#HK.$ `Y`"pRJ^X06$U [:#,`K|+)4ۂV[j/6W$Aq-e2G@1D$yՂ%#-A `I&x1GR @8=x(h R4Zx"aMI24{z&A R ԨPj fP٧g h .`u\ <n TilzM0 $`X6A=@ hb0f\P-$ @C)*5Ο! gCfƩ4oTpj@Oi+{%,i33Sp|5%3u@w R;; yLHAҟLSK~jhgM pj }L@ NwZ tn滋 :/X Pv[~20ۇR}a&s("(0.֧Q$yo!}"u$ ׫ǽӒ// ~[+&06R`zP~|(@OۏJ.1fSq` -`o'pj鷀' &{؁*~7&n]2'**`ޔcrGGݗ'CXWk7'ǃ bsdT2f]1!bA]/C({.!gBetZHaD9clW}Sf:$fBKv#WDc#/ \^%6XS;h7 q]cIׂr18x9=18GG0  (pjNJ"@SрVx$džX F#Ȃ،[ӌ&(Sl}X/If׍x~ Gh:nȎhD{Hx&@88HY{R@zYiwY:X7Tkwj(P%Q`jǒ88fg(7aF &;Kk.pGVI s@_ tai(`tr(nkٌ xh(`7eynjPo[6@s4* m.f!'"Y"F (g Q mF* #)*Й)-3™3,PoviVɚ-k6If 6 Cg1p,K6R'IMC@(701-i:iz * J.OzJ.JcN T$ZcU&G,0ys#sd .:>8V=*cm(8 Hj1aEJPb0We}StEcq7\Rї!IlڦnpڦtJhZLᷧ|j}*:YʏڍQEfA:#5ԩC1aX*_ - 1f3J%|V^JH@&Va:úGF#Z:j;2ZLp:3*TjLRj#:Hد*8[{ 5a;[{۰\"Io=± k";&{(KL`,۲.02;4[Ԥ6:<۳>4B;0„DJL*+k( ' `*Fp7ڴZ:of(|68~h61  ^#֡#@1Pr`D %}kzK-fa0`gZ۸ 6q`g۹K;{|34ۺǹTQ1)gۻ~L '{țʻ̫T%[;A{ڻ'D,$ʺYb10? +buW˽pKңj U; KFKl5")< = [0l?"\Š *,Q$2a,\t8`7ޤ)!>tG}+؝Gڽ-4޾@-qdK(=ߝ=mcAd @]#*]$|6jاn3:VDj!hBӷ D=.$^IjEz6EVabyAn)Ѻ*sLAGO>/T~BKX>L^`&"df+&d]IpNFm^.n 7p0G8<5G^0gEb4S4“*d_fF C=K~X#,2KE_ 6ƾF>x.*_}CQFS)2EAt#I _ ~^GvaWc}Hn5b!~@+ER)oy%'?,?=5*Atb4- I#A?6 S7_A2OE`.f=a0;O8vFrQR+ CA_G/meçbr e]:p@9Y~5#N@E/R^葟A+,?)qU=q_(BRI {Y!T,q-N{A@V! /Ja^WA!aҟ 0P̿UsOM =6"!$o?_ׯ]o"?6 ?p1O-1+AVX$X6q8ʿ)UҤ>_?jey%A]?t'|}?}6ЯAAAAA8dخOA@ DPB >QD-^ĘQF=~RH! ,K-=,@*\ȰaHHћË3j$ŏ CIIHrȲeK]ʜd8sYdL@ir IgУ,M"ɴӧPJheҫ)׋̔JaDZeMڷvԻR2߿ Lr+^XXSLydʘ3k̹Ğ:Ҩ; y:NЮ mnMzo̗;,d:F>2H)̣O]-zN?b;m_=sP'y\"הDۃ[rUy A7}J]H &v- r9w䉔atYN!-PEyw!N]Z57!E;*]>R^u$r@ۈhR`2"7 t= yfj@(k~ttKRTpM eQ`It"sKeu\Je Yܘϕ'si9gmR)sD9gtwJoy^! "aɜ%*]-ꦣAVi*D)27꨻lަ*~*FG+sjJt(鳺Jnf+ܱE&kXD~-˝-mꕂhoID]~kpdy[+j|t0r'bFu"̙S[ M&Jr4R U͎8շ M҉4'LEseL$9X tqSʾqWĖO!S 1c 3<7TyK$p:+^}T(3S8̃Ms a.х~R*9J6*ӝ:aphbovTjղֆC`TTCNuOdDi#|t7E=T__ :(.e]@N).%SGTS$O$I~SHj֌4T_I$ۏ21iDq$cD\g@)cJo$#T$sЎԥF%x5 DId_<.C8Cb Y &!cfȼ$bTJDkYQfù4xtJ0ؔM; @B6qfL\GG.&%[KW I$:ݝ~Yʓ`²|o$]|"B9`SMfςc*1#W*OkTX/r3Ó+b?2RpK86F)%raoqHHBӁ O @QĒ )fLyzQ ݉$fe&O04/sP}>k_BgI$ZN(.qY&GuRύZQՌ`duϏ`s\!#HN$'L|Jr%QQFՙEПNIS:t"˜əjmL"`Z'V_ÍnaMWV"75SdU36͌MנpE3=L>#EaeM'&IbDܾZ>tMj͒Q%kN*mr$!rKbnn ƲjawԐV:#УR߼FRN˙r/qd T6.HWJ-s+ڮz1N%2Uy]sf7ANMI|Ø9z/Y[.Ŵ1 SIF> ܴTQ3۔˘AqhT\O7?ΌT$,I>πMBaF;Ѷ1|fJ[Xx.NZ"ٝT\5s[E((dN=&! 2SuAD(c6$lf; ŴH!ΑsES8q OCNfTr;)gN7o#ss$R(җ§ezN*[6:SC{aZyؿNv0G-/5i;);ww.6NO0;⿫;/2;H:9{~S}Gsߋyzw=v;"z7X !wU`s .gW U!0$bP@3PoUwՠ:On^ׂ)Hz7   0WP6 (P{2HՀs AN؅1- PdXe m!nndžA1GqY+wp!{X@d8`&1 @.`w}8s8wo!$1qvቻ1`EHJH=E8ޠ48Ӏ`TϲH]P8Q H|nj8%XsEHʸ E h⨂5Fx8MQph{v~(tI  wȎPpA $! !pGIY`sXx"f`NQ*PbaZԑQ",4u~60whxF)6yL&gE~q0f]H~هՑJ=.S_y\Ri|^yV`R01Ќm }r-9T@g ☝tŗyS1h9}k7ezy03`0 EgYyo91x!Gy0@њ'_9dg stPY .VvtX_)uԹPxE*X?HSp`ёl)3M9XHP q!A(GE؝5ȈmYO$+/8eW;NQ%P 8s$P   R?g)[1(/iq {xܡw $!tڦxxP|z @ء\~Jx KNJR|: Pϩ'b5X  jrz(fY R79ѨDju(zaW~qWJwEH XVzow1JZ r:zL!q1(R1)a[[gcxj6ZcytEx.@ Y;SÚ#Wg# ˛V@Y [q>B+(* ɎM][6JX ddZ[;;u `e*@ˀE4KW|n :a&6@$`kpX5Tw[z8rCz%yg0f ~ % J>1eMKf0K\˶l_G3Yhoh@Q`$ u`sk kg 4XHZvH]*rǾ \vx;ۆs{X7 LcH0o:(" {D"30zR;G6|,}VZ3~E=ŝTCܝA3I]zR˓) 9ۂ*ׇT"Aq!d?CKI*;LaZn*=*<>ڠ ݦA~ܮgC:@L `TѬ]Eߍ*AAޑ˙1L9H޶@ފE4E 74Z:#.[$ ZZXB'EuA*wS}Y!7̓Ȏ H#AYqu(QBMYNQCn|²+ygݽZM&\E}`3CsWQp#LN|L03lE16n:ҮEqDXLErk}(F7~917 mɖl>cpeȕ\װ-R+ܠWۤߦ.px_9QPAھx_Ÿďx_ȏPRm!?5]My,,(O?S U[P`A .dC&XE1N !E$YI*1bge I@2qڌӦ΃.bZ"QI.eSQNZUYZ$WaŎ%[lحiծek5h[qr ]yXR_HE aĉ/fرb+6'F3Y(˙iouE&]&>LfϱeNvʷu3 l qo/!ѥOٵ10Ű?.iqկgO,}] 54ЭTpAtp$B 3pC;? = CGDSTqE?&*8cm@{G 7#Q#|(I%tI(2=&'唘r!`o.++1_JsM6tdL9NF>ڃnG` D#!j7 @ "J5'Qqf@!0Js]( ! w%ЃG!>aFހ!~[,"&"7H(BK! #¬KQ;rCyd$ @"~H :nl0! T%X =f>Ԥ8ɖD6a0\ 7 ~UfPvr* BY q "Ue0[9gRtR-̄ QQ& IbV)7!b$hn.I?LrRιjSyIObB $,xh`44gxDX " 6 B@%eiK]JͲN0"=B `ALY)95Rfrr=P!i1>Oe<ڤx6,$ E"ꃦ:E:+ZV$p#$N qUPy^jp*\ (́510}Xel`sJ+-?>R78qtQS!ߏ^XR=IZR]ڙ3Ě!5'Ϝp7gZc$XrVQ+%g%SwCفoL/ Ki'3#  >(7CIQF,r/F3uQMϽEP1E@ ϘvycU} [ߞ?t &]gix>)H»c?H@ z "C̊aӯ7 dAGlD!!*ö8Å5 /D1'A82 Eh/#ù@O\V,yņ(г?K `:BB8-Y\`_<1@`E0MFCy8F!à<^\+2/i.l CXDIsTbtz| =@Eҡ{ )Go)àʭ؊}5Ԍ+H:N%!I,D)t2LIO4JP-8õ3(L 8̷QIT dB@2`FPl|JCNEt XTnHGlJC<t\S2HELF8 K=jɢnAt?#}ד H[[x!\=H}\ȍe\`\˽[G0:hԵ-]=]5]-Xm]) [!(8x۽]]ݽMR] ^2hG5=ޥX^ `^}0^0^5^ ڒJE^ ox]_m_}_]띧3___`_K- 鼈-_Vݵ_b_\2Ci` `5 F(3xSWຠ`8 >b!6 Xl~a!~bn `a&6 b.").b*>b+%`-vtB7cIr2!dS;kˈÅ}{㨘+As*ƸL:%V%nC5QVJS=Fr357^@|e R?v 5&VF{6[|S!dNi-^FDf}IdsɽL>@ S$eRƢ0ndjuU=,x肵]Xh"QWcǘaKe0fA.{ cFwfiqZ쫒g&:f^i&d[Ø9m;p\;r,5hh\@*!;Ya53{N `Vydfi18v.^K~ U%XeKÄgo ^z@iiՉj(npi>I`Y- ]<${9V@hSD 웺bq6>jctBCc6dUi=˦H3ͰkLkTi ikЏTVΊ[hwfcxj~ljޛVm#k:뛿GNkSgPPg`VSgo(抪< d@Y58L‹0orNv 6Avx .VdP4.oUM}Ƭ^WC"삘 sΊg 0jnϸnpﲖԑv&v?ig>0ql%oNJa^  !⎡prJrDc/_ss1 23/,35gs<`rsI@UBG'LL )ZzAPbJW2*) SBiۊ⬼};L*,%Q % $E2:Hp|p T z ~HmCjk1&ĀT%\1b<_1W)$TE6.# UɁa$D ϡ3 %BjPOr3J43$/ztd:y~Q:X~F~#fO)MU͡ t2%hNDwC9"`!"X(%`B7BeS=%,_)*:J12H/H2ȧ3N3.,#tq5FacKM6vZ75~Y8 qDŽ#:9#q5#qe<=ƀ>#?ZW@d4$AA&AB&9$`DKCN$aFVrc"vfvn'wn@wx $@}UPkp8q[r+'}֧}'~~'!XO(&.@yD-{Vkh@TFZ(B WHgy(6By(͊*h*Ō(l!(̎&Erhh^ʼn&6Ȓ"E(AJKT镖)]dQl~hh葚i\:n]#2@^V) 9 5ک1愗XL ~iƝUndX*B\IFO4Nj_D%rDS"RRmcġ*A%/BiVC:jsQ? d&+*!tͅfNLEDѥ$ĪʄDz+_ @f_1KĮFR *S%tn%REpb"MV5#VD OV.<2,"BDTı#CelyOr+D+MF\>dAMsq*D+if䃽+"T)DQ[Y.h뿚F愵U%\NE+uM-BWk~Kn*6-W.@z[5LiG&R`8mAhlA`-C,-ಗC+;ƄZR VM/mAӚPz](k.ElR.A`.߶mħ*F8%DԵ`P+|J>Fo#-^F׾&/ХDP6N[!N]FJ3?>7@7,4C7C?tCA8@4EW7Ā4gJ(8zFtt7H44KIcKGbn43M14O 5Qk PbRR45BIS U#UWVgVguHVnX{uXuYu:5Y ÏZc[k\J n8n\P]J__۞u`E%o6n?PJ5US;koZ^Gvx:n=.n`vha*@HlvƘ6ۥuK#FFr}0_,+*(B"bB0 nB=AoPjjGA"Xa*I&%5_jdBy# q71!@aG ,_ N!A" ba]M$Y Oder"`! 6jlx4a F_B7xAP,xF*Os4yX_GBPB7\^BDwPH9A9Zy:ĎG-ǩ9vA+`yDJﴛyIzSD%7CtGHỌKq!9㨋7؝Cm[ ΦzmzMޣz!hP$::܇o#997%ͭHKMȻP{7{5zK<[s DB<Œy(!<|F[X /<juM}RP;=B ˖{qm4)lŅU;سtĺRkśM=IY{ 9R __$!ϗ} Z; ݣ#ٻsY*"܍^/Q>QHŧ)T3⩱~}Ds淋 E~]p kN %lDM?Ku~QE%poKB1j5 C߸{O6Eu|_wUW U2RtyU rZĂ !X`B 6tbĆRxcF!rTQcH A4Ƀ ,ؒaI3(DM4$IPA̝MTύ;ĆRIVjA#8)Ukg $ sCghZs {+^.#[Uܿ#\Z|9VnJ9+^P$@4L.“2<kѹԲ(D.!FvR7&1$*9e+I *qhJIƸp0\`:+Ŭ R /լ*5|32Ϲ$@&9C<2=]F}ҰBQ;mTH5,IխII=UOQU U]}XUUVhuAJ"rA `X5o-ن|d5 ,yV7I˜IXz^)'WGk-K5ޘc٠d 1JUX-+eSۙk#I>nvUm袍>sd8K& gQecV`DH6(k>fnp7F_n/vtN 1jT*b q5ߜs2sDIћ{]eǓ[Q+Dlgߝ}(d@M?&yeϝezr7بɷ%RO `++3   D!!5xlԄpIpʨɣ WB )" Ё! HD!(3I+\i7 ")%F{(sUkBSG#\yGԉ&D J:Ї+C`rL2)z%DkZq+h q]H ҅H4 dx`T2\bZsW8q(6k*XjMkbYnIRX$ p_anTTɋ\Z)kQv&`Պv+]MqŻ?Ss[Ժ "jjo(6ëw"z9C_&drlԄ Ar <"epl,VAȽI}u/"̽X=3l O:ۂ E՘!_,aFuHۆפd8 ̊x2s,@j˛Ү庄X*挙|eƌ3|϶]@9^PFӡw#˷hv448e&mun.V!$"KvY9͐Xɦ^>>6uh6jՆt!a8mU-jΚ٥U͡MMMߘFgCpY[ޮ_;Զ~ȗ@sw{&xV5|"K+7nRmG }{&A\BߚCM:=B膔'1D'5b:A> =uj vHQ\+'@bXUٓ=I"@>30kh'V588o0AA|'4ПX@*n]]FW#%{g6CJCuTBj@vH)R%b7!m`%yW+!D=+b|nP P#APj.!!֏%*CloON!OUL뢎 (ì*Pgf"(4hAJ*lQ2OF:$DܒPv@&,d"$x* FYv)X 1 bK& #ʠ B1t p?b0#`JA F/ΌM! "" 1tM;lM-0 8$H,`. /:p C !BXwQ!2!t*(c6q&Z"dQ+!u/"Q!Q$NN7͘;1;7obQ.#%M!ޑ&  L#ͪ #Fq'P%ƀ1"Z &--">"tO*> ! wbBi &kU*d$*!1B"2ҜHA.!n0"^!|1?,%B'&R+'*#(/r%\ b L)R#Jrp!P#r!6'0pr -v`̲naB 0/2/35PPW?>?"A@ @A=4 *AB#TB'B+B/C3TC7C;C?DCTDGDKDOESTEWE[EK4 ! ,LHHA*\Ȱa#Jhŋ30Ǐ CI$E(Syʗ0IM.c92Ο(;JѣHvTАJuhHS r֯- l ,ځcҷpʝKݻ&/ -߿%a)+^̸"@wr\[ )c?',5^ͺc;v!v]+;9H;>s &Mw?Nݦ؇}"OmG󹇇T^۸of޽~sQoD^mM`m%{'ᆲa}~ۀс-5Ah Ј"h:>NbhKG6&cm4FiDfZv ^6dYےMd|:WepXi't wI'kc9,9DV6lsVeg)afDjƦ"5*ڣEۤ! +kFTjk6q6r6 ѥ쟹N{kkB4,kwlD~ƪkk{Bnվ"-khHm6.Drvnkʶn^+kou P﷫ܿ5&K\ML! rp!pILv o60k v9 t+5B5%h綸۶iIz}Q~oY~qɉzm%.RG޲sF{g{lVc{_v'k6z[x}i_+h- U(0G՘êЄ&9AEH OD$:n-`SWZ&z3C}2>C+c8dXQAΞcՐye_ 2 lFu'̭ F:򑐌$'IJZ̤&7Nz (GIRL*WV򕰌,gIZ̥.w^ 0IbL2f:Ќ4IjZ̦6nz 8IrL:v~ @2 #П9&(VpG}RͨFȍz {00t!MJW#)4I9(eLgJS 98ͩN㗎@ `,0RHMRI=~KJ#NeTծz` Ȯ*XhE%#ֶ54u\Jϧx3Yv׾)~ ` J,,ƂS!4 5ͬf1zTbl|ݬh2)TԺ,m``˳5-nW2soK\Or.lq\VBЍn'% K5H_}#vU5x?UΦ-p;i@xݙ8@a0~z]'N150 w,*=!x/77{ɠ=(ןlz`f@>)4@ :+$ŁC 5h l7*<`?kY+t@ `-xЀ Pg o `L_ p?J o< Ӥ3x"O3>6v>=4>M&D^^G~o]T-307|4x`Vw@`%x`aq_hb!xT`r{Q0R1؃R))|!G8O\V0)}#DȄX4f WfX/Vz`Qxur0@`gs^"p\a'R(+3zh(F8{1H_H'3@d0qx'Q # zx8$-x|TPxQ81\xa؍ QvXxc!wۧp$AqeeX3xh @؏yuxMs7&aS;&r !&Y!$6 '0yBa8)uBQ:9JٔTiɌA6Y\)([haٕfiؒ1BeyniA`F47#rxWK%`zU٘EAPY8lT\SЖa}yI|viqiYљڈa\(!a )9!#a y͉t6 $ix@9X|Rٜ AȘYBrwYᆍ+@``a ]g-Ew3$z'jV} jp"9؅  h> : taU9{)`= 5hd\z }7J`~w+1VxDBp:t3\w#A|V{b]:‰}zgp3pv 0+R0gb1Wgaz0>ywķ`RKy. ` 7Yv+pfPVv :Zz蚮꺮ڮ:Zzگ;[{vu]g {]0U[qHa[unٱk \9$K&{B*,۲00{2;Y6{8 4Cj>@8DۍFxHL{NR+T[xX~\Qb;d[f{h+ʟ_۶1gnr;t[v{xz|۷~;[{۸[˵aj ]m;p۸l˵;ճۺ;[{ۻ;[{țʻt;[{؛ڻ۽kqc'bF;t{;[{?vۿ<\| <\|\f $l[(*,.Z0<4\6|8:<>@Bvz|~ǀȂ<ȓq|pwpTPCMCpȔ\ɖ|ɘɚɜɞɠʢ<ʤ\ʦ|ʨʪʬʮH6^ljh _~<\|Ȝ̚'<,e\؜ڼ̯'<|@B f^;F-O#|tYNPR=T]V} tXb=d]ք! ,H*\Ç#JHŋ3jh0Ǐ CIɓ(S\ɲ˗0cʜR͙oɳO9 0(ѣ, ]ʴӧP>4*jJVjʵׯKٳ@ѪMڷpʝKݻxRܶLÈ+^uoAdl2˘3k̹縐+e?^ͺװcU8k鷩ͻ ZkwμУ/ҳkν"Wӫ_3h˳OݣV>(闐[g6FHf)XfveYHrF1,zbd)]4WبU/%X$(D]薑L6䓰!iAi^=IydΔch.eW?dp).U_I|Ztfqڙ j衈&JZ镘F*Z>¤'W]A*j騤F(jyjQnZةj뭸뮼+k&6F+Vkfv+k覫+k,l' 7G,#H|XA^[ fЂj E8Ph)lAV}G/1 WdFLSO@!@%G/5 ˯?EKCK,)#;@GH79T\,Ȑl{ aI'­p pI d#`(t TxaF$`}2aw<w>!% lf IZOXĕIq9bĉpe@(*Zpc]& p$'tAH `@@0922('45F;>򒘼SHXU (1$GFEN%t7Br:[> cFbP?_ZN60agUj{2IiV 4B/-R n 8Qi$8uڅ6_&g3j'Zl!1p睊OC!lЁI&d#0h$ݤ0с9q@rv3̒ h8@ `׬` p@ ^?թӦt-Ҧ88ԩP(Vј) WY4ԭLg\J׺xͫ^׾ `KMb:d'KZͬf7z hGKҚMjJtlgK:%ͭnwm,pK׶Mr:ЍtKZͮvz xK5 |Kͯ~$ ψL`iKza&'L [ΰ7{ GL䞴(NW0gL8αwʼn>4HM|-8[BqDSr2-{9&gȑŶ/3rvfNTvUL:{vγL0ok |\]>QҀE;ѐR'>J[:#cݬiӠGMRPNAUte~gMZָu+)tMbke2Mj[H",gD-^>;MV-^wV綼Kb p&.8pw!Ix'NڻIn_VA>aAb!Î丽i@Pn@ЇNHOҗ;PԧN[XϺַ{`#bhOpNxϻOO;ġv[ϼ7{GOқOWֻgO6wOO;ЏO[v{OOOϿ8Xx ؀8XxRy؁ "8$X&x(*,؂.0284X6x8:<؃>@B8DXFxHJL؄NPR8TXVxXfhZ؅^`b8dXfXkwjl؆npr8tXvxxz|؇~8Xx؈8Xx؉8Xx؊8Xx؋8XxȘʸ،8Xxؘڸ؍!7x蘎긎؎YYUb< C5}Ynyp E&D! ,M"M(H*\P#JQ`*RhȱǏ Cт'(\ɲ˗0cʜI OPH JF 1 \ZOPЉbԫX"`S;K,ϯfmR5BڬȝKݻx݋ҷcGÈ+^̸biMĂT˘3k; BǠC}x ʨS k~vKEX;7K _H1y:Kijk[a+l]Q$T"[n:wO!p q!Gr !CGXC-8Pw&dY\ _N^艈X|io(8`BƧ`BGH88ؠuaR.d{&#|&m(7}YPx_y;c@ |GIL:gCYH~%!V8昳Њ9f|rթd|P Z2_"ZhBB: Q*)C:1[:m^j{m׫c]y(l.$kIJe,|&Km|ҖJ-֢}.Vny:jN _a/cjYԤp [K8uZt?+or1AgLm\PЩLQMWiq3s3Cգh{ -QEK;۽Rn7i=t`Zh"}/KJM{i uB$S{VcmSw7Rv ͮ9/7s|.XqGZyDT"nyC}nӏ|yPU+Q{LA2i,V.5N+H1 $w8Nx:W%xl`A,(hJ\ MBd\GfY;A(O!˟Ef/Ԓ{HzC$H >ֱa"^W(>nk7Ү,T. v73j|!L>*OğVP<rg\JDP%X<[x-v<GMr[ O UQ[cbB?5v &o1=&iI 1 K{/fyNƼbUS"I^h.-=D.lBD"rnpoT9R!$ȶ /&E$΂W4a"1,0&+wR62Qk0~Y9* @$89n:s̔y*!d‰ΑNr?cd _ >Ԍ&TBNA 񱓢L5q՝䓊Q1 L_=LaW'NG-4hI JĢa2 /bjZyi1^:.ίrQGX!vIQODzjH &ҳ`ˌjF+u6v2UkyaFn[VYM#n\A ʖvƒ _Y$i~d'8ݠodЇ<2YB o`Ɏ%3i-QćnC0Ȕ >IPn#W o0f3=%Е`ٗy8I i&ȗwEВ 1NYn V~D1A qYl*ih ?9XȚ2$XFny ,! 0 / xD' ()o p5 PŜ~9 ZԛטhY0`9} 0aIeAE}9 @,џi i4Q ڞY,pI9 $jw Ga-ZЇh;QW0|Ɛv Vzw< p ` @(gp!yyUɾ,x8ˠ80Ǚ^\ ũÍԬx (@ j +U ~blɸʶ";\'' P ly[+\& 2υx&(iOuw|z ={%zժ!#02/'=6}8:<>5-D4A{G]JʒJҰCϧL#y.} {P2+d]fMgj2=lpr}_=;]vzYV|ף ؂=_xMf؊l^-ՇR ʚٜٛm [@ =ڤw3ڮڰE 9۶}(tZqۚ ] ߤm:7]dp=]=ݎ0 p݇ @]}]OwM/joݚ ߄pjn= : ͜ ȽqAg7 ޣbEk p(*,.g}n-8:<>9'}#ۆ/~H'A E6k y;'ti1r17jqZKPRU.~Xf1M wg΍ik~mp.a^v>e lG灑e،ie4>n^~^rzl.֎;iqއ逡d~}~F}NU/1쪱־z(wR1ҏqcK:;~EؔՐP6#`^Rx;VT W1N~Z^.櫮#Ɨ@u;b4 P!on鹞꙾4yr*#muzy_&},/yQ wo O Y[ tHE}GO.ld! :b6n_-Dydn:9Z15/At\K^JOQ+!KlE?^Y?!xGM5RJ:ԭq'm{o{_aW`<0w8d֧ZէZ!|EҾ`)/~g}֟؏xϾDoAWXq $XA .dC%FT ք`F!E$YA xfJ%Ǎc fM9uOA%ZQaM()%2%-U/&WAQ̛Tӆe[qΥ[7ҦONKVߠZ1"cW&5LkO\e̙5cūgСE&]4Dnf=3dϦ]mܹghoc &^8W/,g?HG/}dŏ'_xb{wϔ/z+p@\hIsAD9fB 3pC7TCCqDuBRTqE[őcE|#sqGXG `dёT`TIlJ,+rK.`QdL243M6LD@S1N|&P"E8WbzChD=ьB5dK7&v6- eiKKC2,(2ʻrT.i<+ig\rS~L([0(swGhRzՁH"%@([,1 @~;e UzViB(}Z'dEk]ٸI-4DT'UER$CL 7-`9FY䂗DrI $YpE }K%T96e/v:l*T涕_a݂Kp3[[j29dm[ܸҵ&Mnu@+_=Ds"]FmkUw 8:!xnUUF^hm& ~^w\q!tfNY!` cS.1M_g1/{^ 'd15,`X31X ?X^B]\.„pD6ax`;+Ę2ACGd/ץ:p~7})3YL,!qM& Kֈ$kyv?iɑ=&,93\hFYE4]QZ-|]O-3Kf6y5 h5/-^⮍RHa1x,zҜAc\$ɼS-PtIf!RNm]v2ћ$܎Ҹc6!I\.|C;fx 1!Ʌgt37\[ՌUxM¥ɿr £+6: aAK;7#CC$B2|A`KBJ(X6 =8?DpbCEA&b@p'YC3yʝ!EZIK= I؄LHACPX(REe r IÁDɅZHD M)n g\Fr,ǡyCsLGutulGw|GxGy>t(0{G}Ǟ)G HLzǂHL^SH| 8ȇ}HHHHdHp I<? _ad3"12;Tu 2( *Չ}9R!P'%v $P-U6k6=7m 957QNC:- #h5'O{C3f:TQ1ROİH U>ӣ UkUC 3O]1I͚`4]{U k;dm<ߓ9^ݣ_N3D7h݉K5j Opm iEo WnVhe(dFd30@3hdLN8`xdLeE6  "!eX8IHb*hXeنM^.8_N1~W Sva5VPWjZO pkgoN[>75gy-f0avg~^x_8g0X~>腡e篘Fhs~YنHb hy~;~zU !inHAjsh 8` jyvi0N饞ji9hjxߑ jNmiNBHdՅ}kb-g6j6I]-\a1 S!wP S5vVؗfKݡp\T cUTu0fZ< seզlPm9memF ҖږPչ% SRIk3e1 `Sn!vUNS8S>SRB=%f & 5OUuWvjghWu t@ h\vV.([vfm8` ]tqkv6Pg ulwHvf0g.w(߆$p m?fBww|?8|͐w('xkbZex 0yni kxmwAGcc7yYN0 zbgx0`yha*O8z/FcanXyʨ>x8 aB]87H8㎀3؆]O0Hz|mב_W; HHb+ H/pʅrA@x x3 }U_uq/u)~U~g9 o?h_Wo?uMܕ$%ϥAwGk~r9q9]qq]q^DO+ xb "LB(q"Ŋ/b̨q#ǎ? )r$ɒ&OLr%˖._Œ)s&͚6o̩s'> *t(ѢF"Mt)ӦNB*u*ժVbͪu+׮^ +v,ٲfϢMv-۶n+w.ݺvͫw/߾~,x0†#Nx1ƎC,y2ʖ/c:nfi~nhZKSE-Zꈥ[{TvJsm''~5hY{&|c7>:ytՅ^b9#nM0}nHuoOo!6 b*j)R=QG]( aVoMlӉ܇`F"W#D)aG:&R4NkC4x)b=nOF9%yHdiYtOƟkX >k9myPI7Z gSZ3W5"2ڨBRZbrکJZڪ) ⚫±_[lL<\AR,&۬B{* VD-^o}ڂ Tk#5I *Yf.Z/4[kL p;̰GEI/1U#l3nC>S^VĬYX.s $t $slE^T 㞻޻ _#3߼CS cs߽߃_磟߾M_㟿߿0  h"0 \ B0  b0 ;0"! Kh0*\! ɵ0! 7Ce#9;5ETd!ML>c$솉bTĬI nC$ FEIj@s8ޤN4Ko)bgYDp!L2HQ;gI=TH8OW&ٴK1L&g11y2"W$֐42hgzWt\ gU~(0(90Qэ i) "jњ&7o3H"As3\';w3'=i{3'?~(A jЃ"4BЇB4(E;b4uD aU1>9m"HRg-- ,(KqnJtt)b!u=BP M:tŤOTݪ*r5a]XSVu[c-KZBkL]h7ƕ'~kWVJU$&JEDa 덄1ajწ.iZm[Hꯐ"YNCx6v*amS0p8ƒl8Cn"w~$fVF"1桮[ VzK j/^]L/iU$.%}+w]ޔd[[`dldg|e{8S.]j, l& G- U`:#nbx^4 '{6N0\AKvc%Je|D`־imbpyO1=msqa#W۸9)F{GcqSf(eZ,XBG.ne묢(l1glwf"ftjdɌcV=#KzbFy܍7ʗِb`ԛX(*k:BjtIO*,oRѺ8h跍!7#!$sjFXr:0 )Y%79!ӧ ˓&~xJob|[?٧d?wo'I@!,M"H*\0#JHŋ.ԈG8~I`(S\ɲ˗0Y2YfG6s9қ1 J%ϣHiRJ-t CZ5瀮`^!6ϲ<=zv۷pʥ(e]PvǶ~ L8cfKǐL٦ʘ'Μ.]x3gƤS^=4ɬc,[kkYbѾ禌:!ȓݶ8liC;Ahyc |돋#|yY?X:}>o,j8{n_` X=8~gf܀\~VV">`JEN"faA(cda#I Ӊ>Vtch@N-bLҸP S:&ud=N)Q@&Z});-)NK@'&3iQ ќkY`.\9QG x8s1Y I#Z2aHYPDrU*5]ө) IɪN5kG"jխeF>M Qe+BJũI8K'F"Pk$YkGyjMq6{_HCѮk-x*z9{IQ۾@Iil\Q LXJeDN0ߍC1_+lj.7my[.rf&N0qYWN I{=z`0r4ڮz:ZHq9+6DU3OؗǤ T7IO)a3. j{ՐZ &گQ]:[!WɰB[V۱Q";$[&,kkڲ0~4`[8KC7<۳>@B[ZVDkJ{ZK۴?WRR3;V[WZ\`b+ck㵄7 jYvYXQ i8h4 6)CSpj+&]Vwҩ2rPu5cU'H 25ʫZ uXj/R[U8[Og8t aûw]!3䧼Q{})v UTBRǥ|UۼYSaЉ&%u:,b)$3Y,81ʞal;K8<àh"*\F ?0S !\h( X,-µ 2VQ{;33:<>?LrB\SFLQHPA*Q\V|XZ\?`` Odl5\hli:nQgrc<[lSlz#u4I~ȨǂlRÅȌ<=Ȇe flɐL|Ɉʤ|H8ʬ\{|ݤʭ\o?kʵ,Rx<,|̳̏IҼz<͙R͕֜͋[͞w,I!Ɇ6 <<b0h ]}]W_б`&Є}M x0$]&}(*M'ps/ ,bQ7ڬa>ӰC=@ Bټ4}~|\ԝ8 1żE1+ýSty[Zk)B9[O 苿<Vݽ9AձWA-ө]|[pOIKZѦ.(;hֈi-qM׋ V`.!m]v~ ^plр^D^n|дp^~Sr   ^@ p  a @ , h ? @gP p l1@0@&̔ l@ QP `pOPb=`Pp/Wʈ?ݠRkp+? ` r\ ~pg#! ݰ\A? 2¦0]PHQ-O&]R$ R@!I(RFW}u[/Ah(8us}n{4k}x7~B'x裗~z$!$ Iȸzze$*W}w'B~!s(Dl Pq%&#@FIdIP䠋`%/` MxU/zpK(;8H 3a 瓈\.%fC Q9~ÕH' D(&.Q"HΊQ E0.ܣIadc?lGrk %x :Q{#Y I! I! 8>&RCq#Pb$IFfR %ѓn2#@b&rRdJ A$ *ee/}J%2&X!1LhF3!'džb @%)WQ$4+a Ef%BA,qBX22z`"YNH8N%B'!B("hE[YOC"Qg)C! ج?-zRfR"qK"gP, QҔ[:bxPxAU%A$Rb5RNT;^YjT՞ne}+Xͺ(geVתmk׺U^ׯ#e`{vQelc}92cleyvWf7(g5Y,hMXZִS-xmmm{92OW1\&&M F {]fW%(а]W`t{^Ρeo{=^p%zINo_Xlw_'3KLr`GXƒocNX6Kvad#&TG,Z>t̢qKOޜwC׬YzǺi_y>h|b|-Y<}_肿ۘ_>bX__}\_wU(  @@,@<@L@\@ti@@ @ @ @  @@ AA,AB/Bò0H2XY8) E82 3D}˵cNC1>{; Ri5|687X i 2̿-EDI84?KLoȳD2ٗRt8!6)34dK4~yEBI6NwJ^fcEDDI EE5ôxR7FCc75k2?kqCDEѶDbGyG{G|G}G~GG HH,HӇP 5GїlM`2K|L ZƅUI+D|SۈسkNwAH~1uK,}xO2|TxJOU=\$7>Ԁ u D!U'uT@H` WxdaxHhD⊫C#U"W;Q0wN8!Gzt$?6Iт4衕4^ؒ+j!db"H- ㋧I9SK3)gK9B$b@B@D}J$7BY2&D%IeGbeXefG&n*眬FT&ّD6902d *%_!a1nuad& GF-Jv* ڪ_I>Tnk|FoRx$I+f֊-INm7+.Ff'To+p/HG0I׮pU p $OdqtC+h4LcI}U>h03CZp=N\Gnq(/RGa:f)b+y=0{JZ5 mB Hr.5P5lZ"q͑ o0H EOd AfK`MPpI\:rDpp/PE© RGԐ:CؘHLRE,qI_t[a.H"dilHE2:4B4#r `B)|?{x9AJ!3FE*TM!!"p,OVwIqcT,f5%]G*!-Wc$lbYb(s4esMk&>FdQj(4"Gh&u`g};!"N z @Gʺt6lX'Ff49EI4=ؗ<쥔PIOIM*R!Iڞy)9GEo駉 *9AĔ = 8GN*Sʟn+SL S=rĜ:sYǥS#&WHMuBLaPr]:Մ|JL:)ٕyxgj}vob듯6(]t)̃)a#+h/LNiڙǯ`cFY–òmpk)^ϲ|8+֠ +.HhR4?9mHv )N wMD q5!gӄЪWtrWz157H]ֳ.Koom[mG-H+RJ$ Xu%~C_3h.P &7<xB؅9aQa >PdDHd;Lגho˲K7 צb%'Oʡs2c-'Yc"L8~IL*<&Di!8O Ns>ЇN+FOҗ;Pԋ[XϺַ^*hOa]}Ne;gOO;񐏼[ϼ7sGOқOWֻIѩ|y{Y5O ЏRr/>}oO?5=?kOӥMNgyPyx *yyxxdG. 8 ` 0 0 ` T{%6x!a+؃>o0DzPH('wyǐy4'd`0 lp$Y%~0 &29 I>u$W+vrGWQ d0 YgDY0 oГ?Y4Gxw dXSx4yL锠Q痐NqG|{HQR'aG g|1x`nivgvGMUte#9'wxIy ]y#V F)'*yxIG@0I ȓa8șI8 xpY)@iy^y9N! I)6IQh8 tɋ;)Q gY' ВziJ 0} 'Ȟڡj6} < (M Yp0 ~8*@`I`<HڏД VzX X 'p q8 a?X( @r8P0LJxVh~Zpw?(g '8 h, :ct6@" n` j"ةѪMwz*~pګu':JYȚrʺtZz9غڭ:Zz蚮꺮ڮ9%4BzBJR yxh4'S[̚X|2: ˱װg }z~s$xy:.P04[6{5k @ 8۳>+t13pHJI;[0LR;,WV;t2zZ\۵^ d[fʊ6жnpoZ0r{xk:r|tJ=H [[jpA+׷[q {۹[{ۺk;EQ+X8`ʱ TK[4Ȼ+9wđ;; ̫֫۽1K˽[0¾ K Nq{˿qα̧Zٙ9 ygw AHѯH1qJUA\~( BNl@~` pj :XnJ9~J$A5~؃˗YVL^n Sg np` 0[ރ~[|Ѩ[ >n1ʤa٦+HΊ` CQ ؤ[갾`'JA*뼞0~c0$JƎ~gppM* 3aޞW XH.^W,I 'i }_. ?G/IN|IO _GѮNn.{ɛ16{x>o{nQ?G A 3HFR ~boyɡ:jy5?ДPt?yY|yDQBJ pOwU ,*M l"$rߪQX 9oxcT-'3KC~uy屘z\9d8WگØ ?E:}UOCތHuKSM !̪N+m 0A QC%NҤH$AXa5EvH)UdYKK1#,P-L 9%ZQI.eSQ&8UY<3Iƭ1Mzs2ي;Ѯe[qV[]yUoEUؿ/fcȑ%O\到&MWҫI`&]iԩUf뺔M:OI`o'^HR:4ɒqѥO^u?O 4m͈'_yկ8&f_}珶RBn?3@TpAɓ$мcKIiB 3pC;=֒I CSTqE[L8r-sFsqGq8 a7BH$TrI&B96f<A&!D&rK.02+I;/TsM6\ 7MɻLcPDUt(DC"RL3-X3 75TTSUud =tUZkVL$CtWb54)!^*Q@jCvZjJ5T-_w\r˵/^9Db\sw^zI.%\8#7^ 6(ͫ3ͩB6#B+bW(J cKdSVY*X%DLp#PWyg?0Y0ə"I $֎?&{zzXCbLW$3:nzm湒3 K^D*ѻB`?"mCImW9Os2d DBQBu(B, 2h HJvt(Hfġ itp℠yJ!ST[nBNySژe *prz2PТ,BɧKա7]UPP(ls)KmJSѣX:S +h] ]bᴔM^}QC*68] bc/t1Mc PV)miM "emF"mm׹VVsmoַۣ%q0yJ$Wens\*Y,nu=apnw]WfLR2E"RkIPBGx~DXB&p LS f%ΰ.n1o-|a gXԋ=a|rqM<ɞX+ [Yb1qm|T;q}c YCf74֡JKfrd(^.rqV+gY[&SU.Y ˜Tl+4sfс)|g2 &sA0ՆgDCyPnhғt-}iLgZӛt=iPZԣ&uM}jTZիfu]jXZֳum}k\Z׻u}k`[&v}ld'[fvlhG[Ӧv}mlg[vmp[VY9/FP"eP!;p ϻģ.SHyR3c*~x70Y07j${_c /w<[y9!7yB|*^f] N+_Ks="%}`͕>/t?]Szխ~ug][z׽u]c'{~v]kg{v]s{~w]{{w^'| x'^g|f @;^|-yg^Czn 7Žiyԧ^ 4]z_*䱧]^ }{__1x_?`|G_gS \3>l2( bzهOZ_P??=J;BEhB_ 3?,@DG8@\@l@|@@ @ @ @ c 󈨛[C 3sHA=P?89?,qAȘ=AqAA¼AH"LK(4#¤p>.A§„P§Ax`B5 C :`AFBCChC0 w 3" ?,BJ"CEW 2 .]delcD \L1@lFmFnFoFp GqF8irTGv>AKaxwy4x:UlNJ9D}$|ܶbFܶk,H\HlH,`|HHHH"HHVH ILIT65xHrFxF[ƺ8HYBTqrBp;*[ S@cb0 IrTK !4 b@AP,K S` TA[SCTTMS'#Sɟ!F-͋A(ӶӶSTԖ6qaշU(RO-S"NU ~PRS(:qpVf- E:#XVzi=ոHSҺyA׫"Y]n/4 |WfkUE! kAT 2`E҄P׫ָ@h=d]y%=vlu juVZ ثWGW%ce/E퓙ą5^ Ԋ| e)5 zlr&UE=ՓE} jلYX'C0 $UTYe-e/]لZڃqZP =ذ͘,U } ܩX[MwRՊ 5υ>hٗÇpM 4'2#ݶ?zT" 5m(|^6ݼP8ۄ5Ջ XD= ܥܢ^@$| U )LEx߷^_Hnߵ_ކ`[ß,2u %,!s@P_6aW ˂Ub-@@ ޓV!n"V.:Z )hTޕE'n\ #m'VP135V..!,L"H*\ȰÇ#JHŋ3jȱǏ CIIZ[ɲ˗0cʜI 'sB|ϟ@ Jѣ# HaӧPJJXjʵׯ`~0VճhP۷pd+Ŧvݻ.߆W LV&^<,C+o&Ԍgn[x  f`g_ SmK=6Uq1>zy/*}@G:tB/wZ5ϟ_'|1^[9|D_B_:GNVb{ Eu,BBh^1bZ#>$7>Qhi׎hZ@*y$P X$A]4dl!PQl~Ò4@\Oq ByFUf}"hEAR{$G;.:ҢxYIjkqYtDB"$jW9%jjvXИ}ªӬ0(ؐV:ʤ@NvYn*oTN:>d鏿ŦAUS|6++uADinG+ZzAHA;!tGBކ;rPdoBj4tpN Q98'$2G2Tfȳƌni?+K{PŒkBY4Bʬ uF8=qjkCPK Jwب2} .c)fo?T)QWg% t~OTY[GknR\IEf'BplKDeJ kA[ͱTړ7y~;Pȗ<:G\Fgt=B]qkI۹y#_vBیy.l눼 ک[cM6MDrJB x|3?SzZ>$#k^ k| Ǒ2t@iߝ &ba21bMl@ض 0X6uS;lAp9)^KO7مWӨ0lRqZBa cCkv{a!C"-r1TC|#ؖHFdG&QNF}ѡ,m$KԪ64uMSԪuD+&S31hYr֣ei jB׺ծT{|'X[z p'` 0d'+f49lc2"S3-hrZEi`K`V7U,PPU!6K ir1uKZͮvz߅f unc-H\ W||K7/mq{п6O|EKN!;XJ[_7={D=o&NW01uQ,8αws!G,2P^T)>&ɟ p|!#fS._."'%;wMTij@gLxuIL /!E7Z/^,J76ҖE+d<ŰҘH'tD25fn]Y.EZع΋$*NtC"(Aډ9\Y'bJ~<]:FI%6$HGBD%F-tS$э@BOxBC]cn׭61jl|)("oy;% 6p=B|\*!K"{-3g͡OEq}'D/pϫ/yw}?7uyoq%E,Fw$n. KlAN'C" CF((_7g>-xb`)}ĿFJ(>Niq?A0 8^#!*XrF 8~ owzQ 8 |X"H"&'e Q u0B8D h zЀ0j4bbDX oN;HeU(.&AG!kv:UiԆq1=zrfjGqx˳D[o1xP^pjM}{a\w9ZV+opZp y`OKx`Tphjl۶nP]F?ZvyT;?sK2¢uy[tڷC $co+۸nGeķZAq"k\;몠 ;*RuUo^!;{{G!~+d ;kaϛa|ڻi&Jێ8ޚQ!Z.RX/#k{B^yc"Z0S&D'4"4+=/f-5rBIM#*.+4\zIR6;{AC%Zҗ$qpyPяaEѽq;5,1iQ 鋷DZ?U!*Ҽ=G\QJ  dQLkAFܠAtT`Z\F58D\'>o,@w|84 7LJF"11<˜GGhȧ\0ӘLLnʓigĐql1DZib)e@a̺;\c-ycgɘ9LTɳ_|caI#+RjCĄL f9o_Nakbr@0:O:"YEé'5M{:f&»(mmc,i.]$YÚ$-@OBMX0}NeLJ#aRmTsiwFPhjlni :FX7z`۹cT;E׀>{]}"wbhP|[J؍妊`YZ_ٞ٠ٌ ]ڦ<1mϕ۲ϐ\&=ۺ˶̻z=݌ŝʽnMѽ][- ]mh}f띯]CܮJ1Xbi-qS|\9&;wg%0,TnzƓ{8e]>(\[►$n3NBp [hc ^AAgU0nPbX*~pht`\d0QQ79 Wd.-{e.g_J'eչi4 ~k`z7&^qQuN{wk.d ady~_>=M'Ӕy%'뭞뱮듍cw\.uwx^> '^>_w&M. ~q9.cJٳp.~__ x/ _]Ht'_^_NX_^_?*ՃK'n6\*338$5<&o7B?DbMuu  SWGJ|!QgK 'K><Y?(ysCy hyTγlˎ@vrDA_qB`L E`H۝F6lA6zWw_@ et+4P/\=٨pdS>?ŭoB8ȈϺɟCˋBqO;"1|HǶԆ)YΊxOe $XA .8BBXb!bPE!;~DH2I) v"ʂI:YM0kQcLb"" 3T9 ]UbƱe=['ΝJ]y_&\aĉ)U{Z,x2d̙5o/cϡE&]iԩUY2kر kmܹu-m'^qɕ/g>ͥO^uٵo85^qx^{O؞>?T@$ACAz0». P ꐵ3T.DIDLDqnc._qFksqGbGAr35#TҠ$*rɂ 9J*LK,F.-L̼P,scM8L'@>O@3 oP5tуTQH#tR :K)48S?=SԸHTT25UV[uR_?V%L[Y{W`q v*uXTEV XG=u69k˖:Vm{>=1sͲs1ݔLWȿ:q]|uyYI|$x;J F΃FaظHz8.M;.cJ_0=dLV126Z*yvʚvEYsf8yCcg光MiwZ+xjzkƪO '!ίq,m6adYm@ nA@oxoW|@cqR"rqsr3|ssK7.G=@Iz#}uؗ[rbv9ʷrݽ~x['xR=yw>ݟJziZz >{e>W'|WOG`}fͺ1W~/ 1 _] T*vP݄`@j@*rD#U<Mx*A* 8[ 5,N .EJ@%aFk"05=QeQĨ(E,Y<[Ks-qQcIƦqdd#s%+h5kcF+Es GΌcb 90@&0RZ E9琊#H*Qld'{I(yjy, )i* (Ye,I#=PG.R/[z%5 ԕ]8D$e_~Y̬Te 6 Y;a,/o~&7ISYZg Ia2"\Iu(gA1!͙pBlBJr~)c,hByDivNeP`уLtIYءRMf &DBiNߢf27 i423J Y2 I*,t՞FR椗9<ˤ<,kWU3zZtIiQeժ yH:c,#&6 }hDL3v hufљ -m&6piNZqE}jd_Eu]]Wtv)i}k\gҹL>yk&ָ[ld'۵}fߙAto f>9j6mBIwunf=,J]kwBPo|W ߸6yF\mVxíg7sn  i5cN/Zc#T R6qn˿5[[MP`=oD!=EpI;PpE2Xa} v7kq;^;= vw,[v oUOtlr|j)P>d<jCnsy1^s}jϙ og/~cf|!T/vr\+*Lw<}w3  Lf?Rf|{%C@s1lKs@ Tyk@ D 9ԋa 2@9h7  Ac`ꂝ_S(8p" Aȶ ,ր檍 "ԝ'䌠j[B* g/Tᚬ҉1D #z0@܈BÈb(SȠ/:)6&`g/4+'G 4 :<BL54Kt> :hrԒg"6/Dt3b,"'C&|2 EÁ&viRUjtu0EJ RPŴԺ_\ķBZ ȇŜ /8"}):ƁA (Qz{Cr ÂJ!~A@C 7II0esI <ēANKCQJ|J6`4ǹJ/錟)J),K#z.R4KcktTYKš=L1DKl;)l9KR$Ƽ{̵P!6d2^K˼@Ll4  FMQMcM`K3ͥdyפMM-Mc;;7A Q#;qtNNΩNÍNń L#*ϴ,O ý,75 l9yD{O#2PDP;# 4͓W+:ك5LtAʕQT28S eO( OE+Q!5t#R)R%$maR('ʑ)I+ R2. SʍzS3",=S5*]S7Ţ4m)7U86Sy;=s)Ԯa9TES2rHR`TKUMݚNZ7^TR DQUH7*SRUZMQ(@+ʢaUHTՐZe*X 1aPHV*SQf hųiSVwPkݼQSVA(cAWDRqEQ1sVДW$cGW:W(Wtz5y؁%XSX#X \Q9ӌ ńPȀUwy&0 JEŐU8ʟ)YzY<^ k*Sͨ`-_Y E E I*FAt%T]Z-Cݰؒ%Zٝhļ%;ՏzGYۊfDDՓD]LŠ tWgx\ȊAs ͊U܄D@VX\ن ]<0Z%ǵ܅ꭍ>֬؂!\,5 :A*-zb b[p/#\׊[5^$C 1\Ұ=v(]Pč]PݱX@R <zS/Xx/ߐ, G&X- nkôp- ]ݙ ƈ%ev`ڭ 1  E&q2-̨.^ ۸ڀma-MЕ[:+UlP≭-Nbl$5cOBLs7TncõcEc?޿eV 3,QBHd|qD1jܐћsJ"QNP#-eDHUSZ IJ1`VIW`%S^ś_Vd~b3`eBY|YnfgVfL1fZffgR-gsY.bg g{|gfwA?Je{}g|.hFE hnj΢& e #zh7Mv W훊~"(_bYP_wh]e-#U;V,zb)iĀʁ4t"@98HP1ϱ5MPUiik8X<=.Pk,t}|i P>?CLI=Ɇk]O@H#m5iþVk N]Om ENh>3n{Ҧ^? $#~1F!N! ξtnQ!΍~6nXn^ln;v:)* LaYKJ(VP{)j.XoN>ˮOCp]z# LpsWp  :ooB^POq,^q5o q[ fF5%bR,o1czvD ngn*~P'еDBpd m'x0 "Z@jU&Ze~8M08iH ԁ86a„$atD?C_ M BE GHHԠ _qh_LupZI6GI'bu>hLv.I<ϋ^}tu1QgP#PJ,wŧ`.m%Jr:H=H\zOtbJsHZ&Љjpk8[ F6zN /奈E+˖Q2Fvfq'Yc6N .x;rhWr'nH"[4yyO.zd "z38k(jzlz_p.z&L`z=gkΑM{ ϑ̯. &ϝNwzK|+q&=ś3툇Z7qD#l͆}M|co62;/cۇ R}A\xKK=j ߑ$@E\3RـN<Ӊ!WoOz ]~杋vŇ~9Wrs+sizv.w&v wCõ b0 {i1!~ID)I K Wj۾()v`ˬ6;O0i2s l2v}BY] ;⹸ v/cק,?Ϣ;ϣ'Y0`f硊 d] V;g+;ryd;O_ 7tU" Vujd n(Wuw(OS7/ G >F Ԋ3,CSը6/0Ht ࿰ `o`ɇOTUux>~et!ouVlWfW_g,U?Im)TulB[g!#yo\vӸ8w"rsƇy ~uL = eL0?]Mw@3~uU6(PNjTHahhO/ң;V몎ng yߐHI_vT$Hln?a:a('?GvHTG gU7UdHCwΨ@-LFhCg|嚷CtψS-m"5bg 'J+u>y!So}ޯ`uN( jB$8U "Bct vb(}2u)E-L7 t+n?a,B"J\J"Hܱt2^Wu)RtW>m׊rR``w{a,8Lӛ(G?ӕDb 5huWpvNeHp!H^ǐ Gtjݵ I0sȂx!( /=H(H.x6F FL*8`U&k Ht{^x^ LrgT.+L6ؒpVPG?K(@`w}(2 a.(E@ T}; B ݸ@Jy -]C r - (}Ybbn d 栛GaF)_!`L `(}1'"5F. 6t*5ڲBV݊. !8ltX%Xtg(.J ڷ.Hw<=kmWu$hR-wR I#.މBT?(݊@vO R$HM %T='0ࡅ@.`(5 \ D6f.bkc(GI#k5<( X  P. .` ۯ3..гAKw@u ΐ!x޹Ȃl#DD.w..* Hx(Hۿ},v};dask-0.16.0/docs/source/images/collections-schedulers.png000066400000000000000000000600141320364734500234100ustar00rootroot00000000000000PNG  IHDR"bKGD IDATxwXTǿ R(E1"(A1-7ƫ1\ML,5իɽ؍I-XEwP鈸E.']t>ϳ2g{|ϙy ȃ兞={ɓu4 qF0,RJx6=z4.\tBChFZZlddd 00 mA\\JKKToZSR{պL&͛7F%99ׯs-9998r:uĄ9%[p9իQOKh@kGoqΝ;1rH8;;C(B" 00}*כԼܹ3Ο?_Xv-vl60tcÆ x ݛxvT*|5j\\\  aaa޽{cժUo*sY 6%PTؼy3 ;;;ؿ^e~6%4cOׯcvx<L͛H$ҹxx<ׯ޴K6[ 0akp] nnn\z߾}N87֭[6m\ԩQXXK.!11ZeX#z@XXbcc! ѭ[7XZZ8rlll0~xkky~"!x"Y[[6mhmGDD$ǧ޴FT)4[cĈΞ=lut҅B!˗֖ĉ)))Ik{AA}Me3O|> m )K@{\mݣGU^K^;X,Jm hIݝG}`~JE~-ݛ,--ޞ&L@Z@gϮ7IvEdeeEBBBƍ5޽ B$ ͍vMDDgΜ!5~W^% ܨ*))>N۷'###Sxr9XvJbb1ӇN:y~QTdiiIfffTQQQc{FF-[H")y{{Ӗ-[< |xԷo_}zAhǎuc$rJ}߿?999@ +++:xǦP(7 6I(T*9s\.a@ HD$H"ФI:h>4/)}m^cmmJJ -YvJ$ :O?zwή>&Ү];8q"M8)++Kc#"%@4ydrtt$4dzO@fff4d v ~FAhϞ=ӥZ}WdbbBϏ&O=+O=Uמ={jj@]SZZZ[joJ͛ opyOv ;JMM7hf͚E%%%\!Cє)S9nzg…~F{Gh׮]\Zqq1oߞPAALUUUUv1֯_OhŊZyxxɓ'iӦiurΗ-[F֭/^۪ΎQE4ч~H Cn߾ͥk^G.BÇ:ٽ{7 WWW:|tiֺ_5q7oh.G󩰰PˮzБ#GgՀ66,Κ.`Kq|1VqF@| UH$syjK#"}6|6luKDTZZJ:t SSS*--8ں<5ّӇL&Ҵ?еk׈h n4o'N$J)''G++D"~s ڼys={P=:&i\Dc#+4/҆JЉ&OLҥKzYr% x4vX~v]<]mmjj*Sp???222T3[î={>XU~X{۰8k%KZ~}|DDty@&MԖFDxbƍK| 7駟Qddd}vݎ$ SN:n.}Ͻ{H()Y?~\݌j|"~hjkB!bcǎd.[oE~~~~9QcSTdeeE5Z׶Vo֭Zϧ޽{si19sh-))!###ڵ+Pۆ>1&P1Νu[ՖFD#Γ?>OZ]$ IPiM}ǵn׼k:.5yd|*,,,^+?k0"99/zooo266lޔ?ιӓ|>IREJӥֺ]V=s^W^u}ڵkTϦx… NZ#ťcI,s׺y\k]ZlתGZhRr(&&j@CSjoW7DI&''{.R)lllj59cccnٳgh#- ݻ777>|QPP/bʔ)\,8::ĤN='V3?uW]s _Ǐט;w/@AA""" E&..I"Brr2:tX\khw5CCT6h1Çny&d2#= X8;;㥗^ҪgΝ;k9㌦aܸq۷/JKKq5y\k]Z"ݻwV#Em˗=z[[,gՀr $]v8q}5T۶mېI&\ҀꛡFV>bQ!㒓kCÙ3gh>UWY_)ccc,ZwMpu 2c}WTPP5_z<zsdDᅬO?^4ԩSPUU 'N ĥDcܻO̟?x7h>JJJffftkֺ* DJUcF/5'Jj^0g]okBMzKի8/x<|2dnnNb@@wܩ7/M6igggky'0D߾}kxjI"h9s\R,,,jxN:U˳655UZmc777xt.mdjjJVVV\.8rqq!##Ӕyp0ax.M*Kcݝlllc~8YYY4ct<|0xUz`{Un||<|@ѣGK/#vpp#FPΝ ͘#lݻG(449tss#333R*TYYIJ4uTf7n.CK.5YD"SPPw ߽{|m9aH$Էo_2dy{{~؈rrrь3($$LMM 8P_BS}fc"1|!B D"Ć<}mNܜƌCC !@@bΎ;YXXШQyЖ@+޳h@]Smoug"7oҌ3ٙܜ|}}熯V/y{{)9;;SdddDΝK-MǩO>dffF&&&ԩS'3gN\ԯ_?233#kkkٳVT3LFdccCϽm۶V"].,@ hS .$ooo233# ZL0ƶ*Zb988 уC֭JMMuݝx<^_Dաn#""H*@ ruu0ھ};Ec#"ڻw/kllL۷pڳgO)u՛5 PUUE۶m0rttݻӲejL#k̵֥wyH(m޼oNC ъ!V_IR255=zw}Gj Xh{Gd Ƴh"[qqqlh~=gH˹`]83mr\|]vkVza`qΟ?JЦ0&93 Ѻ`c `683 ``0 F+3`0`0ʕ+6bę`0͛HLMa!%%Ł3`6`0 F+3`0 UgbC3KKZf0 0*˱r,CГ[ʸw0vX3 9?֭[P( m a`0 F+3`0 & `2Xx iwp!EEEСCW_}ooon7o` 777AdffB.&MFHHH?~m۶8rH$ *o\5k -- Jֲ._ -- yyy ܹsrlZ]GII \\\0v89R˖8m׵իWԩ8hϛ72#Ӟgx0|pA&Xӧ8/jի{Ɵ˗ (((**vZ`~eee?>222  \.aJ AP(P3g >> DE̛&TK./@Nk׮d>e6y``0X[naԨpۻw333={3fү^ D/N?χ ++QQQ8t-q]9s0q:Zr%WVvv6f͊Q,Y˧a׮]ܹs1~xB%K?"$$nZ4d{cʬsf0F"abb˗ ''vccc#992 VpMr]Щ233cbbaoo lck-.ƍ˥b "wl2.罭S8|B*HLLҥK .%:w XZG~sN?8_t @+DBrqAPqBill ### Ƿ_@Z Z ꮬľEEE܋@ee%x< affD"VDhh(pA=zNaܸq7oGprr±c0y+@ @>-s O_tYͳW?KDz_B:mǜ8q z6=IUU PPP|TUUh߾=͹綌@ 򪪪ǏQ\\G!99fff-$ |[ гgOH$?|bcǎ?̌}ظ;w(++Eaa4*u3fٳg:ڵs¥KS4T2-[6c۶my&ܠT*$]{[^qp.]̚}X,Ɵƛ_TJ%rss!cbX[[K.l\[ְP=>YTT$''CP熅FyFYY,--ベ3gB+ر{6l(LMMUDb?ضmp5XZZK.Mq8sss|W\7/09466_|7իx" J@Vs>w^|X`Acumק 1݋ףO>4h:vOMf-䫯 mAL&Cnn.acc;;;شֈRDAAd2r9lmm;;;E111x~Criq 7ܝ;wb׮غuڵk2 x% \ؓx`W-L&L&$[[&{nLػw/MذaMbV\A"]v}.899 xq-888ɉf4{ B:u ӧOgЉ:|>(,,l1ut ݻwӧCmr >>XRkB$cǎСw+`# IDAT lz3~zCh;@j*xxx_Ebb"*++aii;ãI hLD1c4AQQRSS!䄀&s>d ʊDۣcǎD6 ??QO7 a&L$B BFFQyG(``zY("** QQQ2qz׷]: =Bhh1Z/%%%HIIAaa!\\\͜ %T*D8::cǎ/ld2kk xFOj[q&&3]=?'#4M~'Oɓ'1sL3PRR{A./l%SNС222T:p"ݻwo]P*H8 0A J?i<>CA?s^9wtٳg,F#QTHMMEVVѻwo@R'''.Yǎ#B>$J`Gƿ2}$Chq{.\L8siD~aGRRLMMѫWcIIIP(.k׮KDڵ+K./t\sgD 9IwѫO/ 6+?^ ??{Ip9=4o;Pyd4 OFAQ:v ^Vk ɐ􄃃C Eҗr!;;iii\全 $P(x\Ϟs2)RУW B=ݧΏܜ\8:տظZ.I[k_rrrh ZaÆqDx`ee^zۅ- _6W^y&2HNNFbb"}?dIlkdg ]{P|Tq2>' m2S,U9 3f3Tӧw}Dcذax[X߿lt6фbtr <==[ 8y$]>rC 1߻w ;vlrs!44I\(E T*s߆'桡zήކ-333"+N}d2Ǻu븆477GŔ)S0b|ќPZZcqttlsp5Ʀx|iTVVb6lVZww&S߿#Fh}m2!m2_$83^=`BO>V̬,G]z6lƎ_~yϏ=ˆ#+`ɒ%51k,L<sŔ)S?4{AR5|\t ~!޷5~ <@߼y>}4'k׮5&:tj/L_p:t@xx8ƌT zWTTOE5X͛1sL_Š+twi`|zZqppѣGg.]uQA(b~683oѣ(++C\\0yd^o߆O  D`` >CN ;"<<"  .ԩ\@ף_~9s&m㋆=***pZC811>|8FjĘh 83E*b„ HOOǃJbƍصk6lФݯ6mjh +_~NڵkZطo__ mڷorܸq1peNϝ;XPDDD`mJ|4i/22QQɓ'9q޺u+ K0b__58Шz);LzSRR6+@WRlԾ011"@uC˨1ƏţۣS'/dffԩpuu3t/Ny&j|)011 8{,1b{1S&CwVʋ mCO,-jԝ 6D"Çܻw7_[-m( ɐΝ;ڜIII S*G^^>&O\kXZZ!??7nD|i(xܜ|MP"@/պkl,ԹL~X9k`-[ Ronn. EF3ܹ3=(0dȐfh߾4|>j|<|<"Dj,[?~R~ܼyjҥL~W{htwoرc6o [[[|W>}+fݿZc{ajj gggܿ^֌iBhnfffBaa.-- ))v'&&&Ejj T*u7kdds h-, ôiѫW/ ǎCyy:t>7k5c ;w;wDbb"tJK.5^}dN> &M[&F+-<{l7jߘ/8to{{{̙3aaa.Xajj}a Ĵi0tP899 iii8r~7xןE ?dž 0|pرcihڷoD(J|~'>|6߿mU6 Î;$XXX L>C|wx@(B"W^Gkڵ qqq8t>lm0p@EotW2yTC޽{cxW[4F}޲|Uw Qscǎ5zt7u={ >>2 FFFJ6lLRO>wO\zv4Tx! J899ڵ~$$$<ʫ^FF;b.h{K\dIJFqq1tR!yftd|2n߾Ĺ)))JճMm~puuoSb/(--5"_w#3cdă@ h8.c;pBlذFСCXjݫ%/^"4}7Z9CBBxvb1ի9r$qs Kkkk۷fBdd$6m+133Caaͨŋ磲f0N$Z+ƍݻq:~=}222쩎8믿iiZab<^KKK3D½|/d###XXXW^AJJ ֮]u @޽[ c曘4iv9UTT`߾}pmr8::" ǏoMP(|3ɋax'֧O޽ T7 OFjj*JJJ JajjyjvH={rX+}jj*@\.UUUqר<%%%{.Ґ>e˖镨'B!6n܈իWcȑسgܹ%K ӧO?D"JꫯޮXbm  uʫ`0A5JJJ.\ҥKkzKFFF-yBѪ݋ٳgk׮С0dggX9qssr6ԗi>x<VZHPw}@|7|߿/Ru9s7on}CEE83O[\Aj5/_\ŋcĈ\.Qz"r[ax󑑑q]@Db06mjzM6G|>̙+++̘1'Ol'7{Z^J!ظq=7 }Ak8:u Э[7d2d2bڴi\[[[j pΜ9A5FbkkEH4j6/:+VzINǏo… :i✙afƆuƟR<|A=z"G |>2220z .`Æ @&Z@>Rƛ[!ׯ_Gaa!&L~K.ń `V3=HTjskk;[b$۶mC||_{1cرc8w&LnݺHJJBee%033Cyy9VHLL BCCXOOOΝ;ҥ c( qCO-Bh` Fk^qAhh(q (JH$ 22RˡdժUDTVV;v~zlݺoee#Gĉ:-|h9b1JKK_+$33ݺukԾNNN0aj5QZZ sssB,ҲE?zW\A^^lll`kk V5`4K/kTP(DTTt?{l* GmqʪxyG=333,[ #Gisy5^-AVV>ݻw#??(((@ Z#ښߓikߥ>}Ο?#^nlΝÉ'o9Qը'"l޼UUU,E+ iii-Ѝv!--QvvvMkT#P*k W^^- r_>I%$M<[no7bј5[gرc3 o`Æ /O[<;w ** :u=T*RRR[[[DFF,FXXXA.s۟7bd j>|X0ӑ5lHd2뜣nff333kNuŋ4_hH$R瑘OV36^ S>46С8tPSSS!QPPfh ldd}")) wA*bҤIb,2x<쐗\slllGjN޽;w⧟~/j51l0ذaNԥJlذQvEh*,--9]ӧaie s sCYYRܢuk]B!˗/GVVVy3$m\ZX`Ֆ--^||sp1k%pUojn߾G5znݺ58OΝ;駟bӦMMY [o5sk7b߾}ؾ};0sL:u 8u,XcǎEX`q9rV9?ƶm4b!!!7oٳgQVV 71c:J***ĦM…DCN۟>]3gggL4ƚصkN<}FBhܐҲ"IDAT{{fw 9sfV,{rAXb>f%Hqqq8vNz˙;w.cr4gΜ-[:67ɁMi4׮]W'''8̙3B1郮]}pu>|j-nx㯗Νչ| .\@HRҥKXj"##1}t.3gBɍՖ`޼7HR\t _|rrp[VV###~~0`gqq1{8p$ N8+Wa͚5\o@rr[[;d2Py9::ϟڵkabb@YYz-k׮ Bٳgęax6"kbӦM:QN:tRܿ ѣ&Lɓ' Iaaa\,8|nXa߾}(Lځ^CLL,.\i""&…Xp!oߎAE+%Kj ڵ ٘;w.Ə:̒%K?"$$n]9s0qZmٹs'rss矣{_}U޽(.֭[5e,\X3pCk(:8ڵ 1c7Tc_tQu2|>;vDJJJDaٲe8y$۷ӧOoU¬T*\޷o_@n_BzBЫ9YVq 9~<v7n,fjjӧ{ {{{L0βN: WWW8;;#//yyyPꄤ;PU011WS늳 :uBffVzll,zZ '''dgg#++ 68s ͗FO?A/ഴ4<_4RRR+++C<w,..DbsөS'!JP(_ .tsm4(J#//'O5OII ,-`[%fΜ~wЌ@`Tv}ARRwEEd2[|)Z&Ό:!"8pH$JkkV5HNN7_vvv|>D"Qmnn΂A^^ ѧOC,hz(^Z nrjSوMͭ/`PM)_՚G$ ҩSq7n͛v]ZAsD_3Nx< 3fԩSW@ Z3~VD" zk獎yT7# 0ð(T (P=4WM5QCNOMLJFmVNV .1mcL&$dP&,Dv&AYM:{so`ӦMLk)pBz=999=znZpVV*0뻔9nAR\ϻ"7*#F@&T*oC&'5p%!!!ޜV>,[͛_رc,ZEwL& ;]FggBnܸx(((`0 HH$}v>~FDD eoF#ٸt8 DSNyݺnqgնGVWc^0cFEϚIyy}VXYY_ @xxeeeλ[X ***-Z2fP(@yyE_|7npfu:]n9 FHNN&!!$5kYe˖a4Yx1/"כwo1=YKhYAAz;i 4.'33___t:EEE8gk~;Ə̙:!/3gFcee25kb:57?ׯ3|XvMZڷxz…|2sO5::oU_~?Pf-zjѹ-˗/3i$9y$ ΢EXp!#Fhf1;|lƭ,\ݯw,YBiZ|r#FovÇis8|u+~s P̙믿 ..ÇGq%&NhQZȶ_w Kdf-www~_`[nqQ*(J3gΐ֭[JDEEh"fΜ9' ORq =Jh4[*+<{^9ŋz=O3iҤ_燍 .]"00;ZEQQŅ%Kdɝ,Z%%%pg Wm!JHH .._WL6(}>NZ[]槞vu>{-rwwƆL4 F3r^Ovv6:`sD"!88`bccd^s캥Yx!tI3gGJFF.\ 00;1iҤ!i:>>{˗O:! D&,&Mĵk׸x"ƍٹo$WII ׮]߿~!%H`̘1dggsez}o7K&Z7ntsKLݻwSTT?<==cذa̝;\~*tyǎ|駽>!xN>Mii)R3fkm5l0\\\%-- ???ĮM`0O׷O+W=g7oWGccc/^n\vyyyTTTЙXtKNW_}ǏO.'z5cǎՕ+WPYYI@@xh4󖜓'OWU/_PfΜɘ1cj/ qCqqq+Ϝwͻk;]NII 'OˋCuKU :gggrssQTxzz+|qZ|***ϯO-uwDoW-AMd ̙#s?cEDZZ׮]C}V%++t cȑ:0g#G$..WWWΝK\\/_ڌ z@կ?]eeebp?foos]D"`ݺu-NO9ryyyd2ϯMPDUU鉧Njkk)** 777MmD+Sg˖-c鄆w^ӟ?vsll,gϞE*DSS_~= % xxx0~x4 *Xi2nJѲ&LQ̿?O cƌdeeq)J%Ǐ֖\>3 QRRBJJ =zǯ Gٙ9{,:tOO9}'O;wG2vXî]سg[liold2PTTĕ+W(((www4I]]"Hӳw1^~%j:@yy9ߨj145Drr2~~~,?c`Q`vp>qk׮murrãʼn?h"x ,;걱ᡇ"++$&+++8}4Ggi@tt4 vvvGii)7o$??Rɰa:t(C f+ ܺu[nQ[[+ :T;j4^|ۼmtr` s6ӧ[ThCCIIISSSǝͩo߾vgY=UiYZ*LIJJjv5hZC 1?֭[nnn _[t:JOOO 6(&x$X)"6n߾͐!C, Nll= pβzMLí'eR)>>>P[[Kii)\|GGGqqqAP ފ`Jj5ZBСC7nR(B r***Ml``ƍTWWa,X<hz~fGU#jʢI8880rHFICCTTTESS899P(pttpksM}6TWWSUU,F/Mu^:̎zcN8D+/^yeee1qDqqWWLw].W=&ƍ#994fϞ wF,<<Lvv6yyy˸q̻Jmٲ>3T*ywg7+~z ***1b9 IGU͛:u*ǎ#''ׯƔ)S>2 LvϜF́Hu:uuuck m%HH$,"3rAf槞vu>of-{߳yƭ,\ݡa HKKc޼y(?//4~~/ml\Ϝ7*Ɵm t3kkf?tYvc^F{n/^LQQQ}w]9Z.w΂ A~~>Mv,;{;FgζMA6nHQQƍD]ٶ lٷ!tL ~E.h쭺a?IAιy&{… FG3~Ν|9rļiU{!=]EUU̞=率]EK.e\xcǎQPP@yy9 f͚xdddrf Cc…׏aec[n_̙3CY-GmP+y}PWW+Vsqq FDDNNN-^NBBuuu( &LU;vZ9‰qvv!<$4iWoRߟ5k\翰k..]dxxx0eymŅx8Hjj*99Wx{{3 @ge4A-#AYAAYA1[[׮eak+fsEpAhGhjj4"s>dQ7AڱhѢn0Ȉg΂ Lj, }΂ Ljg΂ ?a'W{ ebVFѱ" ?3*9hIENDB`dask-0.16.0/docs/source/images/collections-schedulers.svg000066400000000000000000000574041320364734500234340ustar00rootroot00000000000000 image/svg+xml Synchronous synchronous threaded multiprocessing distributed array bag dataframe Collections Schedulers Task Graph dask-0.16.0/docs/source/images/crosstalk.png000066400000000000000000005644361320364734500207610ustar00rootroot00000000000000PNG  IHDRD{)bKGD IDATxy|L?lIdK,UT RQK|Z[R%T-(jE)ZՖWZj PNHBm/SId̙C9{Ν9sQ !OkDDDDDDDDDDDDD8!JDDDDDDDDDDDDDNK+;""""""""""{d߿_vbŊXd T*zHLLTݲ&$$FRݫWbUBRah׮m޽+VB N>UVUm=+nQ z""""""""""#7ׯ/;t]l޼)))Vd<ٳ'*Theӧ8޲e """xeŏ?p,\Pǎ]VZ ~-wx{,SӚWO>3f0ɓ'yfcڴihԨUᬢf>j;0ߤI7nj{Jk(9-Nr5DWL8CqX ,@6mo{o,͛cYZdd$Μ9#; saղV5kf9k۽{t.KzKvѭ[7ܿ_tCvK,[L62Fm5mUDDDDd~B4..W^g ={ ]vU 333gիWvV, }G׮]s!9s0 XӖvM,czz*~WL>ʕ1ݶm۰gL&&&_~Ixdd̟?O߿лwoEۥ҉(kھX9(TT &MڰaUo޼9SJ)))9s0Z9K_ GXX0֭[m6aaM-jVzcǪpVn˗z#Ff͚V==˗1|ߠAcvܹsVm5m]DDDDd߸(9-N(9-N(9-N(9-N)d׮]~:Ǐ?9[c޼yzٳgÇm?~8:dQ\DV~J\̚5 uԁ/g~槤v-79sƬaܹxꩧΝ;cϺuп̚5 Æ _c'D0|jժpuu͛7!xٷ8XnELL ƌ 6 // ;v [n-ulBV~J=z42331g 4۶mChh(2220?Ԏ3YYo6mpaȑ#2dʏ#=k׮ŤItRDEEaذaر#.^:(.]lt'|}9_`su:zϘ1cɓʏ%tʕ+sW_Ett4Νݻװ#'? c[V~RRRаaCDFFRJ ȑ#Bt8ؓ)S`pFX`a? Q"""""""""" \t 3gĉk˗7 s-L(̜9hJ||ʔ)x RG +?&113f0sv1G Pt~{ȏ"""!<lq&0w\x5]R;ΖO[?Yu֡5k f cY~:g3`MaJ2זq(UJiO ڭp5TP'য়~BXX,XWWRW#**W?N8a)AF~;w &&_|E;k~vis8v~ 7n8l߾>,~GK1?PRvzpĚ.gʧ-0KrvZL4 K.ETT ;ŋt)霩v)l1^)5[Z)˗7\Sٳgc2e -Zcǎ7oƩS୷©SвeR]$ԪUKvIF~ͺuPbEq59?X;㈹;?Ź{.ڶmkcOPP5jm?x b]pBѪU++Wΰ^qqqJˆ#N:CF4N^z5^y*-Z<mFF-^\~}a:l槴4?r8gk'h"k%+*mP|ޔlGǵUoܹsUHOOǕ+W ۭۘiO\;MeJUrmJvJgkGK读Dh4ԩ;z(ɓ'N:2dHW <{6|WիN:Ft:PNN/AbpBCCۖ=SrcJ;? &&:6m¦M_cȐ!7YcNneT pǖmIII6mN>mض~ziӦF:b~yQT6m֬Y4w"`U*?-D-[YfF۫Uhڴ)n݊J*ŋh4ۍ& 7Օ+Wp o(aΝ޽;{96J-Z 66Vcق177SGΏ3qvmL8-[Ć d׊W\MvRp6GͧzS"7 i?1~̵ˏ9}ql1^)5\[}Ry6Y(3Դ)uq&L}ݻѣG4jIƍٳ'._lWxlڴ ?cc2X,^b l߾}ݻӧOc᥎IGϑ#Gлwolݺ{6DڵjYcNneT 8rnc׶<߿2d-Z̞=h?GΏ53gƌ3f`޼yصk#?(-8$\vE!'ݻ7°pB\zQQQ.uEsbbbsGǎ;=-8S~Jq&0n8tǏo-[^0þV4S( |\ޔlGisq|ʨ7r>ԨQì8 mN;={,2–/_3fH›%!?3-[> 1'72Q 8rnc׶+b޽mۑcm@'""⑥NIpB{-?n={K.8y$֭k1-[}עv B`ȑ.͔7ݻlY8g }M عs'j֬?@>7%)+}kڜv12MԩSp})))ʕ+mg~lks~I^utܹ6&YU&Ry6µhjsss8CMʒ&w~!FVZ=r+jsã+esI\v ˖-foan)֎vlG𖹅xyycuJ"glիWW^4~:={ŋ1`xff&<<<,[g8j~5h;ZL?ld;e8rMӎ#SF)[ܺuh۷ mg~lksLQ~󶟦Þǔʳ9ESsm(tIi"{憏?˖-{Ure <☔hԩSؿ?.] q?Nf~웽凹1T~X;ʳQkWYٳgchڴ)fϞ}b͛7?r DDD`Ĉf[+V4Z+)99oV&%%VZW&gȏ)5?ݻwѶm[mkE35}(EMvJp6ĢzS*7AAAhԨ?9o*I<@^~Lx /~ǰg2ާkeZ453G~(]M{ "T*#k:2Gr(̏}s8[nǞ9Snǹ2 &`߾}EΤ޽=z@F<={>`شi~RcjGA޽uV/22k׆+޽{8}4^xdS*?J4b+V`F;z~<߿2d-Z̞=h?@>`Jޔj@YZӦr|ںLaj;={Ĝ9s0f̘1î] ,>>b˖-&ocl"|||,>RX쭦YoN<)d)͹رcEXXE-ΥKq%t9RJiaAAgε< Q|5(gir^5-ÊDJJm'%% ԩS]V,\P4i*mo޼YZ{bܸqVi{̘1GVi[nJ{,S-‡~;vƍ4kLjpI\v &LٔʏRWym6(NYizoΚg 'GJq\3Ɯ5sMDDDDrqBnnn㏱l2ܺuˢ*WKoԩSؿ?.]V*?J]jǙL6{ j,ִb7g̏pΕ#8cG9c暈^yzzbŮacQ qc/ȏWq8͞)}5mXoL/?H)ΖkhΖg&""""8!ZJJ%; E870͙ΕSg˧3`~왳pΙ#8S9S暈)9-N(9-N(9-NZ CJII1&N%$$@RӧwvǎCZѣG dddX}=W Q"""""""""6l~wa诿˗Ѿ}{١<o6o|7no[)+*VzH-[Dƍvf0h ͛7M6Jt.]XLD8p=zJf͚Ym{{N:hBv(*i(?9Rv_g}&;h4Xp0JԶm[K>}PlFv'Xbz-:t:X{֭[Y==h4!JDDDDDDDDD䀼*; ""*RSS);R(DZZ0 HKKmmDDDDDDDDDD YYY͕ 99GBUTHHH@PPPVv;!!*j[Z$oh-`8[5g,}m„ xdRRRdt٢Yogz+K/Fc8W^1u/''^+m4S˗ѵkWϚ֮i"Y IKKhș:N?!ڵkWdeeC999X`^}$ IDATUԩSǦ֭U;v,y]e˖QFhݺMk-Gzdad5sػw/ƌ#;b9z_[x1nܸ!;b}wݻKxC0`M[Κ5z^zX`aR}h۶-5j$;bYzꈎFvvU'󹺺Zj;|p>rF!!!ɚ?֪i" (Y[ZZC_!BA 'OѣVc)l޼ PJZ 3gĥKdB <W_Io7SOa4hPޝ;wPR%9s>pm⥗^eB Z޽{(̞={о}{z١?TP+V@Ϟ=eR㾡eȻヒӧOcǎCq8ȑ#9JDDDDDDDDJTɑs[rBԨQFTTxqy֯_WbҤIC!""""""""Xrh&;""rbu[rB?3gm6١8\̜9cƌp,R+DȪ x+DɦW7x3fU&Zj0~x١)ˋDDdU 'DNsa֭C{1c&Looo)ӓ%""*gx\*U`С^][b[oHQBWTSLŋ+;9s`x(Y[jj*\\\*;Rㄨ\23f*b|'pssÈ#dBDDDDDDDD8^!JDD֖з8!&M˗/oINN|SMv8DDDDDDDDDDDdU%*U#Gb˓]F *<==y(YUjjC pB)L4 7oĆ db7ܹhDEEA*x(YoKv#G̙3y|Q';"""""""""DDdmBĉqm]Vv(]v K.Ō3hdCDDDDDDDDd5^^^%""JKK(ه *7ٳ+;}<3իPӓ%""JMM-s~?XfPx"V\ٳgCRȪx(Y%O`̘15krrrd#ٳΝ;꼼x(Y%3~xLv(V/`߾}3gPlNVݝWU|d<<>>;v,<<< F!ik׮ ?m...Xh6l(#2O?ٳwww|0a*Ud6|pԨQCv&ㄨĉ1o< 6/i.1e <5k֔M`ڴih4\[ h4K,tسgO븞?Rm?ܳg#O:eJBϞ=m͛7q!!!""""""""ѣGC ڶmk+q]vaDTVcj\"R ZjU줨+"##mU]\\Zc݄eĿoaɒ%CغupML:Uv(DDDDDDDDD6ױcGTZj5ڷoWWWGU+1Nb'"jp,22:rrrk8"ѣGYyyy:t#'D777;X`RRRdSj999>}:F] 3STeIvj 8ȉh^K Ԯ]/1^pGDիث6l[tRZ-BCCQzu QYF%**9,<:tî駟?.^;#osNNe5k֔!Y[֭q%ϙrժU yld>ִ}cٯ˗/C0lKMMч{| #D""""u]#L:نm/2m&12_~? \]]1o<;VbdeW5pUmj=:w,)RkW2 'OԩSoWvHfY0n8Nh4siiit:S2]VVn޼Wsdo7+Wsq2Tf͚f͚|a sZ;wyhJ€$ED 8?Ѷ\WRDԳgO,[ 999m*U/,1-s˘CP駟"??ƍ IaZ]t "[z׋Q6,o7Ν;P~~>hèǨQg ={EFFD@@Zl JeV~޽{CZ^x+WU֥K1GZS5 &Mh$%%_#GM6ڷo#Gƍ:mܹ8q"<==%GGdgy>.Ӝ\>}N()??{aDd ִ}cٷU<իz0"""""ѦMԩSsVV'w$0Zwr N᝜zwwwQpnݺ&EZ-"##%GUi 2dĈ, 2hȐ!Pf͚QFf͚Xb8P"={>C͚51n8pssѣeGAյ+8"%*ZKJ5mXosFo suuŠAl=z(~~~h֬ʶ@^K ߿ zzQ٦ Z VnݺRJ*5NAW\O>%K`ʕC~~>xp!둛e˖a^Z_D^3zN^z;B#22߀s@iz_ٳ[څ ""BBTDDDD`BGRsΜ| NCvv6#;*m۶J;:t ;$*$44nnnPTС|||dTuz#FE8!Z\xC`` <^oF Ɂ'N@͚51yd~Y5дiGި_~"[ѣGoTU*z!!"kھ[~YWRYfQz!4h'Nܹs~:>>}6233eW !;vv+;2-'' i߾=o$'' )+##oƅ p ԨQyyy@-dg`O*RRRPZ5dggVrH-G}iӦ{˗/)O> mOΝ;E^D',, ۷o76kZt[lkھW~~>l₅ bԨQ##"""Bž}HHHK|O<~xgѦM4o2Onn.=/qy٢TV O?4֭Fm۶xglu߿2&!!Z-j֬u"00͛7G۶mQJF^6ܸq{ |rùu^Zn`xyy0rRǔ֜-C&O?ot=z4.\hȌ1+W,6NVk!Gʕ+.puuСCdɑƢo߾>Fo#`M7֛}=z4V\iBZƭ[+iyyy?]{;wPn]l3Lxxxð^ gϞEBBN<8+W[F>}лwoN*ظq#bbbp!deeYfhҤ ֭gyjՂNs˜#-- HHHspa\p+WK/~_L[JW\U? ..nnnhժ4h@zj(_yݻwq9ÿ 11ԩ"##ѴiSc;vV^]vX"Q^=ԫW󃫫+ {p#>>Oo4k ]vEdd$U&7t e|LhY`L4K_yl߾Qkݺ5:Tc&Ol㨈祗^¯ ^JUV"B ݻw&92*-ִbٷC!88Bj_~7-[5k 99ݺuCΝѮ];?OII޽{k.|B^0j(4o\y>|˖-Cll,<<<ЧO"$$}/ؾ};mۆ *`9r$Woz=bcccݨ[.^}Uo-[4L攆Oݻe8p 6Đ!C0x`xzz*8T|Xr%Μ96mڠGh׮ׯoN8|0vލ7⯿Bǎ1tPAjqL2gBV SDŊMVe˖Hq_}pqqD@@C"߿tE 8Pv8d!ִ}c/^/ĪUdDDDDdsϟt"((H,_\$''[x"66VtEjѩS'~ϑٳGo^jѭ[7ecݻwO|'I&E :T_V;#VS >\:tȪ<6mZU騒̙3O|h4m۶b׮]6̙3BՊPq9`Μ9#ڵk't:gϞy ;w/jbԨQ<7.~kbڴiݻ6=~VVDZXx1''77W|G[ԩSG\Rdgg4;w?[;wcJ^ueooKdzqILLD\\ۇ~111?#G >>999&wET*t:l޼3"{sȑ#8q7n ^իc߾}Cbb0ɊRSSB nnnB?&;L2kھ[5j@ףq8~x'_|q޽{Aǎm˳>5kٳh4hذ!NjXr٥cҤIhܸ1<<<UV^z6SN_sNϨWbA+ o!,, |2f͚ *4WWW 6 x o6߿M6Ŝ9sGܹs2dE-. ̙3/_/Ν;W^zM㐅cJɸhzÀzС":;<ۇMIIpǰoVV233Z5kO?f͚m۶hժ;Pjغu+^~ekD6Ca޽CBB._|@rh OOOO?4yh۶-j׮-סRs~W۷'p ^^^ͅJVEjjᱪUnݺ_>BBBЦM5XfL9LKK+Ku>IDDD(nݺ}"..gƨQjee?`~hgk)ȑ#ݻ74 .]W^yEvHyyyXt)}]4oׯGJdeS_|F֭[O?E:uddɓ'cʕ=z4>Ct:aLNN|M?sLy0b;v +V@DD쐬cc(aǎ C~~>Z-K՘qqqꫯe\~5j@vРAԫWY&4Mm >>ΝC||<<ÇZnW^E?CL6 puuΝ;/AT믿ơCV^@zP^=ԭ[|\|8{,N>_~W^Ejq6T׮]իa9s>>> APPuյgee!!!HHHcǰo>ܿ 4@߾}1p@TZƿYĚo7fI""""Gp1ظq#}Y!޽{ zj9׿_dNaOF>}4iDvHV'bҥ6mNZ6mڄ7x-[ڵk';$}6^{5:u _~%w.;"cƌ;w.ƍsD85%!t1cX|hܸPT_~8"JOO?5jx'xş).{ZldIDDDdbbb2db-I׋>Ht:1ee^/Ə/\]]Œ%KdcT1`Q\9eXK/$Ğ={dcK5kzVuQzuѪU+qed׮]RJcǎ"55Uv8b8!J}j]w}W\p111"44Tj/<==ǭz\"?~\ Z-:u$bcchxBB:uUV-V=&n8 IDAT-??_ƊMN@8`cz~1x`"֭[捩lizoiiib…" @dxx1JkٲeBՊ?Xv(f۽{(_xEnnp,#  |||>@hZ*n޼)7n,4h _.;deep'9";8tx'DDD?+Qڕ+W3<#۷ec1)f(=n:QJQR%1~x)$8vjZ_ܼy1o߾`eL槥>HU7<ѣ"((HxxxqƉk׮>>ӲCQɓ'Eń db4ѺukѤIq}ᘍcJpB,믅h׮HHH<-<==EhhHLL۷E텗Xx"k)ٳ"$$Dxzz:7SO=%j׮-~GIDDDds OOO%;m۶MhZ#;|7B--5i$W:TTZU\rEv(mڴM6uaÆ}f\tIT\Y5Jv(&bN5_O>dcL1b*[P~z"|M9ٿVh۶m \Rv8&[|pssǏ Xfx>IDDDdtϊ0=?|8{PJt!>#١X^]v1۪U8zP"))IԨQC >\v(2x`QfM,;ؿjb͚5C)qB,EzDVĽ{dcϟ/;*O.t:Cp-ѴiS38ݷl%##Ct]TXQ?~\v8f;zaaau kbM/֛}$*U۷ojD۶mEENNp%7n,:vk߸qCTXQ;Vv(f9(_={Pj׮]BV-[,7nFݻWv(V q%١c8!ZV\~]ԪUKiơ()| ;*CΝ+\\\Ć dRjIIIu֢v͛q(K." @>}Zv8ᅨ*Unݺ\HŚ_7I""""۷OT*O?]&|}}Kgf~~~֭[C۷ Z0wrBۋ.0qDQrey?u=/}]١X]nnhժxeR$)xA% v} v?3,A0b* "(l[TwĞhk`QcCc^#  ***E; M+왳xw3{9###ёdѣ֭[///qX1a <~~~ر8Ewwwɓ033I #Gp8::T$nBFбcGYFt!Mx=c1Vp^:4ik׊#kbѸqE5j`ڵ̺  8Ő!CJ*vѢE ,_\t4h/\r]TTjԨM6{9E9BKKKf͚Q*U4fyYlY=)ǡCHOOV\):d?N*݋!0yd255Q$sy211S";iezS.^O2c΂ E[YYY_PnDGyGQF3Zh(;wiiiQhh(D J#sR2e(55Ut<͝;ʕ+'mۖ:u$:F4!:$&&5-\Pt"9Epdnqe/EǑ\vv6ڴi/_̙3000puu----ё$wIhٳ8/?&O,:ZL6 .˗ :ZqM+כz1cp|||pBܹs\[xxxv۷"-%%2e F-:{akkY-44uŋQvmqWWW\r5jGv>>>?>ܹ#S$Sh1u}ԬY#GČ3DQf͚8p Ν+:+&ƍ͛7#44ʕGm{]W\A DQL4jFFF8qtttDGRL\l'5d\oIcIKK=ƍNj#ѰaC\zժUӥKPV-a9D_b DGG+ǥKb̙8Bnضm(ҥ ^~"Drr20c 6LXS$SxCԩbccqꊎVwF=pE|0 w%bϞ=h߾8j ;wG1.\iӦݻիcG-Mx=c1V8;vAC#LÆ Q^=,ZHX#G8y šB ؼy3:w,:;V]bڴis1xxxÇ'Gll,*V4kLta&OÇʕ+2s oG{EU#.]8}޿xyy!11%Dž РAFta=z4ƌ#:NyaڵEhT\YyNyC9/f1~x 6 xs-7oWt֯_H̛7OtT^ ¸q㐑!:pSLJ!CDG͈#`ggQtqM+ޔדd1X=yݻ(u8z< tEHJһwo9rϟ?%/7nIoŽظq#z):ppuu6>}:233Ea(##3gĄ >٬Yl޼Yt"""uV,\Pc..\___ܾ}[tpM+ޔ]o$c1 o޽(U6m*:phݺ5vDHJҢE (Q_Eڵ =zEzPܹsGtoޫ߼y}7صkyNs o#_0zhXXX#1cٳgزe(Lmڴ D\퍙3g~Ϛ5 7$-Z@1k,Q$5zSnzד18ǏG6m>/]v8~N8v޷UVB"77n@ll,ڷo/:"ԬY˗(޼ۣJ*(Bvp}DFF7)jN bO<#DG h\V٘?>PGaϞ=q}l߾ƍEc?ÇEG)2ie4כדd1Xegg#((_^5kX7_gϞydnnn R600X(v| ((nnnc(FʕQBLJI5h1j*xzztҢ3fܼyOi`DEEEeˢwXj(B[7;u_^t"VvMs)x=Ic^xx8^x^KJP\9S`kknҤI*؎SNI&c(JӦMeO.4i">-&bbb"ZlM64ȦMЪUO~ VVVEnx|ǏӧBs^O2c`ee%I[otoٲ%]F#G9ט1cf̘ǣUViC;w#F=uHLLq@ɒ%ADG(85(KKKIڒc|@9㣤ױGֆsJ$SiѣGS֭EPSN6|Rt`ϟ?'mmm믿DGQ-[Ҙ1cDǐݻԔ^~-:b1۷OtBΝRj}J7^Oד1+(kkkھ}$mRʊ/^L/_$@O&"ݻɓs&$$ttt~+R‚,YB111rJѡ^zI8?~L(22R6s @O>-r["?ʖ-+Y{cTJIڒc|"!CPϞ=%mwN#F-)^>t9\sJ$Sh1FSLCQRRRH[[EGa H:::&:|ԢE 1d1uTjԨӠA>}5;4[Ro|'c1VPFFFta2dÇ oty@ׯ_o:wLt|o$":Pݺu߹'iiiQbb$#))ŋ%iC"I{ÿӄ8pJǚ;w.իWO9Ɔhܸq!Y{m۶4qD+Xfff#ԩS|||$kCxNɝsJCdn11)XDDa`` : <<\t YDDDg.5k:wJi)x=>^O2c233'Y{ZZZ7- s  !!۷o}Zh"BLL$CWW!I{򶟷U~@BOO222d@cl7#WEܾ}@)sJSxCTӧPT(JBddL"##vrRcCP"~NS_?y};^T)qʗ//IW^$iC~`_uӤHJJzoCCn zl7#WL1cnn^8)bN Q w=$舻wݻpttCq޾ܿ_pw~BS_?sz˝דyS0cL_>ЪU|o/SLmGGGZj(Y$Y~/F~x,ާ 76yc(c|܄ؔ)S@MqSr'Ŝ.!!ZZZ(QD ѲeK\v 5F "ʹׯ1c ̘1ǏGVWϝ;#FGEݺuaii OOOIbiicObb$~>T!111Eq{,QF?tP\oySBz2od1XAH˪TdggׯQV-k׮رc.]ڵk{/x}xaH8~x-/#զN~@B nr {lOo|>46ZZZP<LJI2S&ءCDRSSydeeE//_ӧOܯ{4y !?t{DDT*%KPLL \tttW^<7uJ+~ 7o$m~>T\C$JL… En~=P=%%E65757%'Ic;M4I\r.]J 4d@գׯSFF͚5:t@SN1cPPPPNcǎH#GR߾}?1~255lIKVVґ#GVAƁ_+e,ƎKڵ:s iiiQrrrےk|"ڵkKyǚ5kd}x#R$$$HyFA; 攆!mFK!Cm?||C[;w& :{lgffQnݺӓ(11QDzb rqq-V<9;;ʕ+%k/Pdff>r5m߾]ɓ'%i8~޼yӧO%iO.\y]\oySBz2od1XAx{{SΝE(;_+ժUK}WjhѢEYTr3F}| ]rEhclԔۧ~>Ϗ,--f(,9'$$P||Z7Sr'Ґ/Ჳs¿ @HHܠA$$$~~ttt޹O-@Dhkksi6dvy IDAT۷ t9jxS?Bqw>Vqz|X49575"z|x=;^O2c T*"##EP(T*YTTOM)XWɒ%QdIūWJB\\={&4DEEL29ksJSxCT} KLL%jժ}bڵx9 ==;vҥKQv|o%߿xo~1aH8fX~5Y~*}NRR׏LLL${OSSS.o*)zdx=c055ը#11_Ɣ)SPbE8}4tuuѠAOƍAD X]Xnnn8wd=Ij֬D(9'11!!!)jNѕ(5jǏx1-[>}`ڴi۷u֨Z*֮]yaUdɒoJ|1vXիWXpa/;;;cŏdg~ R9P_?5qsz˝דy$c1 J~&@ԫWOM8cccԮ]AAAhٲ}+U@@ڴi#:aÆ "={@BB.]ٳg/1|$''DtN>-ԁk:wJi)x=;^O2cN>M޽{P8B1l0L8QgΜ ___DFF޷ΝCzDWE 6`ر8HIIAٲerJKtOòs?$S%s'''DEE(sT*(Lpmdee(QQQprrC* "22R#\ӹSJMsN)z1cѸqcT\KHGtt4Ξ=+%ٴiVPׯo߮1缪ӦM7(b3x ݻcƍ+ܹ?)rN baaacJjj*222~DD^~ ggg435`u}2Rpuh "\~]#\ӹSJMsOIz1cգGlݺUt m777-[VH˗GFx,f,z!:;:v숬,9rDt?~`W^ӧOEGСCֆ %4To|'c1VX}͛7,:0qqqزe   4ǀ닄9D:~8n߾-엺y155En48uXr%ѨQ#QѴiS~Ee˖o ֔)SKtcҥ0aPxbzJhqyL4It\T*tgEKdɒ\[VZ%:gF=o-:?ŋcܸq'Wzzz=z4ϟ8qdu a„ 9E97D /// ""Btl4%ڵk~:._>}"֭[~~~c|ghٲ(kJi(x=^O2c(-ZӧOc֭&22sŋahh(: ,̙3qqdqF\| .%_u1tPddd# &Ç%_~-ʖ-~At٤cذa1b>sqr"ݜD5PZ5l۶MtpAStAzP۷?(E׮]?EE^n݊ݻ QQqMĚzדoz1cE舱cGJJ8?~<7n+@Ϟ=QNL8QtY$%%aʔ)0a*V(:͜9DGEHH|}}pBE̍.|||ʕ+bٲeǴiDGy) bdРAߐ&:0W+Qi߾=LMMvZQIIIի1h Q4h?^n.^3g`״kMI^O2cLSL;Qn8~8-[&:{b 8p8j SSSٳ1ydDFFVׯzMS ҥ Tqƍ1oQdGt-Q$5l\oIc1餧;UR^x!:NeeeQn֖>|H7n%KR5($$Dt|=x*VH| eeeSd7UTZnM_:DtRQ$_E'O&333t(Xx1P@@(RG!Z effR:uKtY|8ShhhοR׮]IWW&MDS/$$LMMiԨQ)7/^ Pʕ)>>^t.Ihh(Y[[SN(;;[tIdeeW_}Eeʔk׮S$6m> oW.\ ===ڵk(jF5k֤.]Š;Rݺu/DmF[]RIHH+ѣEGQÇ%%%V\ld1֣GjժTF z8Fݺu#ccc2eJٴi,YWNO9aK#YZ*999.]Vt$I<{j֬I5j ݬ $ SYYYŋEGą ҒjժE̟?tttFTvmzHѣdiiIիW9Ғ/9 ldjjJСCI>\dG-:ڄ.,… F͛А^*:qM+כz1cLZϟ?/(,,Lt{5k֌ʔ)C3f ZjU})yyy6׏>}*sڂt*UZnM/_BCCƆ6lHqqqcӣ V$>$'''j֬z֯_O:::Sffxe244#GRff&}wdbbqe'N 3222hdllL;w222h̘1G[lW^T*;.… daaA9EJxC8ʢ6mP }Gzzzr}{}bIOO+:RRR^zQl.١gϦ%Kҝ;wDGܭ[Ғ~WQd5l\oIc1%&&8t9X"UZnݺEDD֭#]]]ڴiSw%jРԩSy՞prvv&{{{NFFFԹsgzUο8p h};wlْs}޽daaA-ZP[M:::4swn2e tEs12661߲&3fiKM6%KKK:tPοz7oN+WLXp/^%JА!C(;;X)2 &GGG54LFFFdQX1`2663gΈ" С999i 䒕E:uʕ++~]H]vͳ5\\oIc1eggI__3 322LJilTYtuui˖-d*Qծ]Ξ=؅D^^^dhhH/V˗ԯ_?200%KСCdhhHSN?`ܾ}lllUVTn]P;vL@ 'WWW*W\ȈO;} ձcǨtԨQ#7\:tʖ-Kקww{rr2-EGG HXpaaag;g 9E!)xUXzuE233Yt ݸqgϞy~qC5 OEHHҰa-x"Y[[boo7J.M/_effҐ!C\/^*UP5zٳgJժU8qV67e$c1Ƙz~͛G԰aC:r䈰,W^ݻ 6@ ۷ nݺ6u-_k38p P޽Brdgg^zdnnN ,mPP՜h_NeʔΝ;raaaԤI255ɓ'改KMM˗S K~ܹsI__vޭƄEk.//#;;;UVQZZɓ'4qD266͛͛7 wԩS'*S ݸqC) '<[q]9s&U\P˖-ޏ?Ht?++lBTL_f͚RT/%'$$dɒ9ӧO9 2DQ^\rJ*E})ҕg233iƍTreҢ&M)!!A´o633<==ښ^*a»sW_}ћ:o~nݺE9/_^iOIOOP?gL]~եC CA>Ν;G+mۖLLLH__:uD~~~~vzjեKfQ狼Kk֬TڱcoߞԔڵkGϧz;66;F'O듮.YZZСCԩS}533KtI,w-5o\+4i"ZDD`4;k׮? &@tw\t  B\\n݊ իfԪUKtwaܹ5k շ0ft .DҥEGzGll,[:tK,EGR4iezS.^O2c'&& ɓ'q}* 077G%^Bbb"(M4CKKKҬ&M²ep4iҤ[bΜ9}6z &E|EAD ɓ'SN!>>PTppp9`jj Gbb"{!++ 9ЬY3ڪ%shh(ZjVZaƍUK?r9mz²e$oޣ_9sDPTX"anndee!11qqqx"""hkkz9ӴiSXZZJ֭[!C`9rZ(e˖aرذaz꥖>^|ׯP\9T*|g9ttt$$&&DBB 3> 4P{l <wѣG*yyy1P|y:tƒIs'n.]VZ8L2WFϞ=dM8}/^`ԨQرc3fL"9rGFJJ VX;Tla}6f̘ÇCGGGhL,[ SN3֭[U ͤ)Mx=c1&/^y&"""@RAKK &&&033PZ5TR666jFD5j|}}qرBo"dggc޽={6._-[bȑ> 7͛7q ܹsIIIHJJBXXLMMQre...R V%KʖիpwwGͱyf7Eо}{/m$##oƍ7pMVVV9c"T~mۆ>}G߷/^ &=zd˗/G!::uͩ *OJd{F ___9rD/>ynnn(]4> 9m$$$9EBhذaMmڴSN 1c *Y$UT`09BTT)5k.ݝiȑrI-"SSSZ*m޼Y "##6mDdnnNK.Uĥp4״rq)'c1:|0#;; FE'믿&]]]9sгg$L>M6crQ׮]ײ{ 211N˙G۷o'===ZpΚ5hǎ#Gvv6}dbbRˁgϞQթ^zY QSvr޳g,ctt4daaAK_Ukb3&d3gY[[S%hɲNw94Ã.^~[n*Y=zƌCdhhH={ÇKږL=z!(QƎK?״q)'c1ᅮ$OOO*U4ڼy3կ_+[%H+-%nĐ=mۖRSSN>Cڟ;&M"CCC:|ZJ%"駟ؘOI}%ժUj׮-˗?Q QgϞa۶mؼy3.\+++4m͛7GҥKtܺu 8s p5(Qݺu'7nv+"©S ???$&&Fh޼96lgggT\lɓ'@XXpIš~D=gXaѣG y[.T*lmmo;ٸ{.'Nܹs "mL5l\oIc1y,^֭իWEGyOVVp J*{%^[n60dT^] ͚5ԩSEGyϽ{ТE 8::b޽022{G>}:&M$i۟G믿ѣ1g۟4i.]={u֒_ThѢEGܹs1uTر:t(r{IIIhݺ5RJIoy1qqѣG%J077HHH˗/q]deeuԁ;*ތ-##Ν˗/ngg+++XXXDBJJ +Tf͚-[Dٲe?2VPIII qqDEE!++ TsĜݾ};RвeK ٧kZٸޔד1c3k,9rO%W֭Ν; 8;;Kvzz:ߏիWĉpqqA>}0dXZZJOa)yC͓~áCФIڕ7D`޼y駟m6tyڴi/_"((d$&&"** nƒ$ٳ...Y&`aa'''899惿`ƽ{p-DEE!!!IIIr ѹsg +V*W=|鈎Fdd$bbbr^? g'''TT@>dV-(((k= $c1X}r =*:J^~]"44'O}ܸqk׮/гgOחQ(*F-Z> 33"yfsرc%J:u+ <>>>E"aĈزe >ƍKTZ! ɓK.dm?ɓ'Q|y5dLZ:u*': ceÆ 6mbbbDGaIkZX~$1cIO>Ν;EGׯ_#,, 'OZIOO޽{~z }􁗗*T>K6Dӧhٲ% ٸq# e˖aСt9sڵCϞ=bŊhvv6m۶ȑ#hذJGS6D`ժU5j֭[//]ZZ:t۷oɓXS׈^c1c1c,))ȿ>v܉J*=G8z(ݻOOO_hժ6oތdiJ.'NpwwNj/ 1h X7C%֨Q#`Ν:th7 1dڵ  4Æ 1`lذ@Ν;֭[ P(c1c1ci$΀T7###8p*T@f*T)S ** ׮]Cڵ1f,Y۷ǦM+fP:kkkAGG-[ ˗/СCqF cƌǜQ"""""""""" Bvvvػw/ѣG<{LXj֬ & !!>Ddd$^xP8::"$$k׮ǏQ g֭[]t͛71o}8x ܄Ѩ۷oGDD֯_ݻwsW=( HLLѻwodee BBBn:<|ҥ ֯_OOO;6olPG-,,sNimڴ%Kw^S@zׯaÆsPn]C#LLL-t8DD+77JRK澫VZÃvJRxyyap0edddO>AZЪU+իx!WL&annCw١FɁL2Vo>XSLAAAС5DDp&grqqARRRSS1`'NDll,^x7n>Cnn.akk ?[ltԊRTO$Ldff9998}4޼y^z/vX|9兏>{ů &(* DDpMC)+pU"77Wʭ~7n5k)S` ӧ1x`Ԯ]ݻwǜ9s~<\KR0~xlݺ֮]^z!OGP*HLLD͑,x{{ӧBhΝ "&&;wFbb"6oތCR bAH dիx?C R: d… q)^:6m#Gb8s ;//aaaرcЮ];o зo_t q'OHJJ#f͚8x LLLУGST*,[ ۷7oÇcʕxfC6m9_X:t(pqEoݿ}ͱ~9ooo8rt<(R?`ƌذaz]um۶EBB&41:""""""""""W^A.LPQFؿ?1tPlذRT4B&]vp-8qBغu+QfMlhѢ<==ѬY3XZZj,RP;v B&MJN"`ɒ%Jѣv܉nݺi,*ٽ{ڵkcΝ ѻwotpvvqco^ߺuk:t>>>:t(~wd2DJ,ԫW nܿjٲ%ヱcbժU011憏?"qY\x)))ظq#߿TUVhѢ`Z~ $$ΝÁиq2H$XhLLLЯ_?ۻϛʖ 66VVVe^okkxS5?3&M~ !!!M"))  ֭[ }‚(Heee|AZjx+$ai]IH 33)))tRRRŋ#33@ nݺf ((W\#GV$ "##amm ر=zSooo4j;v(`+++ڵ 8www-Gk<~WL8 ^oܸ1ム l۶ Z JDDDDDDDDD$R^2CKҦMٳ~~~ŋI0իWG׮]ѵk"߿K.!55q,r)_O>>G˖-5Ҷ1صk y/x8|0|}}ѻwoٳժUx?ƌQ""""""""""2%sգGر)[C{O>򐘘GGrL&[{Ç{.֬Y+WuPT8r222\o tpqq;uss+LMq)! VX1L&ƍ1j( !!ٳ>}:N~jժDqqq^3 IDAT6jBDDDDDDDDDd@^z5k `z쉍7bРAJ>}!'OGdHHHFۗJpvv3:vv;V\QF{ݻw"==}6fܻw055-R,-,zxxaÆ.={"00+LLLҏT*ի1vXt Gҗ!HJJBHHNٳgk5k"99~~~E||ǎ KKK9_ĉ-ԬY[.J%qܾ}n۷q![o߆R-4h ppӤݻ7°dH$'Joaĉ޽{jO1JNNF~0i$|:ٳ'|}}gp7Q""""""""""}6T*`eeW^Z 6 44VVV0a!{իWǞ={`kkCCCabbÇ#??'O~d2ݽJnիWq5\v GիSJ6m-Zzc"22RgI$,]Rzž={ЩS'-'N@@@F;;;šw裏gggaHX%"""""""""7o~EI$={/9ѯ_?(J 8o޼1c J1~b<|jhϞ=+^~]xrr2?w Gh߅n޼ bɦ...V/!J1tPT*|Ůp5kVebe2z&_eeeڵk~:]Wbǎ7orrr`mm͛-[D˖-}K{~O?X+HR~~~wkT*.]-Zh%3g""" IR۷ɓ'/Wɓ'ѳgO :/nkkxSNk^z'OzDD 5jӧO yW Qp#FJ¸q`iiaÆ͚5 sʼn'о}{igg{{bwd24kLc}+;;;v33@o޼ ooo4h111J1p@XXX $$YYY1cܚ5k0zh,Y}YF۶mѶm"󑚚\p.\q N:hѢZj-[m۶hҤF}:`Æ 3f 0{ljoTF!88f–-[pedee̙3X`Zn/oEaoooooL>۷oݻw+3`bbkb„ (((@ll,1m4A;w.N~aΝ(((n:`̙Z}i&&&ܹ)VBVVЧOdgg… ի+V^ -diiX iiix%u+W 33V:L')(((:25kF)t(Df̞=7o:"i6j(իv܉@;gbb:Dϟp"..yyy0SRRмysu2g999޽{/y...x nnnAÆ ZX 4ؾ}zf\.?W^?ӧqQ$''ѣGVZlvڡ]vҥKK5kGCزe d2Ӕׯ_# 7oބ\.ǭ[7T^鰰8Jՙ3DD%fbb:H}WСCb(j|ƍZ`533ðaô2lʕ 777oDvv66lؠwPIm6;_KP`Μ90lllɓ'c͸>.\H4o=z4<<<иqcaHII)Ë֔J%Ѻuk,oo$h֬8.oͻKkRXXJR֯_ϟV(t ?e1x|nTTp"3믿  Q"k׮4iR('''|%қ7o۶mŋ>fffVll"O-33?t|bggի<6""""Cbbb=24*JdR,v^*"-- uX?#}=s ڴi{ 339333n hUi&XL.㧟~ĉ^~SNĉ8~8?]vX355g}E y&O˗8[S[DJ%"ǥR)zݻwk/Jܺu93334jIIIQԔ,x{{%ԨQw,ђufA*z0`uxbǎhٲ""ovvvx{)\bH-__~e/0i$|#Jqaڵ%u? .XwEz<<

Lщ̥kڴ)lmm˼F.[n HDv޻5mmmѴiSEDDU1ߺtTZyT.]0"""""/VS(?~@XZԢkҥy"\]]ѱc"K㚙aPnn.͛Wj1d,[LWF^^BCC,˗/n:F9k1i$deeym^^~'>>8tpEL:Ϟ=øqPfMt3fÇ˼}֯_GFNNN611ѣXaWTb֭HIIT?e|i 2 Xp!J2d뚵5z쩱Ϟ="Ef\GGG,] m۶T[5kք&S{n1YYYX~=V\ϟ Ϳ&MݻwC.uqqp;T*N:ܹ)))W/z$ $ My)ݻLiݻ_yf(J 22姟~˱n:tI#m޻wݺuJBj0aVz6L&knU{N>e˖!99pBh23%r9>,]TȈHuǎ: Qdx,,,4ׯaooOUÇkݸ8.uj;DDDx͛Ν;g9d2x{{#** oj >>o=UW{Og@1bDDDׯQ?r3339Rؠ8[hh(VXYT"44TH~EΟ?nݺAPh P(T*q!hBm+Vh|jׯ_iiiZi :_.;&O'ѱjHJ%<֭[cΝě7oзo_[N[<772S|署{X%2df̘m=((}E(sssP(\!""""""Җ͛7kռ<(J=GDbǂ(ḭa6668"񁕕GD$^mРAEnn. $t8DDDDDDDJ\^abRSPPSNq9c2J\2WD o޼:9;;C" BS-ZI0H*d\ wPgϞ#1c://т?O>yHX%"""""""""""""ł(,D` JDDDDDDDDDDDDDQ"""""""""""""2X,bA  DDDDDDDDDDDDDdX%"""""""""Ґ8ddd 338u{"׾+//χ;lllзo_ܾ}°`ϧN±cǪԦ>U~4 6 ,, sqp%9GՎx/V[(J̙3 4= 6?b}|2 Ǐ9 JDDDDDDDDD .B@:u`ffཿWk5c 8;;ضmN81cT6|l۶ 6l(r}8y$vQv.ܬ__}-[3gbܸqѣ0?)C qssП8q"cȑعs'ʏ{J%lقɓ'cƍ+r^aA-[\`eezq݊\[Ĉ#PV-a„ EfVÇq8;;;7ydl޼gϞT@Tn_cذaHPn]aSv !7@T[ܼy?>>cDFFbv<0#Lbʔ)^ JDDDDDDDDDT7oٳ1mڴ"ǽQZrQkC )r,++ AAAnPAAfΜٳgz_!C#Ώrs9ܽ{^^^E{yyaݺuQc~t#%秢WB/rlG9?bTZyMQ"""""""""*7XXX9>g3gDO?ٳ'|}}q"Vօ 3,Z¿vZ^ӢE dee!***a BT67W\bvkܹsG}PS^[YU;iG̹JOEλWFeήȱ7o:t>&hnFaAHm3j mc,\ͨipۘ0,Xi1~^˗/cȐ!8~x㆖""""*YAAѮ]b\\\m۶Ŏ;PV-A*"??wV1_xmE=|ӦMCǎqF,YB>}/:D>۶mTB2?UMaAEŸ-j~ʓk+)o;b Pz~*2v޽"4w۷GĚ&ÂfemnHfBnF fD!1m۶aÆ iLRĖ-[0ydlܸ-""""*]ff&QF2sppҥK~Ywpp_|ݻwYf/}cJ̙3=COJ:'d~055 Ÿsss7ةh;b `)#lٲV*vN{OEi*?,81nF]  IDATac,\ͨep~ÇqbK%˜d8p LR5""""*۳g\{_~~:5jR)ѭ[7۷HHH(._}ӧO 4hxEϟ?8999nѷSvĘ0M9aDFFX@c~SQ LQwp1ovL 1lNTbӅ 0sL̞=bb.TZy1燈k $ ?~3gj<GGGo~ڵ 3gIIIEΆ&}OEs@}!Ѳe" 9?6v*ڎsN~/^aÆ9c~ )ÂzC]mo.͎IUup66‰*CcڵkѧO888x^cZW*B!"""spp@jԳu)S믿ƢEԟi5ɓ'֭[ݾ};Ν;WI&ܹsرc333Q~}ǬMxj^Vp"Ǐ= " 9?6v*ڎsF~6l؀5k'ٳgHOOW,kSMQe uc,\66‰*CcΝ;8}t%1-+!QHR.\(??F׮]1}t4hcƌ)qvϨQ0p@M%Č3> m۶\AE2 hР,--\ <)OnӎD"3n:deex^JJJ 5? hj cnGWmvڅ-[@&a֭غu+`̘1pttT_']+TRT~X5PƲY7;&‰ ǴJ̙35RcZ̟W*J!""":u*QPPPDUV~oݺ5RRR[n-((իWuV߿>paxyya̘1XhQvӧHIIܖ:?Mycǂ ^N"4MBb ĉ8p v؁#F{1G=۶mŋ+Wݻ]VKPٳ,탛ЩSr~E6 fǤ*YP.C,2acc~ ӧOھJةׯ_k}T*V{OÂ(_XYY!""B"Z2 r\rd2Om///kee/^C+?P+ZZZ".."+++)Q"""""""""HHH@NNaj…8vbbbTxVVVx)4޶ _}СJ???dffjmMoq-_^PʤsiӴҶ\~;vĕ+WPfM)V/_ IIIhժᔪDBk#M077־wTnƚ}~&&&I Tj>ބ(ʼ.L0 7[.~wHRC"""""""#Td:Ƣp(\2k6hnn.T* C"""""""#U8CT~ \\zDQS """2FU-((ȑ#qe;v 5j:$""""""2b\2W\lllX# իW:JcAHG<<<&t:7`׮]HNNcAT\dDDDDT.4Ct׿mVp{r\q Q%s\ m6L2k׮ Q Q=P(D}3gH#ȑ# Cxx8BCCHMP@&A" A"6dBQi,鈇>W\'o:"""""""ľ Q"\2HG`kk4jJp4*===z@׮])t8DDDDD֭[q紴4<{ chݺgӧ꟟={"iذ!ի'DxF+557oHR\|R'lll]>E0* /^/4ũ!$$D0 ^~~>RVVVx>|k׮myVZ2rwww̚5 #G:"QYf fϞFDm۶ ԩSEc^x.]Zj!͛ҥKe^#H3(*?kԨ}޲eK?^GQ"**С"4n׮]+\.J\.ǫW`bPhԨ{իݫ3鐇RSSCcrssRC$ ̥ % ڵkb;3: {Z[[e˖:驞yW * >>>,@Æ ;sZ&[n HH ZPPQFسgjԨ!tHDDDDDkȐ!-L&m9rd뜜 :T߿fff9r$uK=ojj Fd|]ST5j#DtiiiB}ك8 ADÆ K=A0"zT*K=ߤI4mwqQ]@-*U"XۂDA[~QcbM=P *(FYG#Rdم=3x3眑a"prrz}S۷o|2LԸ1'B-[qGB!2GU"زe ֮]_n-D!B*D%%%899ERhٲ%[ۛ ^^^PVVt[׮]aii)D*SNJۺuF9ٳ'*.DuS͘ #??\\x̌GDBVBB+~z*rssk׮ǚ6mQFU!DiiihҤ RRRp}XYY)¿+fϞ}ٙwB!B ,ZǕ!7\ro=^\\ oooNСC+QUUŤI8$"Uy\MMn+cG߹RTKK }cASBB>V@ xD"СC믿I\>|8~wC" !!N( D#"")))HMMEbb"bbb D3fM8&+W -‚ x!B!ٳ'nܸc***ȀTrrrТEիbbb8&#}ETT=" ]]]HeƏC3 wMW"##VbԩXndfGWfnn:ѣG}P( S<<}ر#8B!4:eeexd"77ZZZ¦Mghhhcǎ022 *HD"AJJ b1#"''BZZZ455add###CI=fPe˖QȡzJJJ‹/*wssjE=n&&&59ήXt 7774i+>{jՊs HH-!::.\dȷ/^׎-8'#ԇ(b19$z[bܹ8{,x!B!Ѹ>BCCdffBYY;v RdggUVSSSD555nnn@ c 7n@XXBCCqH$Ԅӡ eee ++ b=BYYtuu 777IuPӚi޼9 HN5#^|p",, >i&&&裏3XXXiӦ(**8Ά,,,*ޞ&k||<ch޼9LLLо}{hjjBSSB7oބD<{ ЩSڜ;hBHŮ]0}ZlgϞYNTzzzg;`֭9'#GGG\z%%%^&77Y8q̙woQFat:!B!2 9rAAAA֭ gggX[[а\!C,>>>Q=E,#((AAAx!t899ҲVW{J$$''ڵk@DD?޽{^^^ {;JYQ\EEE8uq9o߾pqq=:w\=ӑ/"""ׯ_F___0 =zyyy3`nn^=HDDD())A wwwy#D ^zńB!TUUټyxG"Da̝;2L(W^DGW^e*s5 IDAT!CcW\aM4a+W䖋B!޽{ϏD"G3g7nH˗3###9;;ё`&&&~`?z?lڴiLWWqƱ$P( 2mmmƼXHH+))j?YYY, 3@ٞ={XqqTQ40CCC&X߾}ٞ={XvvT)..fOfLUUNy҄(~1%%%C? ;!D\]]H$b믿ŋL$1L$={f̘%!B!X,fFbJJJGѣL"ȤH6d&MT?`LII >]rE&J$vafaa7KIIIjFqeffyf͚m۲lr,99M>5i҄uؑϬL&}ˋRO??YfTKfֺukɾOւ-ϔHQ^^ٓwBF^r7wB ,Y())AݹdSII n߾ GGGaݺu\2B!I$,]| o9sׯ_TTTd'OD||<ڶm WWWW,Ә=0` qm믰I***͛7qI+WXvQlڵ+vZ⫯ؼy3sbɒ% r-† 0|p_ڵ Xl6l؀b׮]hٲ QR/_ٳx".]&&&ر#4554kxb1Mpqq\|RcAXX.]W[FΝall hiiAUUO@;wptt3all\qƕ+Wܺu :u|}}rHȇHJJBPP#XXXTԎm+++CRRRE\xyyy4h|}}A"CTTpQֶb_`iiY뺕H$y&h8|8$ ==zTݻwcڴi0`ѴiSޑjFq1cŊ9s&~G2==G۷ATg'O?zꅃE#Wqq1̙۷cɒ%Xpa}wi>kyK_3MMM֬Y36i$Y/ ?{mܸYZZ2ΎJBdgvvv ޽;۹s'{)++clҤIYfLWW}w2[~lΝL__ B΂Yaa*,,dݝ BvJJJ|]ZZSRRb*'4h{sB!Xddd0+++xǩ/^0'''nݺ;NqkѢ߿BSc1U5A5JKK٤I:;v85VRR¾+&XPP8ug& ?VZZ;N8pS<Կ҄(a1&Hʕ+.eK.e2黼3߿~q\]]"""dwFF裏իYqq'ѣĄ &0X,H$b]ve{`D@ `JJJښ2N!BHCSfjj>Sqjyzz2mmmvUqLKK(q4ֵkW֭[7;TP(bŴYTT8dݺuLYYm۶wqF̶l;pb՞_4!J^еkW֮]rssefc"},??[B̙ÄB!8p %''Zikk3333vnYil>|XqԩSǏf/P(dC}'D"aڕ^ھ}{sJN!BH ֥Kfoo233y`eee/`H͵kXfjzKfcceD5ʘkݺ5}68u~&Ν;yG[2H<;JűVZ1__7nHҠbʔ)prrB޽!1g4k֌[&?ơC`nn+WpC\|fff8~8;XYYqˣ#11={D߾}1c qDHc}v"''7o֭[Ѿ}{ny:t۷Ȁ)~*~VUUEfͰfbS\rjjjH$?~<-[֭[K!B+++àADEEe˖#IEii)F۷o#66-Z郤W^Bpp0yGgϞ]vɓ')Ռb[|9~‡Zj;ޞwj*̚5wt Kb޼ylڎ&DǏؿ?tttxGÇ矣O>z+irrrၘ/JիW~ 6w$Bwb 8vܟlذAB!H/_uƍG akkmԩSrwBhu1`deePSSIгgO|Ҟ@WT3ҥKpqqApp0F;} A||\Οxݺu'6m;:t~~~|2z-fiB_0yd,Z}޽ wwwhgϞ˝i2220p@dee̙3 seyy9-Z5k #Ю_&H5WWWܾ}ׯǴixG"B!A@~cРAԋ{XpBM'6n܈8˜ԧǏcѸtB\G5aaa[wz!H`kkVZ̙3r5R^^H$\|Bwz1m4}SN)Ç;v쐫/qB͛7> 6l3kf¾}p%XXXD!B[p!p=SR-===;vwzΝ;cXx18E5BCC1h 6c ֭Ï?X,w?tK.mwE޽6GѣG`AA\]]QVV4mڔw$@" Çcر8|0F;! !11;v,6l;T̜9DTTLLLx!B!Da%$${8v;LK.駟;N~g̝;bzzzDpp0Ǝ۷o{T3]3U)** #eeeѣz_~w"99PRRG&NP$$$@UU.MфhCwu˗/ٳyǑgϞ6668p8͛7}իWcҥjgrRYYY);eeeޑ#F@rr2bbbf-TB!BQTTPQdjؽ{7RRRӱpBqd1GGGhB.ߚκuj*bhkk#3ׯn޼nݺqquX[[ҥKj `ɒ%u*&Dlرc8R֭ɓy! ֭[`{/ & 227n܀8-OOO4u1 `mm 333:twB!Baee8y(((@Nh"L>wJ[ׯzU‰5nܸKKKq*PwTM `̙ܧ~&Mp=`D_ee͚5X~=ՌLulق˗ bYںvpmʼ8111ի-??;vrqM6T/]S|}}q-ܼy"wѭ[7`Ϟ=ԫXYYaϞ==z48ȕd|'رcyǩW1cܹ}}}q!BQ߇1"""зo_qDcx{{}a֬Yx!x",, DJJ :t; fR^^###cżpӧOtvyǏǣG!ŷ~`$&&B |HvcF&##f’%Kd(Ox%֮]; i~G`ͼ;sss,^3gDVV8 &NWW? 'N# Q!BQ탩immmbݼck ###jʥKL>FAALѣGV_"55W\6hBZhڶmYf"Xr%/_C?bՍf]9s@[[~-(ȍ#G?ƍyG͛78~8(B!Ƚr۷Ǐq!""Bɥ"22g޽}HcTg޽OѲeKQ6lTTTdgpp05k˴_yӦM{6hBի! yǑ???tK,(ݺuS[Vԕ ֬Y;v 55wB+))7|3g6uC+chhSx!B!D]z7xGW^044Dpp0(:.]‚wƌ 66kcT'NhTKGEEÇȑ#|x{{_D"ӄhb  E? 00)))C~{++W "s .qQ!BkOF>}кukQӧyGpixzz!ڷoC5y\t pwwE.xzz"""Bfŋi :ӄh:tQ^ښ%d͚5ݻ7yGb ģGxG!2\SL&82/?y!B!Dnٙw h򎂬,\~... q@56yꄇM4E.).]ʤ?y׬Y3X[[#<<^O ȶm`nnWWWQ3gߏ/_Bȋ/s̀ХKl۶wB 'O"3găp9Q!BK(rBؑ˗/CMM ּ '''"''KT3΅ $iҤ qw***2O899!""^K DII ߏ)S4}۰aàCBȁ!C@ )So>CwxGM61bv; !B!r?P(D^xGjjjѣyGAtt4zEbmm @mQwST%??n݂=(rO>zLzjU5{{{ܼy ܹsèQxGJII GF`` (Dbэd5j; !23gLJw|||p)!B!{.;\177ǽ{x{`nn;\QWW񡚩LUPVVF5fff2{L&}) sss 99֯ СC0`@\ƌXBX,͛71zhQF~ k(֦ufACC'NB! ccc1䎱1x@RRO%?h@f*'/5S$BWWwbbb z^y9abbR(===4o&Dr?#FE.t:tH;w֭(raȑ8wc"SgϞŐ!C xGNEEQB!B*D+f­[R[%LLL M5S9yU' ##zGфhcL"7;QpuuCnŋHHH)++Å quuEDD\aF!BϞ=Cv˗1a۰NZqrqq1-[e˖aA IDATܹׯ;VFuۯ])SΝCϞ= ___H}@QXX,Y[^BqqTƧ&c(kϞ=Z{A5S9yUY]ӦMѼyzgϞAWWW*>MV?P}# oݺuk׮cȕ2VVV; cLKK9rwbbb6n;!2s &yGO>eB!B\i׮ J[ٙM6W^k2,**1بQ؂ *^˔Ν;k=110---yf}v̼>c {ڬչ c3>{a;vZ{A5S9y &H-i]uϑ1X޽{0%},?6nܸھ̖&DQFI&!W?~DQ믿Qʄ ؘ1cx Dfmƌyǐ;lǎcB!"W7o~77i$֡Coܹ0Ν;ofLKKEGGW1ؐ!CXϞ=z/,''G*###` RiC1,33S*U561cǎ1]]]U[T3ΰaجY^]ƮFϑ1Ƙ۲eګ̆ ړ>MViȑ#k2[enp]!Wڷo͛޽{9v=M6SSSܽ{w Bd޽{=Z SSS%B!?ѬY3'YKK ; r{JJJEo=1F5S9yI]JJJ#{.??_f? }4!ʐLWȈbD1bJ#))$X, rĄG !BHTq))>t-ZW]SSmֶm[d-))H!Ri|+>Y66T3Nf*rַNOOFϩMMihh~ Qs-?/_ &o߆0uT0*W\\e˖aٲe;w.cǎxk0eܹsٳ'닜:7ڴitGϟKv5jȪvޜfBHC'}ACm۶-}B!>@]XXN_\\ KKK# D`lٲݻwrocdeex=aw^]VjC&w-jl(CTGCCCu;@P?v&Deۧ/j "s hҤI۲’%K/ؽ{7Ν oooƎ CCCXpuয়~¤Iݮ8vΝ;iӦ1/ŖDJm OuT]_2uuu(++KubyUZZBG PVVB!4V͛7GffT CXX>}[K,9rG@@V^ &999ؼy3ocPWWG^^6lPq4z bM:-ؘ*4o\jLf%t1v@ƯǮYYY^SZZZ2i^OjBFoƚ7o.&M:tP;w2a؝;wzÙr{ii)c!C={___&XNNTˆ T" Snƍ^US]픖9b'NZ{ȫLK=,G!BHC&N;F :Y[[k{aڵ>jUV,((wZ3__z}f*'/5S۷3CCC1jEck?c]H,>С l閹 .??M6Z{-ϙUo.wݻ7R~{ cxTKӦMQPP HTPP vRRRjY~!7 UC}оB!H튩D,Ąw 9>4& jƧr<LJƤrR3U122BYY߿;Jj}y!tQGJJ addT҄ձ'O_q/m{o۩033T\,(f͚IuaTW;zzz5zNe];is䠸 .DGbb">} WWW][xRSSyGYODD:vAWSI̻f"쌈QjDcCXX\\\ve\\\v#< '/\իW1&LDѴiSprrƆ znnn/^K.ҥKԘ,j',,  BAA]F}ɩbRb_h"\r.\>!BQD [r`ذa/; `رwiJJJжm[[cǎ喃jmV3UIMM!/tڕw6n܈-[*?0e)ѽ{w<|qm_nGUpożX 1311کX,>MFQB!B*ၬ,;ww?3gۛw 8yTΜ9| 6kcTػw/(ra߾}2;ooo}C&C -,,DIIIKdddظ ϟ?XOw'AvHcBߣ}!B!; wGAVunnn֦+DbjX3PVV; WoF||<|||dگbccq+oJJJp:}4!X[[#**wXiiiR#** M6& '|&M(LȢv 226! sr,/__EB!Bc8u^z; W0P(ȑ#~QzBBB0zhQPͼ!5SOOO<aaapo>XXXs28pL7ΝCff&Fm @dd$͛#+ܽ{ϟ9WWWXXX`ڵȅ/_e˖B޽y!D&Çݝwp ;^H$B!SRRݽbyΝ;#44Tē {\I5#5Sc"-- /^ t?d@@̙G59 kkkb!X[[CMM ȍ .4_vH8995/DFFYfի(Ȍ zMx"i2B!D?>lق D; wܹ3z; Q666000Eȑ#t"'ooob矼pTx{{B!B\kҤ /^KLyy9O___^јV{)/_***㼥׌\Lu.]ӧO?E,Y---|g\sL<***Xbv)cҥun&DÇgΜ@Y,(1c4E`aȐ!"s۷G>}h_pttD6mxG!B!DM<FFFꫯxG["99Y!۸q#k.Qdf鰴yGTc 8tNЛcƌ4iyǑ8lܸ?TUUfiҤ 6oތիW֭[\J^^N9sĤ Vyǘ1cPXX'NaeeX ###qHLLD.]pMXZZ QbΝCEaa!:t耕+WbĉB!7%Ĕ)Sp ǣe˖ԋ2"??W\H$F$ lllТE ;wN <{ 8p ;Nkͼz WƦM`aaUVo߾Jpp0qͷ~["..;vBJJ zyaѢEԛP 0Ǐ#mh]]]:twKHH@np5>ȧk׮n݂82|BW+VΝ;"wk7DL>_58B!(jjj 2HRpB]6l)Sxǩ{={6/^;ԕ 999HHH_~+WBSSwj 5h"رh׮[?~+V X~=9%3fȑ#uxǑӧOcС8u ;;?///?...HӧOaaaiޞݮaӈ͛7ǎX,EV\}d( 666իyG ϟ; !}B`` (2gaɼB!pD"݋ׯcҥHӧzjcpssSضm/_?w[p!n߾Ǐŋ@Nk.]1̪Ucǎw&C?Ν;q-~~~x)j*żHÇ/P9r$>3#--wH$3f ڷo˗Kmm`iӦ"S.\@pp0~QqDFF"31L6bAHccGf222h"|7h޼98B!($Xj6m;DEEK.Ǒ֭[ 3gDNN5 `ذaz*8R~zlڴ 'O[na1cv1ԌG533ӧqyܹs;9voV" -"هbZ٦4d%2![ ]=?<4R>1ӽ9Kxzz"//֎˗/COOHur?w}`ٸxÿq{/HÇXt)!''WOiƍ*77 :u:Yp!Pђ͑,Xdeeqޭ[^jK+zj|7n:J{%h"*C l޼{"fشiB!HiӰrJ899_~:N%%%]tv1qDaȑsI?]YYrssQRRsss$''s ggg8)F__gΜO?۷w޸tR'8;;;ѱY\3'''`ŊBBB ݻ#"";vy T+999FVVN:$PPSS3n߾ &Yee%f͚P;v>Pdn?BSSSLiKB||UVVrG***ؐ!CX^XII q!BiJ$M61@H$bOfrrrl}իW ?>{u,֫W/֭[7 h"&++˂8Ni ɓ<;zhqyfllth< IDATtt޽{%sttd?POڵk,--Yaaa5SܹÜn/SWWui***?ْ%KXyy9cƍ9Na˖-cW3iiikDŽ3UUUfccPx{{3yyyvEHDEEg۷gϞ=:iƞ>}tuuSʕ+BHdeeY@@Q$f˖-LVV|bB!.??7 6 .D"YnXZZZ}N)c,880]]]Ϝ?<>Ymߟ۷o3fmm],pԗW2;;;&%%͛7tDlݺuLZZٱ'Op/.]0mmmg,$$txb&''N8Q1ǏgԔ]t}\FFc4-_H\\\>qD$ 2UUFBOŋsRRR6:uj=!MAhh(ȑ#\GÇ3>¸B!BE$P֢E fbbΟ?u$lmm [|G(R8\ijj2Pȶo޼ySƄB!6l'-ZXX(ueoo=zT?ܹsȈikk{9Cu:cogjkk3ᚩO/^d1''/f?&>>^APg+~-XIKKEݻg[&0Hf͚XTTG`Gf|>-]5@ʚ;w2555ֽ{w4t*Deeel̘1e˖\ǩ 6m4.\ueJJJlƌ][W^eZZZ믿/A<ׯgٳ(ݼy3Q!Bb䰙3g2iii6dv̙p6i$󙍍gVRK9v1MMM]tttgotfiisuudӧO3sssܹsd)߂deeu,U{MFY/MogϞe{f͘1QX^^6mTUU> m|rʌӧ?qL zNqlLIIW:qclŊ,;;~B֠;2uuu{T~IԩSJSb6z&]蒦իE믿nO1JRttt/t i,v|>[~=Q>ի|K!BHsƦNݻ7۹sg_Ύx<6l0Psնe"ڴiZh%Z\rihh0WW:ǬcFbLb9+''m۶уȰӧx5y)Ͽ]|M20---6w\v1[s?~ٳ9::2YYY& ŞѬK.LEE]N.]ĉ%az4JvAϔG0T,00ՋIII\箢?X2@Çzl2P2.˙3SQQau>_ee% g#F`|>3ooo_登"ݼym޼3)))֧OuV}\c iӦaػw/Zluj۷s΅% u$Ɂ sNL8Hz&O?{رcDH '''0={:RO8;;CFF ב!B!5Dog"==۷/ahh###(**BQQ222塼>Ľ{p}_HJJ:,--{{{I,stt4ƌٳgc͵>@]]nnn;w.>gϞرc1c lڴ RRRS7oرc8|0bcc^zG044DNбcG/++CQQ #%%/_ƃ8;;\1Caҥ(**?ҟ|?#Fѣ{z4kcϞ=Û7ol2̛7 \G{.~WDEEƍhѢ`ll cccE;??x RSS[nҥK` F?5&1̙3bbbЫW:ScҤI8wѻwooǏŋFѵkWڵχq(--EVVRRR$&&"''={-&N###fT~RSSϟcÆ oUVaٲe&!1Y9r$n ]]]cx-44 >ױi1m4=z-wEQQ~G!!!PVV:!B!3m۶000!z聡Ccǎzjk֬GQFaܸqصkD4k!"007n*~B p,)##5>F^^0001 333K<c nnną йsgo0ap̙z 7o .߿Ѿ}{+++hjjk:BKWVV7bӧн{wc۶mC=}vrq̙[ns̩?p?URR_aŊXhQ !ÇpB|l޼GC<19r/`̘1d!B!H^qq1 gl۶ ǎ"\eOCd) -(hkk&LxpL4 ~~~u*i,t,_9VZ33j=wF)S`֭;5SPP 6 )) ih_ƦM xyyaʔ)YQYY ~:矙@ `;v=zf̘ـXLLL###,?~zۛYXXpS, 0---n:z3#F0]]]!ÇVZwJ%vyŀ1ԙP(ġC_AQQfff߿?Wo^t f͂.デՕ%MMGaŊؾ};tuu1g\rO?~aPSS͛7 ̄]v6.!zGzz: ӧȨq=zc֬YCzz:V^?yL!BYYYY5l0=z;vZ# prrǑ~ׯ_G޽;;;Hl:~~~Xr%7n 33VVVpvvѣF Xbbbb %%777s=>>>Gll,Q";w..\UUU5jFII Ο?_ל9鈉SScx8<uڢ=DɇԩScm]v1о}{@EEⵥ+"%%qѿX[[cҤIiR/222p;wW\H$Bn`ll cccBCC5kts-<{ :t[[[|&!#\í[=zFFF044JKϐ#55GRR ѭ[7XXX}B!BիDT>KQOOOСC9r$D".\ ""_1c@[[[8pǏ1j(,__}ĨYii) ___ uF+77ٳg\cĈDll,B䋔DܻwGZZoʱPUU! a``^z_~\|IIIkCAAAcѮ];өS'7? !իWHHH۷Egzz: ͛*CYY;v!ХKC]]g@!Bi ū5V.EE"͛#22C ?'NÇtNNNpvvqs۷SNիdlܸ+q|.WVV0gφQ&?{uW^mǨ%?JJJ`dde˖aܹ\!Iپ};֯_Tq K3 !B!57uzWΝ;7ny***0uTDFFĉ033ݹsߑ".GѫWZ!oϞ=>}:֮]KVÇX~=كN:aŘ8qb+*D?ݻbtʕÜ9sӧO1h <,''666(((@LL ڵkWIk<CNN<N{$RTTǣ2/χ ddd %B!|PYYdddQÆ ñcǰ}v,YV(//ɓO.C>>>{..]#Gbݻ7z ooo\v 4gvo777W!C`֬Y022BHH{ EFF3fÇm6:tBxmٳsQQQdggeeeHHHhehsB(!B!B!4!MZ%%%ppp@||<ѳgZeB~~z<|IIIW_m۶9s&?v&zAAA?>v؁9s|XP@ ,@׮]JLjCFF...wmۆÇC(/_:^h$$$`ԩD5A111qBB!B!BO^1M)ZXX[[[ܹsԩHJJBVVL<033?RRR$6fse,Z9s'?NWW[nEzz:ƎKB(bժUɩĤ\\\p]l߾QQQׇ;^xuFK.8wN8yU;ٳg".\-[rE(!B!B!4!MeR4//xΟ?:[.MMM899a߾}x99Ν;#00;wF>}K.r4V²e˰o>Xv-^x_E֭;wH81w[t x58׽{w .pz ___Gxx8;BGGƍCHHH/<==f?~D`ooDhժƎ={b߾}xDrddd0sLΜ9$fPu 8y$^zh 8gϞŰaà333|Faa!ב?c sm;;OȀ+6l]]]ӧ |>333;w.\v Y33/n۷ocڵ011pQ#}%B!B!&!_666ݻ!##Ǐ7T2225kpEԩSŋ1fo߾Xt)?W^qD"fϞ0=z҂={["::zzzGIq:v `q^%''ø-q IDAT }ױyf <}~H+++(,,č70m4<~SN&Zn {{{4T$a̙_pi 6HKy!sԠcǎeG9sf+W8x`+V`ѢEpttD||<)\TB!B!BHRZZ YYYcڙ3g`ccohs,EG={6݋ 66'OFaa!|}}abb9998~8rrr%WEE&M'N033q$cРA5k \hkkcݺuaо}{ //xv% 6 !;fڵ9s&q5R~٨%B!B!&)=rd@JJJ|ߗT"}ѣGHIIaoo/_b5jttt`jj l޼111xe///DŽ p9bzfO__{.ƌUV>>>uu!EǏ|rBWWVח8 :cƌ={ --]㱛6mlmmqΝLI%B!B!&__jw)lٲNxHIIXnK<}GQ\\;v`Сс&ٳgc۶mBcǎE\\Ξ=^z53# O<… PS͛\#5PRR;=z["""Bx)>*&&vvv?~;PWW<<>۷߰ QB!B!BiJ -[`ҥعs'OIR)**W^쬨Gp]EHH={0h B(]vh۶-ڵkO.y{{{㯿_A(bƌ7o455Iۈ#u&LҥKMiqaؼyg_'9r$xjժ QB!B!BiRR!5kpvvR)Z7|>044|ᆲ,"==nnn۷'OY{m۶E۶m! ѦMe˖hݺ5Zl)y٘vvv=n 899ӳєlIKKǏGTTѥK,[DDtRZ瑗lj'0l0T#*D !B!B! i*Kzzz"00}A^׫]3''O>œ'O<}O>˗ldeeA$вeKiRTT"TTT .ıcǰi&tVVV?>JJJ #F#7bȐ!011E0a Wu>|| j s(((ĉ5 **D !B!B!`QD >NRTrakk/_"..B455nݺx.HT}ldff͛Avv6rrrPVVyZ<}w %%>eeeogx{###'''899!11AAAppp@ǎ1gL>]<ia˖-={6||| gǔ)SzjxxxH켪8}4 #G"::JJJ;?y QB!B!Bi"khEELHDGG\"Rrrr0l0!..ڵx<ɏGvv6QTT磨EEECaa\dffc߼y(,,Dyyg <HKKux/^%Ke˖VWW?FII rNAAA|wcIIIAMM rrrdeeYKվ}{bŊضmV\իWcΜ9?>444$>޽{1m4Y˖-pcر8~'A(!B!B!4f5%s1i$ 66zߕcƌ%z,;;666(++CBBttt8ɡRo~۟B7oPRRغu+n޼ J7oDBBO>?@<ƣGvYwۼ<25QQQx6TUUų]Ņ 444^m}-ZK.Ehh(6oތM6aԩXdIKwv؁yaXxDY6mٳ4hqС]_IB!B!Bi"JKK} .Ĥ^ƱѣG1fRdff |>1115) Ζ-[w|D"bccgϞ?DD_())Aaa! QRR|-))7o<׻ׇwԠ ---hBղe*kkkUVUJߦDQQ7oN< ???uz=ںu+,X۷c֬YL]====aРA2e W~ %B!B!<>D\\u<*E?aeeYYY={ZZZ\Gj2x<ammTرCg̙31cƌ*%lNI*))A^^^4++KT$&&"33xamںukjժuttкuko[DsK ǃ=푘bĈ?|Xd ~'L>RgΜ?7پ QB!B!Bi"[!+ >_Fbb"BaK=}VVVPSSӧ%^}I~w۱zj|7XhQSANNf"33/^gϪ?n9`@mۢ} h߾K(m۶bW333!)) eeeaذa(//d_J*EkKKK ** jjj\Gjp nݺ%ƍx5<==ŷaҥ8 ٳNNNX`#Qߔ```W\\ <~ϟVVV@P}}}A___աCgϞطo,_}fΜ 99燕+W_~s~ѣ9r$ >{ ;wDEE6!**J|رcѷo$ R1u|toBH___s¡)SqB!BHcc̝;222`1x<*++QVVݻwM!I׮]Cjj*&NXgϞrrrn2Μ91c`޼ywExx89JWzjڵ ***>|KKKmN*)ӳgOܼyƲS$իWPRR1 00NB>}0o<8995Y 1/^ ##ƥz+**իWKlz6 Q"QTR;TB*D !B!{%Zn HT1xċ/t邻w"883fpFVpԩF1R޽{0`rss!CpR†hqqqPRRƒ`ii DFFBQQ똜 xCDDDDpp0v؁2bСDԽ~Z\>x{.ݻ7o@SS]tAN>>lquRRRPRR<Νl <<.9#((g?!C %%wrss?x\rr2v*1@Z2B!B!B)mmmW^~@1c,f͚|W!;h .bχ4~GsRr\3gn޼cΝ=z4tcǎAAArvvŋk1I&IlVZ^^^Ė-[KKK̟?vvv+"uuue+++۷o޽{}6BCCq=E֭wnݺ[nԩAxxx磢EEE8q"KKKtرOWC$a͚5c iiiˆ#$27|ݻwcR %F3D !Jh(!B!z[lWoLx?<ʛ"wiԅP(cJ - |}}9L&9W^ELL 2<K{ 8::k zBpp02220uTlܸmڴ̙3qz닣G"==_Ftt4\\\޽{1l0cǎpvvƖ-[pEyX⽟ HH={vZjrqƍؾ}; e(}˾ ??ZZZ}:th)++ñcXXZZbƌppp%wERR\+Wې)틾}B^^Ǐv#vأGbȑt,X777Nf`Aiii׾ oIlL///T逤>{#6 QRk?Fj|^... k`4ؿGtLFiHmڴ?zӧO(!B!Zv-|}},+%%Ck:ugD۷DǮ_~˫ 5j:$͛ѣS9r$=ڀѣGԩSU~544?pZB&%%!88G֭1}tL>ejN˗qʕOzEVVꫯķ!,, ׯGaa![,[ mڴ*++qmt֭N%;;:txo|<~[x$'']FF7oܹs%2F%sI B|W]!JN߾} %6mdeek_VVӦMkDB!ѱJп?XJII6luqmBFFc7`2k_ ϟ?Geee&k&MT@ĉ9ٳgOǘ>}:v P3g7Ct*077ǒ%Kp!޽{ѣG鹨cŊPVV|x<i355A***0n8ѐ%u @P2dH"৽\]]0! &LoiDB!便1sL?˗,ٳ'"##qA_[x9<<>>> NB]j 0YYYx<8::ڵkz*_caoo_ȥ0a҂=z_=Bn`ff~8!//-[w&n0e䠬̛7Gbbxe֭[8t233ѵkWܹsU+/^\iii,[ ϟ?ڵk^?ʰuVzruu_?<ULlJ%uT'dee1ydějx_D_ZZ;k&! (QqCQp((EDETZ[jն(\m{o]rܺoKw֪^\Z@Pd'?gd 0~pLr3gN:q ?HHHŋQ^bؼy3ORV^ddd)x뭷pulذnnn%'sssk^;GGGtMTIRv1vX4mxQYf 233pB7Kqy/Z7d2d2l=>~!֭[ƍcѸtl?_}Ezz'**JӇQ^ꊝ;w=z4 $ ڶm &`}6ݻ˗~Xd ڶm Cݺun:\x;v[oWWW,^X+C~~>~9W֘5k Mhnn.>SMܢE uDv Q2~G;i5 IDAT ֬Y3ܾ}[TDơYfs6333]7CӦMdR݃J(Q&Mp}:u Ά#>|sss͔'OޞG믱xb!//fffs`ܹJ2 >&O (~HMMŵk׌n6<ر+VSпsP(ЦMz_]R]իWo^)/++ {BRm>34jH)w>%.]Vk֬-[ 2+Ю];yyy GaǎHKKC:uƍpqq)\ZjpuuEjj*ի*u'O@VƦJ!;;۷4D"A6mpI?~\מ'O ==7K{Emw\ IkO Dkȑرcvڱ3H...h۶-\/#G8UC|_XXC 沴W_}%t 5h ꫯ>U`ɓ'9LՅ >c/˿o( Y_'O{ѩS'@DDz%K: nBXH$rGj\znnn?>=U˵WsgRĘ1c DdDƌR ]b ^z۷/d2d2Qn]cdeeo-3HAArrrTS:дw;#B"'rC L&Cnn.GUСC LJAD-,, RRaaaB!""""""":t!ɠT*annR 3jsj\E$fL:Z)sIoF~=z0ݻwӧ1zh@1~x@PPiH|||pQɓ'xiiix?~<}rGRթjjժ\+++XXXuԁT*լiccWbΝBƮ!jD?~ ͛بj( ܿ_3'==qf̘]Sd2|ի^|9֭[vѣGP(6mQJeee;vԸ+Jǚ10YW7mk׮:QP(UIMMEz4vL4 &Mܹ{1P"*Q#r=8qs΅qJ4x`4$\wѣG !{q*Xlbcc pQ# @mZF~~hBTTݻrX;Ck?3жm[E7d2ʕ+矅ADDDDDDDDT!5BSL1҃ `̙kmڴɠwܙ4DEE hX8ւ0h >ڵkv-t """""""" P!JDDDDDDDDDDDDD&DDDDDDDDDDDDDd!JDDDDDDDDDDDDD&DDDDDDDDDDDDDd!JDDDDDDDDDDDDD&DDDDDDDDDDDDDd!JDDDDDDDDDTEшGJJ T*Ο?_Td"yyy7oPn]Ν;[V;ϟӧ+ܦEGGʕ+zlذ#Gļy0~x\zէ@?ǢEмysԪU ~~~Zm>Ÿ;/9r$/^G}]P9sFеa(Q|EFT*Z]Ud"&MBVV.\1c`߿?={VeөS';wvPbUTf͚U>?#fΜ+WbΜ9?~<[nT}ST*9s۷oqig}=w^}vlذAk#l}uVL<6mB~~1C +WDNNVVVhڴ)ي o߆-Z6l5j3fرc0`ڙyO~~>fϞQFN:w8::?իM>yOZZ:tP@1qD|Z>œ;/JJJ‘#GR=W\Ԯ],!kDDDDDDDDDDpmDEEaƌZڵkFE}!"## 4Sig cLJOE^\x݃v_>}3HFFTSDVcΜ9*uGL&+1CՆDDDDDDDDDDdtZ͛Xxxx@"FJJ |||si.]Fk[t;w{;ڵCFF6nܨsbRR}^o1?b#T*233q]]@T6˗/#55K.-SNkb+uSteڰCj5v wwb5nnnnصk4h[nA&{qUʱ~ 8m۶{;]t۫ԶJO-uQ}Q֧sЭ>U P')) 3f@׮]i&|a}yo{.bbb0p23MW ;Dkh#%%* ϟ/g*o<̛7NNN[.pΝ ǢEмysԪU ~~~vΟ?ӧOWM1ƕ+W*z YذaFyaz*\mէvL>؎"׮]Chh(Μ9fjǚ]G_6rH,^XՇH)))>lmmK+V1h |P*zː[3X;GLLL1 N}fѥ6xSmԩػw/^y|%TNekSXX9secj1%xa֭WڬZ ÇGݺuuߔ#sGW ;DMʕ+6m ggr"򵰰E0l0,Y-7p13CG пL8Q3&O-[… :+F/֧Pٳ1j(ԩS`}*Z@ѥS)1c r9BBB0mڴ7c/JJJ‘#G;F}TV-@VVVB"Ç3gޞ5j<<< NVVB0III077G5ی>g}^V~}t c}**Yv- kkkW||<̙kkk=zTkS1;e1Dm!jn߾(̘1Ck{>}t>|Z j] 44Tk[FF͞=Z#II]U/޽{<<<~zdgg`}*ZK}Gv>XdR3cXEj5̙R1 k֌)1m4̞=K.Źs6lyIIIA-s}:uꄎ;ԩSZO: ( 6cYSر/^RTxqEtUkS;1Dm!j–,YݻBk{ٱD"o[sr:6tҥBESXoE]|Xtv!##7ntB*>͋B_)ZF* Ekz}*R@ѵc)1cMW|utڵ0`JǘCDDDD$4L\|}Ξ=f͂ƍWȞc"$$e3ك[B.c۶mضm֭[qơ~:K;En޼j룯Uv$ """~zdddx>H!..H/c.YDDD ..Nmƍ [YަK;M4֗\.`iiGמ"E K|aRعs'݋=ָqcvڅ ֭[d(((޽{5[YѶm lRRf̘]bӦM/H$ҥ o^B(>͋WVeS!EuT6@Ο1S;Jbǚ_G޽;Jb!""""ӧP;|0;x+[kZׯc۶m8p@#$$vBHH+<<-[B( ]Q*Ǐ#..&L+$,}G| bɈŋ,1קO~~>N< 7K.J<6Rsue۱|r_{j=nژ5ܿEmb `РA/4TUrr2n݊W0uTaڴiеkWk}ׯ}%ouҥ> P=ېH$Zۋlc}*.k}L)ke1c͘3g,YX"""""ҥ ck=ַo_k͛yےH$8}4VXGGڵkƕ׆իqR@)Zlis}ʪ رceQyN_me/w1S$88㪈j&hh]- ?OnZ/ϯV1qD,YK#ɠRڧN:x>"W+]cUɓ'Z 6lc}*G.k}Lk1c͘VQn]7?O۷ Un+66VVV%VSm p}L>JM_?;lc aUtWnn.$ >|XlZ|9FW~}t Z۳`eeNVi9^W}c*۹vXX[}62D١vښ.e?>M777̟?ÇZ6l{{{uRSSY!Ώ=ֶhѢm E6@өS'tNZԩS Blc}*N_.k}L)k0c͘;vК<==~*J1cU^AAd21L5ϟ_z{zuС\]]܎X>x]Վ)GL玾a}c} CL1o7I&/_.sgÇĬYqƕ8"fر gJmoϞ=غu+r9mۆm۶aݺu7nׯS;)))@\\fƍ777}o޼)g,tOEj>!H###;7⊍b}uS])b1%~|ʒf&MIK.NNNCDDDDz &L֭[S# Dbԣ^d*>eJX3 `Z14^%*qR8|0;xA+ذZױm68p~w`׮] |e˖P*:'OƍҥKR0|?~8L0ʪOEj> 6c…}+Vc׵krZӥ{{{;vv`ՈԎեT6S^/ Ehhh>R@ѥcJX+\51kz-71QYXX`Ν0`N8SNԩSggg 0ٳ'NKDDDTu!jb߾}HHHr[\pӫEhbjhӹN}L 5>1QYYY!::~~~Zۯ_%KUGI`377Dze˰j*wC&$A733C5H5sТE Y֧ ={f׬YSN9LUZZcƌ17n@*5C^{Fk;DTEC ݻPPP ** 4@~~www3F/mUHRBGlT? Ҷ>egg#::}􁵵q;i{t钛* IDATAևk׮ѣGٳQJTn]|Uۮ]6NLB- z3EUui[מt=z>>>P*B).vQ m&h 5z_}_rW7Q\t ǎɓR#GE'c"ܹuaڴi5䱶|j433׿oqdJQ͖c\7:T?`BG!""""888`زe =zK.a􄙙*L"BG)UřԈʖCE轋#DJacc={䚘D& $$%Z̓'+t"""""`ffooo,^ϟnhy!*"H$%*Gzz:DHR4h@(zc|;թSD@@plRXDTI9r$OF I.]M6~: EaB ""ڵk:u*222R&[[[C A޽agg't$rssw}Z D"tOP^z%*GFF;DE(11vvvBGvAXYYaϞ= D޽qajJXDTIAAApqq~˗ G"##.]L:wf̘!PTݻ# xW!֜ ݑ͛7:(Q.?CTtN+B &5].Q"2 KKK} ..NHDTI Bh"֜9s:H*JѣGZ@krg(FVWWW#*"%*D%"(B[b۷/<; *!""1c8~ɱcp\|Y(DDDDDz%tghz燀? -t$" .:(j|3f 5k&t"""""0tgD"C(9kgggԪUDE))): hq QBZZɍDTd2{XZZѧOcQDFF3gD֭#]vի矅BDDDDW\;CЧOח7T*Eǎ sppZƃдiSRFFGLvvD"+`ffO:`͚5BLaa!"""0~xFj&} H$pss?obccADv#D'11DD"HtRHRc֭:hΜ9x1{l8;; G7nݻw Zn]M6o,t `nn$$$H222!*2 W QD$(D%KC -[0p@c `BǩvyyyĤI!+W߿R[ƀ^z IWHJJB||<5j$tORi$9e$&&Lh%"Q?>d27"((HHDs_~裏ЦMTkBDDDDT%jccNHѦMXYY!&&"QR!;;#DE&11!JD" 6 k֬AXXБ{FϞ=1oD^^rqD%##2"FJJ ;Dۛo KKK͚SL:"** Ν;Ν;  .]zaBG!""""ڵkO>:ڷoQPTPHJJBƍC$*鰶D": ORRj5T*QN*t"10c ,\P8DTnݺ_ǂ b/^"""""=777 +eٖ"Fzz:X~~>2335Wq(@ L#Fs:`ܹ3Ξ=.]G>s4nBG!"""""*;Ν{"66gϞŕ+WvZSx&KVɓwp}4ho&222dee7w}'t\ٳ3xxxhիW6mkѣGСC%rss0X[[N:iӠT*[#Xv-,,,аaCX[[aÆ裏ФIF?Dd4  2Ϟ=ŋDD/qww͛={Go߿+VᅦL&:Q1ؿ?.\'NŋHOOG`` R)'5mW\'r9r9 $>fffh޼)k+WPZ5]6gFHNNFbb 6hPXX7|BDq7o\,ܾ} ɰzjHR!//W^E6mN[52v؁e˖a̙B!DEEa߾}8sQfѢEpvvưaÄBDDDDDT-[`Ȑ!pIx޹P NNNBEڵk777ٳg.6=nAA X;̛ !ZK},//j F^^^Z{Tvv6={t;CvO?/ӧO: :^ܺu | W'"""""04mڴ̵)͡R1U'9\.GΝ1Q3'È#1ӧ̚w߭Dԭ[2k"HQ Q߿?ߏN/HXɓ'Re ,(t"""""R) ,_̎8GGG_ 4>%XІ G՜f@޽KI&ӧO5ٔJ%\]]K}YfDQ"2Zz¾}aL0k`+ y J]uj@ӧ SH$|G%AT_~FYgJF 4hPB{ǚ%ޤ!LM$jJgϞiCTBPh9}8<|||0h |7F1~?sssZjiYZZСCh֬ ر#9OOOTÑG H'7oD6m T*1uT,ZHd5KNN5jǏkmH$@j}gB@j\aÆC˖-*Jmr!!! HISG+ L>T:Cp\YPPѣGWc*p!:taaa:RFR733?ԩS/22'N0sA~!t""""" @Z9ah58qbN۵kKKKL/P*0 2 fF۶mr%2 vR5ۃP\]]q >>ܹ38 p%5k7n+WO>.t<"""""J CN4#J%T*j)Shf GϞ=NTHR5J9\ѲeKSl;w 7 L4IDԳgO{Uaa!L"p"c(URDpppH"44TTT9;;ѣt%6#F@nnnrC )$''.> Z Z[h7n@T"??))):u*?GGGQTADDDDDTD˗ФI)tz4jÆ L&Z(8 :圕MR)|}}!H H8bWzBd21 4:ޱC,,,Zptt(tZ¯7n`:R1pss+HAAJE?;z޽{޽{PZ׋4 7ojCAAV^f͚aٲeZӸ.]`ܸqW^ysSSSq\|111x"nݺQ/Ad"""PXXVZ!55.\@LL ._;w 55eɨ={$ڢAG׮]O Fy) &&;vZjV5ɓ'hѢ G  $jR~HMMlS(/2 ݻ۷/T*#yWXӧkuԫWɐd&#RM3/Cv1Yq>>>.;}||o߾jNFDDDDDT5O>ݻ1vX4oqFᘙYfhݺ5ѹsgxyy=zϟ⯿*[FV=zCteZիWqq\t 'nܸ2^zW^A^йsgTTqyyy8{,Nyyyy믿p5ܸq.\@ll,,,,УG :!!!ӧOyflݺOFvv6ꫯuhӦ Zh\͌yDFF_7n?3gpM4l{FXX^u AI׀ݻ}pvvv~>}PՐJđ#GEoooWБ4z'NNB݅ETe /RKXp!N*@aaabIR( /pDDDDDD:*,,7hݺ5 }k׮PՈÇsNСƍcZ㒞~-\^zaӧڵkW5[srrp>|7o_~᭷ A ܹUVaHMME`` ЧO*wرcƖ-[!CwEΝ(5 IDAT?0gΜU}vXYYaС߿?<==QN*}]9r{ݻakkѣGcĉhҤCCVqHHH\.7|pciINNF~`ffh  fL0PT~~a&=[l)hffwy˗/(p-lٲvLKKK>|hĢE0F7obݺu)SՌ\˗cٲeop4o`y)]6l@ӦM1{l֨s7oĢE~ztƍacccŞ={?`߾}ׯ>xxxɱcǰ`=z;v, |)))ظq#;\r5kZha3THdƌjT633S D)))Ν;]]]>zvک=Z*?j}nꐟWd25~ rmllԱf#"""""*˞={ h~R&łVT *6D 5FQb<K"vc/b(BDi6إIgcDEݽgw~eYkgllľΝKO<\ڴiթSh͚5TXX(iЊ+Ԕ֭K[n}J_}Pѣgr 6tuuK.(yUpر#d21b%$$HԶm[եǗZu]WǸx"N85kB.8qy\pQN8/Jtttc|}W\[n…  ~i+ʏQR%,^)))֭>swܑ4۶m-N:0DDDM, 4@@@&M`Ζ</^̙3r!)) ;wYÇ#44vvvevU8qN*>h:-- * ++ (**BNN["Z[[^zhѢڷogggLj֭?___ 0 ""%%VVV +;;gΜqE$''#%%EEE^<#--x733Cz`oomۢ}%nɘ: "L:W\.'EEEq9$''ի-QU|Nׯz۷CM<ׯGAA`iiHԪUKYc1cm۶m񁋋 6n܈u늎TouVWigJ)??&L(~\ ,P#99cǎETT6oތ>>kq;ϟǀף{#+,,1g|gسgT":I Qn/^Ď;믿޽{Y&:vƍ~^iiiHJJBbb"pi;w/WwIKKàAvڏn;33?Μ9mmm899uְׯ_$$$$ ..sW}bȑppp茌2|,Z{.PTTÇc׮]8r233ѤIk 4-Pjշ}}$%%!)) 8q`bbwww|ҥKGBOO8q,--%c1+L>ׯܹs1{l=;288GFV+>|!C 66۷oG^DG*QQQ;}#G"""6m!CDGR]vaܸqڵ+mۦR8 8ؿsBMO?iiiQ۶miƍtU :r?ؘFE/_VHօ \rƹ2޽m^tFIdaaA&LGҋ/9996l@...E͚57RnnBgL*f"---@͞=H__ @G)RPP׏z4w\zY[[`c1+tСUV"""D)ׯ#Y[[YW\5j3S*G*UeffQO奰ӔI.ӊ+H&?#)\.S>]VtR̤Ç!믢(  Ӛ XVVQjըRJ4gvRͥS.]H[[K/^Tj]|LMMK,*N8EGGS޽I[[G8:{lX"YYYѪU(;;[}2HӧO'G 6uӧOӧOiڵԠAD4h@c1c4RSS7nLR߾}bŊtyq>ș3gܜwdooOCq>Ȇ HWWV^-: #5jIiTB:y8lٲeK[lP\e/޽VJիWUV MTTTqAiذa*yV6 Ѳ.!!:t@*fpTT5oޜLMMijS]=6mڐ *hԩ%QXXH~~~dbbB}Jl~UdקΝ;SRRH!$$llljժo>qH.Ӟ={ҒիGG1c1Ըqcj׮Zl>&M *P\\(C&&&4m4Q>YVVPf(==]twںu+EGdIIITrer/R;GJ!88tuu_tsR֖RrТE hiiҥK;w.Eǂ#o|w={6u'O4HJpIƢo߾с+((5kp׾#cXhΞ=uCll, ТE ޽[t,L#!!  ZZZ4hбcGt 3f@aah1c2ŋpww.:###ё>z;ի()) nnn6l/_.:'+WBBB ѣG抎T`;ի8~ Ł0sLq>FtO֯_?lܸ#Gį*:NG?t 7nبQTPAt;u:t;v`ذag~_TvΧdDEENtG˖-cʔ)(W^y&oV<(D˒۷offf8tEG ;w7-Z3f4سgϰa!++ hӦ Ο?m۶a#~{#G4EL9rǐ!Ca舎TjEEE;v,/Mt$c1ƘvZhӧcΜ9(ͱcеkW8p{Ð!CWWWqfܹXnbbb`mm-:ƍ"66+WG)йsgxOL& yyyū> mm} ԬY*3iݺu [+rer <gΜ3gxic1ƘBeee#FŋEǑ}… hҤGGG !!!eiӦ!88022+Wb…HHHr899X^SN~2շH[.aҤI X[[cΜ9o$[/^82yl2\7oބ8<H:QM/h۶8 'ѵkW<{ OHLCUVU~wԉ'ЩS'`0 _?@ll,*V(:=z߿?֬Y#:c1cLxj¼y0qDq$-Z ::͛7ߨ(j BV+Vƍ닕+Wƍe\GS,RFڵ1{l᯻<H:hɬe۷1a|Y mmml߾)))7o8L̙3>ĦM4 6m&Lw4͛7cY ʕ+c֭X~=BBBDa1ci۷CGGGEf͚ ˖-ߥK?/P k.Ŋ+0y2Y s|111?1g9D122¤I|r ssP{Fjj*N> ]]]q_~'ѴiSqpZj_=zG ڵkc05OOOXBt8q"~7$$$Lc1cհaC ,EGwEʕ_jj*jԨcǎCJOU͚5 !!!}aѸ{.LLL$Sn-[ĪUe?~<._'N Qzuӧ<H: QMtA`_ }gϞ=z4r8Lb(d2رġCDajlΜ9ǢEDGҥK狎c1Sc7%:PnnnB``$N:e ^^^tbbb$oΝ0`@.Q leb^^S_wзo_ܹSX^rôi0vX4lPt">>05c$%%IMH7ѣ1uTPBB6l؀˗KzHFFFXt)֬Ydqc1cj* mڴA͚5EGJKK  }]valllЪU+I8vTÇ СC(((@~J O<?/I9pATӧXp(]6L05TPPEaenZx1<˗/}}!SSS:{;o/,,={k6(##Caeԩᡰ[n4c qߘ>R{-Wa!(O$㞙I(::Z!ϫVH{RI\dllJ瀷S@Dy\ {{{1T!jժDQ KLLDڵ/:JGBBLE]~<=q Qc1c*2LaiiiSSS/@(ksvvFzz:BBBykנnԩ7oTcd(((PX{&帿o̯]Vh¸̸gUW)kU?:HםW(d(,,TH[s)3D^ZZ>|[[[QT-D`*,)) i$Zj5jȈc1{#++K_|Ν׾^R%;o711)W_RXL/_^a[%ISuwcccdff*RoSԸYTָ׫b\\dffQ T&NEDޭ[uD %%Et RRR`cc#:yzrmI*JIIA:uYYڵkc1{KV '''c^{PNt坷[ZZѨQ#XXX(,7斖OIqܥ*(gPY ~.ޤ"v\e---TPۊ:wK.?~<~Xp!.\iӦK..ΝøqÇfff6lBW!==]a1͓k(k}+Qzz:_7oc1c4] Ɂ\./u^mجY31[nœ'Oyyyطo֮]͛W}6,ܹ {z5s--RPqWOW1V&GQ8^wi ) < l̈́ʂQOѲeK,X8~8:۷cڴi2d\\\ÇGݺuxb@tt4ax{{v333pwwGpp0> ?҂EnP}UkGWWe.+QVV~1Ѥy1c1V:իWǏ]U Ehh(߿u/ {;6l[bٲeBf͐5k{;r[)SYYYXre"Eu+իWŋViƽ4c 㞑ϟz k߬w9TҌ9qu&([Ŋa``۷oAԖ;4υ~Ks)sSkAAATJMj*ݻw mڴΟ?O˯}_>}ԔΞ= gϞ} FZZZDzaWH[L3O?u),,,}v*WL{UX{Ls;<==֞&]7Za1c14ӓ'OƊRjzVZ)\No)`233SJ"ŸGEEJKKSJ>>>ԧO,R˩YfJ5jDVO%sA'OVjs@ɔ=Qk2Wr}g™QQQ\tk׮}:u"͛7X_ꗱ""ɮ];׮]+}vϿZ`)nc1cHNNE"++ Jiϟ?ǏҾJNNefkk$Β6S$I J93D՞zΝ;}RJPmWYYY)$kFF{3cll,kĤT)땕]R;|0c14\":F䠠@}\rzzz]Rڷ.\rvvvJkׯ_G^^P4)=>>^^;;;KRs!NsQgll/^H" pر׾~=ԩSxniiֶ_FB!Y> ) ;v,--Ku()**BNN_?D>]xa1c^v!:FlٲgϞE\\/^R%""SJx#""3 rOVZ$Ÿgffh׮~vٳɑߏ%sqIUV o]xx j| "xB!V\[6k #F֭[@^^ۇkעy[#޾}iii,ܹ y]B]--RP "뇕H$Д(c1cڷos)==e3f 222ٳgFJ'<<۷WJۯtJCQ DEE)u܍ժ-Ÿ:u pvvVxҦMx."""ЪU+(w9MRJm)]͚57oD&M>PXn ,Xw^aÆغu+-[///4k XfM'w+FFF2e +W0U7oZa1SV-X4Fi#ŵf͚qBҤxyc1c]Zn "ٳgѹsgqKOODž dӶm[X/^@rڗ:8u$i5"""вeKɋpFFFh޼9?ί;_xx8v*y<N9@H=0ƍ1`QJwxΝ;>1~x(֬Y͛7ˢNPP&LǏ+.]MѣGt:)jժCFc1cLsvؼy(ܹSLAjj*Ovv6V~ C QZ?bȑx!BBBϩS[nzJKlll0vX̘1C-Z$%%I޷y&lllp9lRy?.e_>EP)qlmmEGa*~vDGQ)ɨ_LEիWUܹsw^$$$@GGGXԯ_Fܹsuְۅ9@95j@ڵ.:J(((@dd$\]]EGaj|zq1+>}V*Sll,x(c1cڢ_~Xd(B]mʗ_~ SSSlܸQ~UŒ%K)i1fϞ9sF~UAZZ֮]ӧ -L&ôiӰzjdee "Bdd$Ο?3gssD5D~(:J믿=z@Ϟ=Çvލ~T\Z{n899Jtc1Ƙ1c=SN"'O`՘:u*d2}d2L8+V%[0?~ӧOo{{{tϗo`ll,t9r$tuuzjQ$ED;w.[[[q [jK.iӦHHH8Byzzo>Q۷/ {nQ|27nxۋTܚ5k??(RQQjԨ9s`ܸq0c1Ԕ7N>Xɋ9]YXX-Zٹ̬CFеkW]VHׯQFؾ};,$ԒФI[{nxyyʕ+eΝ0aQF q [j&MQF EL:tLL1 Lno{EӦMJexN<):PǏǓ'OпQc1cjlѢEHMMŦMDGDTTrJaՅ/mۆX!n:a2`ʔ)o-,M6mڨT1-Z`ƌH"33gU  .jѣGcӦME͛7(Lغu(dggc=z(LMXZZãm_VBϞ=QreQc1cjrXd f͚$qŋ/1tP ~aĈEٮ\yaٲe077eIŽ;u։---lذG9*Y9@eEڵ'NGrŏ,LLVX___ܸqH+V7/:Sh޼9Ѽysq$-["&&M61c1hDGG|('rʉ,8::y[JOO#/:G999a9r8JqEnk׮Ř1cDycڴi8wƾqFL:ϟGFD)Qvv68$믱rJ#]vW2f"00PtcժUb( СDGbpss?c1c[v-rss7߈[lo(1v܉`)*a -I&MpBL8(\FF .qÇkbNKl14447Fnn.JQ 3acH*##=Lcccq2117|N#'wYz51n8Q;w.pQ$uycܹ0c149݋ ,YDt:z(ƍ:uLt899aFDD8 `0115SLw0ׯrZѥ;v ;;DaaH s]xxxgϞ G=III8xǏǪUТE ! a?bܹw8={6*Tӧ?+WE2)))?>|}}ajj*:SC۷ǠA퍢"q$j7/mڴ1c1a乃ٳݻ7XYYYf*hӦ zꅋ/k֬ҥK1yd_=zÇE* ԯ_nnnxHL.cСz*QBёJǏ˗1|prё>ǏѡC4lahذ!=z .`̙d=̞=cǎ),,-ZEGD\\d2:t(L믤G HbРAԪU+***;w1m۶MtIlܸLLL0c14O?D2v-:'*UЈ#H.\.M6Sjj|="\2:uLUVK. I&~-j׮R``tʊڴiCϟ?_M&&&#:G9<ф §O35nܘDyMrr21mڴ㬩s@ odtQ*77_~0 ҫW/rtt>r:$&& ׉(??<==̌N:%$קZbІ ^zQQZhᔑ!(K999ԫW/?6lHM4Q ҀȈfϞ-:G),,/*UD&&&4dݹs6lHM6S.]JԹsVTTDm۶%mmm&#cccJJJEi+_>eff4ϩv#:$$$Prv2eG.S>}y攛+:RdggSFh0c1ʐ;v}TXX(:N?+rrrh̙C EDD9uЁ={m4f222KSЄ H[[֭[EEE-Y[[ɓ'%L222cǎdeeE/_&"'Og}Fuԡ8!>ǏCdiiI .$ڸqX$77<<<ʊ… TR%rwwPe111TfMjݺJ6p5k֌,,,h׮]])YTTD_|UXgJqATQ׮]ٙrrrDQ`dtQ$LFEᲳe˖!za?~LVVV4ydQLJj֬IO>1c1V}Zt)((={Rɓ'0 ti244+WP˗/'###:w(1c2HիWGSjժURʗ/A ޽K;vr͛xEUJ=G*Uk"URժUɉRRR(##8uܹؑwޥ Pƍz~\.+V3F΀$z/鑗lB*u^kIݝԩSb.33Nzj1<{Kק5k֨0jРYXX8.ˋŋݦNs@j(<<\thYpQ :TU&h*_<͛7OtV̚5LLL4͢"$oooRQTTDK.%===zdvvvA4gI}ӧd25kk<ݻG5k֤A@tmر#͛VPz*ծ]K AdiiI*S:y$5i҄*UDw~VZEg ӕ^ff&Rݺu֭[]ܜ7oNgϞ(rھ};UT4i2޸q<<}J˗/U-]rrr޸۷bŊ4y-ZDGS:uA!;fx-ZDdeeE~~~ZW^ &!Ӿ}=O֭+U7oޤN:)dq͙3ǧE;wѣI&Q-h\hX'O={y=&==\\\QFsQNH[[#qcyyyDG_}ݻwOȑ#dooO*U-[|NӦMrQdddGѲ&66TB۷We%ڶmĉʄ(**qƑNrKmRժUʄxUo^ۺGѸqH&61c1Jaa!ڵիGZZZԮ];ڱc+|:tߟܜ-ZDPե{~TqqqH+WyxxPffG6`233#TKKK[R6mHKKlmm˞={HWW,:r+WFicbLfj:mI񳎑ʩV+VaW]KCCNRXKDEr(:4\?ﶄꞦx?0sufws9s@ `ZZZM"Rի>?h^]~'&//BCC9LY/^`d}it}Ԕ`lڵ,;;Ņ}g5{fffr>OC0+++&++,XР/G\)((4x_ЌPASַo_֡CFo lܹL uI;!lڵLVV9;;K-ʇal,##Cq',,,iiiÇ7kِ!CX֭B!6bİy1 &++ˆ ~'^`,>>[;)++39996qDvz}.j*bر1ccclݺ2n!V^^N8@ `***lܸqlÆ ,!!E'O/˗!C0YYYɜYlllZ)))~LNNalɒ%.*2999tR<mٲ <`llݻD,==$  [nń "k׮Ś5k`bb-[{Ҏʕ+Xb!###XB!P"99Ǐk B)j ={D^`hh%%%N2ԨBӁdm~~~055a̙1fN7ԫWp˗(**BQQb1塪 555<ݺu,y1i$kB,ĉ_ \ ~aɒ%8z(&Oq7= 9993f270>ƌbccˋ}g[n,<<\*90֥KֺukfVXX(.]b p#.QF1999dÇ#c_|EۛB!B0ofJJJoXyyyl޼y糯_lРASN8#F4{5a,77L ÇsBCCcoa,11QsRU&&&S1>Hpp0ӧSUUe+V`u;'Np\JiGa|idihh`۶mv|>F 9sBPgdd`Յ<<<sssMHc3wҥK ]]]̔UUU8}4FSSS#11~~~ n=CHS000@TT8Wo]3f8|0"""Э[&B!B>U}A@@-[˗/7j,---͛HKKÀK.O>%ܹsΆbqohh`\v iiiՅ `l޼AAA_ii)TUU֭[1uTXZZ?<066Ξ= eee1b!00FFF9s&cZQ]]v֖čqiiHDIAܹsqZnSBGG/ƍ7.yyyػw/LLLЭ[7;v VBff&;(**r6!wwwdffGA׮]ajj#??777tvvv͛7r6!`gg۷o#00<СCѳgO ##>|kעG6l233[n5L !B!c5}t̝;SNEnnn￑AAFF8{,I?^ Exx8/_ޠ1>sɓ 6كgʊčSRRRSi.x<m+++;wttB__SSİa0k,?pwwotQСCX`0k,rH9dNyyy8v:Dj &&&033z聶m۾wJ;ӧO;w ** 111(((!C0c AKK ΊKIIÇq!deeAWWfff9r$z Z\TT4ܻw111Dvv60}t̘1Y]J!B!*PQF.]@Рqh"|wXf ***n:]?./_%֭[WWQ^^)S &&|>—_~l>Ş={pmiGyCuu5fΜHDGGgϞÇ033Cq s[F)~iGqY1c~r2ѣG[b޼yɥ|nwA޽GRSA|\#""x1BMM jjjPTTc (**˗/jc, iNB!pܼy.Zjuuuhhh㡬 %%%(..Fff&:::055͡-3#Dp\rо}{hkkC]]jjj (..FQQrss:FsssXXXp?B!Bׯcdl۶ [ncSSSpBbٲe?Ǐnj3'//u񁿿?zUVa̘1J֬Y0I;[ B">>.]B=<ֽ{_`С8v-ȐTbccaoo/ҿ~_nnn˥ (**ڵk2dH DI#-- <@NNJJJPRRӧO*ѽ{wt:uO]ɧM,ѣGxPTTܺu ߇ TUU;{ק=A'ٳgHMMŃB@(B @UUBa``6mH;6!B!wĘ1cpȑOSNŵk þ}MMM_ٵrm.֮] ///DEE}p1$//cǎEII .^Ν;Kl޼7nDΝb [nR>UUU2e /4heݻwannccc=z+ 8|06l؀`޼yбcGN -֮]%Kp:6pܹ:~ JեK/={Ң۷+WDffB!B!4իW 쓓+++ٳի{/,,`bb 6Ȉ ?~W^;Ɂ%p:Uyf^^^077DwrqqgsGUU&M;w &&^Z`aaѣGc޽\PH *pō+,Y"/՟;w&M˛JiժvڅI&I; zB!B!B&M:?1dT kÆ HOOGvЯ_?L:=> >|8Ǝ:KKKÈ#:ooo$''cǎn޼)SSII TUUtΆC`` u m5jƍ"P]]ǏcpvvƨQoooC`kke˖b(T1$ B!B!BaϞ=Dprrz15jFhii{8pEϞ=ၒƞGAFFGA֭1i$TTTq̭[0rH.\ݥKٳС cccp}oURR&ɓ'<>!!6mC(//ѣG̙3fffHOOq_t///! ***C*B!B!B !$$ItR:t?Gll,ۇ'Nōc`<}8bԨQ8s =~Nc$%%K.9<7)))ٳh׮LMMӷw5=3g΄x<^'m?~ 777kW >}M6}˰‚ vZ# T%B!B!B$O>XD"8;;Ʈ]Ypuu#66[֭[#$$Xl ,, GԩSqa5jCCC>>Vlcpss/@?~0aҚ$C6m퍬,̜9?tooo{^XhJ2> ;;;]'>} K;Zv .ӧO===dJOOb( яNQB!B!B!MN(= wwZ-'/_KKKHm咜 btTHJEElmmq9DFF̬>>ÇM6BYYYf߿GիڵkmRW8pgƖ-[tR ::8qGj999ڵ+燇M333߿?N8@ ,\B!B!B 1n8 ** o{nTWWI )|}}}ÇGLLTsqǏGbb"bbb0hР7QTTDpp0 W_Ie,χ5_(} )) @Ɠ'O`jj:ٳPTTv$ QB!B!B!Gphjj~c1w\L:O<`122‰'|͕vwJOO1TUUmm~A~|8uS~?NBrr2 1m4Glڴ }:w.\K0}t2nJDFFSNXb&OLڵ ҎW˳g`ffMMMCIIIڑ$BUU- J!B!B!D<==CVVcl޼]vӛ݇Æ CTTBCCqMҎ;w`Ĉ֭"""U~mʔ)3p l=z Ϟ= pQB!;;[i@PPN322BDD"##X,QkVVmۆǏ˫^E XZZBUU.\#Is\*B!B!B*̙3֭C`` <@ ѣGq]O䎹9}v6mByyabbC"$$ kɒ%pvvƄ aSSSgڵkҥ xoXr%֬Y XYY}Exx8̛71 l `;v,4551`̝;ҎV/_9.^uuuiGm J!B!B!C~~>+WƦcvǎÆ M9s&RRR 6k׮ja4<<氱'8i#cǎųg8Hɭrm : l-f=<<ӧOcܸq  mOOOHKKÁ?~^^^Յ'$c , 6l°sN|w.1}EAAΞ=d899q R7/_DTTڴi#HMFII 2222B!B!Bybј1cN>-BݻQQQyId.޿;<(''']"sg}0\~ ,1N> \_{|}}ѽ{w١W^y&BBB`nn>唠***`cc\m۶ҎԤx<i(!B!B!R<<<0w\l޼e_sq߅_vHaؿ)pY:t6l|-s뢬 '''$''#00ݻ7iX ggg"** Çt3gvZYӱ:~#cNc7 L0鈎#I*D !B!B!)//mۆ8995FFFعs'/^& rrrprrBjj*{Gظq#d֥W)|X[[#..رc1p@8p"QYD"fΜӧO#""lxb8vV\ ooFwUX[[COO.]͛ ww^&MBrr2ѩS'iGjK!B!B!_ !!qqq=ztg#M&L,^?P(<0X[[K(uݬe8::JHccc|ׇ =bhxx8"""пzP'Nıcǰb [ޏD8paȑPTTĥKpm888@NNNH$=n߾htUڑVB!B!B!q} :B022Z___tvvvZPQQeː~ ֆ rL</^DTTLLL }7?>&L&ѣGDAABmt___bѢECΝ憜C(/_FLLTZN4 G˱uz˗/N:oÑ'NX‰%O$aڴiz*/HRRz ϥ3<vҥ ~̞=[QiQۇ+W"33SQ!B!B+::'O :eeeiGBNNGGG5bӦMsNcMM(ѣ_o˖-CV8_~%qEJ!qmbvvv?qUk׮澨(\p>>>͗ ==Zx<(**BYY PQQ~QVVcƍ1c }(--Eǎk_YY)S௿Bdd$ |~\ؿ?Ν 8;;qgP^^ڵ ?>-((@ǎ߻yذarJKuu5 UvX9s 44QQQR2TWWOb(333Ǔ'OPPPF1TVVÇRBQAB!B!BH-tkڵba…ؾ};<==񤜴6SSSX` j0(|>5VCCCo߆ Ν;K9mm 8s JKKaggfBuu5l\<'O~瞖b .l\|>'NDtt42 0K.œ'O0qD"..Yc3gmۆ;w455qȑ#b֬Yo|<s΅BLjD"8884z*++aeeP03g ""}tΖaѢEvĉMHe.%ae.!B!Bx >brrr߿?BCCH\1tرۿE~~>եa1d0 ##UUUox<)k~ióg0x` 09 o***󑞞^ v277yՕƍ P(@FF;wرcaaaH{mc !!!TUUa``HTS @CXAVyyy̜9bH$¯Zs]^fΜ9*6mZD CP\H?FFFF͏H$B^^^ʤf$//"m?vLB!B!|b8ZĪ*<~&MiTVVUj;v5^tt4k;?RUUlܸ7oބ&all, pcРAHHH@UUWUUaӦMͧ#GZ lmm9 <==!j:=y'N6&O# fxB{S>|ٜ={[[VVVbڴiXk]ׯǾ}8%?[_򰱱B"ɡ>ĉq&HDHacc3gμOvcoB!B!4"tҥfxضm8cĈ}oQ7oZjtE@@zŋc鐑Br`u{7n9s֭XdI6Xp!+o߾ynvclٲ3g\]]ѡCi&=zpqq9s'''߿׊-Zso<_]vÇ9郻wym|>ߏ3fp:oKU󕒒)% ghh~rrr011y044lDB!By{{իWu 0ՇQTTױ{n kQ7Ο?ׯY χ%D"달,L0 .D닊WQQ߁F.]9@ ֭[9oUUUWZ ˃+G !!!Ӄ8ﶸ{Frr2vލtM6ݻŁ*l;vkƕgs@DDYgax%sT...o.wc*f̘eD"7a*BZӧ?:^ίkB!B!.edd`Æ omoXzuboK(8ݳRcXl[ۿ&C^j9n۶-<== Daaa㹹A__霜%dgg~QCq2L2 5vqqllߺA @UUx B,ǃ9BBBb޽;|||PXX/bȑؾ};Ν;___aܹXnZ:;;}XnΝ,ر222={6CC÷~y ޜy:eʔZ}P}5ƾk5n8ܺu /_ȑ#9SQQYj^3BǏr* `4X,f۷gl޽ҎHHg&''OX,vDB!B!|Ν;|~&dee k׮[v-Ζvy%;w.x5(''[o]% IDAT#hTUU:ux<^5a|>u֍߿UTT4h={2yyy6sLcl5U|̮^6mڴsz}~zzz}vʕuL(r2.cݾ}uyziӦ ۴i+))iиϟ?gLGG)((0x<aC a5||>6l;qlмb3###k-߿AҒ0yyyaNǶfx<6qD\^+++*iG0Z!JuM4I ij;YfٮB!B!+ojV***BFFVVV8<?~ tQIFSSv©SЪU+C(رcҎN{Enn.c5[E oƞ={p-a(QVVSSSDDDpzn:::8r"##a``Ps~HHHdCEE<nKzj||BGG7oƣGۺukc٨c 1TWW#>>[b?9.]F[xFRR.\޽{לP(įέbUUUl{ٳDׯ/x-w,KHJJB߾}k&##?R*BZ/o>IJJ7iB!B!Drrr  C['h6m0m4L2۷cy<:vi5H:6Dqq1~WBFF7n܀gs #F@~~~Mq={8ཅlΝ;E!;;ǃ N< &^ZZϟ+TWWɓPXX_EpYDGGsVȀ%cׇ Ǝ z--ba۶mom6fff8}4knGqqq3qylܸ`a˖-7n\RC޽qIN˗x011Ej6m\~sIۛb53 ݻwǃj-++Gbʔ)RLEH{{Z߲GZZSB!B!/bҎO|˗cڵ ;Dԩ;VyqU~JPYf!00ޫ1п\px455e%166Fll,{73OҬYJr*B?kkk>>ԢU,ѱߊt}u/^`ɸw^VtVUU!++  NMMO${O4qӫG3f̐r"BZ3f@,===)'"B!B!$%%1yyy())AQQb|ϗBڏq=TUU}cگ O>㹊GGV̜9n݂*̤/21sLi!B!B!MMMٳgx)?Oٳgx9o<~Ϟ=C^^***?Fvvӷ\Gu% 999(**B^^zBuuu5y%PRRHgϞ5"Q>?~,,:Аqcv8 ۷ǃ1bbbN{Fv8 ^  %%%~)޽\1bBCCѾ}{DDDH;[)))aذaҎA!B!B8.]K.= Ϟ=Ø1c>N|?;n݊CSSЀ&TTT4W ###.Qx :rHZHZZZ܄1|pڹsp9iǨӔ)S{쁫+~j`uuuF\j޽{zv B!B!"5E=pPWWl H$z܂gKYhDO~~-_~<}gѢER?ǖSPP>B!B!BW8!B!B!B!-*B!B!B!B>ZT%B!B!B!| J!B!B!BhQAB!B!B!Ѣ(!B!B!B{wWcu:u*R%0v }M nLL#ƾg+Ȓ4Le#BNWOD^>ww>}-6DHo!JDDDDDDDDDDDDDz Q"""""""""8~8=z/^ GS}׭[K.&.`pvv7\\\ ?p…B?@?o)T*pBԮ]eʔAϞ=q=WM;zΟ,XhQ׬>4#m}222퍺umcC(/^tTV x1}_A͒={`ڴiعs'Je2o۶ sŪUtqqqhѢ^jlmUZj-e}`eeK.a߾}|2ƍzUM;e߾}ؾ}{ma}ϔ)S___=޾}+Im%"""""""""ʇUV!-- z.]5kքG[}aРA5kV3+JbĈIPF ,]0m4޽׮]+qק?oK|5j*W L4){/>Y̙c{Νann1 ~OR;@B+}X͟;|}c[~O>ަڰ!JDDDDDDDDD+V@۶majjc7OB&aĉxKtW^ͱݾ}rl›7op}~;vxFux_}~\uM$&&?v]χ 66mB=PBD_Zje[@˖-i6jkbZb.otowؾ}; x>/mU:!ߏ͛zzh֬8ʕ+#..rqzꚌ {c:묆L&CVo>Tdy ̙֭[cΝ駟rع>RG߾}?#ݹZ]ߎ;}>ަڨ!~hjڻ>ͅ޷mۆsbժUpww v트8/mU/^Ç(_|TP?3BBBЯ_?,]L~toY_eoX"5NMt>Y*T3f 885̙3s}]듟YJww|Ϯc}GWjSٳ^Tm\P5vy1d} +JbĈI]F ,]#BڸX5KLL6` Ѩ_&}TݺuIII9|PJmx©.'\.p1*U 'Nȱ.'>WСCaiiY!0i$X]MզH Q]]5 U\lWC{DD*9?y&&&hܸq.]ZA>+VD-`ffc.'>6maffѣGpwwN:cGs}6+WĈ#rh6Ejby-T b'/\@wzZw*v+++y&{݉^m\HUPٳuYf9HEhҤ Ο?cBŋO5t>yyrl6'009YYYaԩ@֭sh7}6fTRux5UB7Daݏ-T bct6n/ߦ[Y ђ^m:w^Hr|嗸yf\r ر#;ԭ[ƍsfϘ1c0hР2HT*UǐdpssÖ-[kvƍ\3bbb OPnPBԭ[Jʱ?:s,Zm@PP###ݻ{͛1n8TX1{?Mզ Q}Xl7? U.] ۋ!xuZZZ6GzڰX5>={6Μ9!DNm'G=/_ƠAp 4(ߨQPN{隬aaXv]U f-TmeecǎVZ8qڵkc?}^l.(bTurl%J*XiibDDDDDDDUVӧ1`ѣG_*U v®];L&Å ?F<rc|_~V= U\ZJ*=.C>LYXwMdhuZV) Q}[l7^lЍEkbYϱɓ'011Aƍ>ҖŪŲepE?Dҥ߻&k׮Ç={vƑ꣮+Q8ONi[}t6nb[vݩbm^ EhҤ Ο?cB6-VMDDDDDD/LLL?bչ>,^PUT1c$#""aaaXj }X+>k};ɍQ?m֦Gv P5Yfm^ Y& [lׯuƍ\$Yw4{m[H*U*8VVVE΢1>sL:rciuG?WuOѦsG]>>C꣫)RUWBՀ. ho}3۵tw1d}}}1m4xzzbѢE8~xt'zŪL&FM_h/} h;}^߇aQެwj@>r'''899:zŪT-qnNT} }ṓM?!""""""""""ʏ"7Dm1WuvsӖKmϝUm9wQ~鑹YskҶv6m4ӷSmϝQm9wǨ! [N&k8)}`}CDDDDDDDDDDxgDDDDDDDDDDDDD%"""""""""""""ņ(-6DHo!JDDDDDDDDDDDDDzPDDDDDDDDDDRPT:zMJ*?ңG>D2e8:>=ESkDDDDDDDDDT.]IIISQtZ6meRJO.]񁏏O_AP}\B###Kc$2.=QkDDDDDDDDDT888ŋRȓ˗Çacc#u< 4ooo̙3XfB18x`&jJӉ'Z V˘jtx9J6?֭[ԫWOcGm]{vލSbڴipuuUs2RkDDDDDDDDDT"WHаD~p!;=z@HH4i"u\LMM0ڷo\\\rJL0A (*h%KÇ})8qDC%''g} RG """""""*Qd2~'rtAAA:Vݺu ݺuÀ'uVVV8x ڷo/SN:R=}zBRR.^O?TH:/WCJ*"hРA[lY2~IQ\R )"dRJ]JY2~I`bbRb """""""L&?r9zꅃ^Xv=888M6Xv-d2ԑHO4ok׮Ř1cPn]CHvtUT P|y#Z[ݿǶm0`Çcuԩ#uҐL|gݻ7.]*uԯ_ "u"""""""1BرqH IO}ЦMڵ K:(|78u/r!7of͚aR! ۼy3&LXXYYIGL8/_ƟOQ!JEaӧq-99;wJ©S`nn.u$S* GTT.^SO Ɛ!C0|p^=-1:Aq?>رC(:&uQV-,[L(:'666l7DDDDDDDD6 ApKjj*ׯ_رclR200֭[P(0d(J#ڵkѯ_?aڵloVRSL'2223T*0fԬYS8$\777/xqt7Zh޽{Kܹs!C`߾}R R!C 66ǏGŊD%@2ep!DDD`Μ9RSLu0w\-o~oRGGtt4ϟ/uСCQ^=,YD(:#22۶mQHO͞=:t(l"uRT;v,.]|'RGVZ իn:㼗RرcO?!00cƌ:^+ cƌAZZqJƍNjT g``cڵxŇ IDATqt':t.]H_UVaرشiqr6m8 ԯ__8T_~)Spi[8::"88N5D4DwIMMڵk݋8|RG!-0x`4h-:ֻvۇ HJYƍի~qlR8T;...8p bccx):u۷oŋhժԑJBHBS,Ye˖!66eʔ:VRThܸ1rJ㐖طo (ԪUK8ZW^B 88X(DDDDDDD000ӧѱcG㐖۱cF???|7f1{lݻ}4 ѴݺuǏqEK%::ݺu%Qreɲ06%f(|7P(XjQ֮]p]J#5jŋKEk]xG… BDDDDDDDD%n݊Yf_;wٳf(i CCCٳ9r$T*$9Ѿ}{4hgΜa3TJTC}/^DhR L0UV:iL:V?\(DDDDDDDDT <۷oǷ~ ___ѣ9r$0rHC,--qa9snnn?#GбcGDҥ5+Q Q3---?JEܹ=ܹsBZ_~hڴ)g@Ghh(Ξ=+4 8ǂ 奱;w̙31uT k.,Y۶mqWgڵkahhc)q QBO>:P*¤I8M~ qqqRGBbذa_qٳ'ߏŋ+EDDW^pvvd zXhƏ˗/<==1yd]d~Lz?BHB233{ƒ%K6mڄoqqqPqHi}舨(ԪUK8DDDDDDD'000ӧѱcG:s zꅯ߹s۷v7t pT^]+JL0;wĎ;ЧO ĦD6D`ǎ;v,bbbPZ5HJTA4h?CuQ%~FJBӦMѮ];YF8DDDDDDDG%u CϞ=W_aٲejcڢ~8x  &*nڵ+RJmo /^DPPZjlJ5 kkk,ZH(ۼy3>}ٳgKt@Ѷm[RG\`` bbb.u""""""""\ڷoGb믡R2/Ub߾}l122޽{K5 ;N:!22/^d3T؆n:?RǑLzz:1uT/_^8#ܰuVDEEIE2Jĉaee%u""""""""Ahh(ك')krAAAjYGI*T@`` =gĠm۶DXXԩ.%! }Ef+ul޼IIIJ,X@(پ};?~yI胚7obĈP*'-- ӧOq-[VI4I&زe ݱ{Bۣ~8}4TƔ%! ^^^pmh\zz:|||0uT^<</<==1i$YdHJņ(-*OOOhL@@޼yӧKt-:uT]v-233yβFXXoyl2,[ G6m4HLMMqܽ{'Ns?Rcɒ%سgƍTXl ,Cpe,Xf͂qHy{{c޽%burr2|}}1w\.]Z8DDDDDDDDDVvm:u QQQׯRRRr7nD׮]%HIYUV{n㏹^OIIq!:u } %_ѯ_?xxxHح_:uQHk]t)k055_-u"""""""""Y&N:Xt _~m޽0a~'899IH7okb֬YޞN:ƅ кuk SRA!_q 9sF(۷Xp!fΜ 333㐞!ub%K`011:ZԨQaaaxwd?~Ç;'Pٳgcذav!##aaa[dB!um2bܽ{aaaRG)Xp!OR=z:Jpwwq-I8}4:v(u*|^^^lQܹ3 100@n4J'Ok׮[.vڅCx4F_~k׮:ga8y$/~xɒ%Xr%bbbbտ#88X(j1e\x^'"""""""ӷo_#33󽯗+WO>凶I^| ;;;"444IIIѣ^x'OJDORaڴiѯ_6n܈ &GN$LIy?>ݻݻwKȒd̝;P*v>>>8v.]$u"úuf(#FylZHKKñcrOkaaǏ :uÇ%LJTRRR0`ܹ90fL0 BllD)8C͛{"22R) _~Att4C%AcǎIHƎX9sF(DDDDDDDT|xmd2.^֭[KR#_sΡz۷۷/bbbڵkk8)Q{zǎCÆ ߻_ff&zwŋpR*$oϞ=͛Rh^\]] %DHH¤Rho͛`Q abb(kիWg3FRa̘1r BBBl@RƍΎ3Hܻw:t@ZZ.]g3r9mR ''<qNڇ ([,f̘oooIPV\ sss?^(T|g4hRh^^^CBDDDDDDD%s5jDHM6 ÇQ^oll{Yfԩ5]v m۶E5pYTZﱴÇqEi %1c `RG)D{ʈN:ݻpBQ cooiii>|DH,XׯѲe|OP`׮]hٲ%ڷo7ocJ ̄Cԩ:33|vҥKm۶\ٳgKEĆGoogz vҚpBm+WDr0fRQIְaC 2^^^9? (UnǏ?c'zY\.s,հaC4h@T/]vSN~BݻѵkWƍŐm߾}YuV(ŋc|2@t>>>MEh>L<rWFJJ -[5j`Сv[vvvpb ̛7CI2 ӧq];!:ׯׯI&Ǹt+u<"""""""* tU:zhiN%k׮L&"c ޽ [[[R cccfo˺`c͚5´iӊ"""""""$%%Ν;FTT+ϟG˖-5JdX[[ɓ'3 \]]t9͛7˗/GZZ{_700\.qqq֭[bȑHTPAL6M=ݻwł DzѹsgǏ ;;;@X[[ 8W^SL˗ B=ZܹsPcUkff&#޾}9:tHTZ5{_yЈvlR_dC2,\矒fŋiӦmWPAl*꽹 hԨx䉤Hw)JzjQF QT)1}tc?\,]TTZU WWWT/)Ŝ9sVX|oVZ_y~ jbb"? ޼y֬*J= ccc1nܸ|%, DŞ={DZZZ3%,,LGd2accSǏ[9nZZعshڴbذa"&&@c,^FFFbŔ<7owwwahhdll,f͚=VhRĴiӄa%֤I\'y Dddxɶ-[j>dbd·}:Ν[?$##+.\2 [lË#{>|ƍCRR!qaZ! 6D aѢE?>T*d2-['&&ǎÙ3gp$$$hРV x"** QQQHJJBJбcGtݺuC:uP*WsΡf͚}LJJ zs!-- rDϞ=y9=ӧO̙31ׯkkkԪU 077)x% DEEۈGҥѮ];Z񟨀L2P(޽;v B!ŏtX\\ xlذ]v:R.oѣlݺ:u: #Pn]lܸՓ:R.GQfMܹ5kք;,Y4( (J;m!OOO\v 9 }Z+VFҥKaaaT*?b޼y1bV\Mx5&M={`Řghڭ[D Dڵ!u ܽ{>*Z& ajj*q-ҿs%QF !x- QL1badd$j֬)<<<sUTFFFSN]~HRSSE^ѲeKann.̄ +Tc?~,_|!acc#c Rc۸q044}N_ضm011'O.{1DT QT)w^[FF1cF-ZgJ+K.޽{ L& E䞿[jJ|ƍR̷ѩS'QJ|8 ZF'۸R9|0 ֭[3gV4C@PDӦMqchݺ5=gbZ GGGDFFGâEREDDDDDDDZ(44&MҥK74jشi.]*už}P:t(|}}1qD;wN8dffbСHMME``^4C:t@߾} u\ѧOtEoRJիT߯5=kv |BK.+ ˥'Rcٲe7o]ľ}0j(a(Wԑ>hΝꫯо}{޽[IHH@&M0h RQ;v`ȑ8{,ڶm+ubscptt:M8GADDV݋[`O?T8jvڡZj8|܋WT֭^|gDHjuhnnn3gqZ܏Ą sNر{:R#FիK' &LDFFW^X"=Uq$"""""""iT*#-- gϞԑŔ)SpADDD|RQx4mÇqEZZڵkʕ+#88X+ɅB=S,ЬY3̟?_ks ,?]O>D8b߾}:tcj?ZIц6 5S)B$;eʾoÐuƾuTchQmQ0~~;f~~ϧsyw>\t ]t F RZZ#G"$$ΝCNxGh4h'Iͳi&̝; &L;GKKKC>}PQQ@4i҄w$B!B!psN̟?n݂8),,DǎѡC#qÇX,Fddd-w>%n݊crRXXccc;wEh@LL ڶm5ݻwaii ___ 6k;W\;wkZ+d5D߇1QF!$$h߾=H,66pppe⯂H͵gL2;\\\xd0`rssMMMޑ!B!W`dd˗cʔ).** ;wFpp0u;_~)8۰aV^ Xh-֘20`kk ===;v[i˃<==dn9hy}'ĉÇy!5#Gѣ5aхGEEBBB;!B!B "8R1yd"66u ƍyǑ t:to%Cll,q1 V\) m6\ɵt5X[[ݻh߾:ZMYO?a׮]HLLTj㫯¬Y0w\Ԕ5hذ!<(VZA ѿ7n]8ݻwta„ BvxܹhC!B!3?FHH(,^GE\\xh000ѣdqE۶mg{ ]JB˖-k.Jf̘Z*((}Ebb"Zl)VZa];Z 񁯯onۇ_/_ȱt̘1˖-P322~zQ!B!"|Rݥ8q"pQ>ɥK &ԩSqaJڷo_k 777ݻWj5 wwZ {{{O*hxM^ j(++ԩS1f̘Zs+Ν;cԨQ2e y!rDf0c QA駟rJC!B!=z8p (\5m{yG$GѸqcQrttToXYY 3FZCHHT#&%%!<<;^C7G5yY+!S\w駟BX,7֮] eeeqڵòexG!B!B@:''Zu}\]]q Q q)ݮTbȐ!RyիWS 6LZmڴѣGu!mגu#F@rr2EkZQ駟0c |HUӦM1m4Xvj*ڢO>H"~g!11wB!B!HX^^;L>011EÇN:ՐG&&&?ԫW[Z6Y_+jmCGyG #FÇȸCO>hР(܍1QQQHJJB!B!K@$"SxQ%>>ED"#33S`hh(PйO kZnVچhll,={'''Qd·~'OVYY .oE&e˖B!B!%%%"s d"?S ynosIII}HX,شV,! ###4oޜw~-B^xGzBpp0B!B4J䖟/_Ƹq`oow7kݺD***b=z`:::l˖-,33_`Usqqa .wTTSRRbwqqqL$1MMMuVvɔD>Gf}D7nDƪy9{y[l/1h޼9X8z]Z=99`)))D~XbLz}߾}L___b'kEZmܸ1;p2ח5i҄w "\\\2ɓ' B!B!iӦMTbyxx-[Vӧ @ׯ3޽{oܐ!C&٠A[1+2e$f̘!<~W=:l۶mﴴɓ'%6ƘT{zz:bcc%20,++K"I;1cǎ1]]]OV ]j-s322+"S,Qzڷo;LiѢCQ!B! .&׷6([ܹ3rrrp|=11 PRRz={1d} Kl}ݻwׯ3޽{o̐!C&+//{ ,,,z͍)((\}3fHldӧOyG3+++lْy{{ ^Gvڴi;GyLNNfXbb 6?^"o>ּys/.Bq=z4sssdlZ+O׊.PWWx 2JKKsAbbbޣ%%^ٳ'cHNNgQWWGAA#54 ΋뉉Uu;(,,XB!BUVPVVšDTH$Brr2JJJxG)buԁ D"; ;tMNkŻZQ+jjj(..J-Eׇ855oԨ׷ڬ{7kLby$rKTRԭ[WjܩWy$5wPn]E!B!~TUUxQeeeHLLDee% #)GxG6ixj_$P nhh(tMNkŻZQ+R{_NogЪU+U=cccJ,,?%釢ȇś{$?whB!B!59];F믈Dll,VZ?ׯYf/_d~:("x5 26˗HII$I󸛙 26w LA;&kEmKl"TVVV;77[aFzz:GŶm۠P'O ~z};444PZZZuK_煹y ܡyC!B!={Dpp0#77XhZh!H` 2УGR-81gϞvڡYf4&..ϟ?tHJJ$I㸇@___НVʼ𠣣S@\AAA /˖-9r{7֮]q غu+uV5NuS^=̚5 C~~>6mTKNRӡ#1I"==M63oڷoyQy?whB!B!5<==֭[c Xv-(˗/CfիW6D{ øq#/BCCJh޼9;{֠mV(0޽{T#^x!b#$$_ %BJKK/ΎwjܱA^%X B!B!Hϗ_~ `ҤIpwm#%%_~%8Ֆ6mh׮8m޼۶m|oŋ#55UgGGG~ΨQWەYfذaF%h-Z+G Zy\Hrz4$%%"w"<(Z,"sb1B!B!quuyǐ >>> պukXYYy>>>puuСCkɺ/_ܹsR98}4222%Ν;8:: ^֊V6D6m uuujLb 4u244y?w222CCCjB!B!DF(ܻwwpA9wO2rH8ppu]ܾ}[*QSS}dݑ#G_Hn{z6燁~ע5yY+jeCTAA;vDDD(믿"22XjRSS+++A&5@:s'""066؄B!B۷ǁxG* YYYprr8;;˗ +H*g"33S*d/ EE0prrd_cRGkkVg+piܼywaff;;;CKKw0sL… WѴiSREEömې/d͚5puuZ3ڴigggYFj5eIvv6oߎK Z!OkEmZZZB]]/^E&\xС(D'(2ӧ=(B!B0̘1 yǑ5koE֭yG,m۶ŠAn:Q.//;v̙3 ^7TUU-պt͛'C`` bbb^={@SSnnnRKk! "|}}Q9r$>Q󃑑,,,xG!B!B<==UV"UgϞEpp0/_;D,_ns̲eˠcJ,Y˗˗RKee%N 777o^쌩S6=-Xr%.] UUUקB>֊Z Q C^S|p 4oȸ~-8\k#FŋyG!B!B@Ξ=o7o7|;Ѯ];᧟~GbfϞǏPWWGp111D@@UVVshݺ5<%mݺ^^^hܸ1 /^H$ڵk%999!==aaaPPP g]hٲ%̙3f͆ e@Qn&aĈ(**©SxG͛ر#b1 x!B!BPZZl޼y!Q~}q$&;;Ċ+xTyy9v ]]]={kt .\zLLLpB̙3kUVa͸w^ vy 8ر#,VȴcÆ HNNFݺuyǑ"|W?>fΜ;QQQ­[`ff;P„B!BHm:`ܹyL`` ǏW]I= WWW[nf_}}}q0i$:u okRhGAA\t%%%ԩ5jۜKKK)\]]i&qhm-((>V\Yk۱l2^z9]]]:tw7|k׮q#B!B!H1l0\p={G?SSS=֭G0ӦMÑ#Gp#q4gbHիQ\\L%W^޽;L;TѣyG!B!Bm߾ưëWxǑ :O<]hjjj8<쌊 ޑ$˗ٳ'4i3g`ѢE(++H$™3gflٲw36m~ #eȑXp!qUq$fƍزe N>-sP YE QذaVXX;T<|?36mMMMqڵk""";Tܹ38B!B2>|Ճ3 yGlsADDΞ=&M#͚5ӧ  MMMŋ񁙙L5}v̙3O%$$ nnn>annKw*^^^pvvxl'Ng}0 6mL}Y 'OFN0l0qkݺ5f̘㯿GpWFff&/_; !B!B8ŸgϞYYY#} L08}4yG*SSSz۷oM6ƍÊ+ɢakk kkkܹݻq!l۶ xx{{{.HÇǚ5k0j(q{Z!#!w^\r:͛7 q[d tuu1fG_|+WѨQ#q!B!Qft ϟ?飔`ذa8qBBBеkWޑѣpA9#}TڢӫzaÆ8uvڅEaȐ!,X-[֭[yh!!!CqQ(++W6tPĠ8rǤǏݺuCxx8Hmݺu?~Cj555wB!B!===AWW Z=zDEE!<<#qչsg\|֭?~;R͛7Ghh{7oǏall .H9M8ļy0~x(c 7nD>} oo﷚o| ҥKwwwpH6`رիm;R^^^8vƏ;-X+Wā0qD^+nݺUc j3F#F 55wz11f899Cj6m`˖-Xd .^;D7ڵky!B!B ą  {{{,_cױc`nnUUUEJڷo7oBMM fff2|ˊ ,^}ŨQpyhhhD1}t pcǎxH "O\v RSPP {n,XNNNbccaiiWʕ+^_f 6d۷gaaa#ַo_.\<|2S;x`aڬK.BDhl޼yLEE 8=zw$D"/\4*++ل XݺuٟZQPC<}2+++;'ydFFFZ9::ƍIٸqX٥Kx!B!Bg[flRX\\vZjŴ֭[a۷gNNNrt2aڴiüYIIT3z-\ijj2H^ejjj~Ϟ=˚5kƾk<==޽;x3ܿ1eee~ׯW{LYqqb=z06zhCg`LYYMJrr2Dr%򩬬;5hЀQ 庡K!B!H$bS]7 lŬYfaÆlʕy$6[|UVV[la-Z`^^^`=SWWgڵc~~~lRlٲnì,TTT}6vX,,,Ν;mǏ0EEEֻwo4ikҤ {Gc:::̌ݼyScK\hh(ٳ'STTd b'O|_~afffLUU},))IzVQQQ˟45@SY'''Ξ=:uѣG;ecXh֯_˗cPPP?@6mxG"B!Bĉ'o>\xmڴaгgOXYYAUUf!668y$"""`aawww|wШ8߿?0lذOSa߾}Ett4v!CG066^%%%vq$''w3f ΟcaȐ!HJJ7Ο?OOO`߾}077$صk8  BѣG4oFXX.^GC',--5Fyy9l\|իv/^`8}4L+V~q$ؽ{7?:u֭455?k'O $$Νٳg#Gb„ hѢ>1j(?ϟg'kh5UTT`Xv-fϞ+WBEEw)--?-[`…򂒒X?0n8X[[7|}}1yd6HB!Ba^zCٳr йsg@$A$EPVVFv! IDAT ~%%%믿X8DDD ##۷/F HIٶn݊ ҥKnV>'N۾B!B![nCcK۶mqa8s 82ܹs۷/ҢE ٳj(**qqqh׮:w wwwdddHJIIAVV5D`ǐ!CPRR㨫cҥEƍamm www|Ri?OZZ4i;Я_?$%%!44"w$薹#Gpy#++ FFFh׮D"ФIhjjAPQQAYYrssϟ#..x⠣[[[Æ pyD||<ݒ~Ԅ! a``:SN B!B!2 A׮]yy#887n@FxܹspqqAzz:xybXYYǎFQQ~'[ݺu͛zх Z͏u`ٲe5kD,,,իn:m[nDի,Y-P233lfjV5Dy{ڷo]v"!0o,--e|ՉǴiɓ' ZZZ{O>011u9N:gggΝ+x-eeeL>IIIӧ ^zÇs-4hW( ШQ#899 {111ظq#ۇmbϞ=x7M64={ vvvPWWǥKиqcޑQCB!B!Bߗ(|Xh\]]q}qܹsr; mmma8uTjb˖-q7`͕0557֭SN!99'N <<mmmޑB!B!BjXTTT]CЯ_? 4HͫWpux~?b…;v,?~,tmۆcƕo4iOÇ}vhhh`ҥsD" ggg$&&JNZZ7n %%%+m= 6l .@KKw$"jB!B!BEGGUVry]AA{.Frޑ" 5( 5jQ'%%%xzz">>CرcѩS'\rǔ9|||0sL Z'N7H$,tP3?A=g~u]l^^GGG gYf͂'z职 'H7n`ڵضmԩ)S|z*:uf͚a׮]011;%lU\zUWpww?QFb1'N@vv6u&v*o!!!xhy8;;pi8::b޼y)Ο?-Zk֬*lk 6A7o+--M'̽|2:w 777$$$aC\z:B@1vXݻb{쁇*W,v*B7߈ tsaڴi7owbOJJҋvڅ/bҤI:L&Ð!C9s 22_|[B411:t_~xU 6D*HRRիի`̙vڡGxn:]*U7bZݱ\.Ǹqp]t ر#Ξ=[^}n}Î;|rZǗ2d_`GVgΜA׮]ѩS'lٲr\씨!JDDDDDDDDDTAi[lܸ2 AAA;%ܹso߆ة .رcq%TV HNNFÆ _/^<~iiiz/+Wȑ#qQQr^:ϟ|Wӧ6mx(B~^ҙӧѭ[7qqqdbD"`C صk&v:ٽ{76l  o޼;Zjaʕr ,,,@ݻhҤ) n9r$zK<,--1w\\r...Fvp=z:q#G /:,}:6D*۷oqeloߎhb#={Xo޼Nƍc˖-8q0d#==]4bpwwGϞ=Ez111HNNF6mгgOhBb4-- 5Ϗ9tc33dž(Qvrrrnyxxয়~ˆ#Ndddرczu~自VcՈ;"pvvFNN4hpdee  D`znݺƭ[жm[ZŽ;`ii CCCS,={bXz5 _QHJJBڵQF SѨAaСݻ7N055Fyxx`ڴi6l߿/v:EH$x1̙S"::M4ڵkq(sss$$$ 11SN;zaʕsܰddff"!!AԊk.C ŋ!HNDDDDDDDDDD ))IoТEЦMbIOOOd2SѸSG^^SlWq}xyyoE-41x`̛7&M%"""""""""ҰnuƎ"55UtJŋ;Qaƍ8z(/_.v:x9߿_cgg+Wʕ+U:w OOO$&&j0SͨVp! ͛7(!ZZ?F͛cݺu,%Kòe0~x~bCHJM-]ի޼y#v:P^=hBTDӤI,ZǏŋE… dh޼yqزe .] .\@ӨQ#aؼy<~J y&1rH`^1h";? VXy`CHqϾ!*ɰuV|RkcMOOoߪٳ^^^"f믿}y899c4k [lӧT*ѺukfYݺuٳ1x`QWA"Ʀ?cmmy#舟~ ea?~}M4A*U/##~-4hVZUVn7ݶm[tÇb mllvZokpss#E%Qjp>cGq=ىΨ_>{ 8PT|Xr%d2j塱1VZUV9Sƍ֨^^^ǩS*$UVaԨQɁD"T*E~~>7n`tZD"|ٳgၘ#;;ƍO?3gjjǏQfMHR۷dC=VZaÆJKTb۶mq} >'ұ IDATN,usO,Dff&;cرXd = 6ñrJ( HRd2B@nn.@.͛_~b+ l۶ #:: LHlذ:f߾}򂡡aٳgWpvZ"cCӰ!JDDDDDDDj…:uj `ddj"i}E~̚5 ǎCJJ zn+X <999TQfM\tVjٳgcw}{۷hڴ)\"X&&&*t].C" ''FBtt #663gDZZ'f͚ׄ'OM6h۶-VZ????~ }+..!!!!Dڵk޽{ܤV*ؽ{7~;wyyy000@\\z ^y=yxZRP`킍cmmuxժUѿAJLL˗Us_T?"##1~xxyy 2{50` H7!JDDDDDDDDD$ {{{֫\.ĉѠA Jܿ8~x Ǐĉܼ$ ~2ON<-[ɓE)77;wtFA.\.ܹs(]t3g~vi̝;>>>)T|^ZtR Z,MVJӅ A7oGwڵM믿t`OFtt4 wvJJJꏆFOgϞaܸqjn$%%N:jg7xyyaРAbHDDDDDDDOu"s ]p Q_ff&6lPRoD6DH+4lذȆhTTg Lddd@"ŦMxbTTIcBT&+Wظĉ6lnZY @^P3T*#Fdq ̛7޽;O6msAAAw}'X.666044T*Uk7nĤICFFF9r PT*-v |СC믿`X>>>?~<CWZ--ɸ]n en-v###UPFDگ^zpuu- 䤶Ť\.;DJ;Ԯ].]իQJb/|;T*SSS V͛7cӦM033L&+_U1B /_^v 8~8 8sL{;x3 ,,c޽{Asb傝[vB[*J;V/_ݻK<{RJH$xBBBpe>|YYYpwwWsyJ^^EH!Zѳgb644DϞ=bK\PZ5tС""""""""*{yyy\q'''cȐ!000(&Bڇ !v* wA )//;vPW^j;v$K$$$… P5>gԩJP(2e VZŁPn"?T"֩;wV5r9BBB`mm-H찰"{&dܹsS[xAҥKprrBPP6lhy1݋Ǘ؄v P;cǎjG̑:6DEЯ_bS*%>N (v[###߿OC/P)I䬴,YD4kL)P(pHhѢ{W.\BX.#((mA>g!!!EZn-BFDDDDDDDDE_>$ $ f͚ Sz kjYF㚘@T"..uXQFHLLĘ1c```l۶M 6R%ƍ'X\MsrrĉRVĄ Aؿ?bbbPZ5d2z $#W]׮]Þ={ki HLLD=> FFF իWGYfDBB"##Usssq m)))^i={J*"g$Y-stEm_u:t #33իWWTSJ0ydA'"""""""L2Em[Em\]S)DCSN>tVZ!n~ Ο?L`wATZUd2㏨U౗,YǏ ={?YYY.vwIJwΝ;wwwYZF ,YsO>رcj+ӢbԨQXp! ;AW(}W^_Zj]NBvv`CΟ?dԬY7F?9V1bĈb?ݻwCT[ehhUh֬꺶dggcpww!h ڱ!*Bkkk<{Lu ? D% Ď;>auM8::iڎ;J]~VVV;Hc3+v*h\Bd٩Y)\VbZv-l٢m/[l SSS4mTܵk$_ƭ["@v3l߾%»s5j80m4TV jx-lll "!|JUV-W+W ;;IIIw-r+aCCCc߾}/k˗/zu&_{ʠf^A п,_999%l_~ؾ}MJDDDDDD011ʕ+NCgh4~VXrh@=Zc .'YX( CT"''UyyyXx?m4 ~klٲsU򐙙N:aΝ =מbCTD}ETT ''"gD`bbW^;%""""""""""1 4G"557>}|T*1^5YY~}[VGX~=*(;46DEڵk#55uњDL&wXnrss(vJDDDDDDDDDDD~|LPɓ'HMMţGG!997oL?OK.U5KT*~mdF QI$bƌ ;"_~ڵC:uNDY&j֬vŋlVC")) Ϟ=?ϟŋ[Qjhh<,[ "eLBc0qRuŐ Dx$$$NR)ϟ_bBCffةITB.#55[;b`ԨQbADDDDDDDDD+W^!==]8u~ ̒4EIIIxV \.vŊFRRرcall,v:Eqm#G!JDDDDDDDDDannz]ݻEʊ Q?ɓ'rט1cPjUI?Ng vDDDDDDDDDDDDDD†(-6DHo!JDDDDDDDDDDDDDz Q"""""""""""""[lbCDDDDDDDDD8"==8wG,/66̙31d\~aΜ9_>W_;wN*sLmv\vLϷP~0w\W;pq]g}ğ;%a}ĭ۷o1sL888~~~zc(vDDDDDDDDDD2o<899VZģGT*?sFFFƍ1e\zUTsǏ޾qMF?ĥKзo_hݺ5?ǧ1UA}|Ym۶ @Ss>o߾Ν;?Lj#g}ĝ;[#GZj={6O,]׮]Å 6\!JDDDDDDDDDziٲeɁ7ՃG,VuR w͗ub9##͛7ǀ`mm]bj}F-[… ޯOYo'OðUȹWL?{XqNi>޽{066Ɯ9s(̙3oѣG~m%"""""""""s=̘1'NTީS'T\T1rŋxxx]둝]8UVEppڵׯ_OZXXWڦ.BէR1cƌ"TT*-q֧NY>_O"<<\ZJGuMkÆ(靨(X̙3H$ 6 ֭tD{K͛Ph-233IիW ,ZHzӦMklڴ⊭<ߺPtE>)iOYjhnS8ZmꊪU]{ M6k\6DH(Jر...]6sNX[[#99Rسgk.oJremϷ'%%ϟG^GW>e ֧NYk}oۇۇ^zIuMWk!ZڵkŹsJ3e@ll,뇙3gbȐ!jg|~aܹsԩSI1Ç4Ĝ7o߾̙3 yCDDDDDDӑի{%.]>>>?>>yLCCC,~_999egiicbϞ=hҤ ƍW} +++?s.GP`Z)MmOY>#TmG_+sA||<֬YS1] h͛\ѣGP*9##27nɓl2L>C '?5}l۶ [nDܹc}LUV悘fȑٳ1p@$$$k׮x z| 6x{{Ν;hذatpp|RzFFƦLR)lmmѡC۷&&&8x=UTˑ8t>˗/G>}`aa{u> ) ֧tMYs}}(J >QQQE6wu6EZlrrr SSSԫWٲޟ0UTyYn]̟? >\hѣGc˖-p'%*(\kܻwƘ3g9s8z("""""""]dnn*\H$<}ӧO/טk<~X'OPR%4kc[YYu033SSSO+]OLL z 333Ç1}tȑ#{u> ) ֧tMYs}},^Gbm6D wf̘'uԩT/^#;;:ӧOnj3|<,, +S\5o1ӧOv- pKKKT\YZ8?~<°h"$&&~[F-pI'Or'gϞCjaoo_bl߾/^TŨQpE6v֧,X6e5j lwwwH$ 6 ֭t4(8՜,111޽;,--|iӦx56mTD%)je;bWWBMy!""""""-RݺuիWٳx)ڷo)SŮ4hT|H"`ڴiX~=^~ ݇\Z7M+Wmڴ ~~~pvvVݻڵk)*T}ԩ/LKKK888Du.֧4> L(ba}oq k}vލxd2lݺ[nźu +++}X Q5J;v쀋ڵksNX[[#99RسgZ(h|ؔ), є?^s!DWWWl۶qJRܼ6wĚ7ڷoz'''5"""""""3aP*;t|}}ѢE &-[ĕ+WݯT*q-lݺ/vL̞=GFxx8Νѱ1pqxxx 44--"""{9\CYBէ+ L}JKSRm϶m۰xbUgϞB>M8>s`ΝP} 0 4ݮ htyHlKKK,]Add§04|WDvRQ(>}:>z[LVy;w7}z󆈈HgϞؾ};ܹ3>|qqq+2D"StRԭ[q1jǚIDATԨG8Xe6>e P&)6߿ȼ#6!hk[Φp{ u9Ɲ;wаarx M,_}􁅅GR ?^LVy7w7S*>|8lr `޽HKK+w$KVE . 55&L(W UWѶ#dG#BY`РAǸx"?e˖@>B}-֭[E8y!'Oryl߾6۷Gpp0 [[[tRD*;B͛Q/^@ffj? n233CDDD﯉yhٲeh !#*T}6>>Mam6HRt W^-gӧh߾=L:nРA7o|\"`ڴiX~=^~ ]ʕ+tuԁڗL&%`bbݻjM$(ͼ6w7wF||"""""""%HtvEˇx룽6w>{p&L޽{CTB"=v!?@Pe˖زe |r٩W*uN>}׷1ѣGN:y&8GGR(ϟʕ+ضm[ym1oJ̙3@vv6vYw222yCDDDDDDDDDD+z۷_Ν;ÇMLL"cI$:u K.EݺuK788b %BI(bޔ6[>!""""""""""]U,X{"--ܱ`jj Qc  &+QQm󆈈H!ZJ*!::˗/Ǐ =ŋqq,[L/z&mF87DDDDDDDDDDD[ P*ckk[\ѼyslٲqJMF87DDDDDDDDDDD H$H$WQE!"""""""""""mwHo!JDDDDDDDDDDDDDz Q"""""""""""""[lbCDDDDDDDDDDDDD N4233(v:+11Y֧޼yk׮ɓ'5:'NDժU5>>}6 4=מbC (v:؟AE# K.i$ƍ+Wc"33S؟ 777lR#S~|) 6DHխ[+W; *F.]ХKӠb >\RbAkDDDDDDDDDDDDD%"""""""""""""ņ(-?CT"hݺة۷oC"UzwRTltӃ_7D{ly={;@5󑟟/v*:m۶b@DDDDDDDDDDٓ(JIi%"""""""""""""ņ(-Cyb'ADDDDDDDD$W+vDDDDQyi_ppaIENDB`dask-0.16.0/docs/source/images/crosstalk.svg000066400000000000000000004077301320364734500207650ustar00rootroot00000000000000 image/svg+xml('x', 1, 1) noop ('x', 1, 0) noop ('x', 1, 3) noop ('x', 1, 2) noop noop ('x', 3, 4) noop noop noop ('x', 3, 0) ('x', 0, 2) ('x', 1, 4) ('x', 3, 3) ('x', 0, 3)=3 noop ('x', 3, 2) noop ('x', 0, 0)=0 ('x', 3, 1) ('x', 0, 1)=1 ('x', 0, 4) noop ('x', 4, 2) ('x', 4, 3) noop ('x', 4, 0) noop noop ('x', 2, 2) ('x', 4, 1) ('x', 2, 3) ('x', 2, 4) ('x', 2, 0) ('x', 2, 1) ('x', 4, 4) noop ('x', 2, 0) ('x', 2, 1) ('x', 2, 2) ('x', 1, 1) noop noop ('x', 1, 0) ('x', 1, 4) noop ('x', 2, 4) ('x', 1, 3) noop noop ('x', 3, 2) ('x', 3, 0) noop ('x', 1, 2) noop noop ('x', 3, 3) noop ('x', 3, 1) noop ('x', 3, 4) ('x', 4, 4) ('x', 0, 2) noop noop ('x', 0, 3) noop ('x', 0, 0) noop ('x', 0, 1) noop ('x', 4, 2) noop ('x', 4, 3) noop noop ('x', 0, 4) ('x', 2, 3) noop ('x', 4, 0) ('x', 4, 1) dask-0.16.0/docs/source/images/dask-array-black-text.svg000066400000000000000000000277621320364734500230550ustar00rootroot00000000000000 image/svg+xml NumPy Array } } Dask Array dask-0.16.0/docs/source/images/dask-dataframe.svg000066400000000000000000000226521320364734500216200ustar00rootroot00000000000000 image/svg+xml January, 2016 Febrary, 2016 March, 2016 April, 2016 May, 2016 Pandas DataFrame } Dask DataFrame } dask-0.16.0/docs/source/images/dask_horizontal.svg000066400000000000000000000056471320364734500221540ustar00rootroot00000000000000daskdask-0.16.0/docs/source/images/dask_horizontal_white.svg000066400000000000000000000056321320364734500233460ustar00rootroot00000000000000daskdask-0.16.0/docs/source/images/dask_icon.svg000066400000000000000000000040471320364734500207040ustar00rootroot00000000000000daskdask-0.16.0/docs/source/images/dask_stacked.svg000066400000000000000000000056121320364734500213710ustar00rootroot00000000000000daskdask-0.16.0/docs/source/images/dask_stacked_white.svg000066400000000000000000000056341320364734500225750ustar00rootroot00000000000000daskdask-0.16.0/docs/source/images/delayed-inc-double-add.svg000066400000000000000000000530161320364734500231260ustar00rootroot00000000000000 %3 1458014055779299329 double-#0 2049265477069956274 add 1458014055779299329->2049265477069956274 6701054586395645232 double 6701054586395645232->1458014055779299329 -2663340490572826749 add-#1 5459913405674845445 sum -2663340490572826749->5459913405674845445 2049265477069956274->-2663340490572826749 -4985944494202966629 inc-#2 -4985944494202966629->2049265477069956274 4444290810974576327 add-#3 4444290810974576327->5459913405674845445 -7395553549245320242 add -7395553549245320242->4444290810974576327 8285413665155640955 double-#4 8285413665155640955->-7395553549245320242 -5052693257052651058 inc-#5 -5052693257052651058->-7395553549245320242 3948720209280600008 double-#6 -498744703366429471 add 3948720209280600008->-498744703366429471 -8005224606849766411 double -8005224606849766411->3948720209280600008 542416636597761034 add-#7 542416636597761034->5459913405674845445 957714430942449007 add 957714430942449007->542416636597761034 6610071032423609008 double-#8 6610071032423609008->957714430942449007 -396791374205749251 inc-#9 -396791374205749251->957714430942449007 -4262554210423640405 inc -4262554210423640405->-5052693257052651058 8517388458569052054 inc-#10 7791591925678556540 add 8517388458569052054->7791591925678556540 2929596393616219699 inc 2929596393616219699->8517388458569052054 6414158183690305625 double-#11 6414158183690305625->7791591925678556540 -4928746311088754872 double -4928746311088754872->6414158183690305625 -7692825099134127446 inc -7692825099134127446->-4985944494202966629 8866146705477033254 inc-#12 8866146705477033254->-498744703366429471 -3118011605129036509 inc -3118011605129036509->8866146705477033254 -1402459633780304227 double -1402459633780304227->6610071032423609008 1492004226049625077 add-#13 1492004226049625077->5459913405674845445 7791591925678556540->1492004226049625077 -8233839601751503304 sum-#14 5459913405674845445->-8233839601751503304 8829710584837903292 add-#15 8829710584837903292->5459913405674845445 8464118818514893962 double 8464118818514893962->8285413665155640955 -498744703366429471->8829710584837903292 5763205663740709060 inc 5763205663740709060->-396791374205749251 dask-0.16.0/docs/source/images/distributed-layout.png000066400000000000000000001111231320364734500225660ustar00rootroot00000000000000PNG  IHDR0bKGD IDATxyt$[dɖ7ɲˎ!!q6HJJ4eʤ- S ӅB[yҡ -tvBB ?h҇5 &!zIlǫȒɋ,[`5NĒ%$}_s뾯;>AA|k.ryG$ 3 PDDEEqB=.AA1--LHC<AA9 œ'PAD sCAaa( y" A9O 00A< œ'PAD sCAaa( y" A9O 00A< œ'PAD sCAaa( y" A9O 00A< œ\vq\8N"##$***#I bݍj~FGxv_Zd2#hUNCc0Pա&&Phhhgry[r12al̇T!2RBa *JCDo ` 166ӁT&:ZFtDr#==DoTn2" a|> 8q3:ZF\\*)) H$>9@N8uH[J)--%:::ߕ -" y^Μ9ÁinnMff)H$_PQ(rsp:XSUug?dzb֬YCLL̴'sCA&w}g&5u%H%Kʹw*3_+W!.kA^x_JyQ!5u2 ^bc ZhhoZ6oN7L \#ɛo (_[6Qgwss^%ߤGPnR"  ߿[뗣e\U/9EA=vz_F0X,wHh,sZ M ;8wLn^" *:&7w5, (:t酫 jRAĉ4,lMh4: .3JzzZ(q\{|Tq(zp8qF(7i LTT4GccdQ mp 0>>"22RӬ^:?3AD pg#HٷFFzٴE`2f׮'FMq? -mw8Nvǚ5`4.v{~H֮}~));֭'LS|ik;D"!5D~i溋]-KISSCA 0͆%&&b**^4mc Z4,J"*Jٳo 2tl23=FBB:b^7G8tKWXXsnwFgg 1KxXJGG5rVZ[+"IM-j=OcAJJ?@*4|ҥ >>ŋ%*Jho?p?d6mH,~Z '…_&&Fυ[rCC]|>B PDn8ryɹՉ30Ѝzho?O[Ith4ITIO/E*Э"%8SoA ;r H>O>5|kB ׃9<\q BA 08*d29Eu45F*`*ΞjY`5*U an4\|slK@ofwM2Yr:IJq\!VF#2z{[{6[--oSPhHJF*a2p ##$&~qP>+=Odd*U,R1 n` g5DF*x1^o?ƀLf%f7! 044 .kIR!&&VK\\**D#/ de%SQQˑHd,X2#z?'#c)'O$&G@#;{UU@PS>qqns .^Nx$&f`Z*+鴣զ1<܃Nx3GPXX ܄D<mmm455NKK'##nǽ\^ \!)!*JJTx221VSAn388nn!#*Nkk%ˑUdfJ}'dd,A&SP'v^RqmTU_^e.b5g(/~5JK%"Bιs3>>RR9|y|l6\.000p\aQ"RPTDGG$($orB=:x<Q|h4IDG'R"Gu|Fbd-ݽ(>bcedgϣ›"}>###v !a1_TT*7{/Zm TjQ*xڑJ#j&GҢP_զ7ZpGQ(bPxƱۻPP(88H$R""h4 ~uDFFq;$$u* V>cbbd3KaN[[;}8^ǽ>nz}."BPHQʼyHOO'==}ZZ1ILWԩS|!Z[REbbK20ЁDoo#JQz"VZEBBB@l?˛oddIh"^~y΍h}DG#d6o2VW0`ۉ$66x∍iX8}4g`hx $TXr*""&}>.Q!FG8 wCh42͝ϢE8ˈ0|>UUU[NSS/Ng4E ˃Ko*FGM$'Gz-vma5S7Vv|V7 ~;4|σ뗰iӦkZ:U@Tf^sQQqgw#&A|X=X7866B__==hpKXlYX]D2/'&3VB3o\y23}JKKg|墯a8I3=Z}C7:#0 Nmlذ7Nzq8v VT nTHJJ>JepxnF:;6RR]+WP(znazD"ZZ HO_D7A uu{fժ"6mڄ\.ʹ4x1ij/J7徾>|MΜRY7/y0Nii{,Y$(rݓM̾cbb.g>G+`  Ύ{Jvvp{` 7N,QQQoͦ ?n4PiCCϲtoL&sL |fI\\!F-:Rϋ| Y({z½R@^; ƭÎ]OV)$$$լ[.,fd" g'NnxSX\S쬢 +VdC^mIϨH$V9Ɔ'NgJIjj>9|1t:9))z֭[GVVVX]s  5 Z^m:: /.~S[WիټyX`fD~aPR^g>Wr {錄 |s8TUUIaa!;v'jBG|6e3FА>mx=F`A2˗Jqq1N'EEEatN= H.{jkkyշZU,Z{A#MMG?Ś5?l Hs0rvJJV|ѳKҴU_ PǷ}/z~7[g111DGGOaד8ęᠦgrGBO-rZ?xNFG@V˘7/zLzFcҀG*N I^yII׉ ~Ť^'--<ΦM+ٸ+ 30s0s1ddz45%1q~@`j>;_e˖###cvSSSrJbٲe,[ ˅d:;-cdć1r`@.B&7y'%E#55߿;.uHHHCB@Psz^w, èj*ݻ4Nq30$3s=WAnn.GazTTT26CrrnsݤҖ08x,X/`0PTTtM"##"+oEGGG'?) m=Fi}x<._;6K_͕Add--0gΜAVjժY PVb ZZZ8r999} ÐjՆ^_:Y^U`^',Zt;Νaff~T^X ^J]]9>DFWW]]$36样q {XP(b9xpo>{&8>!=;|sXN]~Da᝔޽ik;w/$){_fÆz<2D+.Rm$ ?KX,^ӪpގF7ϯ:Z uNkx3@È{ Dq .(fsmm'Q(42#ǏɺugѢ{;u46ta!]txw< \VU.7 ֐+ùsxoE4[qqYbMeŊh4>b cc>T*?$5s蚮`^'ju"#fa$:: ccCTOd:NJJmm)+{hm=Ѹ>n.uW=ÿG"m_Al5zص|x8CM{dfJdbEoo+99kIJB&d:@'xEgZbܹ};JeZQ" 䣏c4.%99ŋ7'/rep7o-^LdePTT4G|>hoo'?? [DVZŹs8|0EEELJzX1+X. kNi]+ӽNڈTbff ^PX,o /o^m}O>w3\ϯxѢ>xaSrsk֏8p04dggݺa pp1v;+WA8!"""rss9}4Ν/ rK16fbx璿u⣭8C4ųoN& C'O{ccF d6loa+Vٺ;aYϳ`ki&N:ݻ)-- p #'Jぁ^xe:;U}XVfYO7sz8kΞIsHii)_ҭ@ } .CaLp8ӘL&.]6Ar9dffrqfjoG5VG ƍg߾93L~T~ gXWW &.GOTT]]];v T^GӅuEE됚J||<555TTTPTTFVI$$$Eq1"* 阾p&+KsV3ىJH$|_EVG9qurrJ-\Qq[X,o}!''a1 3+:WM蠲߇ׇlb 0 |{㣏>|4άFJn~@; cGJ|K_"--={ ;{%J>3tvVyYv6l@.|sZ,x>jVk[C]-.mJ_Ǐ…w6ij:B__'e˷b0'%f<222ؿ?|FUTf6X`36栫jbbYbwu'KbPWWc\\ܴgmmm477pBu,a~---TTT}Yb`0|rT+,d|+_!55={>vZB23WH/+]GkQbcظq)!|0% wy'˖-TT+\tl4$.(==-X,v'cŊ֬YM~~ ZFVIII t:ull?\|e(騪fpk(χjl6cو233y9~'h,RM|:OP QPƍ_ dbk,e9}4UU5Xrr)Q 艉IBJO^^FFzۻrHHHPhQ̟??hϋFGGl6O Ƥ.|uEqkŵz477c25f]ʧ^7<2>[ tbKFFeelٲk+[+peڵ]ill֓ rIx"JH$ |^Q$1|1""Io}Gq||͆dctt4]]]t:PiZhjjСC8)cbbt-ٸq#wq͜?F>Ϝ:OFF!B"Q!(R"ʉЮuvw8zDDQ(X: +>DD<͔?կ~͛\.'55TǩرchZT*zj4d2x<}]n7EEE3!""¿pk^y  99t06fgdd<" FZCVNիg-yĻM V;Owy'㧧Agg\B0嚚7o^Pf6yyys=ٳ_|g}6hiimF^>~f3BRINNj1Pz>(x%iii$$$P]]f8h?_x<xD"!??SNgҳb.nG?vzi餦xѣG`0:qll.f3t:HLLJe˖FEEB g'O?DV `ǽT\\qqq466%p;~)iii߆0YjxxY'xwyw}is6R0ZV*++JaN|Y~QPQ,_Z~_Ç9|0Z{,@#ZTTܹs,]4O,U__?cz=<;v`ܹsZ離jV8͊'QףҬZ\@^^^@zD΄PIIITWWcX(,,g>gy{ F^Qff&XVth2,\;nnx;mx7(//*x^ꢰ0${f]]]TUUz͊###tvvbZq:.Dp뭷Ç#%%庎w^HNN桇 H/'J),,İ Sa8 54453xn /y3g )++JX])' :'l6r0 L&#''NGuu5V?ӌoILLDfB0ez{{bɒ%3~GyW_}J8p{+z34- uuu8N0^opM8Qf +h o EwMCCFF:Y~~>&55uYD2FGgc͚5W}3v\0::zպ7+ZZ&++ Ƒ\n9d,\?KX,,\/~\X=*tΝ;GIIIH \?qS{r?`>`ʯСCh4/_>R*LXj455aہ ihhTVVz)**bݺu?b Z TÇk^u:;;k_ZF7_qBxa8Ց7Ĩ(m\v'ܹs={/<(J0Nؿ? uuuvZt^*""rss9s x<L?? -BoN&ٳg|!pmĻ,ݍD"ꭵrc4ijjb?ѣ\.j!e룶rN'p?ŋ?X,^oG^Yjccc=zΫJoo/K,;HII!22@<3|>z(O?y'w:;;glׇlb JIIIaٲe JBAzz:錏c0L>}Z4+\.n>Se~:#G&f.r9III׾5~>˝wd -[vMOrYjEMMƋ$''s)** j5))) ,0Y`Ln2|;?3O>B&\.G]sN%22RD"'s;!''G43˴VCVJ&ꊦQ\\LUU*?x6.ndXP( /_f.Ƌ{27׿5N/}K8jjjZEHrssE077Y-]ZW?)[l_f˖-aYp;|>VR jNef555h4멛YU[[:'?.laYtw0ZC݈4L&hD6ZmXTuEW^MYYGaǎ?+|h47|z͝U Ĉ&aD|$ #Q|>9~8M '@PR[[^;Mgg'|tuuºuXt)"Do%K JZfl,~! z血311UVx8|~? %fa{v:TWWVWTcoΝ;?k0 D `x}ڿz8ط/JhFĻDfqm#++ co۶7xr***WGӑNii)2,(.4- uuu8N0˟gf@yd㩩ѣhVV '0LדYiJ||<[n^`;8XVv0VQdeep88::Nw8>>?\Xkd5myAL&c…={UVl\&0 vv{;S\\W(#ꫯRYYɁ:v"f3CCCz222HLLfT*(455QUUERR ?tvvRPPW oRSScǎQTTմvL&SNahhh +++ho-**1G}gy{5k\קޱ10 8(i0/"O=ԌnCP*,]>쳠ϧB4!!K, 'FEE]h0=ܹj>뮫~t:ZL/ %K~=-- VKuu5VMCOaדOmmm455hQQQl۶flذ=Nٌlp`0!>>^lFI) IOOg||͆dӓ g? '~O Fo8DPOO.+A5QWtlll{]gǎ455{n+`^^^@W \.7+v\XVchh5kְlٲPPկMCH|:rrr6Ch[Cp>̡Cp8~z A8EFFҥKG"f*++vzYFȑ#ttt΀S6bf"H$ C@7ݺ022Bgg'񤦦I}}==>~0>>w?fbP[[K\\Vj *HRrrrtTWW8qB4abf>09z(eee! a8p\.-ZĿ˿ѐOmmmO"""b}3ӱl|9rC2VVKYY*CaZ}Xill k%>v@WWr|ڕ}>MMML&INNDi.ٌ&99I7nHii)'OdΝlp=D%pjo`__[^) K/X<3lݺ5` Ɗ œ|唔ͩSHKKߢ a8Cn7hb j]ыP&a06O~Byy9[n Iu!8p>Scǂpk >cA}9`,((/D*tFEEd:::h4uMm Ցj####J ZMbb"IIIU0^Z[[imm ՎelGL.eq"y%DEIq8ʚoPqY^0|>z{{X,4 !){]v#=|@uu5)))|.mVEUU^߬8߂a(..jLSSF*UrIp:=x=^^'R ()*Oh4Irrω00UUU:u CCnFG#P*jHLGҢPh^&{ sc.P*ODd1555nVX1">ՊlfXff&7o_痿%/R" \)x< ƉP& 7PӧOS_ߊad44Q*("#apa4RSc))Yŋg[8;>CoXBjr*+clLMbb hXnz{[i *j(.-&''¼yb. z`XXz5ccczH3nӦM:uݻwSZZ̸7|deeo߾n5Vćo\h=zyQ9Zm6ť!jcxVl@/ ,^ˊ+ y_KK#&S=1:gÔsI](dd''EMAM!+kfL]LuVt©0777bzx;v}vv! 3h||^x'x"&:N'IIIt:t:u]jRz-~WqbH̛L$`lV'V'b4ނчt?<.`Æ snoZil **2T<060:}45,&S_]_8S4cbb|g"͍ڶmoTTT\j[az줠{'ù.*{s"Y:;;y?sl7.|Wj*;=9LQo|q$dfΝ=,{kׇtvX`#FᅦFd>*FΞ=d.(׋j߳099|F#qqqҽRdll Z[[7!ͨ?Otwwo~3`Lfa}QFGGپ}e5HgH7o)))nL& !HPTJee%;wIC(iis Ed$&fMkk'Ufrrrf{nE:xs~W IDATvٵk~zl>Kn@* 홏>zq yy_ii,]=W9L?wGoo/jIII X˘P{GxW]6Cw}}},_uօz8T*3FӉjd2M1&&&Bwb*U<OK1>M_0|Ne׮]|I-%det+FPXx'MMGٷgq]1 3hxGygxXf͜53!M( IOOpMSS6lJK7"R Q*?Uy;ˍSg?•+CBVV`q:dfftA8`0P]]|A/344>g:2T*ϟVȠ讠G=K]ݾ~}rr8ywyх9{7쐕UD޽p&**m۶yXe þ hnFi*.hJd撥fTo%jPbaK{.⾠ PTD{u0s_'39g>}=+jӐȂ PL8Qr*++իדOD~ccqژ[rYݶql֭[v(-=)S%-F:]B(,,`9̞݃oyIdd$m,g̜ٕ{ƾu}ׅ@х[|̙]5;Inn}w&MpZ![l`Ill,VRQ fϞMNN4mTrj޽\pƍ;caQK+V |HMeӦ,?i*̢&''syVmBzmv-ML<OIrrmع3GWӱɗX|-[ѤI'^}u׮%&fk7ã5w,D[~~˗Kv4_GhԨ Ͽ͙3[xڵqv>ϟKI~~~4nܘ/^t9͜9jMz3U?-gwOU iiZdITPSS9.g'NDwO6XXXx ƏϰaØ3g**FkaرJwdfY֜= ʢDBT UacS]BY|8=2fZmGpÆ/{ ڣVkhݺ;wq ӯUȸ <Dž ټy:zmbkaQt]+L{ɷXXI֤ɋ\r'#_-vСe$'ѸqE.. DؔҮSCqw׾̙8@uQ-HMJJ1ӗT~XCLjjՈDZ~mۆnư)$AEU%T.t0P2JJy`B0{qzԄj>6m "3*SfgfR4E{ 9x7 ql>wmebcD6l6 (zWk<\]Ӹ#b-C6m3Ԫ具w_oK^^5k6 365[0GG]QoKhhM0%ӧOVr;V3ɹCժYamOZڍoQ='ՙzʷc;uSOb9ZU4WQ-W6lҭx~5gq#wx1I*:uL#(,,$((y9rwt7uԬiǥKSh7cK]QḛF'On$=:5߿nhرذn:N:t9ù~:O>$ܹ3zeVtIr>._vcȐ7Mz/C0fEd0}."%pJIo0`YL} WWWV]& Ozz:s&?DyT*uF@@/S ڵ3JRyGѥKS 4]L.=zÃU"ֶ> `gEKVE57oH-jԨQ,Ym۶~ڵktI.ܺu{άxgSSc4jԞU'77piֵg)S0m4:t`^Abb" j'*]Q/dhZzӱԩS=zhNsXYPVKK[`䑓sT23or̫XZf訡i*jooojԨoI3>>>pMΝ;G\\$'Gqf!99j)(PcaahP.js| (J5;фzѸqɴhтHFͮ]/<,00 p 6l@Ϟ=.͚5k+T˗/3rH;%'OfȐ!7vZUVqy.\+ \v۷\Z@NdJU<6l466jԮmu֥I&ԩSGLlViii$%%DZZdeeKnnѶ'VVVXYYakkNNNₓC(((`̚5Bǔ)Sdۻ/|'4hЀ-[[SX&66^z JŶmۨW%Gq<<<..$%%qMRRR$##<ֺA8;;Bjդ˿W1 qrrɉ *ocǎE3?μyhԨQkl %66UVꫯ*](((_FE| ;;;<==J͓y̙5B]taƍlْXKdde FØ1cougBN<ɺu밵ս^׏FNHH#5BQ0777VXAHOOgȑ|gު9Jq$$$xb1KӧOGot9iуǏADDWiښS2c 8p 7ncĈܹs___6l`R>iFW^!""/^~e٣tY޽;[&)) (]Y)^oذaTVMj KihppĢLi޼9֭SN$''3x`BBBj cMC|@Eߩ WcmơCpvvfСJcP"##޽ĢFFZ* ,`ҤIhZLСCIKKS4Ŵo UWXXHPPP^Xtȑƍ%5 JbĈ8;;ezٳg.M1'NDR1eݺu={777t9ҥKbѠ BBBbS!ЈI֭x"{fҥJoootBffn !Qo3f VVV WuѣGN8'V" @#fhj׮Ͳe2d999|GL8>>t9#//Ol|=bիWӨQ#KHxW^y+W?Tr8q"*sM1K.4jԈ+]c;wzXT4CQ&͛7gƍtޝ[no2c 4ۛ.]ܹs.GqYYY̜9caapEgժUqy7n,(i4i3f 00(?{0ajp}+‚ q[GJSnűOzz:~~~ZJbQQ& #QT1_gggݦǎSGE޽e֬YJT&bٳgK,*L("##iӦ Fw1qh4,_QDhh(о}{)+x, Eծ]˗3brrr8qQm\~})((`ƌJw7odT*]mLc> 'EEI3xٳgcggʕ+߿?JV&cǎƆuq)ѫxvJ˖-.DGG["5kJ&4CQ!QF:uݻqFz(WWWV]ggYhL0ArIDD~~~ФI"##%M0 4Эλ˔)S(((P5jl۶+]^̚5gwSt6l؀GV*E?c4hk׮XTT iRըQpFAAASLaddd(]=]6'N`Æ JS)Z-o5kT*-:u*666J&L4CQ/?>NNN^^zq9K+ښQFE+\Qڻw/vjժ9RrkźXKf f(k׮Y///bcc۷/k׮U$66UV)]NjL2w}'''+Wq,:qD `͚54h@҄f('`͚5Q j`Fرc& WT1l±cpqqaȐ!Jsg./E$z#P蝍 | AAAXZZxbו. (Z@Yf$$$z),,]22zhŋӧObcc%f(Up5 IDATwwtޝݻw+]j1c,YfVZEtt4?QE!f(SOn::t@rr26MwN֭IJJb8ouX5XTiBqիW'<>>X啙ٳ裏PǢC aڵ PΝ;qFO>,YDz&NJbܼyS:cܸq6m/+Z˙3gt=3g/ZѺ(&P777/_N@@999|nj3lM.]dܹzRRRP|10],ڴiS֭[GII3ښ f̘n˗/뽖 &V '!!AǼyHKKW ƌNNNC ٢`I3W^aʕxxxpIzZ{&77YfuHJJ",, JŤI̙3E%@0x͛7gƍtޝ۷o[o}qơhX|9111z<|iѢǗXT#i(888?2i$j5!!!zR~})((`ƌz3g^ĢI3F#226mpU q]]] ёÇ0m4Zm(Þ={Z*#FqܹExJS}f(L!!!aaa?qFHڵ9q6l㗕Veʔ)1GGJԩSݛď?(0) ~f͚_ٳBǰfԨQ@QDYgei&?Nڵ2dH.\yY=zTXB(E0I>,6mmRf@@Qa-ƽFqxwKĢVXT$idըQpFAAASLaСU5 cǎ#77B[VVܹsxzzP.E7l 0 I+b899ezٳg+~~~4k֌/^\!,\Yرch4vXŋ4oޜkJ,*L4CavڵkŋӇK>qj5cƌY [/^LBB͚5ϯBYZ,zjׯ_!I3f~YG}ĉ{vޝ֭[Ă *HOOm%V?ݱy/zc a *.[ݻ3gT4iBnغu+K,Ғz=1~{Պ/Xv-G)c.\@jj*nnn\t~\5ۢEx"...$$$Tq R! [$P3"WVU+GRRrr._se>Ԩɮ]e?V&i.^TqbYWgL~~wܻԪՁ6!:ڒ}> '((e!PڶJ%g":z;tBfBa !0{ B=iB!̞4C!fOf !DINNf\r<\]]iҤ Zw۶mOrr28;;Ӷm[FAӦM9~pp0Ǐ'66k׮J͍=zse,Yž={r j 0`JSղvZ~7?Nvv6jբcǎ977J} 4C!/_O>$''`oo5/_&>>?ʊB^ʪUذa-m۶%Tqqqzj^zϵǎرc={/>|8[lUڵkYd ͛7/c$&B 0gqsscڵ9s'Ori,Xwwޝ 6p9bbbp;gwO?3gω'4i2m4<ȅ سgݺu <<~~lٲ;;;&::e˖Qn]RRRxM~iBQΟ?@Nhٲv{{{:w}שS͛TT<3|'>}Zwy7777Tˆ#ܹ35bh[.3gVˎ;tǹt+_5XZZbaaAvuہ)2h yfbcc}x2?(u{25kktoܸWN~Jf͚>|̵#P!*?q]vgϞ /:i%==pnʕ+WHJJɉu_VJJ:;;p-m geeѵkR lHTJH((QR̆'6mbڴi^5kְfݙ0aB}'oܸA<<<xmIfii @NNFŹsxre, qb˖oiƟm6Ν!;gSfG~ѣظ1]?M*Bq?jc,^ r}Ν;DŽ YTR3fЩS',,,FO^7oΆ 2 $RSRtf?IIȸU{yy&?ըQѣGk.z ?HFF 00^zI}iذ!Ѥm\C$P!*-o6P_oRJG/2P~h-Jf( ""m۶qm7n]`mm+[`ѢElݺDBE cɜ:ulq7o֝ɚ2P!*ܹsuPj>vXt߹s')))VqrrBR׺kغu+ᄇsӮ];֥o ),?DT*5իߢI%{:kZVO̿ӧ7sRShpskAèU޵#[KT nތ6W,q?c'`eeOzp΍{s3~~_Ҵi)SX#W)K}n]HL}Ow{lYk`cSݻ13gj8{V@vv*GVGڶ}kabTXXXK^^əz*?>(jtwݩwG,yϾJ3%nKuM4kbcn;q"N5B*89}dRF QѢ/-㧟Ӷ!̍4Car^zi,Upnj՟NC`k[\cXZڐ^rG)-㊛MNN:jkkr_RF09*gOsf+Lځ[dgsX^f?~DGӓ̼]2if>!̍|f(LZmO 0]8wnnhZM[aժV%.޷N'82ѱh͛\K=J}Bi}iMъ3J[ ]^Q]‚r۲emMR҅_mڼ ?pʱͭy< xuUWFbRaR r3YW!5*O"&M:ѢE/_…oS66Nܹ}5cӗ# qtINN7n0` ݋v@WYڵ,Z4g89"77WݜjUwZͱck .. Bff=Mͭ9j-Ν} an Mf݈?BRR jNN5yᅷy'xxȑn܈0GGW{~;7s%흩[&;uz7^,qXZRjy\=zLJ7N$))'[zz -$-- aNTRS']y@oh h%(P\C||4N_o!&))I61f(Bhт6mڰgϞH3BQ!RRR~'%P!(991cУGbcc.GMf !rJ; eəB'P!ٓf(0{ >nݺѢE 4h@Æ ycZ-k֬! '|  /0yd8n׮]('))p<<<,uRʶm}믿Ã.][2diРڵcܸq={G֮]LϞ=yꩧx'h֬=zСCn޼gBT ,_kײpB{_clٲFZ&>>p֮]˒%Kh޼yǯV= j]j>|~.]Drr25j(*ZC?~bX>s|{5jTT*9u~oy7r ̛7Z !&OѣG9<ǎ#44WWW),,,q/-[`ggGPPDGGl2֭KJJ =;[lQQQ%ԨQzm߾},?lAAA\Jň#طolܸΝ;SPP?>?&L **8Μ9CDD 6srrx9y$5b…a ou֥zX[[SZ5^~eM+ʕ+DEE{%~W(  ڵk9p#Rv]TN8۷|h۶0ʤIpssҒf͚O?ѲeKZ-_~}O<3* {{{ڴisߥrss:t(~,Yjʷ>H3Bh߾=VVV%>W۸q#T^~VZQf>~qv SJ|ng u]J}ܱc#G$>>/r%<==عs'/=zxxe:y}?,/7nPZ5~7ԩSc鋜 !C%//ntYYY;w?999dgg?ֺxVe֭s&Kll,jD.9;;&=rV"##X"P!͛?ӧO/l߾|=͛7umq3ܺuk>3TW7;Vrr߿KwVeʔ)̝; gQFGG^Y[[^|x?ݺuޞ4vņ x饗֖sm iӦ@Ѥ-}1ߟ|2?qpp~W_`ԩ,]Y٤ !D9&Ϸ~[ǹV=kkk'22F@ҥ ̛7#G`ii;,ֳgO4 2k,KV7лwo&N>I3Br1ydN:Evv6ܸq͛7ug^fѣ呒͛Yuqeڷo_bLY`HHZ^z*U8ocڴi$&&ϙ3gxw8rPtJHVwѲeK 5jIIIr&I8xpiV4LThkغu+ᄇsӮ]3~8,bSL_LJ {+qN:Qzunݺ4IJJb| jwflyY[[3o=z4j֬ŊVvm ƎΝ;Y|9ZU^_TөS7kh$&&ҫW/-[fǎONyСlْѣG?q0'/WH2~xV;!OhܹsÇ+]+&Mbر=N 11 f̘apмkt)BiB!ƣwTf(D-[ѻI\*DH3 :uAƣwTG'P0xn h MR!4C! !!`g>Lq\t)B6xn_} ,Tf(}cS/! 4liZhڴ("88(KBQ 駟HKKc„ J:u0uTS!#Pxt…&wqz뭷WAAAJ"b #$.N0;KRa "ǥSLQ!N0>\pp0sΕTilH|XRT jIIDATɚ?ģN:ot9BTi$'HKf( ģCRa II3NQBCC9xҥ3!PTG#T4CQbcc'xdC ~ f(*ģqI\*E4%qiRH<**ĥB 'h&PT8GEETT6iBH<**Eq\*[= PTGEe e޼y 'PT222$FRQY !KEef(ģB$.Mxl TT4iH<*"qH Eǣ~ģB"PģBi"PģPǥt)ˆI3LQahBCC T7aׯ_ܺudrrrtQFښ*UPzu\]]W=ģSqa._zqiTT666}Le|jZCKTT'dMAn.*R(jKpQUV3FLL m۶erVe}=Z-Z\(ooR{χ0Zcǎm۸t}:СCzfhYx1@ʓp{Kt\TuwgرVZU(A߯}Cј3VKǎQ^Q ݻYx1WU u+;쏎&)?~Pi%^ΡZ.]ݻ 711<],B.]ի[L[ﱍo`ͥnpkHF]&U[]/kh4uHC:m8%D[L-]IBhM[ b0g/ k8pF:~؛^-E=##G~e%y=X,m6(Pd juuuurWAMף"AVo9kB0󑜜m5` Aa èףjB :,II-0 "ʃ-&zM 81ݍ|1dh{Ql15yyՅӧN! ]Ҷ EO?-w9dTt߸Aˉ:ʃ-5C`y̞Y ;N'.%(PD t, 7ކ.=;ʃH!c\.z;; cy+{Lx\58ʃH!cn޼ 4v͏ϟ_W?ypm*QZѱۖ ʃH!cnݺcRX5wvq#,v;\.$n[("!PaĿ{㍨մWݰ<ߛE|- cfCZZdulʃ-}3d4~?1|ZWjj/7#?uӫ՘3` AD͐!@aAZ>] ߿&6X5ҍFנT(2|x4::Vwv0%%Ƚk/UTӧ߿ngOILDU~CV]ZFQl/,xGz1>3 9111[[qu@Z--豀` AD͐!ZJB(F5OٹZwA#5[("%:M^χ5*q5|v:8CNZ ,.wuО=ƭZ[ϫWP( kVV5T^,y!d?VapL&ӓ By RD ׵8X0=?~ؿl ʃHN2DR!3^/bvC *w)[Fy RfȘ\z>"XrsqQP(辫(P$B cX(BKK ؈ <]f3t)"39 %+ {رcHMM'Y(P$ w@ ׯ_}۷8=x)) ?dB!x|>x<Ì cf&jj@pi'Ep:H4_XRAqbʃlES*4U6Mt:\.s 5`XTtp$ʃ-?;e;nIENDB`dask-0.16.0/docs/source/images/distributed-layout.svg000066400000000000000000000465011320364734500226100ustar00rootroot00000000000000 image/svg+xml same network scheduler worker worker worker worker worker client client client dask-0.16.0/docs/source/images/distributed-network.png000066400000000000000000001077471320364734500227630ustar00rootroot00000000000000PNG  IHDRNy*bKGD IDATxwx_'{O!ADH$B$Ĩ]UjVі*JQU-=5jE!ȖFvr" uqx?9Ϲgܟr9$--8IJJ";;23sJ%jii`d>&&&ciiIfխhjb8>}Jdd$w%">IIIϗ#6DuR1d9hi ::X[7}{ڷo=jjjUғ\IuoEd2TUU+-О+rJܸqK.vO䨠7GOMM*.I ++8>E.OP++}ܺꊕU2~}>>ܿ@I"|_cc24J@*7EEEqyO#+KSXZĤ9**.(!9 ѴݺcٕY ܺ{bCW\/aCZ<"(5l ϕ@9Tl8CBB8p0QQbcㆵuG?呗rss={vdԨd2X6n(j#}1c^{!:%11 >&~4:w෠+_-Qhi$=W°Īϗcfִ^tj޽o|]K۷&=+9w8+2(m8sj]qp苪z} ,Afn:S<5jb cRs" ²S_jSh'DfDΘFP\"7J@ BU/_ݻdž sf6:Ʀ;** ҧyΤreRScԩjjjwK]- uuPhek#~/nގmmzÇyY4R絞]71=| -7J@ 399]101i^F$affGvP pvv$3 uuҺ%Qla;66ڶ'/Ofxcܺhc]j(%.7oiU}-#4h D"aӦ\F.o`hhQ*IVdfq~(:#ԸTYU,1d/N+[6mR7ywh0ʽb< IKLi~}Z~dJp)BBⱵy}2 7׌}&m-?OSykTƎٳA/Hm㦨G_An~ӭ?N 5Q?Wiii-Gѣ#YѼs}" ;&&&d *B675UXbPӗ\n+=wۏֹ=zܬѺD"qRjf:uT}h\vǏsiڽQmKDŒ*D8ao_M b |qp 275s2#{v¤j .cmݹQ?W.\40T"##UiXYu@jDoiL>ln_үw/HU 33onwϾ\8t).MbTU(y=ceՁ|[@C%&VrT cddת6֖5lj # +1ọ[џ#L0.ר,d*hk׬~Cظ9yy2[@C-++ Q+e*a-H/P::boߦQTL֭?}Z ZYT$Fj*Gd=P\ɩo) uu5d 6DR m;Uq-=cΆvF"iPQQa?M玬c7O!%#VmT((ICA* z8$pPkĈ2[GMC$YMV^>ãHLaqݹBBB СC\t 333vxQwb(,,$,,+W(^=sμ6jMea P5N-TQXxGkihj02"m DΖ QTFَ ԇH0үz<ΆGq/9)oIK(0{!""vڕ0۷o"\Bll,tڕb lmmK{|=~DC X75RFKNNLM}/P QDD7݋֭{ַj# LNu־mMD^oMRIb7o0F tqqq:~rܹtiݺ5]vUhk۶mEEE

#++7o¯ʌ3D%FJ?0׮]CCC<<WeP:uL&# O ,-ח@bqQQgHK cGS~MX,cןCW6m_akkS)(&&Ν;+G)Wx~ҥK%6<< w%rnZczʡ:u' Xвew,,xü9UtXz]Q+;;vаznjD"!***4uڕvڡRg:rsXZ9@Htt8j,ϕ@9Tl8Epp0]$**|-bffG&֮aɤdf>!%O#&bi-^^}޽{ZQ\۶vm[g\v+PFF]t)a&Vb$#:vիZS{Wi(ZlNACsFXX8sz, SS5sT? /%U3ϐH$̝>B"j:hi# 5A_c44tN%77trSJ"7;ܼJHebx{4fϞ0D[ȯ[aU#o2oRd2%dCs-\vM1ſ|2jjj%nnn4iRvwe~rXn9992 Tյ }}3 ECCUUMTUbqIEE撛Nvv2YY)hj֖afGV-E.k[%Zz555f ЧustUILN!%=ԧfNTJTNLX&G**RACE U*M5T0j%MMM^֬ZT,حRdMſ8ˆܺu SSS<<<ڵ+&MZ222_76nȸq044,_ڵkWG<4AE4E, PUQEEU55 T4PQDUMuD"r Ri2I!RI[r 74tt;vR4 TkLc:orss)**XT*UPSSSL544FWWWq2#.9wىx$ܻ+ҪK~Uh**ٵw,\,---pssS+WغUU+t'郦::h¢vOE0Jƫ0\Uu)EC>^UpuIY 8p Jk$"lJkSզƆSCC/;. 'I<KkՎKN$:!mz嫖* PjȄ)9|9m%z֭AO~N4,Uvj|.q{ p{gmnDԨ?77JxijP]>z f2ʥֆSCCurVTF'$%, e˗('ȸծ[P$&z8_|Z (xo;8e79&zU-F~aGdO[~$=Fm=Y)-硯W0wZ7E[]g/_~Ժo2Ujtݻ`Ǝb+,|L]nݺɕ+WgJi@c{h )J 39lVnQ)L:I]bouޏ@Irss f 9K)˹x/(_Ptwwښ1e^?{+^ȱʉ'{nbq%/OťV)GMMs&(ЂacoKr(: D{s 1gN~WGǮ_. ,ZlǎE2r0RON-uM.s>>sVzS'O^݅;w{;&KYKt3gBS440G$QTOQQ.2edtttO7J1k,3 ]gS$1g+D+VŰa q,ʘ8q"ׯ ߟoFiݸq &o>ڵkc63071"(2W/otȹshiu54w@");?woθq~ *CѣG_xugp ҢTuѲeKƎDRH8۷y1111+8 w}G^&n|: W)}(YpWjak;f͜PWAMMKn:QQ,\JCNUUU̟MTR*]fHĶmېJuuuFͮ]޶2p@E*ڐ//s7aX>;=F2))իwbb⇥eZ6YQ];@bi{̞7֭O=Ξ={w߭'N_ Pfaa!#F_~,X2FF>i_2os(oGCCGPTTTa)SopD"qqfspttdb-Z9zzz='O(.Xtuu155%""DQQQyf}w055̌iӦ]un8xYȑ#_*]Rׯ+mgΜۻvFL.3eLLLo+,;)|F9,&镝v6hi!u (bzKc/_ ++%KYj;w$**aÆ)f{Օ~'lll}1d&NȜ9'ΝѣGپ};۷o|NJ:Px!O\U";v OOOLLL>}Ru<;٥K+gϞsεOԩSNMLU/h .i𬾗УGΞ=1AAA= puuӧlܸ͛7+6W???863C| #6l ==;vp -^NNN?dӦMeאhp?fffһwo 0i$ꫯ(1i͛ٻw/.\rF^^jjU+Z]4ȫ$ m۶̙3у3gsNoʕ+0bvvv4oޜW* gYZ UUUbccHR|}}d2b:Q6 rE֖G2|lݺ5:uJim O9R+y!V^M`` JTװhڊ:k?3 G Wn@.IYYWׯ, #G$88;w2p@psscӦMDDD0|p{utvW_}@6mPUU%&&Ѡ=zgќ#Fqlm>w//;,,JK?OϞ=bݻÇh"ݱ,0w\w)~)UGƍ3gk׮L>?|:vHrr2%v: ߟM6v<==9p@ 5+VhJ пH*^ 2t cT-H$bҤIhii1p@FD"a%ʮYɓ'h"Ə q׏~LƦM7o_5C O>ѣGջzF3~o*V޽{ٶm'NPW$ڷoObb"U}7GTTaaU˾ IDATMe2)ɷ),}p#h 'g}޽{5kkk°VWG_UL&cرhkk/43ʤ/pe QWo&D%Gtrde%RXf*}v/BqN(>UpEN:Nۙ1cm۶-d@5jÆ cҤByy&j'IJJ"#cKB*!Т%^ѯ?[c?wy0%+|u(,,ܜ`bbR:ׯgǎ)5 DY._ς3uTduՋ,n޼dϟѱFs޽_@h 4jK;5jC$+YݫCuܐ7o4k֬ -n_^cɒ%ծI~SJ(W [[[>c墢3Tp1~7EgϞU;wTj4aժUxihԆiӦ8qVpf9rRoo 4${֯__2N۷oӿoߎOՏ%''Ӯ];kɒ%K4hP% Æ ‚,HHPn]LOS8;;cgg8ڠϪW:p!طoUgff+bu 55[nU/q֬YHR~h?$UL `-&)| Kc8\\\صk#G8;;W޳`8ƱcBSS_|!!!;wZ0_%d2mlpjmv_V:۶mP=ɓ,[PLMM8q"+VP,k49w\Kb⿣})~<;t@xxZ@@˖-#<<+++{=Leꓗpۗ[2tPΝ;GV:zÙ7o>*PΝ;ٶm/^O5pOll, .DGG<ٹsgVZŃXv-wȑ#Օ>(ի6l̟?CCC&O\3Pl3338p ϟW*mmm믿? R8Ŝ8q7vI.\ș3g* 4 Zj0jeѢE(h@q  pjk֬F*ŋӊ`=z`ӦMMozyL<7|AJ U#88mbffV7n0a٣GL8D*g9?^>}6#GD"PPP@AA{ƍHZQ࣏>___rss+,ERRo~A'eތgȐ!|w*@٤5Krr2P|N9iii^mmm믿F"U;{^j _Ȉ#*7ٵk TofC |||7oR M.ot(>m^a[jjje3gOWCeNH?aAA&OoV!/;wޥPPXXȈ#۷/-guܜ]ؐ駟\~ٜ)޷Ύ\\\puu-jG_z AA~wRSS7o^ڷo/P]_q@.3uTLLL^ʓ!:֭#::^zvZ̙… =z4nnőz-ӧ_57oʕ+%۷/;|̞=[qm͚5\|={fnҥK9w ךR{o߾,_˗YnҤIoZ]+v8-[Fll,NjT SȔsp/=+[)<ӓGtR,Y .};??F]]-Z0l0EyɎ;PQQɉ9r$Gaլ\555ZjU+Ks䲪yJ] Y(4<͛(v_Ռ 6p Žԡ# ҥ-G9>s:uС\tePǺu ӕ&e&&Ƹ:1c[ޟ5w/"3aG(F0!زe ?ǿR>.\`֬Y8pCCC._̀[K yE4o,ޤFE;p( :,~W󉈈` *l]ȑ#ٹs']tĉ[^J)PWScHNxcXt%yP<Y۷˱c[R/W8@144PZ2akKfxqeO&Pttt8tbe˖շ:#77!C0qDM"Z,\ )򫥡0.xٵ`9,3gCB0`jjӧ gӦM-GH$ƎKǎKڻr 4o޼ĽPSon=q>?ؿ0*(pVYlo9JeH$nZH=FFo8ك!mS"(=QZ~~>TZ̘1Cm7"dzi&ƍGH;wd֭_ZLL ѣԽ_P:&Fٍ).\8t=zWZONNN kxD0UD]]1cɶm#22eՈSNpB*u=00ooo!%F"LM څ= b>:P*<7<;_͠Aאy&:{}e /}}=8]/ c}=8Vߞ:w}u<}D#jУGr9׮]c„ ,Z\ҪD||<fӦM&%88`u&2L)#9yHXY7c +K+;88cǎReڵ+jՊ7~Qڶm.zzz }ƩS}W֮79997sB ҲeK8yuaffƸq\.gԩUX.+FuOFFd\ƀI66O<ی33}6#GD"PPP@AA{ƍezd2tCY6DY&Mį ;wiӦ5(˗C~wTUU+,Fdr9=IQf jgдirˤ7^mmm믿F"Uٳxzz:ӧO/,ժ݆Yƌŋ(vWڻw/ ⣏>b͚5{vŋ֮bB[338x%mvuR~en}&,M̙ĉK]700 >>___z]011ɓ\V6Y۷/`ʔ)uttO>hѢzxaVZEPPPS ^ h3Ud ^zOOwknn?3-*XYYYaggGHH7n,s?22|>S;?±*6YC&M͛KNCCC?N^011᭷zڮ_Ό38r[RǏKϞ=X323T+|85Wp pR ﭱvZ|||ڵ+'OTy5k0zhz166&>>лwobѢE̘1###._\֭[/3{JmjDaaiӦ򸸸2߿_nmm-߽{ Cժm6 H@Yl||ҥ]Kcdo?Kwѣ'g#ё---徾E\[[[/wrr=zTqѣr\CCCެY3{Ç垞;VA/Pc H`̙ږ;%u _~aРAu'==yfΜYÆ c7 <%?x:'9#W°wĪϗcjjR'T0 88~۷o[ҥK1#GЭ[:RPP޽;|MaiiI\\bKYGHc+,DVoo!S#@ pƍr˸sN Ɲ;wDL&7ޠe˖]Ϟ=K.]IOwHWb?و g-D#1`֮]7111Jױxb駟j\pCr9sǮ;iٲE}Tܿ޽{W-[o \)oܸ-[pLL.&iٲ%'O^)FjjMַ #Lkߕ5k&L`РAdd>/9zh&7f= Ƌ`8G0+O?_~1E//^d̙9r#L`8رc9rH&bڵjՊc((HTT"BMgjC@UC0JiӦxxxpС*DlݺTԩS PIIIzj|||j*(Ts=zUv^5Y R)KɓRת3] {!..ʱ/sss2d:ӦMr_qQ Pnhjݏˆ^M%>o&oϚ.PnmmMxx8ֿw/{gggᇋN87+>] ztWMԼT۷~)=O>DQo^+NQQ mmmvYt>ԦرcQF{.:::q=<<<Ң[ne_pvvVNѣp}cƌAWWsssbx 022bǎܺu +\pV-[ "'ۖ|J 47T/:|kZ*Zt1o›ܱ0+U.9# ;s1|pϽ{h֬bחg*['N:t(#Fĉ6J{ݻ7W\*ePQQWWWܹ%//~!y7pvv>ؾ};wy)_b>䯿bڵrABCC ""AajjJ޽12PasudJꖿ˹(n?zB7n0//O~>5Ng 7ӟx%>&w{[z8ءZNڂ"1>ɍW+LmѣIHH@EEpzIVVhтttt055%222$$$@̌cjj wNFFgժUlْ#G0m4Zn'Ç'11Qmggڵk߿~mL:hN/''s/XwnWi4L||<%ܕ$ yyyXXXЩS';1vvvXYYqr<>޽{{{:tP0k.]ӧYt)C"p)fϞMTT͛7/ ԪU+*C1qD444*ФIEXٷoH$a…СCnJ6/z o@rFfʪچ}ܘ99 s'7ku^y$ Ǐb[spwG;=M>6!.X7Z 'ءr4kLD0,ƍcݻ1cT̝;۷رc,ѣGɉQyǗ8CZ8ٻw/}]}puiii(}ЪU+E]0۴NppЦ:Y4"m^BV-qhgOkOѣ|t%G1Ѧc3sҿJ9"%;L=<|X,ʊvqu9s&;vTT#`cӒ&qS=f~~}kQ\J+x-.1ɣ;oG656iiSI$ ;ٳgՕG*[XXЯ_?\\\h޼yŴipqqں{FWW#FkSEn555tڵVD gƌW[{eoߞvahhȽ{033ի~g֭[GZZJ I(P;͡o,^l[N"ɈKI##N;aoߦ#Ry!NkYq*r7{Iff&#F`Q IDATx{{׫G M6!LVty8jc܎ e)('11$!33TZFFzcbb9]͍vs@GF^M={Ă )Р g D"LLP g QUUA*Sv@q 7mKӖ\nsLL:`c==S45A&AVViiq\\.ޱHFCUnS"@/+!Ȫ<,J9Us3kڴ독311iJ?UtuM5Ҳ=9$'C$#! ??!<]0/}By$RjI@U@05DUE=xȟ/Hm/G`mݱVƲ2hѢ3-Zt&/o(vw /uG[OD֫i8^أ5C05DUM i9>b3\!}L-yk&͛wfm1kבt0wr&2w5kVv-!7o^Pg y9<8t97ӢEn4}f0OVxx#fTN w3Ϝ`8kHYk))|ϟ"F_tTU}TXZcܽs r)c!HЮtgs(4m  W_ V^=*VH$B$?a^^YCq& UL4GZQG]]IТ7{Er%R*X-BI$rùAŋ9viӆyUέ>+C,#?~|FNE05DUU8c1IiĦ^UH$bOprˡc㢸ǏvYz (XQ%v,XbDMc,&&RFMI-ƒDQƖ D)"E޶-,,y^!3s3Ô3ϹKeJRnnn߿;.\4mڔUVann^&@&Mhڴ)K.… \x,VZ+˗/jyyyc,B@Ppҥrcx_"UB@wxuϖiv66pY  *UFed̠NPi4:,IllNܜIOO/|[yOK.sW 9~r9AWnP-տٞ4jaD&"9\OMAoÑ/V*V̊ʷt_*}E57J%eƐs\B".+ 7w\:yy&4ko'ec㊭mk:vI}.@@@=^-**k˹mw%22 QVVVVk׎w}9::bkk?@TTAD⬤&]IHGN1 RbW:kz8K*yVrn)=W>w3ghٲ%3gΤ[G񌌌裏Xl:u"((H%CZۖt:СTZnn:!![Yh  ";;++KC%U8㬢LLv13BDYE 4ZB&c3X_j38Ɏ=ANN*21_ THU2º;hxyy:Je$ .wϫ޽{I)D⬢v('.CI ͚TiF}RոT322޽;yyyUܹsi333.\Xj'ggg!&+HUԲeK||܉;_':ccȦ_>fjm1w\LBLL W6t8<B$*dHj];\kWDNN*'С=z0t8Jaa!7od̙>Q}"JN$N=h޼9#GCqTHaa/Y3cMQ+܄ ՕUVիWСׯزʻUʔ+,,D$6lL&J/YW^b?~|O>cǖMIL4̞=PV?(^RHn055-VO+8$ ې~73ϼ .^7\fΜGb_":: 6Զ9aÆ/\lYi*vU%&&T*y7,YWV[e0l0N:]ĉ 6mJg̘1*79sZ+Rnތ3۾};x2Y*'#RΝ;9v Ќ06]p>4;w*;v<[V'Odĉܿ###T*IHH@.J=,t"$$&%%婂%-;{,G&11Q[@ۛ 60pR:sL1$&&ҸqcRRRё[nY6 O w{{{QV;wQF?!!!ߟ?%VOHzdllٳi0Ґ|n>Ijj92j~T*yWٻw/T* Z/!!wwoV-##7|ܶYN2yAoߞ_{{{qssԩSښ!C{nZlS;w\;99i{rF7 *>>qtxTBbb&6NNj;3a*>{eڵ3gpu4hP5nܘXmIǕVoP,k /o>lmm0aB۔Exի'N)'KV{դUV^&gI,nŹs۹w:juv"''[8{v#٧ի+V,`֬YՒ4K*+W4xRYp'???\\\XhQQQDEEiVb---億TDݓLޔ)S8z(cʔ)I A4iN?YR/::J,1b֭[ͼy#hٲÜ9 {B^^^h<|OxiΝ/` br|}}WIJ*+7ydZnMV%""566СCtBΝ(\YmdfΜYuO2y 4`tڕFUh̞=]؟,W^?c8:%''6svYْ(6ۦ.ۗNYSUgo"q 4CTg޼yU~=vD!i,fbbš5kڎ(W3Pm&Nʕ+ĉcaaAaaa*UK裏ʕ+dIJPryld2< **}bjjJ&M_Zy)iTZ{%S*L>:~%-/((gϞ899ajjJ޽ nS ?>߂qvvĤcw T-[H $I$@z$IoV߿T*-[JK."##wyGrssrrr$I, .]$Kwޕ)99Y f̘!I$Hҿ/)11Q6mXb<'RJhʍI+ԩSݻwiӦIC )WJJ%KHQQQҥK$Z]Eo)i 'OJ˗ݻwKmڴ㥄޽{,iyQ?/^"""ŋKRnnn??|)Meq.TVZ  bժU\t J"˳=٧GK۾oOT/ꊝ2tP^uJ(FyQ0ooobcc+||u|}ԀHB9r$Gĉ 2vڱm64 ;v,VHEJoٳ͛7+uQT*Xۿ'V!aÆ_Yބz%OE{ `jj*]v.[t)wˋs>Nybbbhذa㟱.oML:(Pƍ_|AFaȑ[3LVRm߿+VpʕJ:+.1%?$44 o_TTi GVӪU+22W*k{.|iii 8ʪvVFI7n$=EL&]xQNxx/H:u\]V=΢\Rҥh0m;NNNҧ~Z6Oڶm"I[l)7&]ɾ䲛7oJ=z%WWW… en_1*iJ[rss &HVVVԨQ#ҏ?(9::JƒtĉbmݻKR˖-gV3/oܸQϷzpY.ޕ5S= UAЕxsHSdW8AБH :SAG"q H$NA)#8At$ DAБH :SAG"q H$NA)#8At$ DAБH :SAG"q H$NA)#8At$ DAБH :SAG"q Ha󉍍%))D233!++J]l]SSc6lFFFArrr'11$!33ZSl]33bWWW6l>ժg~~>DDD~rrϗ̑16@&+W Phr 17ca!ƔӢEsZhr8 ? yyrr *: &##%rٙӢEI&d2P$Ij;wpE.]BJJjs Q(L*^aade%@ffd,|}'ʤ +h>c6~GC$UN$7߉յ>VZ 9±c Nhؾ}'WҦ8= Rd2m؇#G.pYC$UN.]ܹ04^k,J;ҍ~%99OҥH4釳s3CS!2Ge޽n萄ZJSpQ23mhѢb.ۇpB': ڴAdd UFd43𒪰rں%΅ep꼫W?2Y{Bmg熉'GŪy&|OsskKZZ! #+˨N.{s2t(B-V EЈ qqq΋̬ryݭLdoANxC"bUJfbjZ;WlCR祧gaffm0Rn2U)q*Fh4WԈF@.׋L)JޖL}b(lmm JhK~~>>X&eRG,A4Fxzz::ۻ)Je"JeC(C(Jm۶89xj\\em6t(u^۶mqp0">C{Hƍ PUi𥗗mzr_4iҹTB;HR} ĤbҵjՊ-rxxZruS7P333\-0aP,cp\~Ν'֙] sy91`CS/d2ƍÝ;sƯt0!UX^^&͍޽F<01+vU///`dt?ShTݽi^!Zb0 ouTJUȕ+{S'all۵'|^ZZ+VZɒ%9rDoW^GFA~mydQ2 Fкu)LRWҳg O}I*￿>ɓi֬n&1>[n魽Wx OBB $N\ٳ:923$8x*U>֫T.\؁uӧb^R( jMrr׮Z2tXOLm8:&3kx PGB`L6s(KRS*$m4n/YLMMY`/,s$i={4tXBGr#G|ڷ7]&++ET$I$'m>2n$I&g~Ifi k˟5[lSSS5kFhh(!ֺuhذ!tԉ㏱ʊ)S xWpttŅٳg{u5??_~[[[\]]YdI޶mNNNCy-[ ,--qtt͛?`OZp[f\ #F4n W^0Y:Z90bdw.^4:ЬY3~76qN4tܻJ|XZfѡ3#GNK.uf>h">3QF,_m۶;C\\ffVֵkWf͚Err2K.eŊlݺEq)oߎ-bʕlڴ ^zCa\]]﫴g_|Azz^_r fnnΨQݻ7O?7qtlsSb+O9 IDATl'ڹ餤!988Ʌ~&'&e30kkkƍG>}8}4g\$6vaa8:''/UI%%%pbpt4{w7 sdd2xzzjdlٲٳgmϞ=yg˖-lݺ4vNXM6L@ӦMaꢼtܙ}VT{a{{{FŐ!C駽\|DF^",S5 ,,07)?PAA6Y>$''dRSh07`nnDNM:u-Zg3ۏUF}{¹{WJe3ۻaoff6(O':I&##'~x4l &0bYfzIϞ=y7o ܾ} 'oooGgjaÆik4J%DDDV?K5VY_P@fv69e2(Lb0##ea*e.Uju!jU!*Z1VXZÈSS&T>-[po<̓L2sryݛ4ԀBBaBaL.G,DRT֨012DD![K HMŽuk-vNd2v:n5>Q_cs\C&Ks3,иaJŨX/.ccc>#ꄓRn2 +++puuչ]I{2K/iSO=G]y*U!Je^Wdt-.\/ܿ]booܹsYd 㱲*#seŊѶm[w8::koGǎ~z6ƌ'͛7'$x p!FAÆ 쯺x411Ïgʄi4mETk4ͭjW/}~K/ ~&|2e"KKKw@M|*11u  b͚5lܸ{=RSSiڴ)/7qlڴ '''>Cpwww.pttO?%)) '''뇵Y֬YÃXl*  _~E3cƌrN2?KOSUWÐ\!2i4NCa.=Ttv ЯpuuSPLi.diEpDtډFHl[O 5M.q>2ĴJh8p1ISVK|'=;Rm5u..zPIn,{s/h*Ʃk7iѾ gЧ(-[6g9t/8uvazMy/0fm|9{ɸTK]q!?~^`6NG]\J* ɼ~m5E&4L#8udWwV\͂/OFFFlߜy—JUUsBMrqqsh*Եҙk~,;1ljL0ӓ I!8:uqf޼|IH}}(U\Oj0:&d2,{W.cyF8Oɱ#>yyy$&&HffLMM2Ag6lxހjM411asyc­;؏FUϰp7rrrXKT*?4\^M hl2;w^9oΎ-[+H_j]xũD=xHZ֣Uj5#՗y7iݺ5ӧO/lP}w|O}{{(R$n[m ooeקO._-B|y.^|t͛7z-mƺuo}6F۳g~~~>|XaTFԩSYp!hG}voNPP+WnSV{ELVVL17uGۡm"믉gݺuTзsαb 9!F۱+TwT,//oH+'**־}#n߾W:#''`N:=gΜ̙3Çٸq#~!seر۷`:mhnѣGcnh} NaܼCdd<*Տ R iѢ mzӴiSqi/ql޼͛7s񗸖IOOߟK2k,C#Q?'|+Vaa!K.)PĩT*ذa7nĉ"i u^Mhׯ:gٺu+ΝAW\֭/;v,qqqo ŋYp!AGa[nO?5tzWoq>iݺu۷gba@crbccg۶m ,r~\DD$o魷",,ӧO?l,##aÆvZ4KpIW9gZㄔT&͝Ug}$΢}w孷2tHJbĉ 49s5ǽw. GE \iů;wEZZ>>>2ǎc͚5ԩS׿PEسgݻW{KhhvÇYf d^ywˊϐ `nn΁֭͛7s>K/9}Cʱo>bbbxױ@[cݺuDEEa_tϏK`kkÇ9r$z>#((W_}[[[m3g*q{nFڵ3tHʧ~ʵk8ux&"˗/S;@fXlO֩;ƍ{o-[$((H[L[nlڴX3gݽ{w>F% сO9pvT%%%q5NM<<:DnܸA`` *|ӧW^EVWL&NiӦH`` λt"BT4𓟣 ړ LVfSSSG@k>CT*U؃]?>`ԨQ,X۷::+..qƱsN:t`p**h\\\G4--̶ Eg,\S>ƦRהg͞k..]Ė-[ NM@@K.5P WWW:uĮ]=1bM6ږ#111O oooڵ+~~~~j{agښ_~=zÀ RR0a={W_5t8}׏^z1a/?~<ݻ?lΜ9ܹ}2a,,,tRvϊ+xW!44[?~<=z`ǠAj}g>q£vISxfcccÞ={'mرJȾ}0`[߰W^yÇ:A0(q `…\xCc0j4iJ"0q,Gfkp f(J6nhPꕈJ(HXz5-[dƌ|˖-;v={P;;?1$%B3uHdl߾}CS:Ļku-/^bؠ߯[KD⬀[n6t85ڵk̙3OOOCS/Ѣu>is?  Ϟ=ׯ:ju=Fܹ͛élmm)Tqge0bhvLKDA  %%Tmv ckkCQ5LF>Lم-| ĽZL$NM>c2vzWY0}t:w,ޜ6*4=G{>E'E$J套^2t(zb 222شiCy&ّ[ߢ)'̝2  _x 9r<ll+cƆvؓ{:mgwD]~CYmϡC$:Z?ŕ 6yMC ěoȑ#0t8:IJJbԨQl޼CL%'YsmxeX]eM}z^]eggWxQ+kݛ^x]$77#F0c &L`pY6W ^}Ulmm1cL8ƍÆ x9}t}W}믿Ek1߿EպǏgƌ>}f͚:֗i=:bgeva1\Ғ%^eР:i\Npp?I[Zڄܭ[7j5wm]~4۷/ ǏW!Cٳgu:.5D Gcƌaܹ5@ e"i"YTh]G ss\Mbo?3x@q&L(#117nJ"??|իWK|hQݿ_/mFR][n1o<4dիAcGTT*1-9q#KK^{k9C Ѵ4@ Y~S333ԩSY`qtt%ʶYۉYMd2;vw70HEÎ&O$XNfn/]K+yUaӣGqqqeW.\ԩSZnccC\\Æ [n|888p1~JYYٻw/~~~jՊ_$^|EZh;S} KO)ݎ;1,}0Vo# \]]i׮;wd&+777 fƍ%&[nڵk۷/h  v`}Eڬ Df7fϞ=5'Nжm{ڵܻw 1ęˁ!ع{n<< :t(:ubƌ8::jk333/_μy󰳳ŋh֬| [n_.:AjΝ;%OOO)99Fo͛K)))5ҟSҨW6Sۮ7;+jԩSR߾}% IPH 6 wСCR%sssZj׮tQGJ]v,--%ɫlIDAT]bcc%IhwR۶m+%j*,#ՠ%K11ؓ8q&M?ǧf_qHfN./]AagφO?ۻTpaYdIqmm#4kLLFDz9FMޟIs) °aX~}ݹW K;w9̗;`z)Tpذo>zE˖-ׯ^-((`ر7YfMzeff8GЁi ǎcԩ*K$&ML&c׮] zA*NNC]H%w//pD_}Jo|W~ސƍUkPˉY|m\6wLFV&&0&PR34{ZZ.D,o a*'BBAT]AW"q H$NA)#8At$ DAБH :SAG"q H$NA)#8z޽{T:WJJ Ju$I… *UǧSW ==Vqqq!))222޽;yyy&OmvD: ɞY`CQfܿ%q uNaa!aaap P*("8:\\\;;;:ݺuԔ]j+7ݻ7666 0bm+JO vvvر(틩)M4!66J}]z:`aaQl¾bsF]r''Ci2x`\\\077Jy„ XZZʪU<&gϞѹsg^ }_ZZZ/>>FiT*+sǜC}~KoRRt/Z/m7 ꪿mWj۲gJJ NNNп>|ș3g?~< rBCCѣ=̙39r;v̙3;bW'O2qD߿* cccgHHH?ʊI}a?uLwYFMbb" ţlذ 0ŸGI1_ΝM牲b(8֟9d2zk0;M?qrr>A#99 rJEnnwC_gС_GYpww/LIʜ湬uxJW_}{RPPJcҼysm4jH(Qҥ P+RXoqd2rC?5(ҠAT.]wܹsiذa>Vƍ[G374Rc޵kgΜ뤦ҵkW'5jD\\\Mƍ쯲PVe)?$NA(OϞ=ZwIxxxu GVӪU+222ҒDFFZ=ŅEETTv;+++ڵkǻKLL erBBB*/*JD(Jm+<{8::b X~=yyy<>Fe)#DEE]D&&&믜:u-[}(R䯿G888sN>cLMM9z(jՊQ龌9|0111tЁ.]йsgYΎ;8s -[d̙tMݒ%K9sfEW'Ouִj [[["""pqq)#-[FN8zhc7OW9Yu  "Dq H$NA)#8U)Ve U&N$(((T_iRO$NNWyUz-)) b\_{TqS)L菺^H"A;/"X!Ljm3q߾}CSSǏ(((@2~\2}̃*//G{{ex䔖82AW.c-`JJJp弬UTTw|^uV9>kyηo߮$eiMp9ߏ#G|Ѐ1?)q4ѣGok.[M&Bڹs':;;ӟr fff0007obbb"컰&I ӧN:>2Ξ= ׋.̎?bbN8~|>LNN" t=&ߪuH$y\~EEEB0@?n躎ׯ_###W^h߿Dcc߾}ϟ?ƍp\p݈㶞FP(Btwwرc{<?͇F,C<w}aSSwSk_psssﷵ㤔bnn,..ڵkp\ӧ_O-Y' vf[nE$˗/ 066]ŋUw9ZrsV5Ro4MC0Ķm۰e#L洞}ጞ$(+ף %%%)TU /yw޽4R'),}vaVN4bhhSSSسg:::,ĩ2Q-i(++HMʑrrno޼Aee%V$e(Uڴi0Fj'),&D2ͯe$ހ4R'),&D eƬof;Q*I1˫ݻQMDJe&0Fj$fff'Ѭ2cVիW4Rr7ʑ#"ID$H'$N""IDD88$qpI$"ID$H'$N""IOg,.199 ߿WOHںn2}$,BD"=kǎ'eC䤒L8ؾ[aIENDB`dask-0.16.0/docs/source/images/distributed-network.svg000066400000000000000000000713411320364734500227640ustar00rootroot00000000000000 image/svg+xml worker router dealer every worker has: : one router : a dealer for every other worker to which it has spoken worker router dealer The scheduler has one routerto which each worker's dealerconnects scheduler dask-0.16.0/docs/source/images/frame-shuffle.png000066400000000000000000000646311320364734500214700ustar00rootroot00000000000000PNG  IHDR*WubKGD IDATxy\SgVQ RAQqQ"e(V֭i:uZZ Kĭ (K$G's }?~wtq'NΝ; qE׾[i*\pG t]"mB@ =j Ν;1dbԨQQxX,֭[^z磤D{0zhƑ#GÇcС055## 1}txxxŋ033=mۦt?Ǻu0tP&&&pwwG||1b۶mÀЫW/x{{#55j* 0@xwww|000K;* Ν;~zDDDԩSq9>SDFF"&&!!!˗/o1c.](sN\C\\mwwwP+WΝ;HLL%JJJ3g8|0&N/FNprrB||<?Jr̖-[_bHLL'O =Ǐոu$OOO9|>wR&Ғd F%yF(ennNdCEDD0kQosf͢F͸} wm_s͚5=U\\,yʕ+*))??REQT=ݻwKinnzIٳ ÇSEQǎ^yؘz%EQJ͜9%;U*,,ċ/ |@@=<@cc#&MJ1ydB$۷o1صk~gHYfaɒ%2v#aoo>}PTT&|033,--Ѐ? |||$W3ϟǪUзo_\t B.\@pp0=tVCK-^---[$e_ƏaÆwAEEyCCC:u gϞE}}=!ݒ=zH733ڵ ׯ_6l̙3gϞP(Dbb"ӧO###* UW$TÇG^, sss} X 1bz쉄7'MBBF 6kpannϳ쌔!((2O2'OĿoڵ }&BcРA077͛71|pPWWm۶|>Ett4&NW_}]c=z'|>VVVB||<Μ9ӧOKspp˗q-?Ǻu`ll3f*++1p@ֵݻd=z)))8~8ejj͛7c˖-022ɓKKKL}Xz5:5HOOwMBpe,]<6661cΝ;*888ߟq5߀$Nn<@{{{̙3UAQ>~@jj*ƍ3f`˖-r OPIcjZ5 urw uvJJR쌒7߄J^oPϟnj3\ر+`m=T%0S]]Ǥakk CCC![#"TZ...ɁBBB鉽{eK".. :*D/RSS%C.N ???ڵ Ϟ=u~EjŊ|2gϞa5 CJ0m4޽[f:A5E YYY Bnn.Ə[$"TZiii2|`޽xQ"/R1sL߿?^y啎&A2(..nSD;v,q<|v)ddd޸r &L$"TZcƌAfffܰtR:tEEEH!!! ,@uu5[%"T:i]\\GE~~~s$R ooo#;;cǎ1uuPRSPPA[oرcn?l"u-}`͚5(,,D||H謩3gp5r7$ic-,,{GLL BCC%DH院Ĝ9stJJK7LkB$)ܼ*님(sssDFF~ѣG^*)gjՒǏ.FGx5;S"T:F7dǛbʕ8t:e 3End###ueh<^A_s311|;@*p*011ʕ+?XlYjj"RtTxx乖bd#ҙnnn]wS!x.mE{h BPSR\D h3EC "T"" D1Wy1 wcP\Eg7De?ethc"0L dO)XΙo= dvO!X{hU; }6VUs!d@ݑi>F*Rl=S4ĀLPu4AU)g&33#i>D#Rl=S4i@&7ASmB$k:"g= Lļ HHH@]]zs~-&O*B{hۀyHK"1cKG۷/_Dpp-"Ŕ3%>IKAb^???۷044yho0k,|RW=S4`@&1/y#4)6>bbbîH)Ꙣ!dfffػw/D 3Ed~C uiʘ1cЧO>}ӦMHL3tcףGرGQ;X)}0 "TzH{kREs홢!dP!ѰՍ6Vg \ BO=:̙3tNUzb@&pK:899/_VxNUzh"TzI鈏g|HA:޽{չHLѸaҥؿ?1|!WT>]DJ)i\\\JܻwO˻#tPu2(BII چcoobI *W\\ HlЩ*|tq^w^;RBG-JKKU٣<) FꐔO?S;{.<==Q]]Ǐk,|<k֬Ahh(*V9 cbbRCJ x<|TTT̙3pvv"NL)Ǐcx뭷ktzB!~Ǧ'qUγaaa^{M+"hD#*-_"00GڵkY0JTel>mdSSS`ĉXjd^HLLDbb"`gg %SKL۷/Ξ=xヹ3%Ny66)zY|gx066ٳɝ.*-B癇"77wޕy=2ι3Eh~֭[Q]]u֡W^5!BE_N)3I/kBJ!##QQQhii?ODEE,ߜ ^v}˙Gb1lݺMMMذaN*ie&::ӦMÀB0J O`bbٳ j}fLhj(ܸq pppᆱPTs7ǛooV}HS^^ӧOkձ&Be¹$?ϟ CK(..ҥK^CԀLA("44TBSt777 {n 4+D"6oެz$#(ã*-"}wo˖-/PQQ~GѣG~vZ홢v\]]1&Kss3?.K?h SksU :{CPP=zl 0ZahH uDS  2- ,CYY ```/_*4K?7"++ w~ܳgOƒ>yK,,,,8]7nĉ'0k,dggXd o߮ر+`m=T%0S]]D[5k$r̙عs'ڎX,V[XZZZ [[[bٲe´˜ϟ.bkk+.\TL0˗/WʲGKK N> '''̚5 tt"T(5dkEQte˖ /_9~79R",رcL###+Hܾ}^^^Daa!rssCk bYYYHJJ ֭[V)!!C A޽֭CϞ=_`Zz!FGtM˗tR|hii444ԩSDKKK-B^dj25UïիW>>>HNN46 _^鱷oF\\ze˖I>Ç oooٳ---Oamm'ObjH~ 0#i"lmm`1QXmۆɓ'cȐ!2Ϩhz =S?^ \ eeeŋ/0w\ ~}G(bhnnƓ'O+WeeeŇ~ D#:r >qqq_FI^tѲ3T/SE䚚1c4  غu+  ػw/@QbbbH]8cB' ݲ rf@byQ|'TTTEQZ."TZb̙Õ+Wڼ&GeASLAހ+ݽÇ֭vqqq7oIL<K,3gPYYuS$r\3%Si2EQ~:K<|.4mU\\?022ǀ$WLGBB<<mKKK_Έ? /1NKK "#߻Dt3ӕ+96T͙RT/cGSbи6T__Ç#)) VBpp0@)SH-,,9***`jj OOO&t.PyvcC)E2i)/_|ʰl2L>]+s222}v8::ǫ x%.e^{  %n*v7-ϜFgj*MQ h!3~~~*ϱ~\r:u8q^^^ppp\MMMAXX!2hԟE輐#}7.$oEn׮]pvv5gjӦMj;wX-/ŸtRRR0k,7U1rH >@@@-[KKK[[[q1*..Fqq1fΜ)"tOPwTgE.77W|lzKJJc455aӦMPsQ.MLL z]MB׃U;"7k?-r9ƎZ^N@M?)bW^żyڼS$''#22R' "T#+MYGtt4 :T兩^lxܹ*y._ KKKF+̓'OpYG9R_ϔ”iK=&MRjya)74'Oŋ c|}׮]?~|VBUAKJJRϙbjY`4J^FO@~da3EQ8vO>}y֭[vVZy ]فЙf͒4EJ|b;wpY8;;keʋtŋpuu ={&M’%K6&gffB$1[[[qAcĈj5!BՁHy0`rrrd&(ڵk1l0ZJ+Pts8::hVmm-zjzم P[[.S ӯ[jjjdrjjjpaOx7UeѸ>={&bU$\*$%%ʊdM@3gݻ'3%py\rSLAxx8LMMaff(\x< سgfX[[k|<<˗/g|㰶+1@e 29Sʦhc0@ ;w^{ Zz8qጵ|ԠQ+Wlll|r|ǨҥKq-$$$.cGUx1 K,EQZKьرc 8y$FJheMBׂ sssBJ LQ2\ HLLDNNz7xbUTT`ƍ? Z *=x~mp _DEE+Vw}www+ Iן>},,\jJJK7АLi/D"q7o*++o*5m}YC q͈EQQBCCD2ݪP%$$`Ȑ!{( @nn.|}}n`Ccc#={&? D:V\pݻ7wױ~Ie_Q䊋 WWWlܸfff+$yzz"!!uuujM?~!2f5 Ν T^CIp {#_B,B$j_BRR\\\rJ8p@9zhڢS4Q2"؈'OѣGXh?ɀ, P(̝;Q,bcc|rdeea̘1zْпIO:X[kTKFuu123w*I{B;4%%%EgI1- Llb5/Kdٱ%&&bppp@zz:&N "T۔||>wE@@c閅'Ob׮] Mld@?&***Pʐٳgƍppp`T'!?tLMM 9Saaamg`` LW}qq1P[[]v1uk׮1 KCչ bo dI( 111 DϞ=iӦqL  GGG)/fBCC9yD矃gff*SDjj*XOdgg(x{{ѣGȑ#@ BeY^hs˕ygD,?FǏ+LcˢFRRϟfЀ8̛7HMMJC){@JKMyal>1&Od>\ֱV #^|[n4нԨ=J}tj^x;wr2Oi̙عs.IL- L@eyY49992e KJJpM|GQFJH*?hGR__ DFF {ƍ_bر!dҶ'N ̄q666F/=tz /ưa{U:?ϔ|8;;#,,ӝ8i蔅dy_AAD}}={IRRR0i$%H1] (L> .\fD:3f466bϞ=I[‹/@}HkjjBLL `d܎qIKKKQSS@D)pu|q91zؠ'<S)B, *t,>>#Gdw=ܹs㣴G C>>>ǏzjaΝ3%$yOIQŰBdd"U/ǭ[xH$"khh@~~>|J*8x<,Z>>>o{nN=S4l&b~&U3~X[[ѣG=z44XYYa̘1粲憞={*Fpwwئn%IL4<.) duC;wǏGyy9uuuHJJܹs%ωb":AmPiٳgܹsÍ7k.緩[I۷>^}Uh1ygBހï^kײ [nwZ?H~2СC,:M݊FDS* $$e &2WDBpp0,,,wEIIIȚ2Dt@pp0pɗXnUTT$bIu \D˰4n˄B!1gC AJ{nӫkĉXz Lhb@V$rO>ŋ&yN^ֶMTZZUE CJ$%%(-- ௟^;v@II ֭[YnEywD.33"D8'9իW1biAD|Δ^x={e$_\՝;w8(\n*diEBB.\W_ENNNoF7L"9SBAA{ֆO>;;;O4i9լ噫k@NYHKKVܻwhhhI(,,9i]AJK9S ,@KK O?aʕÍ7>A6噫k@ .].\M֦^G *-ALx`dd7U TVVVVL(3W׀\__bL:u0DUURSS1w6Jq"AJ ܺu GMMD|8rrri&@ѣ$+]jkkK.Ņ8+++|$$$k*]:tʰpBALyyf3܌hK֑fP3^SRaÆa„ J3$X{נVoא3Ĝ9sd6 ]VC?l>-r_|b1ɓ'$%uH1]K|曬4t͛7OUP(DTTΝ:khnnf=Heڀܖ"\jJI#FP٬˶^СCd)L8˃D䂂d^FII """pԩ.104dN4%hHss>D ;RSSVS$~~~ؽ{7b16l~ڀ|Q,^ ѷo_IKK1bŵillijg@߄RS={6ߏqƱfP(!'1ck֬QhNLLmZZZZpU[@zYgHaOx^BD 1EPMf<OaR4zP(T8 2Ev8;;K m z{uۚA7TW#3sͭu"_pp0222PSS5222pM*\]]dlݺ E!&&m) YYYHIIIIPE v'5<<<0x`TVV*( QQQǰaX3ӳAQ$ BeѣhhhhsQQ^*D 6_@@e@A,ڵkX~=,X"??؜eqqq7o;ϟ?o#HU!BB$Kqq1z# [|qqqMg'$$Ca}MM D"Ǝx6@P"T|S&PyܹsׯN8///888ȼ_:37oTZȀ'Se\!BAH׭v ũUӑ 377GPP6r̘13֖?AP '|yfγ~ܾ}+VPqUÇy^~ =ͨM (aeeo_|i֟ &L/y̌޽{s(Nn`ZS:J9,AP}ʕ+!fs| _H8~8BBB`jj*9i{]] d9Ҫ@"TzH@@([)\ZZWf 6m6B!1gceoll|)ԁ#?~={6?K.)effB$ϯ͹2tdJMMł `kk0B 閅#GAeP9Lo߾MM Xp!cϔ/b޼yJ[m߾9990ac薅$# Nt(m@jSSbۇ'*UV())Aii) A%Pu@ tMiǎԩSW^>ƍimHlذARrvvf4 +x 0!BI@Ϟ=ƀlgg޽{+OOƜ9si݂[n5mڴ6d.- 555G BI055<ƀ>Yf)'x888`̙طo@MIIIܽ{7BBBpQ8;;֖g9s&޽?2 H;1"vvvuF (**ƒqF 6JϤh:rii)jjjd Jqȑ#Xv-t† 5khWd~@v"ZZZe˖IӽP2 ?~?"JOO?5ZZZ|B,_[lŋ; Dd___#33UUU2dhuСCʂd WSPQQ~yCCyf=k߿?# WWWΟ#F={s~_{!=tp 8c7ԍ˸y7diBڀ<{lƜ9sXceeUV!///_%)3ʂ[Lk|ᅬǏ֯_K.HD:d{{{<}mr|2BBBǖg.&)+k׮Ŵi0CKbҤIصk}YN999'Nē'O)0gF[[лwo_}[GY3 X,V/} Bb}HOOҥK( fSyJJ WP<k׮5=z4lmmQZZr_{@Jϑ,o!00 塹g+2yTx7P666|vǘo%χS@JJMM/ T~@ @ll,yE` IDAT[ 䳧%+iiińѧO|+&|>޽7 *=Fzruu50|d: Tz|bb"'''VnBRR*** >ir߾}eo"Tz m@ѣ}#*PVV\̞=Ɔ $w\\\`iiޥS0%Cc 9992 IA``'igŜ rA:3m4t^VyyQ2 *=ܯ_?!<|*c1bW3 *=|IxyyA}g`cc:5YxKN5%0ygedI{CJ' 1c$$$`ܹ(..VDee%ʰ`TUUܜSM+Lcޙzc9BRPP;;;?aaa02R/))͛7`Ml &&&*0yg[CzD+W@ `ȑ338&P6rrr޽{h;P\SRE!2ԝ~C3233QWW * mGbb"BCC62A0jJnxd&o"Tz?~<x727Q\\ cc6t?VJ}((CRRΝiZZZpUIKLlhZRv7 Y,t/Pu0Eرcٳ' oo6) lHɁULlhRr7YYYm - "TLff&ѯ_6{.JJJ0}t֐n( )))8q"ՙPnntLz ,Z,tPu HHH@LL3gk] &@(Ҍ=UUU~&e[Rt7O^&Fzz:FEZ8D:hx{{?~cʌWW 4** >>>ߧlԭ͓IQ\\rҲЅ!BA簵E֋jjjpE̛7Ox<|}}cǪ^PnnS^ϟ??LZ(D:z8qΖL@fԩS8q2jkk1l0>\Pnn[wށ@ qA Baر077۷L:U5[[[w} TL?ZbT/( ŤIzHoP3EEExL@'NܹsfR)#//5jk"\Qnֲ```#Fȑ#8uك͛7øs>| *E#t>4͈Fxx8pX .Çx픔 t*zɛBה2227`6lNNNppp/5 E>}pU`„ ѷo_XZ#/Dڑ 2...(**L@f ذa>|n]x1xzz"!!uuu*uO?I&aĉ(--Eii)?~,%{{{TVVo&lmm?(KFWWWcŊ011Q{_xTx&ϋDډ"//~! L?~xTi`֭(--#H3qqgB(ѣGx1_믿?CbԩQzv'님|;vL"UUQUu[g'(U;  s\2y磮u*ա ,y^ |>???۷JǾӢ$ԯ_?8::3f@JJ 1i$X[[˜Ceaڵ044LT޽{7u.RU; ;;;xxxdoss3N<%K(Ǖ 31y&F|L4 V^^^sΞ= Pfܾ}?D[nɹ 100xBJTTT ==]֔9s0y 4 xmPt^u$#F(B0aN::E O㫯 ЧOMMM:*ccc!tp`` ,--o2ӧ¦M7;;;cݑL+ϟƍvUE СC(++… %"ԣG,Z}366Vz@JLf:=YހL߽ VOIIiӔ7{l߿-)) 7ڂ:vv܉e˖E!''666077J2BFRR֮]+%zf sUyYkOWJ___N?|>K̚5KU﷊7P]]o%wMe*@Q/pWPPh@˵R@񁁁Bg}K,*S.|>شi۷K,],iB%Bյ!Brrr ڴ\rрÇI㵅B! S/\Q gQPk׮C,… eZ֬YSN*-K!22RF={h@.++CnnTخ^x[[[9 ݪ6JԵհ!qzjԭ薅$I^VhhV@Pi'N 23)BLL ڧdmm?oW4jc#[f͚*-RPP6#b19;;E4k~:lllj4ʼ{V-/^FXPi&`ҥm"Y˗Çj)))4iڟC&PmhFB B׃ȑ#1x6ѷҥ?߿_niJKKQSSv:&PyϤj tUqq1D""΍pU8***8=zhkkÖ-[ Jy+I8s yW:n8yV-V999ڿ@UYyY* 7nŋq_ߚ%K%K $$yo6/~֬YFϷ~R\ǧ~j9r{Uq>GΥjiiAUU$&&bذa HKKܹsHғBDDD}zhs:FGG]GbxNgϢ܌~{xZ1Puwwמz|r^ $U*r9~vONRdIₒf7jH$HR1ǎòeL~|fƯrJe::? iEK[huV~J >qƩ?]ʕ+鵭5)5u)AXT*UL٢J'_iK)((?ϔLn}ϤI _~eeeXv-cd232Ԕ5ucd`@#3g RDNN̙Ӧ5jjjP[[kEZׯSP"@3gҥB˗!H0vXǓHHHi?&s@5?)UUUرcP(>}:)(QaHKKCcc#6o8(**I]tmiiڵk1rH"88BJJ #f@eɓ'ܹsBqqq>:wƌnslkv>}: DC"n1bxmmm޽{{c rܬ D-ai'j.ϸHKKï~+nRxzz"44ô v4i/_SbҥU(P[_@X~l)kJueA&A  >>*q; O6 1VZcיw5:%CEuRVٳgy_.D TanOM.Hrݻu0>ؔN|G&u|yol$1|[e%s0e#c@ \Vݸq...ĉh"|g8uc6ߜb4@Js:!@򐘘[]t {EggƶdKHH$=~GEEE:A@L( ǶfsٳgoA{{{cb* TN,??qqqzw &&&"##eeekƚ3qD dCIuuuh%:y+>Yn4b TNŋJ2ds[4i@}L&ɦ׶Y3HqQ^^4b *'tMttt.Ҽ-]*SQrB U$&&/_q,..hhhuLPr2 v&L`}L[:H@5b*'c-I0=PSO=^+ gSƙ3gb /{xx݂~ܸqZӝyLw.] :{kb! K,栚E@*gd6m?bѣ>|?htt4I #oYJ6 Tv ?!Hi&o[JI… &}oAcMJ \l… 5j||| _ǚWI( T6ot]_RKVEo`@eC  4Y@iDʆuIAcnJ <lDP7n9Hq[qEb 0lD.c =QsVET0PN;wNoI)L[qEo@e.\1x`)HfJފ+Ett4 J0 /xq\7233xbj (Hz+DJ7 Tœ9sp5;zj(P񠦦]]] eGyǏ#(({q *+9>n /@AAz :t2(/? wGx/ѯIgu*.ghkkC}}=?;z:7:::CbĈ{aaa }t${RJMMM;wA~6d^sMڵ{˅/paܦR2(PB*Bӣ@Eqz!N!Q"8= TG(PB*Bӣ@Eqz ώM`ܹ8x`!Q"G~)! J~LA$!** _~ֵVBxx8<<|1كD"᭷BWW{...pqq֭[* iii 7ݖؼkj[;wdwv4 &ؚ5kXnn.իWرc^cXnnX3ϰ'N=L,Z())afreee1Pȶoclذa_g%%%1{wD"a;wd2͟?yyyeh\kY~,66,%% 6D"c-X1ca"^ *IDATVSSÆN:vV\X[[4hPc3f`QQQu{0'C3gy1 6homme죏>RضmLqgIՏ={/ ;lڴ ïϟǮ]pI̜9y&T*gmW^ECC{+--Ecccc(..F[[{GEuu1ɓ:u*3gΠ9_v mmmXr%b1b1 ܺute-y BoŜ9s˗/ٳկDe-@gg1***0k,,Xiii Xrkb1 ++ IIIZlpܼ􉪏/C,# iiiZǏ9 DI`lܸQ|@;v'|NkSSSկgdd蝟Y >lȑĉ'z=z4qIO:xzzZ=FII :::qF?cǎŰa}||P(1b<==qe=Zdl\ƦOT}۷/#%%h5k ۶müyPYY ???Ǚ7o֯_@oT,\nذ۶mCZZƌ?3f`vJ"o66l@$tvvbРAHJJƍvZH$8z(9QQQpss͛xbbܻwnnn8߿III@ >Tlڴ b< p &ظm4#Gٳgvqu?VT_͍eggzKOOz͛ ;qciF2dKKKc1R֭[Yhh( ,""}Z]o׿XDDswwg"IRuΝ;lڴiݝ3S*,--5 B~`b k?~}Yww73gswwg .dZ6x`eƘUQQB!DիW[ɀD_0777}9WFRR "##Ò%KPVVOV_͛bof/XRRT utYXttYDDD?1$k̍?rL(/ܑT**++QYYӧO7?۷cɎMbΜ95k͛7x&LIIIXnJe+ cMʕ+ֆk׮l ʚĉ̙3oʔ)FO+V067fVҺf̙,""1𫟧gstǼw=z4;v,kii1~D_}ŋCvaj'ׯɓhooǾ} ɐc9AAA8}4\mYȑ#Ê+~z{h) vƔ|w ,,,ynnn?.eee#2337m$R>c ***ĉ~gvv kɹ /scrNJJdٲe_\uuu!۷o{bTڲm۶3gfݺuPT;vݺuۻ{ҥn577+F#GĦMVX) o{f\> IENDB`dask-0.16.0/docs/source/images/frame-shuffle.svg000066400000000000000000000435711320364734500215030ustar00rootroot00000000000000 image/svg+xml shuffle concatenatesubblocks old blocks new blocks dask-0.16.0/docs/source/images/frame-sort.png000066400000000000000000000220111320364734500210050ustar00rootroot00000000000000PNG  IHDRn„4bKGD IDATx}PTg]eyY BbD@TJTt!$D&2MW5jDgѕTMi|"q|iH$P/ GD,gƙeϽ|]w/=s ?i&@\zF=XWD]]Ujs.dUUU.{~>zxLݪQRA}\֪^l޼sOxUtJw*mՂt3>>>pvvȑ#q%K_0a^ xWdrO== pZ-~K۷Rpƍy9DFFBbܸqm}P=7nĐ!C !ƌ7vjز^֭[x:ŋxh0b;wve;...x'q@RR燥KORARa˖-mo?!Rj*TTT@RY9{,/\]]_&򗿄',ZvmNN"99YBقEff0LbƌB>@L0AxBlݺU?\~cŋR[Pc6^bB׋[+xhUUUbmjItQZZ*}]OzK[ZZ|(,,]\\,?O"N>m9455&a6lGcuo:M&{RgΜ} qƍv_;w!C QYY)[!G;DB! %رc"C &Ď;СCɓBbʕјG>>>YTzD^8w\gwmO>Dx{{޽{=zooo-\?Džhjj7`{nq1ѧOKMy޽Ѿy\)D!>{}>3'6}Ԡ%鑏Wڵkjf|4hz9rK.EJJ qa̙3ƬD@@j}uTϐ!CZ=ۭ^V_z%={ׯ_+ǏQWWgʕ+ ߿?\zレ[oO?ŝ;wI{:}cMB6 /`i& 8S5t3.o߾@cc_sseæLѣx͛7l6cذa5fPP\먞?tx駑Չ=`z;z}QVV7OAAs==z`ҥ(++Cii).\`iOHHƍOOOL2^~~z=|}}/_˗ZOMM ylذ6l5k7tzmU/Ǫ =z3g0zhx{{cѢE|2V\L8cƌ͛R|W~@SS5Nqh,{qwwGDDP^^?L{7| . kw{gnn?{~]\\,ƌ#E>}ڵk-m---"00PF˺*JpYZZ*ℇ#F7ol `Yܹ^~vBǮ9==r… .Ξ=kYD<쳢gϞO^$%% www,Bl޼Y ///ܷ}8yR?FFF ѧOK/=pa}>ٱc={G}h y|Bg$ԗA%J$H.۷oە.˹t et[III(//oLΟ?t ]ҭ[PXXtVQQmvGT"1D`P$IA%J$H *T" 0D`P$IA%J$H *MPrGu7uuu<:cŋ\QQ6A5Lx7,?WdRӦMo?)\U+!JNNFonsnٌ l6r z6U4iΟ?;w8pFtYѣGZ 777=۶mws=z`ذa/W_aذa (++CUU0j(K6y͛Č3\:e477SNU.W^A߾}a6ѿّ!@@@bcc\zEOOO+Jep={|2*zt:zt9]Jxx8z쉄KvOZmoA}}}YM+ٳg?|||ٳgL [oŠ>ѣl2VZvۻͯgZf0/ BttIvGTGlrvvZ;RSS.SII5MMM N:1bzpEƉ'#%%~7{3 ALL |Ml6mڄ [low]Lp|!vHJ@XXUAg$ŠAŋb݋*xyy18wrss\YOAt?-IA%J$H *T" 0D`P$IA%J$H *T" 0D`P$I݅lkHNN'Ѐ(\T(DEEY~2e8IA%J$H *T" 0D`Pɓ'qY bP@bb"٣t~mڴiHII $%%?T2IdU6xD;v <<}ի۴[pqqADDeVNVʕ+-ϫT*T*dggm{>xDrL>HHH@mm-t:}ݺu?>,Y^{"11F 0{l̙30vT]]A׷ikllIJe0w\dffQ^^ Z i/ b E׎Sbɘ1c ,m%%%ubcc۬"444ػ\BT;Rسgqm=iii6JebP0n8޽YYY0GXXq69rVm?N7oںlR?QYY ٌӧOjXx10j(aطo_FEEaΝ;v,\]]䄉'iT;2L0(--3\jy999X|9BBBm6L4~WX $''C`00A#^vU*ӑ2N﹀:t*5RϨD`P}Tq<35k?K"+gT;埢iZaѢEزe |}},]6lA3g|zfL&TWW.]š5km6T;w'''ڵk@hh(M &զ1ѣ8p 222t9de ?~<Ətd#K$Qɪ8m0dU6xK$H *T" 0D`P$IA%JVlL"ln#*T" 0D`P$IA%J$H *T" 0D`P$IA%J$H *,wx((x_:좼qq'?`5D7+Y]Ս8q0ξ:Y?fNJ@ w72kܬé&TDubT" 0D`P$IA%J$H *T" 0D`P$IA%J$HNk׮U;e-9(]ԩS#Fw Wl8qX~=RRRPYYz?7ߴtD8v$%e ,ld,(h%qEX{ ^^^ j'Xڧ;Ib1bT" 0D`P$IA%NYKyy9OT$$$:Ҿn:̟?K,^޽{|шf`Æ ={6̙WdvT;FKK ۴566bٲe;w.233(//… -APĴYC F"xkGÇԩS1yd̘1ܺu m։EQQ].u! jٳ}6FGZWnJVE1 7nvލ,F#,, 8|pe9Cն۟N͛7m]6)Q lӧFZ ,^^^^5j~۷~sN;prrĉUd L&FCEnn.j5`޼y,_!!!ضm&Ma+V@EE`0Tàڑ^Gqqq* HOOow{>- J`$''i&444` W&7*** QQQL`5}Tq<35k?K"+⩯OѴZ-Ұh"lٲ@HH.]pMǠ̙3m>F}}=f3L&QTTK.a͚5d q5DDD 44ӦMÄ jqTqQ 8 Q2A?ǏW ^%JViӦ!%%`0?PS_*f<IA%J$H *T" 0D}%밋Kş=:KPo8dvV7ZnV0(''uuu Jgu8*]){ug"̛7O 8Ph4ѳgOsω[X!t:9shll"VZ%رc 1h )\]]Ŋ+,m[n!!!IxxxÇ{>rߏbB$Ex1#((}vڅ7o_Gtt4v؁?裏qF:j*;8pz)d2>Ss1sLL:}>#?_#m5?=H9Ço-?o۶M~幘/b;w777>Qxyy9sy[{^׮]<ׯΟ?̛7GYYY/R6l p֭N8TrxK.F͛a4ȇ)++ fk׮w}ÇgϞ:Oȗ:V֭[\wFKK Q[[ 777vA%jӧuVL& QQQfFߣwEyy9ܹɄ8uXڧ % Ovm&o&IA%J$H *T" 0D`P$IA%J$H *T" 0D`P$Iw!$Z` شi0`+JV(SLQS_" 0D`P$IA%J$H 8yd:Auسgeq6iӦ!%%`0?Po&Uq67v؁ph4WnӾn:i"[eff"<<:Z+W|8Nɓ'cƌ(((֭[mNll,`r aPHVcϞ=۷1zh=ҺBvT*J.AUq{ndeeh4aaapwwÇ,{ <Zt:n޼iIAjGeeeGdd$f3N> h4jddd`¨Qc߾};wbرpuu&Nh"{`Pd2h4:t(rssV` m0iҤ]b*** N:Վz=mWTHOOGzzz˜:up!H]?IAuccPę3g3`֬Y8~8ZZZ.QȖjEa˖-E@@BBBtRll=ՎfΜi1a6a2P]]"\t k֬Att'`Pܝk׮!""6m&LWWWKǠ:Gb@HH吕1b?~e/xD%lnUq67/T" 0D`P$IA%J$H *Ygs ~3D`P$IA%J$H *T" 0D`P$IA%J$H *T" 0Dᡠ}%밋Kş=:KPo8dvV7bĉ{|[l;ZNNd8u8*])ZȬgs֛PQŋID`P$IA%J$H *T" 0D`P$IA%J$H *T" 8]VATRRt vSSSSNF޽{+\򲳳q bHIIAee% jjj7|[=Xnn;Bڵ @3B<ŋb݋*xyy1` j>$Ovm&ID`P$IA%J$H *T" 0D`P$IA%J$H *T" 0ɓ8{e 9=|_J  99M6 P21dUQQQ}:RSSZt:Ku0|,Yz{Ebb"%IDAT˃F# 6`٘3g_#aP---^o؈e˖aܹǣ .BCCf}___ 2A੯ >SNɓ1c XJJJp-ƶY'66EEEhhhwԅ0vVg=z4i]!Dm*Z%RŠ*`ܸqؽ{7`4Q__0m=r Vn:7o޴u٤ ~F2#22fO4 j5222xbxyyaԨQo߾Ν;1vX 'NV=0vd2`4QZZ ggg :Pyrrr|r`۶m4iRXHNNN``P jGzT*#==eZg[@@:d;JVl$:;wl6+] zYf!::>]tIdEjG& &f… |ro߾xCll-Վك7tFT*֢(**Bii)~æm0vd0`0lqq1&L0#** ٘d ۑrHaP/_td#K$H *T" 0D`P$IA%J$H !Y?F'x O>$|||Ln *YUUU -L:w0tP+O}$IA%J$H *T" X~ZPuEy%OP_Fj%7nWP-շ6?̂ OOO?NXWW i&444`VѴN}k_6 t$/wJ@smIENDB`dask-0.16.0/docs/source/images/frame-sort.svg000066400000000000000000000547031320364734500210350ustar00rootroot00000000000000 image/svg+xml sort sort sort sort merge new index index+location full result dask-0.16.0/docs/source/images/frame.png000066400000000000000000000530311320364734500200260ustar00rootroot00000000000000PNG  IHDRybKGD IDATxy\Uue+ J*C.f.ekYnHdc 28F6Jbc$nbOE2p\[7{/~{9=|9*EQ0 6cǎNChٳ6gVGCQQzO>5B<,j:hP;v`f9/@<=VcV˜RS/Gj: ! r+B_!Ĥ !&&W!01)B!IB!LLBabR|B+B_!Ĥ !&&W!01)BQÖ/_&[1EIB?p]kUSTRP!$88WRTT9ӓ@FSO=UfjՊC=}2:)Ba7o$## Ҷm[E֭[8q~3~x.\Vƍ{4}2:)BaD]t/M6[odnܸ_~Y\BjdffF9x O<k׮oъER1o<QT,\֯_J⭷*/@^^Z~~~_1hVW?<+waBBB ۷/Gъ%lj_!0&MO>^zXjٮ]ݻw'NȻKvv6ۗSi 6dڷoSO=Ŏ;ҥ W֊MJJbݺu\vmҥKغu+%GvBy饗 eRNvM^^5+++vZfߢ"qvv&55:uhV8Ν;?>M6eϞ=4m(ݣGx u놫V{p3{l̙*#_!0www͹wY[[ӫW/޽˞={4N"--ݻVR&77k-3770%K3K /s=ǛoɃt;NJҜzJN*)ⱦ( 5Fj:ab!!!lڴIVrxST5۷oӦM-ZDVVNc:t(vI[ILEhذ!UQ%W<6ow !+˖[rY21c̝;xcJHc}<=J.]DaafQE^xlmmٰavZx饗*pBϟOAASLI&L2{U͛XXXTjYɩ7oVjZkvv,0.૯g[׾xcUs9W46n,ֹsJcj5/2;w$^x*knnNXX.]">>&Mh" Va?GGG qFeCӣJbʕFLї@PP̙C~~~˗/[[[ܘ:u*:|Zmd=zƍPfa-ߖZVVc(}szTɑ ̾}u֤2m4FQ*>221cp9PhCAA딹1i$JHH ,,]G4gMŋgP":tZmfG,^[[[FSQ%ׄ:wù|2槟~~ښ/3ghbϜ9CTT?~D~ؽ{7˗/駟ҪU+.\9={#FСCnn/)**_?S^MVr=V~_.}3>;~?w?|?7vܠuT,>vwՋHpp0ǏgժU:KR1~xpcǎtvpNN7ߟA1p@Zlٳg֭ٳ'\pVZs=G`` Ҹqc7FN*BOfPըjڵkڵkKd3fRV8}4&LWWW,--qwwgڵ',ZԘs-3 &>>'xBsIM{\\kښ-[LkffFTTVVV['O~~>C 6-ѣ077]nZ힞޼"#ܒoqqsyVƍ+ʌEQQ!\ Ҋ_/~Tfj1k=z0pOi+%G+WWW333݋;*:ARR4l(RܹsO>X[h-S.;-|iڴ-+?ж8:>ٳ'xsz{Wr~3hLQ}y~W7qƕ^zsgUiW_}W_}J8{9=N?:Xr%G&66V(cZ9@SZ] 3p\]]188p4iiiF к!^'N`ܹ4QrYEӳ׻cf%1?N~䛛˖-[8unݝ:up-(5!CX~= :\Rرc֧׳gV^]#ew?$n݊|XY`ee#!O?U6U6m8zhMaRF-w&$$ׯZ/yZ͕+Wj<}booϺu`˖-U:̤G;vK2|R1ܸq{ʹ De7ڗEQfΜC 1lj9KKK ʾTj;ܹIN΃RGwaEQ((ܓ,.DGGt4礄7/Y66V yWd :듙3<#R6]Cf{=%9tٺuVMII!##??J [3y2+B6m^wmX<<3j Nȑ=Z…ܼyOϧ43(.!DRi=qǏ'$$J_͞={u@)PsssҸ~ֽUվ}{k:O=U,UcKǎٺufR ZٳgSN|Ǝ5y1o< ;:W772][GEfԩc5GlܸWsqvnL~~˗=CO,&֪գHE'k֬a/ʕ+?(.`ggGHHk֬E(|||h׮gΜ~#001c%m9znܸOŦM,[6~3ʍk/ 4/\[oys_/yogS'Ц& !Gw4hЀm۶qQ,--i߾=&L(5QhŊj*//R-ӬY3IIIOvر'c/ҪU+N:Uf*Urȑ#eJ)f͍8DFFbaaꏕ{p|F.!J޽[҅$\ش)S~ޑ:t59th'..nxx4jBS) V߿?\|T:u*| ̬و?۰a;v+OϚyR^^ K>{͍efhj9aasEhz۷'UԽ '/V7|7|Ä 0g|r|}}͍S Yx1*+WעEPTmV+7CchKTJ?aHUo*+Qo><==iݺ5L6#F'221cp9PhCAAA\˜4iy%$$蓛!^1AOӽ{wWWWݻ9WuM9.R|5s >˗/sa~'~/8sV3gٙǏ/BPPwf؂>SZj… qqq)g2b:t׶蓛!^1ܹS󉌌gϞZ~y軟M9 &DuKMKjޚNC԰`ܹ3;vɓhB,..BښOl2Ǝ QQQذn:N:_a> :6lؠs77Cc?c5k_⭷һ5V|Q}vmVөGD1'''={ЧOv___\]]IJJ";;(.6{ԩS 8Pq#""HJJbժU4lPɭ"m{U4dJ<]v-weÆ ett B>>JAA2'''P޽[߳>JRRR덊R%..ǏW,,,pMwUr+QѶ{mܸQ#GV9}b+W^Ν; O9,xdeek-}6R}ի1ĉiԨjnm{ui,OlYfSKm !YYYg}FǎM6ٳgW.g+VdP V+S)Bbt֭G}DN\I=8vK.ee988p ݻWH޺uKEQ9s&^^^ 2tڮ\R*ɣ9ߔO<+D-R4t2ͥ_~,YDk5oޜ7np]tIN< Pޤ$T* .dƌ̘1h:GUsַ)%[nפ Qկ_}tUΡC`rAnݪ-%% k3y2+B6m>n1>yT5g}r<]bsdF)JNJSSSsssY9()tr3zv77}c1g;듇> J޽Dٔ;VF͚5Km*#G,(WEV+]vU7nJ``_X]|͐mdo(ҥK*m)!vv&wE͵Q4k֌&Mm}%? Gȑ#3sLYfFLL ɉp"##˽l*fȶWuO0/<%V(bnwl2ZnM@@@6mb&LۀٳgM!Dm6N|Q1vX&MTi-Ѿ@,-m93a,99w9|8 Wg)mM #C^ B_!Ĥ !&&W!01)B!IB!LLx)BaaAMQNAaDR|cmyll^|Q+.//+3; ܹSʍ78&O~ܜjCaZR|c+# ͽ]=,-Ҋ]4ZN^^n]rV爏_mc!LKxlZCnn&QunkIL7.ƌKXƄ ejUeE9//?l/W^8B'W<]G4gMŋgP":tZms\R1hЛg8BG͛7O`XU}ү0vWWi+\Rh+Uaa!;vc8::;wn /O QUK|2 EEEձz!}Ӯ]7vOOo{onq\\^|q2uef/`ii{ЧϫƪTf22j59sh>w}EA2m4.\s^ UyjK΢( Oկcf\@5h!Dfйsg⋚ֹsg?>O<k׮|DFFҢE Μ9CTTxxxK~عs'˗/gرurJr\\\rJ9={#FСC|nޱ3OI% !j/XlllhР e߿ PtЁ 6;}4&LWWW,--qwwgڵ̘1JU <<< ߿?v= IDAT5CVVi׮k׮yLpT*-*5ܹsQTza)t'Oj(,,$<<\}̟_|e˴mffFTTVVV['VO~~>CGٝ;7[ס3я VV6R|xU|Ǎ믿ίJ_W_}E@@.\(cر# 8,]vtRΟ?}A?ܜTӧ9rn:ݻ:^bbb1@=J:9>uϞ=G+WWWִw^N:u7""$VZEÆ %e_kX)BAAd!Dsݲe 1mڴ!55/Ç͛7,gԨQ}v<Ȏ;PoܹsONbb" Ϛ3gΰg֭[Ν;Yb:٩S'j5w&/*puuǠJ$''s|||д:pww׊)ѴiST*N8qs9~2tD?gSTTX-OB /,sU41ͣA( [[[mVjZwoA~~ֺȑ#'X[[ϓݻ5'O$++*唕EHHfffbnnYvm,,,PեիWOc'ҨQ#""" ^GmVr?mfD?%;:>QIagШQ#ڴiSja&M4}aT֭[,--z*+`QPP-ѣUNS9}4j]m׮رcРA Ĵi>ӬY3IIIOvرħ+VêUHLL$//GGGN_UV:uT*Urȑ#9R4 0k,܈899Ndddu"4tqr2N,*R1b _887BJ)9**Կ|V:u*| 65< `ҤITi&6oL^#l~ z'0ȸjyooa„ pPTtڵBQrL2l._w}W0?y׮]tM3+[!r$''~6lȸq5kuъ)**bPB!j#)(mFV0a B(kB!IB!LLBabR|B+B_!Ĥ !j\BBAAAԯ_[[[|||3gY|9ԩSyA,^Jʕ+ukѢET*ڶm[vv6Ѵk׎uRNXx1EEE2U?^z%w~ÐߔU+WQBCC f߾}xzzҺukRSS6m#F(Odd$cƌܹs( ӇR񹹹1i$JHH ,,L'xNիW ח&MDhhh 폧~ݻk>jwݠ<͹lt+(**ҒI&p cK^ j'cXaƍlܸk}AvJnn.OE3gSOѠA~GΝ;eرrJr\\\r qqqSٳ/mdOnP.ХKM[rr2ڵ,63r2CTlt'OFaffViڴ):uDuiҤQLppV[@@;wfǎC 6V#ΧzcǎqJouv}bg͚Ett4׿x뭷otU!ԩS'PۧKquuUΝ; .(EEE(JTT(qqq;}tLٽ{((:one9xbmmj67nTeȑUCXoooPtb6r<]b/^8Y !:8ph*]+ϴT*=qs%,,Ν;띷ػw/cǎ{t oootʻ:>Cب(zŌ3 ,,BBB033#66sssoZ]oz418q"5""" 斒'|#999:铇>e]wM9.ׯ_+D-cǎt:ҥ n˳ "-->;,׳gV^]/+V`ɒ%Zm 6z-F/B||< ,㸺V˘fIx"'O4tҼyreffңG;ҥK>|xqܸq{*nCQfΜC 06==$+WT97Z/ޞ3gpBΝ[mc>yT5g}r<]bx2JQrrr+dɒ c2 FǎSJ? .ѣ U[,55᪪czy[ILL4xM9c !jԬY@i۶2rRR}}PjҵkWq *UWܺt(m۶UBs9//c1yS[Cٔ;V&\ [v-}M! /ӫW/ҥK9rG83sLYfFLL ɉp"##K=mĈj9±cPմk׎aÆocV'}5v?_~L=.lga0uT&M[R|k7c_!ClgaEEE̛7VX|K89Xݩ #IN;wtB<ceesM!tdaaU)Ȑ[B+B_!Ĥ !&&W!01)B!IB!LLBabR|B+B_!Ĥ !&&W!01)B!IB!LLBabR|B+B_!Ĥ !&&W!01)B!IB!LLBabR|B+B_!Ĥ !&&W!01)B!I񭀢(5fPXXPiT*??SB2|7xxx`NET͛?wBʺeyV|F\Vf̘{3x?sN)7ޘ<%rssm !iI-Czz:/^#G\F%{;zXZZ`eeti_};Gݺ˗?BIoQT|JŋHͤ ~Ü9s/3~bkkSNxbT*+W)ERh۶>SZCnn&Q:u4q׮5..n,]3 "ˌǡC;y ڎ歷~'Z{[W/BIo+Շ~)׮]3Ej&Jpp0ӓ֭[ʴi1bDHƌùs@Qӧe_%,,I&WBBaaaoWmuo߿//?Zniwp?h.^<ѡC/j{ef3{[8qw6m#>~ ST &EE$$=c⛞YbE1weܹ?jŷs >˗/sa~'~/8s&̙3DEEILL_~!((ݻw|ru駟ҪU+.\N9={#FСCnkm}ү0vWWi+\Rh+ו3tڟ=,XKK+o]Ν˓kBv^|Eڵk4mڔz{>"33֭[oaa!Yfjj5ڵcڵZqOf„ ꊥ%֭[G`` vvvԭ[֭['hŬYz [[[۷/G1h?O}ȑ# }:ֱk.Ν7oWףj>sLf͚Ŝ9sn(Srr2LJM{ZZ -- ???M{ӦMuĉ̝;0͑ҥ(Szر7USVΟ?Ej4mڜV'kBڭڏ|K#g̘1eޢEWcǎ^[>t;rHK|G@qacҷ|9(J2eeekݾ} ju~%=qD5jDDDnJYִ̝Jb„H̵[#߬_@q裏7ooٳNwRe˖-:u[nN:uu:t={-4lP3PYYYg}Fǎ>}_={z2Ν⟭uj8egG~9xyFj/%E䚫f;v bmmo ^|d{nBBB~z1)"YYYXXXhM|҇Zʕ+̤G;vK2|R1ܸq{ڶ/%I0sL2daM-;ϧ2~|]/3NQ 5$ !jj?\R,֭i+;n8˰a4~ԩJ*dff`\B^^4iD|)((Rnn.?cǎ-3y\pԲK.i#%%'O9*JHJJBRh"].{SƒǬYc9w$}@ރˍ?2oyB.~[RDX|׿ҥKv PJE:uoFFoߞ)Sh]\\+???o֭[6lX(<)gsN\\hi|RyU6uTM~aQXXRp!}rc׮]OF-kݰOx_hV櫯6.]^4:Kǎ}}_5:ude/L^j$!}FJk׮5Б /0̫FSPzET*[DٷY61(B,)UvEnhܸqM#nnY]ڃSJ吒#i8̎Y>VVH!jxXEEEE̞=[9ˢv"4tqr2N,*R1b ͊]*IDATCH~qpo8!j*RL0j޼5͛6lllgдis6 5:5KN; !&&W!01)B!IB!LLBabR|B+B_!Ĥ !&&W!01)B!IB!LLBabR|B+B_!Ĥ !&&W!01)B!IB!LLBabR|B+B_!Ĥ !&&W!01)B!IB!LLBabR|B+/o_4 j:!R|Eq4C !!>G<==6leY|9111+Bdd$厳xb&MD\\F4E1e9rcHn51fE㥗^믿t\sss ʣ*ɐX9,ܜ-[t: %88}I֭IMMeڴi1>3s(DGGӧOBP"770&Ms^ =Vc?~iw޽{w~2))Vrss{ɧ~W_)ÇW]i;pbmmӧO>+EQ%((HXMl~~l2CP*)--MS:vNۢOn51CQeƍ 9yTe?ߔ;VԳgOP6nܨMsj'''*ZҢE eݺuJTTT&//Oyg&M(W\ѫ[Mi(QY'}b###u*/61VԩS'PۧKquuUΝ; .(EEE(:ӧ+fffݻEQ*Vc?JTV|CXoooPtb6r<]b/^8Y&\ Q߿ѣGt:ۻw/...%''s|||ZG~dddM69'N0w\ܹ ͭ&gTWFEE4UODg)BbeO2774&++̈-XXXVKW&'NQFDDDߐjbOlXOׯJcǎ5deeDZZ{U>p+"^Рb?P0M,5JSK)j%GY.KOQ,P4\zbwEa~kJ^+0ꌌ3gY,>{`3{{rvjvUjଶtR-ZdСCD~+ի999$'']k;׮]Fq!zӧĠA{%~wsE9y5gs׷fĶjժT:VKy̖Plmj4ˑ#GXhGgl@@׮]ٳ-;!jcǎggg( cڴiL6ι٢Gɜ<ꚳ[?Sb{qENnERY#%tyűo>3f}c###ٻw/FnjB:t耛Yhт׺,))֭[3x`,-|ɣ953%ǧLB-//q٨T*<<|pumnԄt\pŷ tŽ;P'N $$-[~ G[n%99Zם5k}K(bTu}~dddOZZZ0'vƍ0i$wPlL#_a3za2YVKiiiEM6L>{`` SN%!!"""ŋDFF2rHh*[V_s0'vڴi=z2C14wٟ2¦σ:t˗xmڴ:3gDRǞ={-[`c [V_s05Օzvʰsߐ#D$33鑜 #GB!IB!LBaeR|B++BX_!ʤ !V&L%%%\~i TVjmUTȃ(xHTHII1T*Xj 3y &NHq,~ٶGr iis9UypvxKXhYB{vΝ;ܹv]dBGayFs7f8::dφ ލ4iԨ+vYBj7!!EQxWY={6_}ŶAϞ=iѢ5"$$ٳgSQQQk|jj*4jooo&MDii}oP֧Q;; ʕIh4e;PD]] ָrYY&9y Ӧ-">~ cΨ5T%lؐĉ80wbcb:Ck (j._.,'}h4,\,rss3<ɓbcc_~ߟvqqL°ajϘ1#Grit^gΜ99JFÄ O+## &<~5TW\d @vvwv\>z_|WצF۩-TYYƍkm_[Ξ=J5%K0)€*Xfv?Bߕ+Wre9֭[78x `׮]8;;vZ $$$кuk=JVV+={&55j d޼yxzzSAAÆ _6?OtJj|>\]ho_Njv2w?7o=lٲClxx$lݺr9+DCW ppp`nݚkrqfС憣#^^^ 2Z]#ɓ;ppttLJu(̝;"Ezz:Z2u҅nݺp1Ce˨$..??? _<,Yb#!!'''{ƍgR> 2www֯__kp{{{""z3y|#\jxٲD 4~qirpp4:h&穬䶡]QU42}օ[.wFVӿ|}}>|8FŧځXj/( k֬^ ##(~$''sӧ-[iS?Dmǎ(644///}Ύ;wr y';;+WSOu7[s {8;-kܸ ]Pzzk{qd.._[Z4WmaۧN?@m>G`ԨQ$&&R2e 5֙2e qqq߿[>9}ё'ԩS)77={B.] N0>>>r):tY员Ȅ  GO ٬v QQ#,O^A~y;_[>t=|ϝ˯m3BңGs=K/ą 4i;}C Ν;,_Qlb)))F_$n޼5k֬!a7~FCvf1M6q&UΞ^ZJ4q\S#: W-VB0x`HeL7`޽M!ӳgON:Ejj*]vZ?;v/? nݪIwgROX l!\~('')B<m]t)C_:`\\\ؼy3W^moBѳgOrrrHNN&::F;Znܸa1^g1h L8::~-4ϯ-#FLf}&7z =B^ߜrrrh߾=^͚5 y,.fTMзo_9… [@@gϞF1P;vǏcooodgg( 7{ IӦU ݹCRDDw~Qݻ*kIѰ ggԩSuV/_Nllm6JuSLűo>3f="##ٻw/j5t77gTjѢǏuYRR[f톤y+ˋj޲V\|9ee͛awWղzjEaڵ߲2<==AVjXGyy+++ J^rk޼ꂗڎH^~e>~̝;s2`h4L8?j^GIII]8i&ggNȱictӵk:v|(ر>_xԻe^+RkpqqaРA,^t0شi;v֭[߿NGbb"uʭSNۓą XnIII`{fZ- >6m0}t:u* A~~>/^$22#Gi?tw]\; fMY ʨQR\||pumJIMΜ9Aee%ۿk߷w0]'x4]]bPuw?#F`Ŭ^޽NGDDqqqu-00K2{l.^hn yPj:ġC5g( 7_3gMRR{Ã8f̘C58;]l#{Sٻ]ܘ̙8;7& wooV<=kk|֧g䥤0f̙cI3&MĜ9sXbE+4J"< ̿p_{wmr_haC*&$+,ѣ?O>yX:]%Itܹ!b ɩSPݻ:a''xcEE-KΡ( un߾ɺu)W!lOo}vz3,IENDB`dask-0.16.0/docs/source/images/frame.svg000066400000000000000000000276251320364734500200530ustar00rootroot00000000000000 image/svg+xml January, 2014 ('f', 0) ('f', 1) ('f', 2) ('f', 3) February, 2014 March, 2014 April, 2014 2014-02-01T00:00:00 2014-03-01T00:00:00 2014-04-01T00:00:00 Divisions dask-0.16.0/docs/source/images/ghost.svg000066400000000000000000001175771320364734500201130ustar00rootroot00000000000000 image/svg+xml ('x', 0, 0) ('x', 1, 0) ('x', 2, 0) ('x', 3, 0) ('x', 0, 1) ('x', 1, 1) ('x', 2, 1) ('x', 3, 1) ('x', 0, 2) ('x', 1, 2) ('x', 2, 2) ('x', 3, 2) 5 8 8 8 5 5 5 dask-0.16.0/docs/source/images/ghosted-blocks.png000066400000000000000000000102631320364734500216440ustar00rootroot00000000000000PNG  IHDROȐbKGDhIDATxlϬg]ıPBҦhiT*8**Z; j$%meHEB(T1$jKpH'ތosٵwyޯ̮<3<;cmذW*؆700F۷oPථʲԈ ׯmے:u*px}zWTKM̞o]w&~UY[+I8Kw<+Pȷ׶?Q7[K?}25-i${C鳄.[U#9.x_]ִPxm^b>R}YM}bU/aE"F!Z?I5;ʈApժk%䍝/^7-iiYq]}g$IOV+VPL br-^z$Km##;#h߲ɛ O*o$+DQl]wOΟ=] I]~F(Y;Ykj7ۜ:'Kl@:|Pm(~[ʩ+? wFC,180&:vgk?s:O=؟<|9}2x===X,VQQ쥀D[F~P’dNqN/V]]|9vT*eٶmvT*+qg4p:X>Zl۞ppoI".\0GCF"6IS׮kdY+}uǟy?0 '|n#GAGV54J٥RK?xU#qp7VkfIҁw$e&BC4K,%b6-Rtx5tR]‚Ո9 WzG3 G0A#p `8 G0A#p `8 G0A#p `8 G0A#p `8 G0A#p `8 G0A#p `8 G0A#p `8 G0A#p `8 gw(Q]"A h߾I1M^u9vǖ'#G^ZacΟlVĚڷ٥XJ@**\' et4{Ec1I=8r]QYY)I+P($۶\տSm=='.$̣3sڮwUB,%BDh[4^C/Jɲ~ຮl;jq$Qlz$Ŵ`T*ey{k45,p(x3qVXhNA>ϲ,90G``[2xApC$M^w}?]^QJ7W;>_}ozq6Mfe#N|G2;Uh$i߯^ߡ๳/r/$uS=g$jY43 ȶ J)FoE"X-VӒ+%IhkVe/+<`aK?v$)V>o$MNN0o|8tm;'Š0}9o܆yiY$Q\RIN&Wzg3. IҡWw+^UBQ03. Iқ}[>AZ$H}+2"Y({.̏6`?}-ܰ"0JAb``` 566CCӣ3D"ʱC@4'fp 7n ( @kkk(ھ}u]7t ů/t_޻PGGN l(~áimmHZǎSooo`[$|De:;;ujjjXN^ª5<<FcV}P o9^UĹyh߲Qcp_6S&ۗo]?ڮ~ʹexmsJ}*'CoPӒVIұ:|&_/겅 X5CoB_zneM %IAy,#e5aɰ.Z?I5;ʈApժk%䍝/^7-iiYq]}g$IOV+VPL br-^z$KmL3-ɠ1BMc|'ݪx$k*%Dq[tz)eOY`r!kЫ` @raNuc0.QaGϒ0ǺOW'0c,Q***B00H>~P’dNqN/V]]=g'MR)˶m;R)%l`ˣ* tٶӳLJCKDt…|9R__|4𞧶)Q7޸֔}uǟy?ݳ~SOuD?(!G\^󭪛~=|&_Sk X5'7|c,n$xyΟ@RFcM- {TesrA 3<R:ZZVnajD~+壙 bx#p `8 G0A#p `8 G0A#p `8 G0A#p `8 G0A#p `8 G0A#p `8 G0A#p `8 G0A#p `8 G0A.@{wno|Ⱥ?cߓϑ#{/01jO6+bM]HeLR,^%IpuDUy2:xy$}q̨$MLLmۊD._oжZQ9m}xx[;*|ߎbA`YdB!MLLhrr2-s+JhYxMNNʲ~FU]]mۊM$Qzb,XؖJ,￿k49x3,K놶ٶӀnllLÁmXL4q488ؖL&#^IS׮]kdWRͶUr<}9;m-j[]%''-ҷ0sU]V{idFU$ģLWnxACUhqF>n ;-rIR+;5sF%}f,_HPzj4xKEʥjETfZV"W=2cHbI/>e񳂏o6\^<Ѝx?i;u܆Cm!J@xa$>/q݊WO(ϸ,$Io%ohvSSSD">JCeeedإL'× ߷νvwUAQu_ov0ޫH(n˖-H$xMpʕ^ )c@,+f7kAIENDB`dask-0.16.0/docs/source/images/ghosted-blocks.svg000066400000000000000000000445321320364734500216650ustar00rootroot00000000000000 image/svg+xml dask-0.16.0/docs/source/images/ghosted-neighbors.png000066400000000000000000000036641320364734500223560ustar00rootroot00000000000000PNG  IHDRh˕bKGDiIDATxmHT !Ѯ5uz0ZI _ܤ).#d䫠$wv F/R eSo4]qi2"rqf:͙¯̜M4h@> TUŲe˰uV>|EEEfHYg3{2m6v466bppP^YYY(++Cee%"R"QRR444 ++ϋFEkk+ۑ;Ǜ1dưcŃi3,z(_w1!Kى ap9@[[l6:;;YVV6=֣9?-nzz-BUUΞ=4!---X ,˗/1|%OkKJJBQQ099k&18n$1dv܉QLOOX˗/uEcTTT 338x.30<<,2f@qRPUfڵk3=l}>BG,\SSS@8Fzza>rrr2~=(N@Q ӉX,X{zzs/^l6ѡޤ4;Yf=s0FIw TU%l@} '+8骻_Ã+L[OM"3Xas))@F] -nG}kI5 HL$C&! `D0d"2 HL$C&! `D0d"2 HL$C&! `D0d"2 HL$C&! `D0d"2 HL$C&! `D0d"2 HL$C&! `D0d"2 HL$C&! `D0d"2 HL$C&! `D0d"2v4 ~?ޅ׺gUU7@$u_ #՘Iwљc_6eGd('|DQ:ǟR4-]粳g &&&f]]]/"bqKRRR6+1d y&^zg=|ׯrrrt؎LvY0tu%LMM2ǐ-tbttTY~ڪ<2C D"猍 $1dp\Ixݻwk:lGfcv188p8М&\.bÆ zNE$%%axQBsn7/^bܻwyyy:mHfORSSӉӧOux ,YO";;7oΛOVRQQׯ#߽6 wMfna20d ٸq#bGqu܊0d QGE]]]\~tuua߾}:oFf{dye˖vػw/ ڎLV .СC022#G![ݻ~ٳuqm>![Faa'ΊF8qjjj5[\{dkjjɓ'rJڵ yyyx-?~ ׋͛7ʕ+S4Mk7{ JĄMGG;-33srժUo+++_珛2DIENDB`dask-0.16.0/docs/source/images/ghosted-neighbors.svg000066400000000000000000000106651320364734500223700ustar00rootroot00000000000000 image/svg+xml dask-0.16.0/docs/source/images/inc-add.svg000066400000000000000000000106411320364734500202460ustar00rootroot00000000000000 %3 -3347572897014886303 add-#0 -2850255052063941396 add -2850255052063941396->-3347572897014886303 -8351513739548264758 inc-#1 -8351513739548264758->-2850255052063941396 -799087838119053254 inc-#2 -799087838119053254->-2850255052063941396 -1627079348212151549 inc -1627079348212151549->-799087838119053254 6935536028328168595 inc 6935536028328168595->-8351513739548264758 dask-0.16.0/docs/source/images/optimize_dask1.png000066400000000000000000002567031320364734500216720ustar00rootroot00000000000000PNG  IHDR(M|ZsRGB@IDATx UǟTJ3)IѬh@eHdVre?("EI$SfY44zs3a}~yskxwY,HHHHH ( h4P C1MAAHHHHH $K`2dȐd uMJB]GV.S~ԫx5Ν;aK%l-S}ԩ#_}O]l׮]eҥf IH A!x衇Lde˖ wY; P"#>Ef;܃VeHHHHH h(6 0*D$@$@$@$@%@% GIHHHH hUY'    ((m8M$@$@$@$@a$@%: @@ @ hQl    #(alU։HHHHJJ@b @ @ cN$@$@$@$@$P4PpHHHHHJ[u"    І$@$@$@$@$F4Pت  6u_1${7|Sy:u矎͌H!ϝ;W^xyWouFPB >G@LhĄL%?Heٲek.9eɒ%|riٲ4nؑ s} HjJ<ҰaCyם9'Tu„ 뮓?S4 @1U(Sڶm+۷o׆D̉&O!و#ez4#sH%3Fr-;w 6ȴiӤx3FH 'ڴiԮ][9眰c\"@%;J0,ۺ[tgN|QꪫD%E] 38͓=zHCy'ej{nݻ6i$?y8q̘1#C'y~zСC2|py饗dժU:#Gtf͒>}Ȗ-[$RR .]T=? $EƠӖe̙3婧w}7R }|?`ɕ7$;_wM%OgIx m3) $`:tV/CLJFBźo,kf+g}76LnFOK.t<`xȐ!2j(yACqcǎKҥ>@wZ0JڢE =z~RjU9餓2 Fiȑ/j*M~ Hk׮z,An*6(0`yf'N~X~m=0|yWϠƍ{G/bp850Q7;?SEa  O&l"H )=u%$ǚ5k,.Vvҫ |ֱcuelxʘџW\_U'D_WJZzO_k޼yZ/B_SoG}~ssVBtyԨ};d]ʔ)c+oVK-LGZ{NLV^?׫WR3n)CN쫎fGD}i|((''˜ y^L'50uR9I}ts\ c2.\XWnݺi+[(Az_^_/W~mӦ~V~-P~M`zyʕ%o޿& kԨ`^dR[o_1+@$f͚iby'}7W_}^P{|voτYq=;uE= _W&`>MҙJ-rٞ?Dvҿqò.5!UN |GnE ԬS4 zoo"!mfG>t_襡X@A&`>e 6nܨuYb8s9R'%}_~^݈mx|O$lb^Rf8uP#܌SߓkK?Jd4Pz!n\N=TWjiwNb9%~ŊҺukW`$@_ld=h}ǹwt ^+V <. <p4P oT/Iv Wv8x~!ر#䮗t3iX_/p,܃[R'.Ea p\+i³^ӦM [mMdQ2>˼}CpP o,$` S)=xD CnŸ(eNDy鐩SjW8, AyҮ{1 Y)8\ gwyҬY3}3!Nj#ݻw%4h(4&W\q>2@Ԩ $E 70.UGRkݺu:$)ybMrY-K ^HOE2 xB w:ZNeڻw'2HqZg>dK #m8͢5fږ9? 8GB eCNt?B>D0܃$Fq6"{ @c<1 $G?6 $e$ڰa<ѿI5e@$b ֭SK>9ǒ9CKiDKٕfT_I Fہg$J>ό)'@6 1 猤VY[py\P     GHHHHH4Pi B$@$@$@$@$@ 1hHHHHH      c@1)( >$@$@$@$@$@bLSP     (|HHHHH!@Ř $@$@$@$@$@y zjҥY%Ν;?'̚5KJ(DṼ#0|pkڵRlY)R߿9(DpP")20ޡVN͚5+VO?qh"9x4iDrʕ}$.]ZZla,!о}{ctѣ2g9s@4jH4hC,&P? ԧ|v7B*.[Jri.K]9 ޽{˃>(ŋ۷K޽[N87"T#@}J74PkJʉ'c>HM`ƍJbpbРA0kG.>YLJ̨t֭~A/؇2qDŦ|$@I6l5E63f>|8\R)5ݴZ@1E(OIA&IM%@ 0@&7~x#_I @}Fkh2b$uܸqrEay$@A˼o @vQ`e̙m6_I>E[9r4h_( )f~$@?eΜ#FJ$)^ г`'`3, wc 7md_+ @ ̘1Cvtqp"*"$M44PLkݳgOLifϞS|X~G˗/{Aw>SQhQm08N8A]q;ɓG){jմCa0@ Oo))O@=KV6o,s ,FctNޗ)SF& L! l߾]6nܨ;@zjuի\ M4UTIL&"rغ t?u5f=^1ۉYŎH$3/[nqY zԩSG5j(L$@}2U(Sh$K"/R&M$|6Eƍukժ+,[r6,Y"t0"/-Z? `CN*sճ"'|4lP.´ً>;a$YX[.^XP͚5-[J}~JLԧp#k lx!5JwN[=;.LNRHJs'- .3f̙3СCڐB-,?a 9~|Wk,f]qҴiSXlW1 =\Yh^VZnFPWⰜ%@}JцOj@Iw;2b>|`=i&Zң_~`zfS0'2XҾ}{נA W@BD N*cƌ˩ի<#L7Hrmdʔ)Z'Og0o׮+W.,xb"@}>4 6ՂGtL+OO9]С\tE;wxZc) 2YF9ԩqzY2I+plGI3;/X?묳r0%k{;0;w6mh OD} C@h81ճ>@]~p r=A-Mm  6墣0@X`t$~L>]ϒu](>T 6amaf1@qKTkgK}HGJ~abcK/jf͚[oefj'XjH3Rg5:b'tk]X kRZ#XM6Y/Xjj۶5o޼S3Z*>ԧ1Ε` ҿK՝RKbN,ձcGKb5Yj9H`H@juR,uյkWK-E `Mߗj٫K7nw$c PoŜJ[CNh Kf r(_~qKW֛oiaL%YѧzJ%j_ս{wK 7U\cR֭[kCEdJ#@}r-&B1DѣG,ۭk"YpPYO˽iAi>(']xHIlWzF'(gRlj_~YԲx < ED -3(|P=MҳgO=bv~[`U$Dp{L}zTqp 8E`ݢu]'jpBV\)7|S3(/iSgΜg1`>ymD}2MQsFZh[4>c=._L!)UVFxuydOakuIϦ&'sƖd+gP2yzdժU2k,Q:D0< QM.Yd:E$QX/ `OӓO>)_~`oٲeD,+FʽWMR5Ԍ) G/s%mR.:0L/~Yx(O]HFT6$IޢrK*jD5Z# 6o,m6p#|#)QD8`OԡRbE '@} 5d?ET 9sh=wy"<`mzdɒztZj,L@(jsM,~饗_u`|\!>߆A1p…R~}Q޺Fx'`%f͚i7259Xh0;z|ƍᆪqf":0W^A/BО7n%@}rWS"x94Pg\r%RjU & Fn7 v(K.SoP8!4{QоX})޴'^B}x@U c…0:ʧ>|kٌ ?~O<;e Ok5J;#gϖu}EQap=&L}2@%+P\Jl_b НPtF۴i#RrgL1J> 7n|Bӻ;mL}riRLk<ʽ E!Q[/dhɎi@:7umV ()6m$͛7?qRV-'g^ȝ;##tb y!@}rcPr>RR7=;ꍔ{bL:Of ^x|PSL`4#RYFo/V|嗂SRζ69r^{-5*b-O.5}:]9m-8|pgч;<PbbѨO48gPXչsg}Xĉi8F6qfj BXʇoępС^թS')RvCDa΃Mnԧ%J IO˧~ xa 6m*7||gO% &-[EfAqyb `_Dt#GLU[#:)+< 4h e92[nE/xco|3SΌA) 6뮻NwT<,Erws56'>\F?Ɖ z믿w}'JJ$Х>I=BΎ~iɒ%;73J XK(` C0dx~W9Mf;wggS))[?SgS4Pr"d}L- ƹs? ʕ3H:.H=0г-oIzQF/0%4~74i?1RF/>yA9ePmc(u4^{M{9ꫯtG̙YJظqM+eȐ!)UU$ݻW,X@wAk@߿`u̘1r&PgJ%>&]Ƭɳ>+=zq TͪbŊ2tPiѢ$/_3(HRu]Ѕ%_8/))Z>I3(nu(}Iݺuz2are6$ z+-k,8hN>c;y8f3fHԧ>F v@q~e˖x2 ;vL5j;(pA/_ov%{4nXF,,<|,Y"^xWÌ5>eOH5}r^Hs2e# $+V<Ú{\rI>} ℁@:uyYpaԧxx3IOI)9gPbO$ònA>`)Aw^E/sҶm[6m\ve CK!|FJ޼ܖJ} #lTREN+ sw`ƢE&@lz ST5wbٳG 9e 7 YF/%~͢|ɛ -4er(kԩ{I51XJԯ_?-N,r/9qℼ曎LH ֊e^/Z*Z@ޣ>+tˆ c, 5j^VFc*F# vV\rG|gϖK.D &۷O>C@18~\pr)sbH(ԧ@4S >yP4P&Cyݻwa4Q#Cj֬ct#Y hׯSL`Dv£ߨQǠC>-ɏ֠(enٲE9}>b  |ҦM5k\tE^}Gr Ub&pm ܍c@^hy'Gq2I~Ih$ p̙3姟~ :9s# pݻw˱?]ekNzna̝"ߤjժz*B,/SoT0,W[q_)y8Px '%`ŋeРA%a|S7O?]u릗رoq.0:&t@XA$qepuֲuV=jnI̝r&pw7|gz>B܏}v9묳瞓'xY D!p!}j[3O^ |<ۣG}H֥^Iy,b%P|yZtjnYV/{~?O:ňxK{bŊƫ烥G h_܉M1vZ;v<1aD:ЫWqyYl4\8w9ueG`_/4'( >X4P|nzK*V{}F]"Pzuiժ`p)ܳgOm-ZԙL 8LK.rOdn>Ss?3RٳG7ɓDZ| 8M3|8Y~…Ng1Bm&d`$6lv[ ʎ?ޓijԨ!7^O& eDt}$iA].] >g7GOi3ΐ+2ߐ`LÙˎ;d\PO9m(0|֪UK6lE# (QB;sːlO.'sobk۷׮uQˣ>EÛ0UL_HZ_F;ÇY$ $Nfϟ/֭K<N9m4پ};u? VPBNՐ!CdTsP O1Y4(/R _ $$8LTR9 8̚µ8 ~M 'SZrKD}nǧ6lTD o޼zjؿuSOK6fp3A '`>%^R@qm9Ы?lbŋAL"vڙ" ମo'ԧ $`>Hhx*3gΔ{J۶m=.ő36mya6!v]˗=cAdٰaRQ|o $)ɪ+X#g:i$]6;)0sUW e L,?Y&Suq#= 7F-[F[$`><óg桍16<ڼyh"Fȟ?`߃' J?S)~ɽIAκ˪UhxȜEC 2}tw YЕ)SFׯ:F_}>կS"r&`>9]'O>Y4iClF!s Oz4CNOϕ+WΑ &Yܹs}z0*9 PѬYQF/_#3 NYfzb.@A'pgs|{G>AeEo}r^NK 11gΞȊ'+VhtK럄-CQk0>~'?L7 On+ټi$K0lܸy17n,'Ny/#WdIZRhp:T\D}cd2%(T 0@`gHjPBaqIPZrD*|:~8Rf3?G-JT@a! *S6?ɖW( {bHxl3dO`߾}zi'u?{>Ls疚5kzԧ`>/::)TߥQ,[L Qq,2D~hxBFB3A,yExM}G%^b _~TT)LBI6l0_P$EzBO| q+,k~u<(K >:o1@ YAi1x:3<>:4^S@8dt9?>|YJHx98A} CjD$>E 4P yPfw"g~ݮ]m/^_u\(J .x@qi jG?LqNG>}_B%` uT7x OW rlsڿ^hl~q2cƌ4ɰ\w}yM>$ھ} =ϷX/,߶m[ KGce?^>ɭV/_S|>#_qiLѣEb(\tIq駟; g~u,-ܒ.x1(md=8M{3r9s:Q=t|xOK| %;/{ rс#`9_|!<|2esOcܹYW.|4kL:2~xć1cH׮]?\Rرs6m$W\zYUruisz̙3~/|{18A}#NRcПexO6fpرc-꼹XJY4*^f@RFժU+-ZeڰaC5NRUP!뮳o)Glٲi(cRneuou]wY\sܽ{woѢExXƷe!^~ebŊ֡CtofpN:Yjz^tEZ!CQr|0`:lV^:uftC}SBϱAJZ`dźo6-Xʟ8qf_{S3C:x $#:jT祼Y0:,mY)o޼y-G}dG퐹 "gW&Rw:Ԯ]zg.ڵkgϯGz sfW39m޼#*/Y[kj N8u5i#}/Ƣ1wpNLc0Aбr;P#o))~L|ē>%/9pvu3`tyc9k`9O6F*g{DgIVZ5jȦM>M"K*nfQ?fmg`Y֯KDL96"+Po,R'xB|A*f5kxxCXx^bg.1QۋԧG:79R[S?&&ERlU`BPK|SK2LUt% Y`ּб@\>uHRՋ4P\lwb^˧' ?te<_xei$0ҹm۶$}MmףjOxݻfwPs`ZgP&:TbRP7zbzY¨mιRo))>RՋ4P\lq!Ō؝VѣGKӦM6e(i(h.Jz iמ'Ҭ[Np]GWă+]]هKOtb)[n:.\!C.:ÇfϞש8:t@qy74_r@IDATA^Bp{bc=37n)NF)?iKٲ+bxMYq#~=U?v2>N sYR^}8dw2t0w-svf^U3Ţʄ|&|ۿsqtR_czvcK}U&_ e@ <:ڛC==U:R?vZwkyoíoϞ=%K{j3f#,u>ZW4j:~&XZr'ֺ5( < .=[ 4Q2`fs=zM5p,20۷ґd ֽgW2HtZ .pK62Zd=A֣>j="OӄiOqzrv9~,;W ܐ9?S-xߴoZ|7tc-_-mn!<%2f߷({NGbM'Ow14Uՠfl4R->֏qaA.cDEjMO>R"gNJ|ttӻݍʙ;8 6?v8jRǸ^q:LQ#% P_`Av5r$l ?YpR3!&p%T}<\~?Ha}$R@%O_YDo)) hnyOj6zVn5@e^&fƒ$/CbҊ0a/Z <v^D7ab7eX^,S9VK#֭]l3OVz6sE>Ei>eO4P\nMtL`؝8˒=Ya5֛ *Up6AvEA.nL'~Yx'sIl\St?$ޘǩ}H'v>9'ꞝ<4P5Cow8SJbkZPdKN$V:lsSJ^j֬i5N M4'O0 xO>V3iČ*0Pp&%S^׍Cp,aHK*1sukY;zRy'Y's<%fTGD 8#|gJ 8)NNPY OY0 7uf>!݃X xJ3(qCa1@ϵԧ=EM}6-SjUGAi,#Ι5kqE@ ๆs*UxVgY'wq4PFի..]b  VMJڵ ?=\Yz>|?!X2 @`ٲergI'BgI}ʞ d:5(Vd 0эF8CV0iΜEɊWN) ŏH}(!7hxb GYg`tFq1u?+^ 6<Ә2PͲ$>yYDʢԩSG/^@J&!s ,ZN4J'OUu?+^ 0{jϔ]2PͲ">yUDˡ(85jH6oL>qrcts 8qBϟ/x"hذ̝;7r!7oX%x'< On-i$C/ 4'mϙ3'TJ$isID |0? $2ex^Y'T4P{bCW@cM'FYdǏ˂  _ԧ>09*?)`>ߤaYfϞx`]8Cd*TQ#3fPPҜ O9K_ (o֬v5m6KeQ$<?>}fș8M6-爌AzJ}z0*9 P/SL92cn֭ҲeK4G4pF}#%!L4I*W Ť>e&A%`>ʎ-Spa$ ɓSOu~W9r~=0A}rqL'+c4PbT4|O:U࢕J_W]uʕ+UT%K - hx9}ժU5kʈ#_{ 1cH|ꫯ&ڮ];ٲe ~`*TwO7HIdU\IN3С5#1 {b'PzuU >wܡ x@A!G ~tIA(99iذ# իonO&e^ţL~rGą% hܹB@2dΝ"H~B ڵi+/^ iڴi g"pyNl..OIcbI.Mח_~YfӦMuF})@^v4Plb 7ߔuI% "*DbvժUFri̞Q ,=l׮]ޥ>(),RϦ'Le߿_wRn&yw2G@~W^ݻSk7Ap皢M2վꪫ{+W@֛f a'_#ˎa0}g-:t'Qr'vw[V!&"0u!YG%&}ZQn@q lbi/eʔɓ'' ӑ@x 裏d͚5<5>a'Қ>@3` H%%4P%Q۷ ={zT*Ie},X 8X^s7̙3K.GRJټy?P՝ De¬O^6 /iY宻3)535@l٢fއ=zĞ1]!кuk}%K`LIߙ9sHÆ C f5RO^ˁ' ^ptI d$@6mڗ-[& w]'džeI ˞ס `֤vұcPS~4 O I( yhӦMڳz{̒R:tEf̘%E:<{2m47 (J`lܸQ-Z$ A"W $O )yb9@%gFZW_}P知^z裏ʫ&m۶ ŋIX '|RA& 'lOn76  ;wܡEAGlٲlRѣG{kܹ7oTads4rH#eP#M\p SO JL}JE%7i8ы,/u֕+ʗ_~I^@y> =89L_}\q|])$ r -JJ~]S]$ZLnt9O>Yƌ#.CNFz-qbx[_z{L/1\\g02lذ3N4'ЀF}r8._sOo#FHvϟŋqCCT@?8gUzbDW^.w@S>93sn4P2 g,0`Mt RJ2uT.KKs (`T!(dy=8ݒȗ~;@q%;vLp>ի%f zyRJ唄 #rJ=ݢE 2dʕ0 )K. gD}{ >yӄ4Px){Κ5K)x0<Kq EM(6 6HÆ 9_|xҰw0l|yJΝ]J,c袋t8hѢ䌩OAn=d>y%oy;Zڙg)ƍӧLcAR+PC`|뭷d<)>$s0@HbŤ}!JVd2o<'QR-M@{掖= {ۗ%̰!g_n`Wg zzRP!k2%pa!PBˆԧA`4?N F믿^Ey͟6=zJP03mʄ .7ONT yZj᯴C5>92DPkL{GK `?,͐g0*' %ȍ7([)SY9"7p|2i$SNi!#SFo@񗿣c  SNr Ggf o^zI`0@ܹezʵ^+}Y+eK3i={v] >,)O*xJXC Nϛ(d3Q /M>R4' ɓG/ ֆ rرE0/ UVe]u5XZE>FnJH}rny;Ąh*~FI.i?k׮]89m|rm֭[/ 8v$Fp9D>(O4l59PTTI6MÇ)fs=c6vX97}+׿d۶mPG,[aE ,XPp`ʕWIkD}<,!V ~`둡| "yf}/3gԞUC&~XզM.%ǡ5jxgՆdPc$ %IdK% xAH-xG ' C8`3,?Cϟ/\pA8*Z8Bnf P`C Cc8c:%@}r_S"s4PrfICKgϞzi:%Ò>Lf׊)"8K`߾}zvK|[IN} X .)@>ߦsxU<}z 0+V5jd;߶N8a*PbNf뮻R=%*}7+'P֑d~Qor%`)Fk׮-]vMٽ{08 =zT193(#pIW={7xCԠ`-6}>zF`ӦMv^/^y& )nd%>yh84xb6[^ :4x äGkɼyVZ!a͛k{.]Hf4IĔ ,` c޽{kl)!>Ӏ's"QIh$J.nFYrOQ؈ ذao^6m*+V˗ko]<ZI'$|rAStUvڕH<ɓӒ'|Rc@1)SN} <`#-[0R߿_}Y=sd7n:3CX[VTFJ>}dȑrgK^xf O?W_7WVM^K/Q &@}O3w̛RPK,uF6'A@V[jZoUiZK=I"G&%g̬'xB;gPNԒ g2g.֚5koʓ'ܽ[\R 1ꓻK}r_Ӌ_TÇ)*\zv%CܹsYBRP6nhuRZjF4h $ ԩSOxRN0ȑIDlkQiZn4PLkI?_P!KmV^mf٧^Ϙ`F㌔YMDi"P:WTRZjibؼgtM0?He&ZOɵ7)9~AIM%(-eziVmn믿6HBDQn\-%*XUdI瞳mfb >ŋ[jSe!iE?RR ԱcR+=S\J}ʎJ@ wZ;|a1º pժU-uguq}>d,Q.C-,b W^y:S E-ѣGsjX"RN/r0IִiӂEԧpOلN.T]<%,ZH{2dUZҠ=b*6õ$wGzQkK/!Xl:SNҥK-":tHqرc8Rnrwo"guV=nO>%+ pQ3s>p@Q˾D-k +Em]F'ʬYtl߾]ׯ/8GFy K'6UV|m۶?_v &EfK;kFccۊOD}2)uWB(Mզp}:=~щ_Ւ}brAM4InQct*WjQfpL"tݺuzf3-A 'N9W=[; ޷iF-jQ>(h8fGʔ)S;`Q-&M`|eܷkXb 3gN:\ j޼pӥ&# & ('iٳE]5j=L41 #yɞ={|_N/QBبRhd$N/ÏܹsERvmQ?6z<F,]T#`bH"ҰaCݩ,E]Ӟ|Hw( @fΜ)0T8r^ i뼭UT)u00+rJژPB1khՂ̆2 L3#@nd$A0`{1g c` !H0D PT,O#lϴ+p@$n9E ^>pW# P\ˬc&@%fTh:/B{Ćx4J%駟fpE-Z f$"꓋puhČM'KرcI5( $I'N'dLNGzmnji2+.胓3ݼyh p3f>$}081p@'%u'NzGwYFmg˼\άI9.WX@$;S]4P="iS,FR+̊Gb  '8.stH 6ԧ817hxÙH˸Νm'b]Y A`Ĉ6ǧ؇^ $@9>̈1#@;,%Æ IAIu <% `4[}ԩS|%BoyNYm FRG>' ֯_/ .v- m4h$@P"edLyK.)0 ZETRҭ[79rHZ9sȮ]uir-jJ7$@&дiSyG3Tի'5kL~)fRH">Ef;eO,!еkW=_Ss%0@ٲe瞓zH( )H>Y%|m @` @ lQp    (kSֈHHHHKJ` @@ _F$@$@$@$@$X4PtHHHHGJڔ5"    ئ$@$@$@$@$>4Pצ  6'    6eHHHHH h(8 )kD$@$@$@$@%@%MGIHHHH |hMY#    ,(m: N$@$@$@$@#@%|m @` @ lQp    (kSֈHHHHKJ` @@ _F$@$@$@$@$X4PtHHHHGJڔ5"    ئ$@$@$@$@$>4Pצ  6'    6eHHHHH h(8 )kD$@$@$@$@%@%MGIHHHH |hMY#    ,Cȑ#acٽ{:tH~gcd:ӤhѢCAH ;v쐝;w:#y?~\mfʕKT"ss,Бy&ԧ L}'lwsY*Ru&-[ĉ&5%:,Y~}H)~?+)>L3 JXZz`~ҿ0ݻ͛@<(O=| d͚50b XPS>E4P֢>էpRvmJ7RJ@1(]ʕ+Gݏ/o^F@P"QOلƆUY'    ((m8M$@$@$@$@a$@%: @@ @ hQl    #(alU։HHHHJJ@b @ @ cN$@$@$@$@$P4PpHHHHHJ[u"    І$@$@$@$@$F4Pت  6&    0VeHHHHH h(6 w&6UTʔ* c*QGRdhP2ɐ }};9}y8>kwwYHHHHHr6 deĈm6i֬\}Ւ={, HM?cʖ-[|rM7I޼yi$@)&O)\rTXWp}PJ ;Yf? J +ڽ{|rqiРE&[(^{.]ZjJe&o(QBZh!ʕV^-͛7ʕ+ۇJ$ ̙3u#s߂ŋK۶mѣ̛7O)߲eK)[TV-٨pM%G9LC-y찐[Y֛%P4j׮-9sN: ;<}Aaă5Km몫]QSO\rI gYGCڇJ$ >.}(P@Kz)̘1C`2BA"oy ;%$~؃ u3kIMA뀃lntfʕjnW #4>()w^7bD<7FTP8]kҥzZLI_I%=Ů~ c LGYH )OA1%gQmxdABEDU!6kj$@E{25G29ȰËۣ|%<ITP<9-ԅ^(v@Aٶm[E9#{C6h ?~lݺUV&MT,ћ7o~iy_~Ep |QXHC ZGq+vW0dy;*vb 8 B k !f|N d#.|@F=#B>!|MuZmڴCo͉'| ;#? .ݏ:h($7n(1mڴQgx:@R;SL"O<\|OS$dlBL ٷeK/ dꫯ֐?rHYb\s5z޲e˒QF;w&;$wnʓf2j(8m4bŊO+ L YYb@.]ʣ IK,ob/k53g8 yڇ\yu+f$%E߹s(hd߾}V@Ɉ,k.A.D7$b.,Pb? cj,iEHy? ğ@4]ؿgb5QId?M`Jx> k J Jx> k J Jx> k J Jx> k J Jx> k J Jx> k J Jx> k J G|HÇe}c=.{RӘ'oxR'@yJ RfoqV<H7$@$0vX9?_5He'PA OUGH rSɓ|%)JPq~&H)m6&ė7[s"߿a>a„ @0sʼnKΝ;1HD'l&CTP2D@OA/ThII 8oAȝS{t7~x#_I2 @y+*(q %I!+V͛74$H 8o[#GL;$8D&<8TPǞ-;D`Μ9Y[9t5 / `VYflذ!yH )&O;Ur%5y= x#G48;g!H)}>6ğ9[tC4ktz)hnƍj*[fU$@$0sL믿>+]D77?$ ݻw',5 I.c C|%1]vG u VΝ>W (O)c d3v=%; d@.]dՂ,$@!PhQy's4GJ.<FEeNI$@$@$@$@$@$*(e$@$@$@$@$@1#0N$@$@$@$@$*(e$@$@$@$@$@1#0N$@$@$@$@$*(e$@$@$@$@$@1#0N$@$@$@$@$*(e$@$@$@$@$@1#0N$@$@$@$@$*(e$@$@$@$@$@1#0N$@$@$@$@$*(e$@$@$@$@$@1#0N$@$@$@$@$*(e$@$@$@$@$@1#0N$@$@$@$@$*(e$@$@$@$@$@1#0N$@$@$@$@$*(e$@$@$@$@$@1#0N$@$@$@$@$*(e$@$@$@$@$@1#0N$@$@$@$@$*(e$@$@$@$@$@1#0N$@$@$@$@$*(e$@$@$@$@$@1#0N$@$@$@$@$*(e$@$@$@$@$@1#0N$@$@$@$@$*(e$@$@$@$@$@1#0N$@$@$@$@$*(e$@$@$@$@$@1#0N$@$@$@$@$U͚I}m۶ɾ}_~Ç/{1cH+ʓ_g.fqdA"~zYxXBV^-֭S$r9sCرc_KBsϕ*UHժU.ٳgOv? xѣGo%KȪUT7nܨve&'|@'*/^\;vE;z)Ȃ1=VQ͛e͚5` hܸ4o\ׯJNd|{P@IDATOA @y ,o TP7;C VB᪫JHfֵkʂ dΜ92|C ڹ뮓ܹsgz^G$~M}=z,[L14h W\!g}v *LA+7o>ve 뮻VZWoI(O v/c0b!D0 -c+CˬZ ̮IܺXFnuI'Y_ݻe[6Da#`v5:X_2fZֵ^kM:2qCavW>}X˗Kc6ĭl @yr"벐@V-Ȫ(]v̮Fc7}vo߾1g=qȵ+ @ 2nraS+W_L,֚ˍu7[Ɵ26omլWHE'lXCL-X>h\wK%zW%JX& `H s#įtbŊkiL93WWmڴɺ{tW鮎ͱjS"I Z/_n]|Ŗ%za'vф0Tٲe3fx x/ Pǎkar~5mԂB&@yJ } 0Qcn:=&`EܬY3O5r{I ~vڥ;'s̞=[=|`ڦM)Z<7'QNʓ\gKgy6a9RWOjw(P@8cvA 68͛7O ,3J*i'~ afҦs6" PcByEq%˜_{59_+HJYlYY 9:PN$Iy:IX%,38k]&/{]lUO>]3Ώ3F7&/|r}bAsʷ~+%J!9VQfouf4.VVM>lsIL}eEB )g%s!jo!w/fO5?g}F3O7anSHyJOYxb]%,RH[OSld^l  ={tI Ǎ硸N>)>mr!Э[7NGߪ'r+ܯZ*mh PzStrwP2qǡC6],J^ɷ`B~$S ȇ~8xᣇ䍈$(O)6^~> g/}ٳFB2)D7TZUJ,"gw;vLTSNN<8#0vN݋SHS2oWo3B)8p#G[oJx PBʓ3Z Ό{n5jQgϞ݃=O ,O@j>Bp` ֤ Gb 'ʓ3NyrWkՙ`FnV_]QСCq6pmV+;9] {OdG(OM9^ fģAq((xHɕ+G{nAC4m,_\V^-\pf(_n]>|3_<9;]'gyz6*(^ eҥO? "x8C2e3pرcTRrWP{8(۷/B$QZ$z|̙3;X% 2yXYCV\);wTNd5@I&U<3ה'w&V*(?,hذ`U ,$5gϖŋ 278Ke ʓ{Lyrmj(>jD/5kQUyU^]>3t ~C5x Aeٲej$ <ʓL]#Dπ_x*'9rAoڵk ?/RD*Y$e?03@ly=(O3MtTP=>h+|jAOEt5՟#`H`ڵr!ʾ =T5ɠ<8-PA'muٸq\x>? WBOiRJ^RUpf >ʓsLyrq<[O>l~Їʕ+g-y5ke(~Xдӽ֭[?t '>x@xa[uI'Ir\_|![Nڴi#Zr/Wjk ܏P(3Fw *$={[_DC . lx)OO7}l;(ö3ϔ9s:oF^z%yGs\ v?TXlY}HC_p컵0qw/}{O )F Gk` :T_Iƍ=q*TQC1!=Őu,x`G&N(>"c=&>,_5?SJ*rrD;ŪxG Kyt oien,GZjmϖÏu%X^?] (;V5ags_-%}kM&}o#F$s-:d2Mdnv!3]GZv2&^ɾVN>ͮIR=n>~ bԎo.2[nNWrF:#GZÇ3e5nV$Lx`8[eB q4oeBD3egy˄H^cƤJFňT#GoZ'OH_ճÐ}WuW]ue1C1B7oXwy5\*G:t[`dWLe˖n˾ݠ 2d]G^W\1AxK)O޽GֳlP"$m0\5^ń!dKh;%vNye8;Y%_|<7xU.4zJ&L2]I&.Op̏}NdҤI*ѥ xu+i>)OI(&4JMJuM2b Q n #lgVN.B+CI 3*KxPܙkʓ;\]+$z|>̐Xb:|Rʖ-+>d'kpO?-&ɩ޽[͹L8pK'؃2ةS믿;`?I9C)pX8@yrf<9cPA؄x;UVjsωj~HB^$a͇4ekE *T 2Dxtq1~Χ<%SzxyrZ٩[O.˒uGnIg6@Y |FN!y0{ ȃ56m$)i̲,?[1E)SZb?Ny_◙@?ofݒ~7<w?m;{ }(tI{6vq sV6ŋ rٻ# ʼygϞByJVlQb姳iB +=>~I-E_wuDsXe<-[رc6-xh۶N*W,X鷕RSzĢ=+?Iŏ>w]:$Æ K`/4|һwo 8z̘1uЌ遾 RT)pl;~76J wq 9.'D^)OY$`|A 9?jv]+5 YH/oEAX7|/L?o߮\}z_HAb5s*5k֔vɄ dٲeI~&͎[o  )5*|w}I M)"3gΔjժwSx.@|I%A cM:{liܸqBF#_R$ҟwy@1Vʂ($evqm…Szg H:@y O)Lc\zuaƏKz!9rF:3g |w2h~ =zTT"'OwSsM6IuJ gT6l(|7C(O42~OyʘQPΠL8z˻+pdIW_}%kô?O:,]AFH}KwY6mq8q_޽[ɟ?Mt۷O}|R@kT)ˆH[^,g:>}?VԩS|lٲx.H07x\{rz{ d@y` G8q8dzO`O?Tr--ZPI&+W.;@ )Oyꓙ|pСC-#?Wb,رm6+o޼|/gΜj [,t!0~c4N;ͺ-=Y!S[1IY9S,c6z~ /ؿe"Yg}qF/wՕ*&+R,`Gʘ?{LpⱌI P(Oc`TɄ֕āFwQ2~8s#G0"R;RSLR;F _lD̙3=[g]N:Y&o:S)kQׅ ,iB[[cǎPhѢ֊+B1fS(9ARA O _~Y={&:e?\c]wݕyt2$=ZMhGk޽5PLfxCڻw LJݥ:uiꅇ:0,]%'Fx ęÇ- ʗ/mG{g@~zD` 1-2 ~OMnW+a.l"?R4Jd̈gdyx:ڼsuPb yf]qwLW A'$ >B(#\D UJJA[m&G[lIJL^oPz޽lٲ. qSoPN`]$)ԇ|5$FW_D.kvL`ΐ'O &c,$eӧO&PT}.X0J-,P=|n}֭,سg5dVZg4숇3)O0 Jg=c)L"J.mZ I^j4JY^2Sڵk'za$?nACkE.sy R%o];&E,X!aȯ=;S7bƌW:xkĈGdunf룏>WƖ~ALBg})Qwۥ dﰊfBbUuժU'-s=U^=~r`wUW%[AD$+Ǝ LXH pϛz.a("_X+W>kZz8LذۉQ[=,`Iv'L@^{5i$ J,Nvy׭2eX0IY` 2غ3[J:Om9MK1wOD0@pcЋ4f͚٧vzjnBo^[ҰaC]@~Kb~i~'T?˗WGD؛#zK׮]o!2 ěmܦ^5k֌k /X ߉^oR{XDD0C$C*vt&}>A6mjDC꓊EMZzu&2*(YF %͚5K[ǮP삜 Xq9 [qGGy:3 s*ňM6%;l?d+L~p|B&?:aP潈߶m[< c3%,QLD$.D®/Kb x]l:Hfb!{B6*(6 J[޵DJY`SOY+Vة©{}vkrmf .xkʕN5X=07袋B Y5v(`~'-02l"0MR`]U8^`!i~'ۗŪ?mx4xά[~+!P@1/wØ՜C'{n1b|FĘbiplR:w\5j"7ɔ)S8qv#pWJ~B0WŮ@&h'ؑ6䦛nnM،UVI&xSd7Mp1~]4h>C$;I J 5CI<(Ƒ4fM'(#9r{>)GVLqXH 0}5L"q2].p$TEk  ,w -mj &/wdwDNsk,~[o52 *(!x-JSk؍Hj_eڄ툏 6d>?]xw]Է>SW. n,(85l0fJZ HuʪU_燂~b$T?(,&pQV[nS$((P*Xl-X=zHkd)Ch\s N9;8TwƞX_ل sAu 5jG\r%m17V޽wyo ~όedvRk/! Ȑ!C5k*)ԩdɒVBF d#E bMt/ tapJ5$[ڵk &x]vs]Ν;ˢE /^6o& `N//lMDnL82Dׅ<a.lHϖ;|/B> B Ϟ=[`iRz~9)!Ov<C;-[L ('y| '*(b2 bO:UhBv H2ƂDjSzuA^z)hDad1N~~ǎ{ɔ{pÎ l@I}QJ$&}Î$uHk+,P|O$h0lڴIz)כ4iR&vx GT\RZa,$d5̂g^: wmagqO?hPD! (+TNE DS$l拈RaFj[ʕLnt̻cBX,5w۷ĵi2%`gv:-wfB۷oOSmq`wuvmSV7KLˬ`[u3|/]4 b~m5L2n6&hQ@H )4b_մiӔ_39bU`](Qa c-_LZߙQ+j]+A+PƔ;m Heʔ)1UiX?1ch-DCvvYH "OcE` U˖- &$sdf u]+'":~_>bG^zju7ɝ,;J_^QJF>@.5W# |zJ}^f_0CB#j^z&i0;)$,baub5 A+96~Ǥ{>aYC;(브Dx@e#ݥKYz-ˠ} ?/7|`LXGގɓ'˧~*5j@aOŻ 9s4IZ;% ,$5'ѣGt5$I68%J9XJYfit~!HCX"I9bBBG$$M4>>RZ5eDM[|t֍I-!  6yQN#TP=Bn۷Od2[s@&MPB.%X  /yS_,!@%xTP1Ϯw$L r"x„ RV-]{y*p8e: ,Vwa 7Q &]5i֬d k w\XA J8Q"ra[WQǵrDZbF@BdXCƵl,M0-B.ANYF;Qx7IdA&v!}ox5 M;gȐ!>ܹFkpdc #GLz5] o44o\0,#[A(ʕ+kfwD[v*(0?)^V^jU8hbe\Ox @׮],'@%sOQj\Eaoh'w֭ 06jH͍`hm,?eҠAmC.D=֖-[sV#; 3 .pK PA >:Dzi):" Ç5d*2`x/ rQ;OdGN5mT~2KJ{n:ula'1ON˨Qb Cڶmː0 WTP2 ǤI4wO?Mb5y4FP 0{ƍaDd)<Hg;vM6ŋQN;40`323H Z?|w5<oͩ#'V(5U̾am_Tc믾;HSc<|TSݑnIw~ "L${oX}饗  @ $h`>čYxmF<#bYH =HիWVY!f@06^s,ϛ4i"H\U={vg ;\ ae!p'ۧ~eZB WfɓgϞ>-jՒ+W/uQ? /}C9A @c8+0RUQ}>9SjεzG(ˁ\k@ qc֭_ӰC1V*(fg?\" DKpGib"_wu|,ؿFIЌ3$wܱW+9vIҷo_5B$Rӧ/_#IEdB %H0\tZʙ Y'PA4xp>}w{_lٲe[E%K鱳=?S6mYhT4sg"¢EtwnV;v6l^k)XCwB$uQbPfk9Daa'IhL2/F_n'w}ǚp0+LG._\wvء o@!v0B$ : D-TP<1 _Qq/_>ow<D]OW;v(iOvn65a={/MR#FW\5\&P`S$r M8S"7{/ͳMPAqjЪg y6^I'~f/X֮]녮y[2eL6MUE!;Wr@IDAT"BÄsRxq%ٹs 0@Tmu>y4ΓG =®%c]2Tx㍁2+q+b[1z訯 É=3F_qF+]FڠA44 ԯ&R[޼yo5)g }ݺ d,'@s`m\c so]yrj`ݎ]~[(H k#GZE:$| DB.<rӛs^a]|ڵ姟~ qЏxt3gj>%JODd g̘.a b7+^)k֬Q.3hժ8p@.8""12?[P{,H k`Q`AEZM:$GFb8D-'IsΑ/RWܣGj~Pvd  s…D\L:5Ʉ +m@\g)Eq'k 7, X³@v=w9sd)ׯ+WFsj\ٽ{?>.EHrFќs𠎝QXׯC XuBO>2L ;L*Tv"E잂Μ9<("{j$SL7{A>RW]]OJR;)VP7߬;HP,#ͬ[6{&l"ƒFSp>2A?"$cɘ]8_ad~Ø2\ߴiS 1g 3@wܡIz|9O[ '(ۈp/g&MСC3<')EXL?J~2gxwP<7%afKxWWwIl=:fGǽ[P6mD(u]19:tHe>$/VfF>;VGi. I` ;Vҥr%#b|"OX#ed֋gŋ>!%"֟H X˃pƿ01C4$RV9 ;WEΜ'%d_93l01*^"7 /] .@jժ%˖-g}Vn 3eNk. ~_sz^c*(3zwV 5|ڵk$Q󈯾J … w9wQ<Z f^qo f,# No:ږ.]:8HH Dlg۶m7xCz-KBDßCED/d.?珽(([l˗{cMd(' Bv!(I `VZ/4kL`qɒ%wDb|l d@շ.?oW:I 4}\VWs,$@#P@9rF\} iF&Ou $'ZGB<<{tČ$;:Kڷo/sb^9]M ;C]wF*\b{.SA '[" @ ػ(sw= B>m*(ds5\#EMvHHGFgq}Q7l9 Z2eJ@FaPA <9J]t)$/HH 1]ڵ+1`$0ZnM8@%({RBW^   . r0Wb`ˤXbEtRAd9#GرciH"+W.͋_ݻw;T+!p8餓Ԕ~(w*(+{:i$9|z뭎 IH!pwi&qB$9[+W֭[3W+*(qư"תU+uV  vQz)Ww^0_Ib pUWIe1\SE J'ˢE+s,$@$@&lC&J9H #"7~yɩ*{͛7ؓΓ'@}7d_3H R߶mT\Y=8 *$u֥ʉh ֪UKCvjѣG}JZHe˖ZGrPERYz4o\W'l߾]ݥKuu 1P`R0tP?`ˏ*l> )SƍդJ*:>_0 {܋ dܸqұcGÇ{塇J% јp/.) ^˂s:,@2m4bU%XՌ,i>`!;UargtO%rY gyF_đ%.O{쵂[dx*O=UhQˬ86ƍ[\,]eP-o-cbu-ݻ[7pek~ע(p/~-`<=c*ۜ<7k׮S;leV}-BhobUPݬ|Z!]?(xYmo>d]|aV|6+zD82~(d J}QS/T<}(iy睤c?j~6VŊ'|2{{'[k׮㶢cV3Q̮eV=ƽd+(PjMCwI3x.5lذ|7іhdP~ -Ӷ'|%I4o1hРCQw*e?ev>쥬K Vŏ%}1Չt.Na)揷=u] 2v6k,Y`ro 0`r5f1;%ɮMy`J)%0w,&7|yC0a8ྱ 9ra?`;DFA/SS-\!SdY엍RK(mFa"(IL0]z־p.F8>R LBLaق =a3"0SAd//N: :w%|*O^)ԯ__q=v:ZhbpHߵk`Bz-AnQ=8b:{ј`he˖ٗ%{7(5/`+^x!ٹA?t:6>?9 Uy/Lo䵩1O?k]c XJH舱Kb/d)k#ujyap֜iTP2_lM6jb螛HbjzcOuuov=m6Bx@`lX+vL a_Q6m;\w]P7 imΫpraҤ\<yu%ȥ HQÎۂHdٳ~J7DcÌQBM&ڳjժBIDSvf%ه?LDǏ/[nUeD=!`=CBmko߾r-6`ATj=ؓV$`&%CyreFB'/DBbX!DCt'U䨰^S4,#: 80k0hXbkΝV֭\,ZYccz4$yu%XO@ZG!Rj_dFAEyIjZ71 yI,\fUsÄs'}URBN:)hˬpY=@j6=ylkt @jz)&9Ģ}c$@)@Aٰa)D(.'Ǐghaj *RJipmW~Wj]̈̅c=={> >*۷oKa!H@\+W-Ocԭ[O⃛ -[Vny\!y}bY׺PA ش'òl2!p6qV ʡCѣriy;rw%pWM]"TJ !IQJ( ReH)%!G$)eP22$ED⵿_t}p3=Ͻ {{^G#hL n?#]~'8B(Q"Pۄ~ $',S ~`GnH5wРA|J mT~ͫ^znWA+4X`\ғH 6j(zwrowb! BPڵŠdɒԳgO_q _PlY2du٩A;rǡV[oZjd}پI؁ΧXVd 80+z@]E׮]iΜ9g]ˇ \hP.]J9TRE`TZ5]z%!mڴ+Jh]q=G͛7M6gF95QOF 6˔A?[޴?n߾]̫+K/s&Nu8!0rHȭ_^c;`QVjߟ{=1_I~UE ,*xc5+b޾}{ jAx´cjv믿Ą&^m/%y1nԩCM4;')~D \rTjUJcD'PBxׯ_7^AqFz?)Wz!@χuhʔ)SB!Z/'[hQ&@FOD 4P㙠o[lӧSv-*#Y;jn=E|,[VZU[xq}#1fT) :P> 9n =[*\>Lb ;&Ls]ի%}V._ hiPPy^uZhQ loC5n  .w})/]V$gjoxꩧ'`c :% s )2S8M6O>EIE@ӿhr!Tre5Jw:Dm۶NȲI8dw!B,j_J9^x`1.6oLj HТ *±+):,$5󂿖7 ԊnѢ#1c[H=X\NlpS믿>?@amzW@K.WТKu}$ #{QF?6k#;๯~(vrP}]1_䌮¬+q@spq!йsg*S w}k6Hy͝;7"P%м=X^lh݊@EHuy0F~)r+sb..  U$ ˭:\E@(jPcG‡UW]EJ|0|)9"PfM٠?~%m J.yt[oE? (F(%K.H 1~ 7HYdB>ZJ&=xZOɳ$*Ak0^~eujE@5(:aF^z)s=kmCz^{т wP\+uM .pL-HPC@K}UVp))QF`vN:.\e vP unиqc:r+@RP!GA\r u]f5PG@4Uՠ8eBT@wlٲf͚]u֬j!r-ɂ(#p'_~2 i (9J`矟[z"8x9P`{ߪU+[(:aEQGyAMR (I@!w!9V˖-Dm۶Jdɒ@~ѼyV@"Pvm5*@變Rs^FQ2er]Q! lذAo;TP!$g|W /_<00Т+@$8hwV-˽R3$9w傞ޣ8%jiDiӦdSG>ZL[ (i$w5lP3z֪!&^qpE@(ӦMO>DP"t"*o֥c1.ޕ5tz" AqV-4hт9.f(@tI?szczT@IOB8?5zW %9.z4?!EʕB[Y IJe" Wm,))Xu/ pEQJ{Sʉ"P =X*Qy (.mXA^ͻENWC@ՒN;D}ի\]vم?xr[o (vP*53g$,ZjUE@ȄLv}w‚LIPv СC:qP)AO?47n!O]ɓ^ztH(@A.(ZY@u]W^4rHZ~}@V6gtR￝)PKC@88}v>}jOYH4(=W\A{7=") `KvP5Rx ڼyF7`tOE R%L_A ݚcO%_~)QڵkG^z;ZDpkx)y@n /e'Ì5y0=45j0C5pC~ ּfnJe]fXhU \Fww k expZm6jM!&IbŊ]as>?Vg fpaW,96ON%KȳMOS_grB.J"!bn6j60li/D'_~}glժU[n1l*f` @6m &C!ㄓ.]v-0lUV9̚s&1c&NJ$nvN)t>9r1nЫl#mܹۋŸ;-ʾ^{M4&)ɰkh4;0lnŰW+^wGc'}jb}G ;XG}; K kq1}Qx;]s5`%.EB@S*d?_~EA;\ʕ+{̅7c.e˖o4/&xXviFոi )v6W^yeH[M؟pD-MhPF0ݨ[뮻pxK~>|\T@Tԓ̓OOM2%g͚EwD1cg #{1 fw/ao߾ >߈7 :x zgIXhXE C:ћH["F˹jӧyl#_[j9̩_:b_>2f*Do6k֬1ȑ#5j$0ESJ qN0634mF>P_}xtꩧlVJ sKϸ/ [$OpYfԱcGba}eD… $~I&Xk[sm /s-"|@dRbÛg8FJIcI@$S#`Rٕ.[as6+YȗŽrI B$b_E$c^˜f/:~WCuIp&P& (QK\^ |ZdKhΧ@|sڵ2ΑF4Q38fU 00ٺukVHHND:\rI'@lMfApc A 9$_5P Nmf@z_ xό9 }șYz߃*@ܩAg5oܜwy\*gϞ-Q_C)l%-/$G6MlBf.Y^tEfƍnWtbaܰ)ox*#k̊+ Ue`a7y'@dOOФw~iP%-<ΟDbw]KΗ}pǂ ҝw)jUϞ=EHܹ'BL[9>I& X<&D0jժe?x}(B.RBTΧͧnp8b}rC@p.\o$Ee̎X)Ďǃ f„ ^PT'9;NJ40`s` /0wmSO=%F|'c #&gU@)pk4hРw}pI /B駟xĈ>2|!KgؽenGr٘A%K]?_U:LOb|IӒ%KDJ#J.hTxm cpJ䮠6z2ٛ裏S?. "/f QS#<Ҝධ,>âaD8@| 'hM/P7_](_u0!Ef RE֭kʗ/o;w+ˣp48]v(1Ո8koPωF|*b6t|\h&֭͝;x_PSN!~eԿZj L8|KӦMJ̳ -ZDsyG}DJ3f $Tځ/?86^{oE$O1`$| |KI}u't`?w%Xz^zX}N$:Qꪫf}<=U79Q.G rHwĹIwuq N⭷JӧOK&9C:Wy肐:RÇ2ԝ5uCmT@ U~~ᇴf͚P (l[O[&]T)p4|pb&aNuڕ،8sج}]v'&QzCSL~ly.|W! G@k9 FAOP8DVlP%CVV@®RrEN9ƀ\tAv8[˥z4tPbyYG +`߲e _\:u*m޼ l fȑAau>u>e8  ,fT?3]J&:s=סS *Tz)qỶsaRr7|`^ǞwԱcGS'%hO?M6vI_#e`q<J9^a>a#z"soQzUJ͒NA5l 9uAlC,|%7x8OGQjڴ)uYq_ÿűVo_U-ZD^馛hݺu4~$guH瓽|s#ybK^JZx9hK&NL>)eS۶m}‘ـp gyJPyo-] '|2~3Q '<UbOS]J*d<.rK(>F &H'`?=0 :ݛz-ZlY`q Rf7n,fps|qĮYl!bMkΧ88|Bx&z23*d(+ްw>`YW}{ (h8W^yE8\e J*yKPs*Ud'51/BXSDBO?PZ$4ev1,QcbŊnТ # ߂0EQ M~ (qq(Bnڴ)'_|qJ5^HJ!жyL4I2Y,?Qݺu@8V6rs?9fpE-Χc" 'hU3S%3F9_]GuU\92zc5k]J~pZ <|†Wר}Nr\>i cxa?S2e)8"|,Htɽ;N ?3$=2O2_ x8`tR,-モkw.6Hs??S#;XĢ/3lyq *ع4k .'6msaLaI00V/b:#sϺB_ظzb;I>pɌyOݹoпX9Sr!ITV>}C4h 1!Sے: f^aKҝg;簌+ ړ8ԩw<,?`kkL[m& |\:n{eė7|8KCXapBX_8f`Q={~xK/q|!",YmݺՉ"W.@hJYDb7U8cdX8aacW);.̅Q$ aw6 5O3!+X%;,v2rܣX%y$Lszqaރ?=xf~{lԷ_~KrxAORn%7ޅ] iB#mlD۬^?|2`󟌰XmOKH|r+  |G;&8#+%(X8_2OO,~yށ6__uUҿÇW;AOH3?c-*8X@CU8'#h bgkLN*N>dy'/^,1`{d &Q"j 4WnHzvsؿIGAw%L a…vhCNPyBXbhj <3#0]wA@AưΧ{62#hީ>ج7kUN-N vkfϞM> uܹ{G̢Ə_v_7T>)"^`ANenc2Lu(ǜJL)7Y ;R0ʇdE^zIƍӜ9sp-PZq{˖؉p֛LB;%ιsiٙvsXHAw*P<7!0N|I{7*OwZyU%~ B;:X?B#^,:'/4İB8tX'! .1<#De~w ">!J / DⅧamDKb9ĉ%BO!3pM~\M0'[6-[F'@ qb ͂aL)X٬ 9|b>nŭ]9~AIt9DB_^H> gp#x"U7㧹[%G-Da{n3fPðCMMe 9¡aeaAOP)oa rm$`y lxVv6eW8:Caٞ'b 1yH2gsx!" cX^ߨ Nj<0>]C(@ ʖپ!ȿq%K̍,BASw}E^&MrLs,$=îd*MSsuc0I*J@bVDCN:ٻ)`W|ʾÂ4$V5WNņM{CO@8IHlJ8A= AݵjJ*E8rR8rJ+L|nqTR(,!L6NS-DK%>겞!N&|N2(2u|?)^|Ƅo-*83f̐P 4pTPr`'rG/TQTAXbBarޓѹsA}9Lj_ܡ)N |\l*e (j(췐rg?; *f0H2 y1dV/Hg{.E: &Pv5LX3w_?0J < YΧ;H &N[dF)WP"&M̻,MĺF?w b~`2zlooDС/Kd Qs?}`A(s%l) |V+~*dY;(^8Pwo62λ0K]TgzK.h"¼ :{2.8~ׂ®|bc'Pr6iDB^do6_*C֕*XH 5ju̳ ݎ$jp"OL;F}дi7gX <8ЍY`h^9loqS,~1K.# %F:h6Tb+5̰D+W.C4T(/<`X<.ODއ  VS |BBisaz&)j╥@r"wG:u1}0OP3=ę ѧE?kIvGiH@׫W/֭q'K/%iԩ⺰'s P ߖ Y̖wN::ҀGM6ц ҴNOC آB%\=a76$)# ۞ǟ_!0~x٭\"fA߰^nVz٭[6,`^O+`3_ti-cƌ1׬^ڗ<8PVZ- yob5:RwiӪUd,p 3bG@V@uO?}am ;;Y 'g3sHyZHr:w,*tN d# 6N>*/ӧg4r99s#n}vQu]vU9M;I  LbIS,߃q4O}S#Jjl2dŸ`΄^xᎃvKt\x_.830bŊfѢE"G ?Ӝx≢U2dYxnṔsf@Nڵ9e\#Z'b6_}00.|ra<AoJW%U3QB8onma5z뭷$^ǾFhJ,``>rȂ,{c~1Q!"DL |AIȩUgG̪t3}9&6plF˳.!>ȇt>e,XŘzWXsKm! ;VO~ۆwi>S~zLz!#'2O=+;r`tG=xV{S5vk֬)A aϊ4cxOaԢE Y@HY,C1 &>D|Χg>(S (p*vb禐zŘ}0Q= c1?nf`]x<^cKQ$E"|^0Mn @g/R4V͚53p*;t brXaQ@g r l&m`B8Ki#4>F:#_޽{^@T%oӦiڴiwGYf >.%aGH `j-J03B>GaFt%h@@XAg,YOKb |ᇒB'r . 4-D|>w,h ' g9c·G$A#2`DB_`lRt>\ ?n+\}r}5}A"I`C]ehᅴ͛KXxM\X L=ڵkg%SM82# v!@ 'l!l>1ɦ C8QH|GƍhrCa2[!`V3g`sl"!766&>:6?ZH*)*89aӥg@h2]cW;حTRhNaKЪ.:Ep ϟ[~DY* GnhQ헽O䙀`ac|ܹ)Q~}yy ӤQZP 7uY([H !Q&$ /iyTS"A h!=ӧhF`x) ZAe :Y af>Ypa{{Iу РAY \}0!E1B$6b~ ^puHm؈DЊ%{ 31ni^+ B(ĸCDɰ͞=;qvƅRfT@ɌQ+3Q$@ [J D,!g!N-ߑ V3f0#< y.mҺj-*aֹ a4>9fLI)vI]t; \*LnV|A(KsOxPS7dpG&wUd?[cٕ4wěa'Ҙ {Si (Y? Lӧgyg/GCdG._L0k첓^(AQͅwEY*!c ;_u2G`ǎq})%G&50^{_JO^O;\D5$l߾ ,-^~,{&jX\eAt6Y/_pf@)>̴mVVg |zb)V vrǖQ,0ghC 'mkIKEXC=T4wRځ6v8A @fOQ0'X 5j0 +9zW{8]u&ML*<@b!߱C[`;m^4(oGxDkL,r!h~C@;]&:R VD >5_wd sW^);~XHBAąf xf4uԑ@@u"6z 7t !lu!̍5JL1J,)$ȉ>4Q'/Yb݁mԨ (Y \2eСC+\bbY[6lȪiA,V AP X7.6%^]"/P@Ս3x8#<ľ,q0Af~,1~cIi !C#M >ܖӹ^Z,)qꩧ5IjJF$l\]3a„dE4a/3c9JnݺAaS>U@~De$}k*X<l ^<@[G3^N]v8֦ɅX! B 9AHgz`ga1&) ΂NF0'dlp7{`8 "%bHADAohyS+f`'ldgyF4rX>sAKкDѧ' B{2 l4rɹ_J$_1SfT@ɢcnv] ;l7v C2zh6`PpXvS 3.ؙ# |IG"pF=l}]yW"jDo]Ղp NG4.3-IRj-?˖-sh3g9#$10a h!)"AB(E_y橧r,)|D! eӆ'- 1䏵7+˜lDI;,'Gj" #'Ҁ, Ŕdߟ5kF9XH˲<[0Vxו0ޒ/J$;cB\x?TNԋf͚U4YJ} ~rؤG> Ji&Ol)~Ŋċ B?Ƅ{^$kNy7lً'%\ڳ7z5{wY ٿGޡۗ}Y܁>kƈ}ʲ3}>0a"Y A#!҉v`#AcbnG&IC쳏ͻVZ0TA5c+Ilwέ9( L@XӾ,^|rMbVXt\v&q$5<UJ!d7Aa] ڐq"JG;v1,<[qںukb_~%}'EsBbY`sCʦ6/b>'@lvm5dSM X6 B"[E]$cB }G΋g21Fl+1ছn"( l>(۩S'67樣UGz&_ fC8J D'\pG\ 3Q"j"ziUDbDNb< g]zmY0\ @*U)]^(s"AKbm_ "갦!dIX|h/w{ `l }]f|ZA{LpCHX!ȇȯq M V9B#F׮]=xN!!'< \Zbi]v. mq jţ?@Ԋ8^l_zVL* AN! > iGO>k׮M]f|V: '5'ā+q_d1Cpk n=zb Ⅎbn/f)|M&csʔ)B/yTv !<_a.sM@Tc5Ȅ4 dlbyW%$Cn9ZM86r.i'(Mn?ɗٺ[Kǎ<6, ` -8-摟C.'$ի0 ԠAID /XypdA;PRc#gk׮pe0O#!"& ^ɗ{}ƹh${5d$pOI,-ᐭb3d&_|񅘆U^]LHZ M:-;'w9CJF-qd $bZ Ha=2\Ӝ?DTPR0P/w2+Wvp‰;'K` M{!8-6Mn 4at6#Rk 'g5AB`3 ZB)Fi)+tX "hDfAVu`";Ւ}^)"p ,vkN+ n"t7TfMbxZxfu >ϒ0S޽%2%7*YDmѢ$jZk*VH69hl_P0$tʳ>KOl+Z)JG %Kz;yQa.$ C9;G+Cj'C ҨQ#TD ttK6z1Q!dg49ijyҤIڼysڼysZBZjUx.4y2,'xCJ qdj۶-uE;?,Mv0YvKм曉kҦM9NOA,\u3;#hĪVJ3f̠G"Me>9pE-L&z#xP#k/ |bS)(*YP@ha! B1 FʯoɎ_)?j(3C2$|G~ބ2l۶8_ :t B 4&?,7s, Mȓ't1ЬY ͆aI_x @ҥ%0F 0~$W JQ% 6:ԨQCl~!]ȑ#iРASj/$Ӈ "k ʽ1 B"ma5kc^{,p_+[Rn]:쳅ohvBUTVZ6i;EډiӦoȥA0ZsիW7,$ 64k`wN(ʼn5c ?焝ن} j(C*ֆv5 =k65*ǘr^LZ;yc2ӥz>L+%J= 4 뮣+Vŋ7bŊ2b' W* R4|6gϞд#+mIdd?S >ŀHX1!3|D͛7|^pO?H)w)H(GyDv? H哏9PpҤI8H܅M(_"0N|AVĝG HڥKҲehĦTR9$… EY}]l@ W_e{~vᇑkBX쨀'T@J)~$jGT O9JBz7ޟPt|T82f`Z۩S'BaÆge4{` ->y_X5(_+<(2#̻SN!dD?~ ô#vK,EÔ(%&N~aѬ^|aڶj*IdYV-qh& R c|a])=JE;C0};W%]8[D 16h˖-`Jpw߄'|y"hP$'P6Tp)]#({AC?c9F?~+h۷o+ʗC`#V*&&^0ױVCU@&~ /<ؑ_{ęF y%Eo=ʗ8wZ3." _c=&X Wr 40Lg~gӮ+ |vCݽ{w1O_&B3.p#3^gOC79Nmʹh"ͻ ka?10|U@pNhNT8I>6n(~0+:%<;ȃ`-"- Qv !0鬳"!, J@ɗ']t#G#xX~mikriӦ97|#C lh"`={3/@*g￿uk"`ހI)In۴iL~!F8dXE,9".]j]Z"wxqj8a~𰆓޽-oꫯa̅6\wu{ߚFfРAfʕY+ ;LMv)૲eZNVkm۶u$0|@9:Y.!. /QF% @1hE ;|!D䦀-+ NpI-p=5kаaènݺR&LSI+ȭ ӘJERJ2^{5 ՠ$Aw∝4%_*a_8}q䏚5kj1c@ǀbc7ݜ|tƦE]k<ņ.2 YZAIDATœ.]Z|'ooФI\cN:29+)"`!yfBw:BC2?Yn("B k׺] ;>_ (K/eME{r("8{-t->nGDeÏ1E%zLPE@PE@PG@@9PE@PE@P!E@PE@PE@ *+E@PE@PE@PEǀ"("("P|ʈ"("("c@PE@PE@P| ( eDPE@PE@PT@1("("(A@t2("("(*PE@PE@P oBQE@PE@PPt ("("(oP7]("("( (:E@PE@PE7F@Yz51btM7Ok׮-v O?mrׯ;tWl2:t(6ի_ٳ믧W^y6ia![z" ^=׼imZihѢԅd8ֹC7?nlX̼Km6g֟avB@h޼y4h 5kV\G{ԣGڲe,B˗/O3f̈Sң>jrW?hdW {_}=ԫW/Zfj|'4i$z衇wMZX{>e_ޡxW5o[_{9Ffht뭷R:uhȐ!ֹC&,X֪U?|:.W +Nʞ{I\r |q}_S  Е+W^{% m8Cyw[^Gq]}R.hE^5kR׮]m;Ow*!sͻ9՜2e N?ӪU7ސt~l+s}xoy}:Y9c#̛bd|!X='AM٦M8-[ҥKKŊW?6BYNUeXV|p]ZNn$>r+ERE皷-/^{~Rcw駟.kd8%ֹC7?86,x|)X;ٮz"2e̙3.\He˖N:I_X`ө*FNZj`swC-&AӦMk~mzW _NpX3zV,A͇.^`Сc]{۷QGEcǎ Rڵc}Ǻ,Ph3| D(QڵkG_|!&/RxMsi#Ǐ(%c؜3%.v-l'ԗӍEܟOq..s2vN+//vxB94)EȄKlۜ;XxOߝ)xJ\KϿYsdv|ŞIsLʖa<?M7 flB/֙E: ;SN_~/_N_|q1mPdO~ q28s 3`xR& KR'{ / 7az|iݺa"m:$4ooܹr-wy;;~D5\c:wlT'/_+46`xmx,ePc Î77".*3w)YŨ r|a37Ks栃2?{{9 ᅩcߚc;^r XMjXp1,@vp\YxcƌX1wtR9߷o_ O$Ne˳]P_N7ՖO`."@y^oذ!s,/vO ovMgyLĶIӍlvt mރxwnГO>i{﬊HgVes:c8_p!zޠ̱~]r-û-E:xdAD h2xڃĻ:1ԤIsUWa}NU2 Rc̰fQYx@baoQʕ [? ;IL)06٢x8%u]>۷oox|E/ey]U]0ھn: (09묳FwU&P)/@]d̋k^㏧ &Ei L/i 0G]t_r &D01 j_8-b2/,Krb ~9grwEu Qa90AXhN4X8Ē`K3QFa 7:oj}C2G^\ H\5Lc8xql bB1:qfۨ2ǪW.\_@ G33L팍Ls':6}W'2uc}swLi-r]mg>(VXe_csۭje>}:;E|meA}Na,S–w# 6}X`"d0ȵ<(^sv`f;i B @NAQ%ޅIYYra\S>-:ظq㢼9Æ +g?)1;,}ZQA0࣑Hvb=b8F3(7JĻ66M- };Z+M6k]4ݗKef\yNզXuh͛>M9a 0'!0cCKV^&^1 A/v_^62g\F;Xw:ֲ)N_eZsdL6|(CbFk[sپmXxW[|f阀hH:~N9cG ӹ> ܎pRMGhADK}G"8Xg\g:8^EXT>%BB$A-V~ X,6hZT8AC+z"x6Je 3xXdLXGlyr=Ӛ#>iܸq@,Ϛ#>,x?ֱO'Ol~ilvڙvɮqL@  XHEA Koi.;xx ;R] gd[H"a 10kAԌG jǎEHAXs&+RLbfr&R*4Wfbz _ A@u:cS2^c+2K[04ǑZ x1"Cz @dpDHI SqȐȸ~գFvi=vwvw7&VM;NF#}H8p{6bvd9Msc1mͩ|jXYmu?5 YPBF3f_{][+%/nLƸW&߹VkMLs+8ě4ϟ JYO-Y]s}ܟ9f HO5N6i!?tӡJxk|ƊP c#.Xzxx|uclCo&1E_ovA}}vhrL(J`/7.$s _M z%I1F *14Lx"Ϟ=rT2h MK#$(xWJeFid0XOxejb/y0^W&&CS}Ks?GpH1n*/oc:ucD6H1yډ똆 G~C9ABxBŀJ3KӘ}k)4H<4#/8~:Kys=Eil{)/dK`/N\6;z(B1xƸNJ>P͹bSkCu +3_5MCjt$:+ yO(ޜ:G͵jEWxϩ!ߨ)hʁ0eu4ͭ_L}V1_q<9匳Գ:<59*^i{I|APO@! IUTB!͆ OCYؐϢ0ilsAVvE( ꬲRl(uDu%.;n+Y[0Vg!݌9Httk*^ XEegu*[oTQuH!k:ؑXsCB|}-!F*T.Sl9hb]e/$Yh·|Ƭ*c(¡ˑ8_ԫt(|1~NX4%FmvZWǹ@fWb`beg|:br!YYmLQ ib~̾5Vn0?ͱ֦."3T."I#l껕|Jc^%ag|uL+%_zoeS٥[SJmCPKeFݢ9>V%~|̶ ;ѭxF&~C}>;U;/{VE%_M{]c[Գ:=Ͼ!/ZUWҡ k]/]H+G~,QA) NTLBʱ8 dݎ*oVA/r+/Ize/_lf R$})ohr5õrmޒ/v. Q;䣑r&|}$ݿd_-rKO֦8FK2m@|goiއ³Tw߿;uUok;%.]-y|-=|K>fklrthUfŴTf5gs|ݖ(%(C~\>guPiX) `[ˡNKMy)Crp36%;`o? #)Koc0>}ujk F"~ke]SnSmT*Z_Mo!<'j˗/}&k⚎%.O>=w--jӡK6%lXkˬ^T?}v͹~oԖñ\s86o1bXϧ{ޮe{j}HKr':o߾-1>C;YۄܦZ˺ܦyxZs8-~H4HhZ7bԳ"-4_@ͽH !chG}{3 g|: sTH1XӕAkK۸F=/Sތ00r:޿E%}kʭ]6:)c\.rt|dulmU/C^$Yry >}C n665M(H)w]Tn}sͯƟPЈޯܹÑǩLQ񖲽uM֦V;|"DvhS;(o?eޮyS*mPDǏC6{MB OƆxQ1TVjJWW+ϱ'W-rkũ֦V;|Z]#[ENO{J}sgu *A6uwUD@D@D@D@D(=PJD@D@D@D@D`j]G!JX(pQE@D@D@D@D@zED@D@D@D@D@!:uTj@*uwUD@D@D@D@D(=PJD@D@D@D@D`䇽uׯ_|7@Ν;YAٿy;289O>]f;VcÇHEn2ׯ__ZŋwsmgX{3rǏW" " I۷UXq2}"R}tѣG,][" " " " " "AY)" " " " "O@ ~.Z+" " " " "5PVCQIENDB`dask-0.16.0/docs/source/images/optimize_dask1.svg000066400000000000000000000625321320364734500217000ustar00rootroot00000000000000 G print3 print3 ((<method 'count' of 'str' objects>, 'words', 'val2'), 'function') count count2 count2 ((<method 'count' of 'str' objects>, 'words', 'val2'), 'function')->count2 print2 print2 val3 val3='pear' ((<method 'count' of 'str' objects>, 'words', 'val3'), 'function') count val3->((<method 'count' of 'str' objects>, 'words', 'val3'), 'function') ((<built-in method format of str object at 0x1004d0c90>, 'count3', 'val3', 'nwords'), 'function') format val3->((<built-in method format of str object at 0x1004d0c90>, 'count3', 'val3', 'nwords'), 'function') val2 val2='apple' val2->((<method 'count' of 'str' objects>, 'words', 'val2'), 'function') ((<built-in method format of str object at 0x1004d0c90>, 'count2', 'val2', 'nwords'), 'function') format val2->((<built-in method format of str object at 0x1004d0c90>, 'count2', 'val2', 'nwords'), 'function') val1 val1='orange' ((<built-in method format of str object at 0x1004d0c90>, 'count1', 'val1', 'nwords'), 'function') format val1->((<built-in method format of str object at 0x1004d0c90>, 'count1', 'val1', 'nwords'), 'function') ((<method 'count' of 'str' objects>, 'words', 'val1'), 'function') count val1->((<method 'count' of 'str' objects>, 'words', 'val1'), 'function') ((<function print_and_return at 0x1071c2500>, 'out2'), 'function') print_and_return ((<function print_and_return at 0x1071c2500>, 'out2'), 'function')->print2 out1 out1 ((<built-in method format of str object at 0x1004d0c90>, 'count1', 'val1', 'nwords'), 'function')->out1 nwords nwords nwords->((<built-in method format of str object at 0x1004d0c90>, 'count1', 'val1', 'nwords'), 'function') nwords->((<built-in method format of str object at 0x1004d0c90>, 'count2', 'val2', 'nwords'), 'function') nwords->((<built-in method format of str object at 0x1004d0c90>, 'count3', 'val3', 'nwords'), 'function') print1 print1 out2 out2 ((<built-in method format of str object at 0x1004d0c90>, 'count2', 'val2', 'nwords'), 'function')->out2 count1 count1 ((<method 'count' of 'str' objects>, 'words', 'val1'), 'function')->count1 ((<function print_and_return at 0x1071c2500>, 'out1'), 'function') print_and_return out1->((<function print_and_return at 0x1071c2500>, 'out1'), 'function') out2->((<function print_and_return at 0x1071c2500>, 'out2'), 'function') ((<function print_and_return at 0x1071c2500>, 'out1'), 'function')->print1 words words='apple orange apple pear orange pear pear' words->((<method 'count' of 'str' objects>, 'words', 'val2'), 'function') words->((<method 'count' of 'str' objects>, 'words', 'val1'), 'function') ((<built-in function len>, (<method 'split' of 'str' objects>, 'words')), 'function') len words->((<built-in function len>, (<method 'split' of 'str' objects>, 'words')), 'function') words->((<method 'count' of 'str' objects>, 'words', 'val3'), 'function') count1->((<built-in method format of str object at 0x1004d0c90>, 'count1', 'val1', 'nwords'), 'function') count3 count3 count3->((<built-in method format of str object at 0x1004d0c90>, 'count3', 'val3', 'nwords'), 'function') count2->((<built-in method format of str object at 0x1004d0c90>, 'count2', 'val2', 'nwords'), 'function') ((<built-in function len>, (<method 'split' of 'str' objects>, 'words')), 'function')->nwords ((<method 'count' of 'str' objects>, 'words', 'val3'), 'function')->count3 out3 out3 ((<built-in method format of str object at 0x1004d0c90>, 'count3', 'val3', 'nwords'), 'function')->out3 ((<function print_and_return at 0x1071c2500>, 'out3'), 'function') print_and_return out3->((<function print_and_return at 0x1071c2500>, 'out3'), 'function') ((<function print_and_return at 0x1071c2500>, 'out3'), 'function')->print3 dask-0.16.0/docs/source/images/optimize_dask2.png000066400000000000000000002220161320364734500216610ustar00rootroot00000000000000PNG  IHDRM_\sRGB@IDATx9gA@2Hɠ$ `0"J2d% 334_uaBwOgwfzUOS$L$@$@$@$@$`/Q%!    +HHHHhڲY( @J" 'ut]Ο?D-9uҳgOɚ5%p";wNK*%o%uK SEN p ɝ;TXX< -˗/ŋKZ9H@`РAJ͚5 $G/HtQQ51+0@iX7Hb@lٲɂ b[F{+ XN>#g$@$@$@$@%C$@$@$@$`9#g$@$@$@$@%C$@$@$@$`9#g$@$@$@$@%C$@$@$@$`9#g$@$@$@$@%C$@$@$@$`9#g$@$@$@$@%C$@$@$@$`9#g$@$@$@$@%C$@$@$@$`9#g$@$@$@$@%C$@$@$@$`9#g$@$@$@$@%Cq|?9˗KϞ=W^z cn طokN9bǏrY XMY+2|ptܹ4iD "ԨQC+gA$@#~z߲ea:uJt"Eɓ'V. "H)VZ% . II$yr9p̛7Oe&ݺu0 @-#<q>m۶rՐd&vk2dt-+ۏ>(V+VH߾}%E,Y2_nZn޼)k֬u.? ]wB%(jժRdɐd&#vld:)??P t(Fu 5|X/oߖ[*_#F87\.]$?L6MeG}T}* = a?K>_~] f\bs}g͚%}Q+/| EsΕ˕+Wdܸq駟ʮ]E&ԗOp.҅@@F%o\vMvuƒ/R`XsQԩ̜9S|Mׯy`&O^-@0:۷o`n޼Y)b0:˗/n8%JY*ysy0zq>|UHGku[nI_7|#cǎzJ 30L,6mŋѣe„ ?PwS~RٳgO/%JDnL$8a.ŋCi>mӟ{L토޽[}TOi>͟L}׼yswϋ7H:_1Gʕ+???'nݺ>8N$' x -ڞ={oe˖,iƥ/_|7nѩӌZyU{U5C՟_-UuL{ X_&>|$8N`iʔ)L2~1ec͛W6mTMcdC'K&?FYVti9t~zjbiԩ'O5ZyH Џ UP/+:tP3)W1Hz_?6mZ^hQ|#H%* PuxR|`*_QS(+)Ԅړa(~ 5L#aE?ޗ`>($y* ؚ U[76]|U̔zy!gi1S&M$`"՗ K"66!RX`7C,Xt7au/V#j@,Y_;v,j_|C$`[ ,7]rm %UȂI B?BYm AJ*8qԮ]o^|Y?}b… * Z.^^WOȋs1e |Oaq 7ԩS  {tY/p6L$@%sgG@l#Gq H1W q3|}3Hxf"[LbgyF0]6EwyGF:F1p@Yh `(Tg}/B0:,Y"" cmu apc.20F{f!tzFeRJH$`>|j3,B;l܁oc1#*$J1cE4g>} >iȑn:>_/A,)SpXx8f"H^TM( ?F#BVZAꫯEK@RLgΜ9rBɀXə2e %{@yP%<\@zR#[o R'V% E'!88?zRcZZ1+#0.\ش+}I|Txm8>%LHB!~ ;H!@C,*ƍm,-E##(?rȆC Q1QVDN&'eOa)=1fN$@nSğQ'W0 UX$_Cb H%st     P  K|Y: @hHHHH%@C\,HHHH D4TCl$@$@$@$@j._N$@$@$@$"!c6    s P5/K'    1 h˗ H elAo1cD\'۷oŋKʔY]r%L( 8ŋSN'NHѢEm!ϖ-[l!.Q˞A [lOȅ _EܹsqF9~͛א2-QFRXpa~p5U/;v찅vGj֬)SLRVZE\ ]ɴ wէ$` ;ʯn.K.5PB$)7oޔ9s￷(ڍ@}T$qnܸ!cǎUr/_\8N L$qSFjddPC+PG?ҥKJ)Rȸq' QF)wLtZJ:AX+ ؈ U5Eq&#GPanذaTR Dkdĉ> 2 ŊIhڠ(s `u)S7hyfٽ{s$@6mX ֡CJ%@CճMOō 0uT~z02zXH#cߒL$e^&AI #F@2 x͜9SnݺBC7~ ltl gʜ9s덩 Iw興"y x U585>$#!qfI$v:|^ $Hjh $N - !C6- W`7E; |ʕ@="@C5!2@PXB.H#LETq9sF.\0?'PD3SI $6Uׅo.: %pUٸqc~^dIyN Csԏ&p2G{iSZ[re9sJ\Hb68mT巪ٛEJ4iC|%JM' I K,?=8qB-\)SF?W H@b}'O}*XX8Ƨ$@$@$@$`g4T:HHHH%mڴ$UT} 3' )|rՏ\R6m$ΝSuϟ_?}w E(3<՗ٳGov{*U~}I2eJC"@C5BY0n:5k̛7OV^:9s9F3W`A %]ձc( Fc / ÇKʔ)bŊR^=yGQ& k \|Y.\Ko߮(QK;ŬHMÐݽ{/Yjƈl,YTѨQ#iܸ@n&96o "LO>]M&:uJiذҴiSy'ԡ|$T N0AfΜ)K,Q>0H4h|` +\З,ZHΝ1ڋ\=㡘lFA]7NU1zQ{ќٳgnZRF+FLH#KTTXBDQHL;9p(+fg ͛7֭[Kڵ뀓u!@C5MaEo믿ѣU#lz)[ectAԩ/[yQ/H;N:U%q*{4/ x5۷Ox߸qJ//|lݪ7rh& y1BT`b:g}V#`'a'XbGEmO##~aÆJ9E_ҤIf.ҝ{׮]2|p2do~*T@,&@Cb.LǏʏ?DlR >aE:rH##oZ- GS}ɓ'xڵa>(3f.|[o)#>cƌDDVjibݻ5JzפSN_Ƃ2|w*c=&}TZ5L ,1ХK&„!Џ#vJ@P8 :Hr`j {{4R@ث01E!ƫV` xA/^Ba!"įD0` СC*|ӕ$@CL,;d0^}U7 b(aG `,,T'򰢹RJ;w^$@H)+)8?COH 4T͠2C&R/^ _TlCئMzL2bڵkeҤIHEprb[F&p6RM35<1G52X Xs{janlC`ĉjW^%a:PD(X_l~ψDN&?XJ,4ؑFNV˖#Xcf͚)7-p  EQ$YN_5vYT4 T0G.Xjm۶7|S~L$D4HuVշ8Q'ɌP2dP۴bŋe)6m/um5 v\~N#ݬ5Š+WnݺVIH tOVA9zawQ+C/9%,XB!sҥ?p|IjL|oX̀';wVSt\`x+M O *T k֬\$5_ڜũ]؆*6^پ}ԫWOO˹sB-)򓴓^%Gjq1|} /37 Vc^zImɷzjs&<;W_ɧ~*u3`SLQ;K/=L#ШQ#^Jk׮]ƍٖ U6 sYt̙3Gm]:ujw)2m%KBWZJΜ9 L$I?ZtYP!v`do]?H 4Ts"z<?Vyl-L!*b+6`m"W^^xAx  My!fEp `"PM͛7_W7b$;09@tTD]h0u%)BInڴi#L#E0V 5+ $F 62K~G@JIB&'O"EE5O/T.k)<̈́K #rp>|X ŋ+&^0Ubp`"3L:U-)H5rdĈ~(Ǐ!C0VDq PK"i&U! m3#tFVo$}7ock .T1ї?~>Q[jfp0L{lc { `GYf)c'WYjf)VƖይ99%l)~+ Q}衇rL$kX+L ̙3F `աC-pH ٣Ge~ż"Ppa*6mZuL$4TyMӾ[ԴiS5&Md!_3LӾ{*b%2@(o߾^yP`9 z`& + $ ~lM{ 2f؁%o vR$y"={v7o_e-V6A.S*Ԙd?h{ՖޥA%н{wh̘14R2gάabl mIqTak~z&L )S'l.*crA L&H@>}T"!J<~Y@ڵkjd5wmhzITZ;'5$ԱcG !؅B<q* %p  nE˝1cƸ PuwݹslNxn*ۂVvfŊRhQ7G]$0|yG"^zY>8RJ?͍}n4Td# ~4jH6G޼y:I@@\ڵk˥KL$m6g2j(A&Hի/(?Cb;wjOsAΘ1F]S:밫|̰͛7] ӧU(˫3ikUj_~^S2?p;|prgz@X]vt< ֭[Pv0N'OLv/_ !OyѢE!,N$@CՉf/*n:&n,W; Э[7Yl#Gw(E-,%;?"C#1beJ9M瘦zPw8sdޖMdP&MzJMϐB%wd͚Ub&b*6|>iܸZsbq6:—~ժUjw a>O/Bb۬29@>}$C ;8KpJ:ٳgWLӮk qDk-G_8_rE.]~$_V%+#z1)Z`T3@ \|Yc{U.tkU?GT0Ν+}5n"н{w9x 6MjZzW\'swU8~swHsy` 45uڤ.ȑ~R" ƏO&$@CՁx2dtQRJNQKxeڴirQb7o۷O^{2l@S۪Z\53 U :)SvNj5 ?U,`J͚5TRҐ@<^~eYhP$yhڸq 7ƍK|(e@7T3=`ԩS*?{HD)H q5<8P]'Ёu*Hjb$`߿_S3TÇKte˖ü$`lDl$rMeE>Cǔ0vXiӦ@ҥjժ2zh° EV$}6b@^x9yIla4T%͛7fhݺeJi-lP %?@*THW3FJ<3sdݻw v FSϟW۪:Mvɋޜ9sJݺuݦ*PuG;&n.rڵk'y.O "UuY,䩧)RDVN!C/S3gN%hFz$B-ZP1UoݺeHy,$xXԶyf. s؄@?L @Cp_n4i$r"IsdժUuϞ=[2f(j4*lKp-39 UgSXRjzU3@$ ,YRb֬Yu}+Xlٲx@hd<9b#!L$dp$$2-' ,Gy$2V0@*UԂ@>bh 8c78DN'# n,Ov*ٲere%k9 @dɤaÆF&j6 K;w:WmB@\|M$K, ?hMMJN:ߝTٿiڿ’ =bŊa$`ٲeTZ`^fMk+em$`\˘!ظqI5XP5MYtTVQٴ}(Vp`80/ ؆gfϞ}mZ$aAh&pH%bvZqo$/^䈪YPzhP #ra l}H-W7TE`eZW)k" /do4T>aIuV|a$`'{Kݨ`&O[Ypoڽ(_pM##gg%g4Tmim0T˕+gi  o&k/3k­eΜ؃jȜS4T^4puזsP;vLΜ9P1 `A\Z8jfnۻwFɓ'ujg߾}JbŊN6 Du{%} Poۄ%ٵkRPaf#B&s ߿_RN-y5"N Ppa*U P N;xrǏF@7TaD1KXrf!D_FF֚IrH~Cw%!Cə3'GBcA< X,>pm_zUN8a(I,4TcpχÇKL$k֬Q@ ud.0f;sЯm%kj!~i5P)6 $I#Ι%wutf_Z@r)ȑÉSfo\BI`*I0łx c'lr(kN96HIӦM> Ң~O߬`ԇ^7%۷o}ܹsot^{78 U{O9yDu2dWa={2vؐے' ua._U%?{xOCQ$)9\#^*F5S-ԩS#->8K.ժU)SZ*-/tR$L:_L:1/)Z+ K֟u ֯u`:;[猎V~72Bu<쳶 YVƠ\p|GuW+udׯӤIc^%&얾DCW]S3_f2#`0Ox2w͛2|AKlu9uTؓO>)իWkq93ftIf͚%7ow}Wu-3JժU(:_}ܹS~i)^2X{97oʕKc=&yחԛ]vʕ+5kTy_?gϞ &(J*jlZ|&W([{u¾͛ΧM4Cԧ>ciiСӫcw}*viu? ?fc ħ:wU_UqЧuw4CЧ|ZsQ~~;v4ڧ8}!ӌE 4cZ?%4it[}XiOL} |ƍ+mȷm6U |B7)TL_|{xO:iF:Ph ͈#|eqުN[-[Xx }IB-~+MK@K/{G_|̾$uڬkn\>*9GkBMKcT_PK,m_{?$O0B0>쳈hgաl_  8$a8EMIYUuCǴCԱuG~mU=NQ>os0+{Yx e,j#>z[H Uݽ{MEiJ'O41ףGxnйQFc=xvLCPnsBu;ʰ27Χ.YYAK% ]8k׮J'm$ѯ~ +WCY}I Ϝ9ӗ"E ߅ 2i*J` UdNM/IA }IBuڵ/NZ[ǩ7rc:95 0TBtAQ F,K>-j%K\ҦM@*]:СCUazMӁF"%,X5Cҟ}AyF4mH~@3Ŕ(QBhoBaP(> fǴUzfbuWD%s-oKҥK|@_1;J/ ޽{f8Z W`eqY+uҏo{%}Ū:YOhQg§n%EJR7A5X0gXYȑ79sӥv([n]r~[6N.b<:MѣGifbQu-W)١гs_(T5 7Zwj_~%(_,UB}cV%1D%byfC_frK4Tˁݜ<Ԧ 6vqt?ZtQxL81v|G`IyBIϫꝾ/(fz=NyA0 WNA}I$Kʕ+jv&ZٗG%cKk=  OeUiULc:+wܦ;`)矋LT`SrTiFB… +W*F) 8ei-owSִ눪VC[ &'N0KOؗpvƔ̰Bi^i#zd͚5ҧO.`j(n顨VXE@FQV C @G8,iU~zv%$wWV |bE;H-^S! e5lPa):]`?\yc2nڹ/I}#n_sg3@׷\F-ȅgmjKg&v|I% 1/['Z%H@a2Vc%c$ұcT׾}{߇~Vj~q|yXmM82͇0MEFc*! !O_:޴kN~IF}AF* ra?Bii* `kR +۶mbn*vRL;,"I]oxGrTX,! ~]sDGxH:a?bPj 5jMLe"@p]\pF4&?]3 ̌H',/.۷o f&;um )Rx J/*C Dac4Tm8V|ymg0? ؍-Wr<_ c5-] J޳g^d_4T6aK߮]]] ěu[lBDضm ?z-F4TÀgG); %j0;7rǐG4v{'%G7Ga)Gs*`jJ{={v:zܹR@͛'3gJ*AN:ҕ+WQ @_khڿ–Gٳg] HuS#-G"EȬY4ƨ@IDAT:uD,#L'@Cm7ݻw޽{.@DGG… 3SdNC52Yq0zmiذq$P5 } "Lc6$Xh\r 3, U>~ae ժU1ٟ UQLR4h ӦM ,@";$qCZ@nݺ&M>}z`$Θ1C4iF)j%VҎ`]-Z U@Hps0a:fӫ|TTT`$%KȱcǤe˖a¬Vj%լY3I*L4)Rjŋts֭C+ #6CS +UƍffJ,iU'L4T2dM ~L$4n˗//%Jp讓뺦uBnݒ' x4T^aI'`ڃBƍب裏ʘ1cl$E! `&ɓҪUO!@C6Ma ϒ% :X D?P[?b% /1B~ֈL!@C9mkӦ.~WiԨЊXG{'}2 ˓'O:@ r"?@H<(7{Tɥ]v2l0k ؝%mڴO{C# xP2e/f5K`.W\&{h߾Z &3 bfgXcrj/CԩZ{!Cc!$`W?FSi(P@-%!8fϞ-;vG,l?wҿb$`L]xQ^u`axweٲer0Kbv0@߾}aÆ*~ydP5E7|SM\pƒR4Tݷ~aɞz!Z|7Ry Ԛ.]xSPuj˅)wǎU U $B~};&{XDv#TlҠAFy$L`a_ݷo 7 ؁*UpT'doQ>2d{ K L}]~G#kK%姟~KL,@\M4]R$***lSoV#W*UQFTJ%[nOƎ+-[nӕ#nk`͛7!f% 0n89j\|2 mfbM,#WoѢE`xm pDնMc`_|vy浮bD1\vM֮][uW\YmЀL$)/#ӧOMFJ k (ƀtt)P3"AW^һwo>0EAu´ifbH `pCb̜9sʟp>h[mX(?ꫯb5kXX3"8z2R?C;.Rt kBѝJ'lBw66`RqU:_|BH ![lYAX &$t;ff*V :*Enݺ*FT\C\Ӕ)U=Wxq&0|w5;wn3bXbڭ Df} C_frN9 P̘1 "&C_ Ԅ99_:u$o;v̩jPn޽?~nkhPD(.\i!|T`yeҮ];h)59BYLTZUf*`?o&0 / fRadne96$w< U&0ٳgՔ5nTé=$ ѣ7ZDF6ÇW4RjrU؈&0@ǎUQpWcr/Nm y3gԫWϐ2Yw ܹSU 4Ȼ <9v Šl7K,#@u&{F͛-gy!4iݺ,\P֭['緱_^d̘Q,Y"S`X0da2e U 9Nz)j4T]ݼ)002d ;M4bD ~zRuٲejGҭ[- Fx=i| Ϣ[*aN4IvءXU/q/R]COFi`5Y|7W 6?6hiѢdΜY37PZK`qհaø*\[yS7ސ_|Q~iٰaC| A `DXe.F;/ь5JyeA$|r_+ү_?w*I&p iҤ2:V\I z/F`4x03Ꙧ6PQl{ z!KfQn"{n?ӽpI8njpupbӷX\Huo~.fڶm^EY|hG'P3CT\9 s:$jR["bD=ǀ 'M Z-\)?< $g}VL#@CsMnׯ_\DJeT2q:!H{tTXٳ;]%o"۷KڵC3f`TY;iӦI͕|ܙ}UC?S=Xf A#G=jܹ "EFOo߾*4[֐jxbRҤIB}SQj2!cLRB/Ν;[o%.WacڴiFDF(P,[L`"̙3,eـ^bСCe„ o@*`74T".OcƌQFkƍaOڵkn({~~doVjݪ ~͚5\sְzvZ6Y8& !j[#CUׯ7\lUKGF)5} L2EU&3fu֩ֈқItYTREuD FoF*$Q-cG}$?_ł&Lw3OF8p ?zꩧ":uY#4篿_zU6Na4 @lٷo_7o^e"8ccԴiT_2ge-V?/{K'EC|Ȭ$Bh+fmEH%nժZkR+ȑ#2~x7СmVri;y) ؍޽{U68qB<3jW%JZrDEESNIÆ U_ҬY3I*W0PO{j4aE'}1غuki޼+V3e/dɒETvZB|$i>}Z|h#3 c M69r[v)}'F: U原:^7nQL5nXf֫WOeɒ%*Tn?"'tZR^;xL:U= bF,N Ä>ӧ)ac+ ؈ U5E1G b:d,´xʔ)U jirmpw9Yb,]T"(*U|衇ΙH%6YfߏM>P:CC.AXtD6%@Cզ CL pI\7l٢V"n+\e˪+F^]&XuV'6\=˱ApR 4 $~ .0\a OKJ.V;n5!3g-Fї@E+7uf @CL%%p% DlHصpu _|3|p2dPĴ;FpqC4}ttzE((?L=zT0PO SX 0D$`o ޏǐ \П`'f?ҦMI#C>!f_Q]#CWԁ~ x؆*b(sE?6f jQƥ~82.7'ܨtXH e"p .1GTăl0 F/ˈmx-Z[; )4T=T9F#؝ej?? #& bzB"%GEayРAah(a&KU $Aj5 DhD$@lCELÇ璀 D%wVԉ$H/_V+u 1c;'N(qe"+M`},1bDe :t Z4c A$ 8@%0l0ޕH%~zz_? Ы૗ PrS ^Z>DL$@$(L\tE3g Gn#@Cm-J},%ʱ#!6+#GO*N1ue`N4IW$lv*m۷o+4DZ"2 @R,Y"9/ĕQ1/֦믿T 0aBB_8 5*ִz`&*^my6n.(SyXdD$@رc`(#_IshzɩqP]r e 9s]0QUNIhό9H@GpbbH" %<]6V誄qp#nlUd:XݏRÊ]܄HH >2ydiBjc$z ?ƹ^u*HhժΝ_v9r4kL?^T){! `vUΟ??uVٸq:tHvVYFeN:.\بX ۷eϞ=l߉'͛G%Gr]wقdڏNﱧa88oݺF)Ϻn:T$"]ڷobnod+RݻRK∪ݚD ::Z`2D*WP/t[\|KjSW~#1\payaѪўhڳ](U-*ʕ lC& LH͛7y|C$@1 > R]+VH߾}%E,Y2_nZn޼)k֬O+ ,\P>#gM \I>lV*˒%Kʕ+RR%iذ2,Qid޽1cFyҥKю7nH ҳgO gL$@!`F__?bԄ'@C5q>CWVoذ!XEQSL)X0Xo,q^D,bUvJH~G$%GK#@1I4T B@/TkESۂOjw˗/WGsΩs{ ܼy EӢE 뮻dر2h 駟J6m/$@&H_w@E_pAko"a}ǏLz3ߓh7Q&pi?4ŋ ]n>mO[BOXuhS}Z8)UCiq}^۾__i7"O?>C(?ocl,D`pKPwY̾gyƷh"_|TsN%Kէ |lOW͙3GڰW& r;qɠ,k K'NHܹE3TE_d$0 `آ0!?RlFhFڴicr=b‚f@|VA8pKZ]i } G;b[-L.\PpL>VI qTnL/En⼸F*g"&@z*UxAg$bE PuV{QZ    ij*J$@$@$@"@CYEiIHHH3hz( 8 Ug%   Ꙧ$@$@$@$,4T^HHHL2E1 @._,9rh?On#TnkRc9/T ?m۶Y.+$p>Sו"CuBԀ @C,&O,>DJ*=@= @HF|y޽iӦa&pnjMƍ2|p#_IH X5w\>D'y U/6u5B,X B#Gʕ+  b)?rs,,A4T=T8ǏOb̘1G @ ?.˖-K2/O 7֥n悑#GL 8faVX!qGO;j(GFI \4T%%]֬Ys@Ξ=\< L`ر239JW[zM7n.(H3031z30( !@C5~$@ aB7HH !uքVÜOhdh< &]ffΜ)Hw30 %< O4I^)}zb;p-;]*#=z4(Vbw)R2`^H#p55R:uj׷oVab} x@2ԙ$p ɝ;,^Xj?̉k+JoRDA"]"(*#*H+w(E J齗{fn6d{6ɝ3RE, ׯCPPUD`q    K PQ?+'   Dj 2]͛'aϟ_+&w,XP>gΜZi\9w`7={Ν;eӦMmۦo>PZV*իW7ܜ($@ %s2{l=J(zwJ| VM*PROƍO?T`HƍK!Vp̙YfoU[a1$mTTMك?(=z+ܹԬYSҤIcY^WY飷 aڭ[7m-ٲγbp (#G *L|4h mڴҥK[ڛǏ˘1cw޲o>osQa!T!e$c |x X&)UUȑC+?u&qI`ԩRP!i׮TXQo 3fB# )RDvBN"@EIŶLŋ;hZdXB۠\h/r:m4ٰavmz_PH#k.y駥VZDe]WݲeVao6V4Q*"lp {)KPBk4hx/^Xdf̘o2&L0D$xs޽{eٲevkذ8 & ؝U2X`m9AX;:2>n2FPvmWnZJL!<#G( ؕyN.&˕+W Ən#8ptutRϤCɓ套^X]1H 2rE8R7^uׯ;NȄE!31.=OL0nRR+"?C?SH'x#ߜ[TB6} |Z Pt2;'@r锥E"FAF(HJ.N!0̆Zl)ݻw\+F%) QF s'RRHNi4ؖe˖qH3dVyv+"H) #yweĉ:ƣų8p=_>C4iN띩U֜9ss@0jP@&U!zL0Ȓd܂sH 8_0 U8lbխ^cMCĉ:`eѢE6m8gxu: V{8`lSXQEd߾}^sȂ m 0C$ԳfƆ]8޽[g)ߑ \vM@`?%6(_~]+ XAᩬ:"-.]hQQn$8CAi DŮ 2r-DAFXC$Q(^n6lg#?51!$Ud̮UnZ+әʺjؙߤDҿnԡ>}Zx )$EQ"lVe޽{K׮]eR@JtwN… tXw@PqC/Ue…:|UW 0US08pV:v쨽sR *=z4mT;V)W ]biRH Xm&7nYg@XG@Æ RNպ^"={Vrz_~vK8eR'w^)RN/o!$ ?tv2~w駟 Ԥ G3gLż\D)a뎘ț7tIz%ΝK<B$@g5slB{#Ç @E2p4(WPڴiø!c4C(;w^FAjШxepCÀ;0`<JgCŋCϔ)_ȥK,l zeOXB(?iJ* ҬRY $/w+W@غk|AAf)B$e&/ڵjO$-^X+&"H1M&o1mkCs!u EF==cʞ7N.\ XOJ*5kV2dHp|+T yqPT i|($ITT#IeMO=Dva,@'OsA)$p"K&Mݨ!r%cƌJ}ĻzwmCҥKuz7A JȾ}䧟~rPTɓ%UTRv ᕱtM:ĉ ߘMDYi̙ <>xΘ1eI&zժU%]tnZW^=eÆ k9$TT4Zkyiz.lUBn'pTؙJ1@b${2w\s fi$G %K)Q'_8B?~Y" ؈իC&qD($)TT#EE1S9"ʕ ^?2eVZ d)T_:K$\Rti4e-[Ļ޺uz؅Uƍ{)R:7xdӦMȒH0N )Sp@$PQU6-[}v9, 0ݼys 8V˹$ńロsI{^ۓ]TRiӧuZV~oJƌ#R 뮻Me7|ʶ@Hvޭw=b Yll۶Mԩ#>l$} F* VAv9/ʽ+|wҧONs1z߿}eIFb.YnKtS {dyƬ!N:%}#G$^L>]}QݺuKB 5jTDjE0 RvmOJ,3+v(*XG Gc ?ǎn9sf]z*dvjVq!ߐ =zT#%>'9sD5 Fl(SC6@!k QNG3feի`֯_/9b!(O'NcJ6mt&vY\Wi+8Hƍ +  2ذ .96iD1Tؼ hLߵE:InyFmߛM*} vh/|l2rHP^|3K.IFd…5kVG31X0b͙f2H*#'f *Qv|^C|;um5{)0=h޼|'2i$#n&PPqs;d)K,?S پ}Vx Js&pڱ뭷ԍٿIF2d  jӧO3?p6\Ev +1)y lٲ[o"y#xm߾_` {Ve˖b\&O1`~1S|#v,P?6 ؊?hqձcGCl*W)Z0N<s\) 79V%)ScJ{O81\xcsLj^w @IDATB7Ҫ}-~a}Zf͚ʪr1l0ey$` J3ßmQ*;D)Zkc(gFC~T7 :4V{zoݧ3gΘU$!Qu hojM׫q / U qK4hcbUcR`E_f f\&Af\x$7<28u.1\oꡢvDo}HؒVlz|%b$B`cW s(J;#j%E۫KUk70;#Vb01)%&?Pbփ1t͚5ڎԿ‚ +ǛuUw{mxrdaB>w?א'8租~-:ߖ}xPr![?~\a{A q8[MFlܹ3 }b&&r0v*ܗ M f.E#5~`zf @D/-lGhѢ7]*63jW^yPvL*~/_>^3TCiӦƀb>W T_[n]֟xrǛ5kf4jW` 0a{GbW!1ݯ#'ۂ[VathC\O|լ6t]NU&!;~[Gj׮q.AK(b& D9A3QHdd,Pl?;._Xh ԧN!/dmڴikey$`j@z4U;%hS$T-cKH<QҼY]3*2&9;riS'ge+L!7(]vts웲i}l v'*W*W\'OQ ;IKNBn&R <};W+R^ SH RF, O>)p#pj}g 8Zp.1,^*-QY" ?**ؖ@*UDԶuP`O6o<_ WTI+v{_?l#*IժU]5*lurʒ"E 0aӚn.]T'@0 x>+/CyQرc{ёV)+*rt7j(4-"EȽֲ$>5jhl(Vs ֫WϜY  @E5D{"N%|*CL\pXM͝;7< ~ǹ}1D:uNUʔ)Sx+WH%_|O] x*!4ۃ>I"_^J,)Æ jj L2;^mgΜժUo6;^z;#[l;ӱ`I,s#HSOUˁ`ҥu˗ l~)$TTUHvۇ}UOI7ܥK&|6?%xv7|z}%~ sPQ5#K&'|R>,N:U;.T^]`*yfތ5zll ۷Ol߾|Ǒ^x=l<9lo!C7&лwo\1 ̉xKO~& zH ױ? )$`.z˓E`̏(Pn$zj3GT  [0޿'t]g0aMDH F9+4s='زC*lmS#0^VTIx>+ 'OQDȑ#c V)3g|gү_?)OG^c͚L&pe}s9t萬\iA_EG֬Y#3g6:#midl2׵;ԩ##FGk?`B3EYIzX\9I2 dnR+A5j *wdࡐ }ԩL&17tScpwygyF۾#?eJ%dƌ:$yY9 I vPڋ2h+V6ey ,/'رcL2EƎ+6loYwrڵ!Sգ>*-Zǰ {lٲ믿s^8p@ǖEI&QI۴YI#`-ZTxie7ol2d ڵ֭[$"i_%`Զ٘~&Mٶm^MW_k&p#dΝZIE<2F߶}ex* $HLPXq;wNoWΜ9S͛'p2&Dsr#,N^Ǐ6)IӧO,%|nРAʫWb _vƌ.T/3Ug[$K*W,/^Ԋw; !ۻw %$r:9D|[YYΝv_矵Msҥq?y?ydiԨԬYS;X٤h…RV-&zk   pE`&Cď?(ٳg~Xۨ\- XbzSPR!pB܈}[YŠ nwq *BPHmJ*%y}5!AUR6G}$UTYfϞM%m(r K gߨz.\pE*9P7RCł_*ݡ 辣S7j5kflݺ2xL̙3Fw<_m {v2CVѣG}OTa 倩犾}ڽlw G KcOPNVƪUOaTPAXTҥKxoqox_j$$7o6 .l([KCy:g˗7Jb۴iSC$zn3TCٛdk5IM61f*2.]:C S TFmU! 2PH!:dLT]wݥ'k(j:u2n̩uCyPKE߿ UH" ܄=]pM*W8ZvaeVU}2dq)+S e+Jht1?{@i<_@lTTc;—#G#8[@=jӓ98F{x4ۈ}SOi[*gS,_;~xC%G0T.bŊӧM@`Em۶;ߋVQ^=[b*ܓq`BM~Vs)X6uE<q0[(^8VB:;^`OYSAc57[2&UҥKafϊL|1aPWJv?:{=ZA衇 SwI. YN0vmEDa9<|b~s=7C𛅩Ja|.sUqoB%.̇PfD!TKq/UwZq_x*-;UNQcxѢEV7uZ h{U1E9fJԥV)EʊҡSnڴIWZjP_[8& ]{7cJI0Y`Ν;|Cv"3i޼]Q!D)Zvj m܁ D$$@)Up[ GTH b3[vwݧWI3}x5XQۖ$`$*OQ:vaZv(sVZYE j%v:J\b#\ATee*#"Ve)$`#FC-a7b:({V(h7&&0|QN?r Yf][&~e_]H3{Rܲ%M6-|Oj)~Vn6_ڷo`(Iʇ0@+VF9QxFzuuҤINPT4mwYrea2vXH[Evff=}O` NTN1UKHP Kh3VS+\p ^GQUV/[Gu>8 be?[>xX= E:͛U/.)xI F8ܞy]>DZUo\p2e؅fVղطo_QªRF~V]*W!䧟~VZyJgm}Cv+ hQDhQ`L!53g78xBNxdԆ !eqHEj j%}f!U=zȓO>֭[ڭi53&Vh)<^TV6AW)cG*V#`G(]PQHb 6Q!X) [qo.ʰ66mhS81ے;`X°/G F#j!ڏbݸ*!Xb:*(m2,˻zw]3fm4x 5VZQ*P|78 B>AׯW`X1VZs4N;8WLFLAA@!+vLz꥝AͬeAe؊Hx CRN: T*CAf)3DBՙ+ Uu!L5PH c-~8k GƸ^$@v*^Y`׵iV)$`7Lea{&IYf0!mZn-CaJH!nܐiᑌTH 4 *R~G:4')\(=RF1 4(e|':[ҹRH&&fDPNamH AJÄVqEٳGPVr՞[V<pˏ7N?9q{ $PT1a'-zFF>,UlH fJ -k#G(R^DKBJ** A& @t\r:O`ڣ ;2?lkĪ}뭷tC^> $P F5Trv!ʋ/W"@@-=!"٧J5,XP}Q,)\@z8o 6"vh#8ShL ؅@r4 p |ڹ q?#-OSzpIѦo}p B` Y(#m6)U$`u !.&@E`a%u%X@x57|gŠ p0|駽q2eʔzU;%sa%'b[o pil+ R}KlR{(cq[(V1ixÆ cЊ+$O<G /WU{ Y  @E5v8N!`N`י )#sN)S`5uҤIڑ1T{qDVmRQm rSwQҧO0 4k֬J*ɉ'1SL%J e8`UW]PQuڈ1 m= zYED嫯_]xb_ yʑ}lٲc6-K_8`k_ H<Ž K,ҫW/v=5*6.BB?e؟@ܹj̙\ra7-E`Μ9 R]V;:o\IThve){ja!p11zkX@T b:{ʕu`6%l+VݏiӦMR<0vYfW&*Ѥͺ&pQ߿ފB`qdcҤIU5y@-Z$ŊpW1R=mv1o!)$MTTIuMXi۶me &l`ּys/k kEqY쩧&=u:oG!:tfWS@PQi60@+7~ - uf1l+vmr#9"UT9WӥKe$8fyРA~G"K ydg$`CA=U o[h!xM u\Td$Zr . _쀴oߞun1,Lm*OSOtN$ٳ R~g 9 Kjy}  WS# edƍ:,V)!f)^`uҥz"25T$!z"Mj  p55l(!`# Ǟv;*RRH XwyԩSiU"@E5,|8N:%>~ט*ҰmT~0VPon9)O  ܹs($T]tѩaC!HI,;lPR%K&B9s4wq^C~yJ~7,b Y`t]B/WzF|Ime{ PQ#+W.  K`~p$ՀhWʕ+z궰~ v3t$jժɔ)Skj>wNU/ B6 PR%f)$)TT#EE… һwo@U5 ѣuRuʐ!C $oݺUJ,P̙MBf@VI&(f˲HGڊ* U!Ll+[l)~) k%&3GZT Dtd˖M;F|ITTd%VRV$k֬kd-{O^sDf%ʿ.^7M4l#&mܹ"Fpv.($`6*feya9r>|X+!a\I]vUïJ^z%fQc05qD:u  D^JHhTT=:vkO>͛ QH  ꔟȎ V*,Q$O\{`׬Yӫ(o O^^|Eu Z*LGׁ}ұw^A $Fjժ:&K3oCƍ$ &=܍0U3i뮻*{@2e-&W!>(F P bv2vX0`J ]gmH>80U3i̞=[6oެd*{}ݧWr\rzef͚% 0nU~}7w}s:ҥKeƍi1*]f0Rh#V2e$3gLeӥK=7'`a0Qe513q-DAD8{ hvo'O={p9pFA& xX?c.0eʔ*T b}78p ]n0c?{o-89sf.5)"8Z<'AD&gxW\'j\z.]ZL\~6mX#F^x!IS8Q>c7v>!`= N/F%M47__߿_2fS%$<ɗoăB'9s ϝs뭷:>l9]ts C9~Rw )7;wޙ vWN}o 6L^ H">B&O7M.&7 s YfUs{ÖہU;ۀԢEv`IHZn-;wy9/Z?nB v*⦺զΓ x=#+V!D -[ B*9^l##U dϞ]jժ%t_أ5ԬGի'^zIB$@$NmڴY֭[W'@E5YA\cǎFB { <:NI,F/ g}&Փy1 VUǍ''NpQؕh-ҬG3gl޼Y'  hܸ|2j(w=4UӑK*W,B3  H.4lP݈&*Ѥ֮]+K.7x$} TݺuX[go&@E5l, XXM-^<^HH\@zHް $@E5=\ݻe\M ;j*yG/yOsg̙37tRVnMǪI)$,*yaݻʕKԩV9^CRZ5>|t]yA̛7Oڷo3.9"y2uT˛ՠAI29!@E9cؖyذaҩS'AgzN2En&gYpdȐAu&vBr<ڵkKɒ%pHڣq sHӦMu@;[nDSF9mꜱrlK>/UT:c;/YDv~I>s%Ydڞ1gk͚5C QH %(… S׶jJo.˖-3lN\rڦW/^ >ij]iDy}PY~{LΜ9#Kܹ婧q+W\J*ɽ+AozΓ'~o>}=]O?kԨVLpݳ>+PF\3gN^/x3ȠAʪq@by)!< uC1bi쥌L2ꆡذaWC9}JY0ʦn$c„ !C{c3f0dɢ} /`(eS9QlY}xHPmN tJՃL.)Y` vQhQ#uԆRtn(EҨYfa(3XǎuRX بPqQ}Έ# g(EX;s۶m3 OݨoC)1+%(Q6|G'Ny0/p}.]9(&(SLX(qC9;akSF0sz5~5O6n6C)z1B?PqHu[ef(eP&UK.2=tL]Iy!?)%xСC ej($="B A-[^~RU($IPQ1mYn,*\mNx&-[*U(GP}*N|뭷1ecjM+!PԪm?ѧO*XJ" ʦO0*qu!J*1,XHE/\Kp 7|\bGEܹs2TB_O(uGDwء=#SCb;_rO?sm冭ux`/asz,Z/_8iҤۇ)RDXo߀! `#K|1d޽xb}lբKm/aA`5"|P& ¡?=l8{p86ʆI >2Ar 3꯿09b$u.?vbVv%@Eծ#v!5ﶼ7#ڒJ c 6cR|y6 lONJJO-(% LHDoح~qw #xЁ|qgǬL6FŊu8!ۂepD؃%" O\aO\6y`3N!PQM? V1 jU+"p{ >&&Q%P +Xu{f!xѣGIK̘GІ@s^A!@"!@ܹse}!L/3`%mѢErl)RfXCxϜ9>&MhE_y?`#VI۵k4J*ʀ}|'zヒ9*<>mb!i")= 8p@ۣ" BX!B,Xr}قᆱð7B5o\}ts šaNy3 pzXō;r\WYJ\3"~7P)$PHTo(e.[z2jP Oa۶m[acfiPӧOCW!|r~޻O/pesՏ@x#Cc AAY}e༸rȋ^ ( ;/_1xb X+G7Ce1 s!l:<>?"Jعu0T\፨J5oLSX/x>J?PzXwr\d*=UZ%.0#{O=Hc}bjKBK!||Dx*Y($S!@?(HSJJ(w/\PzZujP\ ]EE5!>W0D꘨j%PTN x1z}1)/ѡCb?'TT#PU߼o?$oH+k@qTB12(Gv``'x UmPC-ׅGʿ=`8,#Oč"ು+D%߇ww\vpRY/ j `O {θ7p$@$@$ V+ {*aQeWIHH e뭣S|  k B1V5\+U;_!H!  @\ܹssU5)󖴁ڃU{cZ!؎ׯ$K̲vgɒE0i׮eme&Vro7,9s?h+Wl׎\|pGvi[ٲe)Sij]$nbXT<wߕ VSNM"O{ңGI6sҪU+uVTm06HHbĉdrTT]>I^߾}nݺ1(|  $AJUmk렢j-[ 6Hm.6HH jչťt`CW_}%K~8y Do߾ɊOj۲ƿKOT[E$@$@ժU%C 2f̘PQu`ӕkڵkS %  H"ԩSQm]%TTcoϟ?/C ֭[|ӶiB$@$@ MNG&@EgVӿ{AVZU$!  (W͛'m]TTcoA%K۴ !  $KL߿0N$@EՁff-Z$[lPY @4nXbŊɊGjX۲&zGXblE$@$@ {BU^Mߵk̞=$@$@Sĉ˖OLSb~$W\RfMǴ %  ׯ/'ON0;wN.m۶C-ב r-*PQuxݛQF-^z)kx" ؕVUg̘! Nq*$6l(2eJu<HHHY.]$3gδcئ PQ /CH[Jv HHb O<"xAE㘤^| I$j<HH?ojKPO7|EG[n5$oUQ[o5o,\P!"]v20%Kf<ӆ PXyCv Rf^,1uTCdG g(3M'N4TP=Kx GNvx(EiMg{ c<U~ ziO8wK{hNԊvP eCfNAMٷT?ȫyZAݾuBA3fQhQYfMєÇz2O:ujm۶&ز. l w9l`D\l1;8 S̡qK3gdjӦMyC؍AmZA&Ѿ 29N\&?<|PHC̙3ҡCvG`zZH8qvU>y即?l)"@EYTk/UTA]N>-׫xFXMڲS 7xCi'۲l xjNm˷~#`5n(%ݺue̅^|}G$w\Uۗ4PQ N# `[XJ(W~A|Ye 6|05^ŋ+6l*D!0p@)]Vz /vc|Ɍ3D9ʗ_~M193*[ q>W/ɓ' ?a;'pO֮]+XeV=sTXpBKPGo6A ~'I-։J"n:'5?bZpAq*D[ #GvO3&_KݺuUV`ݤZ c2A]tAĎW Y"2|{=DxE&;p^$2WU5LJR~xSD>CuGi;-70رΆ* 6̱7L"0`5رcd ptCqsGbtϛ7/iH{aێ]+uut 7 N``*>23+NaR`uݻ2x`iٲp̟?_B1S%?a0Q1u%ح@*\1J83w*_63ץ8@(Ɓ7 veslXvRt*SlO2 ^} "I͎H[U)t[)).N6PaqQ nF֬YI`7] dQNm7d+C0ǏW6Ѿ}me˖J jwʨUNBEJ@&>e(U͊vEJ:f Dv%jFYe*N j~C S^Q,XN*DX"e>} 崢EZMjeD;[QXNx@qd:sL;vFNB . U7 3b$OJTfCJgP[ΆT޽{ nDr匹sQ e`FzrP6;1i$4*-Am]Rӷ ^Nk(E,dsz嗵?XT# ҝ;wѩqwqTZ#`˂,!-9so%ۥR_oyu ٲejr(?ؓbufB)&kS ym`! %peC5~/I)eU6ӡ+aKR7[N `R馛={8+!_v} 7\mkj̙3 ݉H`wbr2`j,rJi.~) |dž}ӁF9SN9+ۿ|r=_nٲ%93+0ؓU{KV!VQO L9֕@=Wy۹p*;fҥ:t j%2PY;+2ʕ+Oj͚59pmө:lhauuXk.| OnJ6l蹕aÆɉ'Yf#W^ 2? #GdŊ?ի!^puPׯ/ @IE"(H9s9sfǃL޼y5HSPeO<Sw`SW{{(Q4ATX@^(l ,(* 쀽و`7 bWDIT5o;z׻˻wwݽ{;wΜ9?Q7/˶#.#(Z-\_.֗cL%3gTc=ֽꥎ~W>&R|!RAFFvMO~]SRbab?pߡCzY/첱&x{9eRe5 z& ;s_|ԏ;6Fe8e~eڋB׾ / )d{@t~y ^p ꫯzQ~zQZ=6̟tIqExʟxmڴiZWr \&.2򼋢hx16 /L[jUƓHK$ËKʾV/=/' zapv$=nݺyQ2iK>K.R/|^l:T;w=z^߿%rR[&J|G,TMڵI28a"@#~Ĉ)aub[([ΊO}5Y<̬0(·Qh={f)\Ւo9YH;x!%ϠÀ#+)2Zq0sL\ѥ2W\EUVk>auMEL%~~X/ %>e! +n|X_Ė+}z m(EKN=T}NVLx3`ӷ\wukq|={+;PqD8g 09FUJTq&?# LlP 2D Gxw8Q>[?8õ G ~[p"Jlu~DB $xmڀ裏v2`k]L oo)I-la%H@Kg"ցo$:$lΑ0}In?BK@ta_& {A Q-SNwY[",(rd# 5:/ӰGجV"Xb ~,oF82N9rdIQߒ"%0ϔ$8[8X_M_B_xM7ͽ7m-Y W{Q;F.k=VTSZK3@y)ayB DlVéKeK~=9\;L Ujy2)68JriC/ /Npl-4>Ƥ:ZKNTtbwT^( _}UEdTԔQϊЉ^ ,~D> ƀ8A-w0#^t8Kc01MC97`:N [F _J; &K"/E$ڗ0ɬ$aXq@`w_`} ? 09STk`+ip,Ғ-VvhFI);v`((tRW|g+!@1"a(4|R(maX f%:" (ŁŌ#ϊw+!DZr &B=c('|6lnr(6Q\ -7F{dƌX_rM7ݔIvkDKUܾS`?"~M2%n%_1!jEhJ g@@IP=DeKYjYC'L[FqS%-31cx(vyg/v&x*19 NWJC=wqzp܋^ˬ]ブJVU4Oq=tfodJEb>ZoFm'Nqő{뭷R q-7cx[E!`#b}sm*mUd=g%]/y'6(~֬Y^T{AL#gS( E`]7)#yi&)A`eW^\'xznРAe xXe%a afnn8V-9 OrG\3ثS$ nr/ pl%o =\ܤZi7j:lcVJupFe-֗пS?s^8␊8wRVM+R'ntep|ˣrnJ4i$;ضCsB|J*YV|J*a%* B,q% N%Yd-tf5pSTk\D &"*HJjKDzJV?91E5EuIGlЧ(5 Pl*^xmEUt7BߢPxI/t*T9s;uj&$BVVxf*vjjJx*=R%))NblE}h ڀXʖ,q/H]\~7q>!*CQߵIo4lYo o(7TSTk}g'5Ԥ0BCU/ /\ď\8F@(>u=,A. Sb2E51hK'*f7jiO7W}G󅯴 ^biTp';8ԧr=s= ‰+"Ey7pV1C=(=zЃYl'6ojmpJu̙N͉* ?$?ӱcǦ ۷[s5v3p8s9uz,PJqҘIp +)l-j쫜馛NT^0Gb[7j *[nEmӟsèQC=Ǝ KX.BתUSZj)5sqͶ33U5TST}z]lR{'kDe<w`WwX:Thd&7P؍ǻvU;B5It#če^ӃL#u9Gk. '3|{np0_*bW(i/xWݲ.[pv76vuWDZdC΢k6`M aÆ5akVd;BfW_i3%`M6+~ \hE;P]tE@Rݩ;8(g '# Zj0EʀaJX`}ӦMsly7phI6ș|Rvqvy-/V8 LLjfDЂKSq!vpTf)UbRoV`/PonۥXguuf k}5@Űsz,?Poam:yXr%.F#p':޻iMƍƌij!~DsvRK Mjx"{i5H1-;/tU^L)ӘS*1ybZk+{H"]1b_b%7YL^|E/Jj/[^vV*ҞY-#20_zQN(b_|q/I(M8[QD-?2)W^yEOp R"W3<=szDd,i9d ^sӜTML#t]DR8VL‘/*ZI͜9_uU{-( *l rtR-xaZN(�uT/v^]1(Ujx 駟zJ !AIXCK/e?3Y؏"z}뮻֍8x>Oqlk^{C9bj`#^V r-9/UUA%%'i[|j3xǩvuv{;v쨃G;nV.2UPrQ&N=Xܪ e4!r&|Bà-juc{mVřhȉdZOUpkЅL* nWwnUT,l eG)3ov%nLQb b>#~a)E=[W"'VirJ}730t`F L=I|J6#0gT(?C3Xbc##%a\ꫯ[l;aBo{nj-}KEqOCb;nB>>\DKcO ["`X$M(xǓOS`9a8--6tn}(7AWO9L.`UN:ɋY&}!BvŒO~ԗl&yVaia| (o)+ ~cJofi/rҝթ|STϩIJT49-@t- .(cW\qE"SDnư) һwo]KrȊv-aM%F\y?[ɬ͙3쩪 @`{Y|ţP8r+:i/^ZE􁾵kABgۈpb@}STc4O8`ϊI4Νv]4o6ZyWr{ $T |UeN;n4Tɣ>UNFKC,; <}w V0;ԓDGQpOPYe裏 ~&"`j"Չ `c曷#KD*IۧO/'H C݈2VYaЪQx`߿ KVI `U%&E@W•롢bb~BTGVjGy\j:qǖ?~EW cwCaJŌ\WN?oj5\Vzl'*  U p2Xe x衇" ,)`.`R:8;hHe>Qgŏ[m/{K&,L ,php oMh8 -49F*KQiG.pq 2c&m()S6Zݶ "t8T v# f9K:Y=EkJ^5ƹ's)ʊٹIy`+x/ryx&{܁fXA8"-CKTX v~`tI*)45)&֭[[`X;vл%Lv11u]5js3iQd&LL9"CŸ(=zy\m

Ne W) '5p&TƏs h۶mt|ᴔ7x#튮-![￿:]Er/7aN҂6 I2y(S ,AziX9d0>v;!7> ճin(Wa @X]N64hPtN~z0(h頯P}wdС~VkV_Yl0IST5)w<4BQWNk\)-V X9 rɝQQc2EY w9VsOLHYUE!Nb.;8 ImFK.ھ)qP.]v#+yQL9xZˌSmݦ#q]:KhٳujI&řdŅ & 0n>hb[bɍ?>3Zmrbk8]yN^D<9?Ķ9NޜX8q pb&_' 6'| + rK:9 a&RnIsBCd~'!9މNI#tm `ڒNnٵo:%'N5z%ZQ^d ݉2GvMS&CKڷoL}' ى}PA9mݻw׾E{bذaNn.{WǓibBd5A!ZJ,f8Qޝ,4Vj_&X M Seeʉݮv ,(|iY!q)(zJ)l=:YPUVtab OvnշċLz(dE;T9]}1/L:Sn9^DmƌNP, {f1aF(}8%nsj[/R[nI_y=>Ç7WI(bgC9&@@16᯶΃}QgDl8'#DXpah.M*>t]w/zy4 9IT9IkF,fS _TNJYY71< N*9'"0+xnԩ&X9܇ m[Ƌ=zAm!߿{'ebgV[meK\))#(8NC))~g 8KtQ*ljrR,5"[s eo2`8Ǯ2 31F ;l)uM2)5jJm{} 6&ةrmעLQurx9wbA#yn3Ș4rڒQygLN^iBZjxrVXfGUr׆VƔF4JͶcPSTc1c4|b7w8ޤT ݁8NCcGJg L|;AaTMSTcRΛV:Fvf!ëf1q}4rxwi''u_&+@btuc=%ئ438ǡ"{w籆(P ajL?7[&|;# _xסCF+';찃:YM>uU 1݄ YN1t&))cJCn <ԘMAeR9V{WYn۱ߍ1-"1fQ+ ,xh}9 kMV7#GTj"Lkٳgu3`UVYE'+afTς [mSƀ^.QDNUcV[m5e;[vp\i,&@K@a/6 3 #bǻ(.LU/Yk$GZՅ)"S @P T[:={qݹ瞫t"xM 0,vtȘp"!FںN:! (guVrx nIwao㉼ʱO?tSnLQN>C J'Nԁw>y-! oٝxz%!*]qt[Qu瞫[[ۻuYG梔GDs5coeNS yW!`"'"bmw5o`Rz嗻ZX}71JA`7W'+v-9v_,TeQO&V3c=? bo:@- pc‹IVE4Fm{j/}m۶cU58zV_7)STN=9EGRGqx[bbGѭ[7I#_|ζ!P)K-z/q{キڰVgggE qSToD/dܹz;wqM!P)lA_k+ &[P1ye]VwMJ2ԈW^y;0WKp\vm0)STډ3g֥Bl,#?Bq!km8Әk\Ȧ'(."-av®(҃$@o߾No8WqdN.0&!`jy9̷rKO?QdQT!vmK)Xnsw뭷Vo8MYf9N);ݐ!ClWnY)R̊ؽ6mZʱpV6E ̙4a3S/G9ٚVk嗯[>E]TK.NKb`{~wFac@#nӦ}lܘ1c~K.`&9AIGJgly{ĩȯ)%VN!oK?NyR+%VCz쩫 O=gĂS8XEenb N;ͱs]p͖TW+NTP@jVDV?V\qEubkH tڐxgJ_e i||jބ}1U`b=1~׵_ᄴ'lՈ)%W_-Pu`?vI'A)%&@X{s5Z1$=s:.Ӄ,֤;'xLhڭLף>:uT*8 !VVZi%{MkV&_P JC=4 ٲ<e#0p@Gf5z8˻¦Anlk/׻woUR[oβaTAaaO+yE&+JO="N6ȿ/~v8;1ɢc;￿n=Qx$fB0}ΌQ%Z[PP'O6ijS q4U \ij1>l~cfVK3I&)'@xlb4B84*8FUVYEvm "о}{]ر/^mjm5EDMQ=zP^c>6]vSw{:!~f`R*lM*G`Μ9JQgyNz612eщG.3m)EzDVܩlvr^xd@@3Һ[*D#o}\FaW!x!&f.*~jFj@Qe6MU+| Z뮻εiF韊}ݧܨ|.A'!,`=Sw5xMJC ;VP uǠb+gb43w>Šjj:Ļgj:Ȃ9䐪<|cqݮ8=͈cb.Dv6ز6;C]n3 hY&@(s9slO>*fW%FHlcII4m4׹sgw7;xQ\ E71s9G>XHY)4ɏ}0L C u]W]]yuR)I 믿>㤓jMQ-P8Q%߭JT;{b2(@y]I6`->}Zk~@VXu첋${lU5ʦNT_|}uOVGmə]6V#z)7uTu†gp uЅ/6 ;k@Yd/΋rhImsJ2X51 Xvetr-EPݑ5k?(N-@^xAa1 v8evqTMQm#Eu7|8;@`[R~tdGV'OBa׷o_mǾ_V:ɾe V]uU="vɵ)lsgСCLQ͇]7 C0 C)~K0 C0 C CƮ!`!`STk %n!`!ST!c C0 C0j)57 C0 Cȇ)놀!`!`5E՚o!`!`C|uC0 C0 "`jM C0 C0!`j>d!`!`@MHG/{ΝuYnȐ!^~ 'Ov7|s౅+TB{nΜ9ؽL>]|G:zZM4ɝp nĉ3l!Cw< YĆ@\~v'i8˕ Eoq>;C=Usӟ 7N?t?]tEYaݨQ%\Jsgy{ {Ex?Hnע=éSǻ.}駥eBǎ@x g ?'Xc 8P4*q+K.z6l뮻>s{-nРA G֭*%hlaX--z߾}{קO .kkў({wGZ|}Oў4Cƞ-ϢxiT.W*ՠQ$o[ ,^vm{_|1.vڹ6mD kܲEz d >w-'PK\xCw)C Yjٷ%[bv٭:E'tbg*^bz3v-\ޭC*zv}SLqo}Tϗ PRò.-++J۶mݪX~g6R G-o1 S@==$,\/I}o bZwлw\h\ec(Bs1~-ȺVǮ%^V@3AϋFk^zEs>2{̈́K^(Yr\}^)W_O>KYgѰ2 Gy$e<ӕ# ?#VV)K=C;쥓]swqG&&?ý4J/6^7}>xHkϥZ_{×pq)6yFV& bmDMO?5^X/d^QFld/er)ϋ'l׮]5[o5&aFeuSHR)J[F3)Me u<k'y ^ꫯE$dWXa'LXL4СC#F{/_?֩(Bx~gƱz%(t^2b; @QYs5vM>}^}뭷DÇQ| -P&|K<^V 믿y3tPY =[VrnxI[9Yո p/gUY tdPO_}pIRY}߼ő,s/ `(/t;C*I(b if"CX\ U&K(9(X#~M=Ei+QT) w$[a;S{YU\6xo&a2sG^vtbE&+g@NQdIEyǣ!x9yRQX?-aC2wmYT&` Pqѷ%*+G6 b%bx^>|y{@nsMQD~6\B* _x'7ʻV-Gy'2E,b[ݝ(.ߏ8=W? 4NQsLVT?Yv>V뮻n9h2Bd 7Oj%]d8s=-20o1?.p*n X?i+tQHt˿oSVuYV(w 6%knӦ<8ij+lObLⴽJRP{Ȅ:vs DVϳH.](FzD&W_ͼ+3u(+r#|F_wX?0Oe9JG;=R9`dU+s-yldw׿zp΋N3BAvDE#W D$DSS>47?;N-;cy!{]+To޵(/lF5HEM:qg}vp[d=NS-b_Pn [йGiy-fsUv5WdJȑ#so5oUn&D8Z:EEVa`1Gџج"}#χv'KYvt+ UoUݞO8 >ajObM0)F eDVlI,;s ("tldm$J][9Y!׉e&0z' [N?|k#K_pF7. VmP|Nd C*$,؅'(7O@;.'6(il-P[تKpD.60Y[OfҁQ '() 뀻+L ܷ~5>Jy%@?MW]jlA*)!03J=^P|B /u(Y>=mj'XD SN{k: ޅ4 +XVMQ~&yq 2aa9_{ Sns(u]nكŸISӋu&z|/ j#K8B%8ΗZ|yJB: N瞂: c ;ҌZ.-mE3^Š7oWgQDPDz3J(o1ʂ"ó`P7|FJ L(c'X "ܲA6'K43x?}b0[n~qsϬ1(Ģ4@esP$}SN (V &Nm 0qA}+$ٞH%L`wrSXS(o0b՜ j"4bPg{=ꀺC!BcAXihmQ;d% P6|FaQ9{6Q$ ^p>APG(sc Ih hÂY硫i m8I6lI /mX_] oޅi)/4 OO'Q, 1q-WrZr]oEr(Z\q%Z.D'drJc^2▁G)UD 0l&QByE) t4?x#KegҒZ&ʫkNSfՆG+"YqkqD^=Ei+Qi,=53Ys~@v¯R"[^1TIEOBh3g_wnؕ/Hס /YIiDi+Q”jRM-ew&2qk-'|R %$m?F+ _1a@֠Z,/v?Di+Q¤箚Sw7CQTsCoC0 C0 C7nI!`!`GYC0 C0  `j @$ C0 C0#=P{BIENDB`dask-0.16.0/docs/source/images/optimize_dask2.svg000066400000000000000000000434741320364734500217050ustar00rootroot00000000000000 G ((<built-in method format of str object at 0x1005d0c90>, 'count2', 'val2', 'nwords'), 'function') format out2 out2 ((<built-in method format of str object at 0x1005d0c90>, 'count2', 'val2', 'nwords'), 'function')->out2 out1 out1 ((<function print_and_return at 0x1084bf500>, 'out1'), 'function') print_and_return out1->((<function print_and_return at 0x1084bf500>, 'out1'), 'function') ((<built-in method format of str object at 0x1005d0c90>, 'count1', 'val1', 'nwords'), 'function') format ((<built-in method format of str object at 0x1005d0c90>, 'count1', 'val1', 'nwords'), 'function')->out1 ((<method 'count' of 'str' objects>, 'words', 'val1'), 'function') count count1 count1 ((<method 'count' of 'str' objects>, 'words', 'val1'), 'function')->count1 print2 print2 nwords nwords nwords->((<built-in method format of str object at 0x1005d0c90>, 'count2', 'val2', 'nwords'), 'function') nwords->((<built-in method format of str object at 0x1005d0c90>, 'count1', 'val1', 'nwords'), 'function') ((<function print_and_return at 0x1084bf500>, 'out2'), 'function') print_and_return out2->((<function print_and_return at 0x1084bf500>, 'out2'), 'function') print1 print1 ((<method 'count' of 'str' objects>, 'words', 'val2'), 'function') count count2 count2 ((<method 'count' of 'str' objects>, 'words', 'val2'), 'function')->count2 ((<function print_and_return at 0x1084bf500>, 'out1'), 'function')->print1 words words='apple orange apple pear orange pear pear' words->((<method 'count' of 'str' objects>, 'words', 'val1'), 'function') words->((<method 'count' of 'str' objects>, 'words', 'val2'), 'function') ((<built-in function len>, (<method 'split' of 'str' objects>, 'words')), 'function') len words->((<built-in function len>, (<method 'split' of 'str' objects>, 'words')), 'function') count1->((<built-in method format of str object at 0x1005d0c90>, 'count1', 'val1', 'nwords'), 'function') val2 val2='apple' val2->((<built-in method format of str object at 0x1005d0c90>, 'count2', 'val2', 'nwords'), 'function') val2->((<method 'count' of 'str' objects>, 'words', 'val2'), 'function') val1 val1='orange' val1->((<built-in method format of str object at 0x1005d0c90>, 'count1', 'val1', 'nwords'), 'function') val1->((<method 'count' of 'str' objects>, 'words', 'val1'), 'function') count2->((<built-in method format of str object at 0x1005d0c90>, 'count2', 'val2', 'nwords'), 'function') ((<built-in function len>, (<method 'split' of 'str' objects>, 'words')), 'function')->nwords ((<function print_and_return at 0x1084bf500>, 'out2'), 'function')->print2 dask-0.16.0/docs/source/images/optimize_dask3.png000066400000000000000000001374531320364734500216740ustar00rootroot00000000000000PNG  IHDR sRGB@IDATxֆI  $fQ̨b(f1_,0$̠AHF@?_lyvg[U}*:Uƨ:  p@Y`$@$@$` P" FB5 HH|>";v 6,q۷o/'|r1,[L^ټys~e<֨QC,2PߵkWywE(Es1c iٲ >\\1B99C|[+ViӦɚ5kJ*ͼ ?_ij3ŋeG}w/s#tķl3 ! ėJ|˖9#  P @| Pķl3 ! ėJ|˖9#  P @| Pķl3 ! ėJ|˖9#  P @| Pķl3 ! ėJ|˖9#  PWpBbA@=tMү_?Y~ka3 #>}3_-/L>ݕg͚%ᄏ[}Q袋yAGq%/ʀ\GaÆon>Dszꩂ Xux'%kVF)g϶nݺܹs[oM$+SLq唧ThҥKٰaCNa P·o5j:[:w,ꫯ϶#\Y}RlYOo$\l6Y [n)\֭e=(r'@%-[ؑIޮeV\>|yWox믿3ΐ1cĉ ΐu]I_j׮-T^=2@`Mk"q7SO=U` !#K.L9mk5!CF@ر8ks.(bX ꫯzKN:Cw áfxUAdB-ݻwOBFjH۵k'PI׆ƪU>~]ֶm74a4sI$"%]) *\itoTبʯ*FUO?{?7^{|`ΐ᾽ 4hvm;3rGkp6.xkZ |¥QHS6_J'x7Jݤ+0={4gy9v,tmHڙ'xp Fڀh{iGϜ|Z-̗_~iս*Fp裏wqG /ض|/L 1I|yP >6(^s5v$6!:SY(~UⱬO֑H F\4 /# 쫪VhG@jժWZ>%̇ҠARݣ\HGq"F|w%[ǬN$@;š BEB%1;>EEO^"(z%%3-F 7xenjs+R'|9UPO @K,;>q_"IB%bjt$@F$PJL# DJD $ (PB)1$@$*)(&H@B% 4 @DPDL  (H$@!@b2IH (TPJL# DJD $ (PB)1$@$yk.,Ŵl2{0 VbHs\5KvРAR|y;t$@9W_'U*>_~yfxw cN /_.ǏcgB%2eى2vo~!(1O( sF  h` "aԑ#i9n " J0m Nl"Æ `5~ 'Pck׮#Fw<%@)c/ȢExXn|E @A)1 rSmC  @&M${O(Tgn5v #իW$@IІRuk*,tPW:~;m^''Svlٲvj,AB%诽-.jNGQdժUi1c6fYbEZ?  o uƍV =t;_eҥ: 5L}+WXhGЦeڴi~cf 4HO M0ü)o>yn֤I)SL %K LmAK.S \^=Z*H /M7eT{իWL5mڴu HMVZ?=SRsW% ĕJ\K" PQ @\ Pĵd/ J% ĕJ\K" PQ @\ Pĵd/ J% ĕJ\K" PQ @\ Pĵd/ J% ĕJ\K" PQ @\ Pĵd/ J% ĕJ\K" PQ @\ Pĵd/ J% ĕJ\K" PQ @\ Pĵd/ J% ĕJ\K" PQ @\ Pĵd/ J% ĕJ\K" PQ @\ k7ʯ*˖-˗ˊ+)6m~N8Q6l ݻwJ*Vh?U&5jԐv~{zRJa@~%K$о64w\+B*WhCnݺ=y)l ;Ĺ>$PV|yNd BRF#Fm e˖rAI-l !i)j~G;Z>j׮-r}rQG ' II&eر::?xcN;gqaF-}V3vZcm‡.5 \0oF_a5*.|`>AРa78n8+1 !ѩS'iݺuE:co|V`tQ|lB{ {f֬Y M0v.SV-A;c:%~)TC C9ݝwV8Z} |}cg!ӑ@iV aD#<Ҏю02TF\F1.hC u;\]A[jyg; eSE檫2:T7:k.:=fn }=4}1Ìu9w 9ҜzFˍ'l dttnd! m'4:lt:̨Q3{!K GlSN5]v5:eTĜ{F{FyÑ@SFՙ.i3<蔙ϩ`tQ"@{ItJtZؼfQʆkiUCvvt Ƽ Fx\#*PAwߵ Jta2T0*ey:ѳ߿9mTUuʬS߾}@ЅF;u7%j1V<=-2Beذaf. L`S p0 !8O X\~vK.1z6A0L P/T>l];XnX& "ٽwzX!H BE]lh&X(ָw2[oժf!߭ؠg=nI8*zV5 m6{f:, y.LDR=*`1.0i}`1?VC)TذJG&0 0@MX$~ꩧ}0\'->xWftxfM 6*Q*S=}gfYnr:t#X?2 B#TrB<u01bZ< 0p@;-өS'Wυ]0Sϝ. aPX\X E( qvL}6]~0a]csC9Z)`ZS\r pErL+>F-ayP6 '`pb]~oL͚5mXjU~eF0YD_3(`Z,hPIg͂DCw0z^tӧ{cCs,C8y`B:t`[o0__aڅ#bڻwXFqX;ƾɓ'@ 2߱cGkm+Eڵk[K %ˆ]5jJJ]*С?eԨQf]I!`5'xB.Fj#}T4M$DF9]-u%3lNkҤq\I!LI۷`(_VL h|ᇢF#EF6LX`/6⋮wEvmvA H/6lWǣlW_ GI]v0{l#YR !;殽ڒcp϶v{C'?eO@5R*jN#]^Wj#eX_ J>K9#bĉs#a(RG)S5xc=&N1c5+R ڶm+|5k" _~i})68ߞB)C dH (5csu*XǐH h؁uvl,ƻeΜ9.97|Ӛȇ$l,T`"t$4l{Q08_Yc't54?O8^ 'QysP9no1z"C$.]ص +W΋(\ k0=w\]k2 ( ؛^)L)P&7ӑ@X -Aw9%YEҁQ NCňwE I*w:9 c9F(pf  ԋq(v܇a̛7Oԩd2]yJR:ud5=PB(p fa؎Y0o6 [0= 1Ǻs= ,C=ˊ=G]j!WaK0Nq>zaC `LD`dCLPA 6  DFw\V | DF+g䌄뭭},Б@bx=p@k8&0H *`K0֚HqHY$G}T\9t̰ &H J` f[ Q3*Cj;cF Dǎu8H(O?]&O,-*1a% ks*%=.Ypa`)D/DXb1 "pQGɶnшD2j(: Eß$ hss1F1B1bzn0Hw+VN8yBZ_5j(),'PC]M0A½)Agn@ňjŹ aLuV6ԴiS Ĵ[BQb ,c&/H P1ZoېvaiݺT+V`=^0MGQ&k/AŌ38::G0Z+VL8Qplpٲz+.|# i?ݤIB ҦM?e\$ vYS,sJ ,ʴm6ݳN"6l[!TZjE#gD^V힕t*0S(Tҡ@]޼y|%؆|͈<&OIBN $uۨ^:'|%e_~N%NCJ2|ac(BuuK1 ct$7mڴI~"YK)T8R)‰bD;,ʕ9fKtꜥ*W@, nc&[r82;H ԩcG K*?xckd @ |ŊY|9ېgtpUP "BeʕvFq!m  ĕw6TD8 #w9K/Y' `* ENՆ5kVA2e7n\N 0jԨt~e˖ TmFE^׮]+nA{橧*8:6[T0K-L~jCE t`0,r9At-*1Χ׉¥sΒJM۩^ QmCLEvO/ʺpa|Nq,J*EQ*{'^z?^FiM_x:Ka=W^yEp8i|vAޒ+R~Gywg-l;vh4` r'px8O>Gb_¹[yI'G0a^HiHW6 .Ln ]wuV0gsGؠQ1fq- xz۾}{iڴewg:uP=OŋeMAfPmf?ck 4P߱Nr' q nUREZnmTS՟OvYիgL.LŁgnl#+K' Ǐϴ3U,htz5~kF ԄK.c=( hC5ڋN\ꪫ G\N:ئe{-O>iZhaPA`Ǝk."sfСF+QcV^~uGK$ᮻTk|Kd/ۓXeahGƓ T_F__㎳e//ۮ  Kܨf_}ڨ2OqiMCk*X/iҎѴm;Gt^?m(;|Tx66ZV:ܐt H jtȨʼn4}ңG"A_s^.s137j3n+Գ>kOJp"щѹ\{ *YM7dk?<&~C(E ޓYd} /S&$>(ǬZ*lX<48/5Հ1~jH ,'Y}䤽[ ^2xN'gw|y~'9slvZ+':WxA׆Ԥ}*$rLU (Kƍ_t6uzs ژ;8QG]A,T Gw83;ۡNyE-2%FrXB*Z}XJqkC5<6ec?d4=묳4 2ٸq6u LGTXt\r`JQ`nO_4L} IƇRʼnv^84KPme˖~(Uttnis !\tcmڛO뭷ᢾ6 Æp^|ea6NkҤ0_raഡq"\W\ޓ;Igv,)4KA e4<"\*҂۫W/Aw8W8~S@&q Ra駟b].:q9EP8'N; 9q !8kX)i5NI3Ǐtu4'SOH(xq.C@$\=؅T%]A%&KrnWRAr!ЛF [`MNۑNq˽+=r)){ű 9/Lx˗6cVD8 tN-{qTP r;I[I#T ZlMСNQ.U x NCHL5`j/N.]/˭0*Ȫ`tdk #]O$Ч/Hm΂6}S F:Վ7lvάtm7uHBC|eUS՟K  ZP}֩x/oۦ}yQ Ip j_~ ؍6TYH+'ttXw /U>V3*8QW3{">}Q /"`9jnBB 6&%YklBB;OGϬ> 5E8NqB'!1eF ҕ܈xu@NdD|0,HEf!K)Tkt$7~TǩNǩ0BV7 0.:ZU[q/c521lu6RNZl3.4t4oܫ("#@n֬Yl* DJfaب굥^ Pa,•IOI똥*HOLfQ 7H P1J<`f= N`̘1V`so:WPi߾5Gk9}M2&##F5^FF^@=c X1=U^l Ia': LKx~4Bt0Z#~MJ+Ty4Z npI'ʕ+%:*tp(׾[l2*NPǤ#(Xh|r|`$~FJn@EǬJ*J6:tb$;7xÞ5 ܩ6|[j^ĦG\B~l%2dtўKT"8 {vӑ@cGuTIHA`S =@`Ȕ)Su8H>HƍFNꌄJVdvAEӛ,m93y.t$?혡f2*Γ^{f3J?!п9묳B K.ŨQO @xvw2*\pU|3M @&L gϖ.(t8C !_x?I ꥗^a%PSUd+~'-[ Crl ࠚᅦ&ML #ŋ /L D;v=gHH@!"ȰaB3Jq@%uGGa'GY l_&-[&\{ld-T0 zW6q/ xNc_P#rIX_/~| :LV\Y?PA se! / `q~ƌr 7xM#oiFʔ)cg,{2qDi۶m´GI^:ׄʢEͥ^jK0>O%#EiڴiI#q# Bq懧L"[mUr\N@=;ꨣd…~V3:u*;gl.[N%k `Μ9v`# ,qБ[ L5V _t$l GG<*H ΋<~{7̰ƍIԧO>$smСrg3?% )m|ꩧ26 <*8EV> axE `;#}=誨]Aa `k2Jac0L X=$#ύ'LhbQߗ;.o2~z;U3H PғO>Y^|\O"<@IDAT R ,ڣ!|FeBO@;]v*vTB~ 8vB!Lyu|sƨ:ԩїo2Kn3{bc`<{Svm3|cbQ'0rHk(FGfE|M#Q ,Fw==[Ĩu$ZiܸYx `'0i$SreV}OB9T]i8.8aFnj2bsÝЀSᅴ&M5itj900)cSZ5#[] B|7loo;ό/TĮyCp%KMiԆ?Õ8&zha\5W iL )[<# >}XFHÓD7ufVX3^sL͚52/ūhJ 7PԽvĢs⊧{ ޽{3J-{`{ojH@,U7z>QU@PA|MgbUy3[l #F#Ylѣ`4N?tRQ^FPPV`J$`ӦMF%Q*Ft'QJf;lfd'0dM{0PyFFQU2 ã4`Qt H+W;TZՌ1½Rc;m޼:,VS/6:8TywC B3lٳ'CY}2OC=dO$Ce>)Tk.\X@p<~F\BJ9BI7vVZפ%BX?9ST U)3f=rNfر$O>iuju0 믭t=/<$󆵱Zj=S>M`SOMl6ҥK  l鹑 Fqy%戃1wzg87˖-; / ~YC02s4x9 nG03q0$)4DF8^6v˜^@;~kU±طo߸g7={iݺ]yn8W1bʸ{ݹsuPAc1cuSt m۲lq;Nk%KDx=alv$*_PA=z ѣyh0 r~0mڴКE.K T+c "-TPBIW+x!Ocǎ!{O2%.8gp9SFXƈP5j?{K䅊V:%HbD b^ǖE4p5\34첋G+a0@A 0I(͛ngLQKƚ5k}gWn~{ Ns1K *([zv uVӹGwޱA:7Uq*NcH[ nիW7 9i' h.Ҋÿ0ܩSPBI'qmKT5,šy,ŋ-bG%+Ʒ5>h3f̘ݬA'/Bũ8 !ճk.8{`m~"cw}wQ e?s > 1-ֲeK; Ur$Pg7VB8E,`㺋t f=4?Ké=t|欳β/K]uUfz=:UV\yWB%O:j`oEŊ bQ>D!d1oc1=[s9nj?>@J}*,:"[Jqo4hvȰ祗^2֭kW  ]`]vV ^8 G @[nv/b??p2&:AAV6>Ff0z!ѣf|we@)y?!j_L!h5 #@)j>zNj$1"hUa=2fLcё Km_?pQsBv!40T?µOCt9nСvȎJrG:I$y85obP Ucoc~FPa$-X6)he P P1 #Xܲ(&Q@鯉(H?P Td$Z` r~H/lٲBLK %DYPqp  w mHM,%_w P ܉*&L(0R9sݻ?~ &z9W)Tl#ڜF)$PJ)`D͎S`z%]0(TnUء` v> orh;zE[  jJ'805{: 8c J1U\U#(F J;$ '3ƍf] o PxϸH r )Ox[oUd-pa *tP9LC fY`n"F1XhDoH 1u\`a2x/hF={,9-ΊYH ޽jʪCx$HN5b\ѣϼH]vGd  @vpBAu{wvӷ8:RH$'瀻TL/  HTJ8B%T HB%Gp=FXK$GtGx JIxH pJ8ʁ ȑJG.?F2PG$/ yJIxH (TU<**H K*Y;ǀ< (Tm* PP7?8 l p-1os-_N$12ex ϔJH  TG%z%`DG|o?y)TPUVf͚[-Az):HG'|o?\O_7xHH K*Yw  (Tҳ  , Pd IHPIφwHH$@%0z' HOB%=! ȒJHH = lxHH K*Yw  (Tҳ  , Pd IHPIφwHH$@%0z' HOB%=! Ȓ@ޞ%Px_f 8P͛'7Ν;KJB6&B`ɒ%2sL9CH#}ݥw裏E]$͛74: ,[Lziذ%?@9P  0@$^{9RfϞ- .nݺܹs[o-?HD 0?tE6l@DPnA7Nn_}}vd5k֔[ʖ-+~i n?֭[{G"Xy|G2c e]CQzK6o,۷MwtIիguQʔ)#=ԩSGڴi#O֮][w_)_EX DWN,(&#J ¡m۶RB?+1 qXիWkX8^TXQ4ib;`_c9& @4 x~I$f7ׅ۴iyr Fp]w|vA}BkժUK-[[K)&L):2 Oy=PB#F*x`Ў:(A߿W:K^ޓ*U$ ~H@p")T\(цR!Xc'T٣G)<6'0!qN P>կ_߮`$[Nk͏=XٳlٲYe`rmYS{キ@K.{S D̙cMvii.C&G*tI2eʔoֲhѢFsWZÇW^y~w!?xUVp:et:ń:$k׮v3"*nժU?؍;sK&ȪUrɞvÏK>~O~y"G*.V_\]%S9CGF'%5!s@$@!@`BHH (T_ Jh ! P~2$@$*) &HOB%e @hP( > !s@$@!@`BHH (T_ Jh ! P~2$@$*) &HO oMYfE=δ#'/JJdĉ{y~SNyNOG's˓ׯ_o~ ʫ*8; ^zƒPKC… GxyҧO#f͚4QFٴL(1z)Smb˖-+ FcJ  a6˗_~91jAVP&kF'1M)J;aYbE"[l?PV\/$@E #k!|z_ P`dC I;`JH =ÇK K;yR'tqa^f"2eH֭t$@ r){ FaWZ52H#a'G'(Sʼy; `J6̻KVP ~/')X"$ @Qo`8X**^mlMըQȜprRׯPL$ڴic tʰ!)}ܸq $i\W le `_g}vCp ,rP  P##%9Kf⯤px?7yi+7T=O?%zZ. h\)Wunr9C Lka4?hQDӆp miܹps5 L>]7o.3gΔ&M"FK#zj+dF):tPG*31:?0:CH sNq0ɜ>"FY R/ $ ċPwȜ4ԾxH08m}̋4 $@) 8miC)=(T|ŝ:2A8 $/^%(Li3N*|'@?"1: i E< @JNqPJO+ _qiNIWI pڌӆ o PϼHNpH@$f6/JBWܩ#s@RU 6㴡*3/ R/ $ ċPwȜ4ԾxH08m}̋4 $@) 8miC)=(T|ŝ:2A8 $/^%(Li3N*|'@?"1: i E< @JNqPJO+ _qiNIWI pڌӆ o PSH /@ Tʔ)cțP uqc|xRPA.<(TBR˗ǐ*хK#$eJH ɈtCB%$eJH ɈN(TBR&\IA0"JHʄ_!)&#R8R _qQL8R IA0"@ WeyfYpb Yz_ˀbŊvI5vt$^-Zdʕ+寿I&AI*UdvwQveێWPy/cy>ĻtROeʔ)2}t@AqTPjlٲE6m$s6 {--Z:H8\r@\lܸQ ᄈfΜ9aÆDB .褡=8֭+{4o\Zn-m۶?\! &N(1B~G{ZШQ#iРԪU*UT$,_ϟ/g϶o_~6#͚5&MȮ*;SWV.z#1AAM6~B4nX>h9#:B%!Cŋe=/K6m-)#GZd+T99r+.C^?^S_}`u9r+u.\~SO /Ўd܊+]>Csc ԩSh(|Rf̘13:lt:qF I?ziftdot˟h,HC^ض۪U+BDW:Ri2Æ 3&wQCrڵFS6'xѵ,$:ttMtm̳>kTA%t-O0vT4y}p r ,괓х? CgkYw4{bz.>b@)9smGGoYf>}]4˞JTV8:(?fd0R%6ۘ{רf byCSIO>DS^=otxtߎ\0 qI' =ɡJJ+ի<^<*/IcxZffw7`11#+F5ʛnBy29zh^n{ȥ?SC aN5(;T6޽{G9+L{ `M/bOef̘┖4.;%idX+ ]Ã>h '`T?z`CI0h!-H?;LtSE:| ŧV7F>i$Xs(,Ʒl2vd{vko{,S_ 6E?*%?atȚN}:ȅvc/U%?b:k!:.&>hHLӧp yb͚5Fwm֌;6o͌C@;&fv2dGyľ+n|rPas3^ K/O]}}EWiQj%KڵkgO<Go]g5j7V..ڳƍs*o&j,Ӟ7vk瞳Z^8ށ_r55kF}[.5x` B^Eי0/Bp;J߾}yA!]oOĹF"['u.}G̈́)[F-Fl~M׮]sx::ujߟ?.ӼQ8[nv4݆saǞH2l%8 t [1x;z(;6 j;SOI3x^F@摎;n;,ns=?a;po. ooes~usՎCbCذaaUk&2#KXf =;b+oZ=Q{2'&,p>snq?.PԪU+Y-]t躭6̧)ü$:tTRE>s2DwRg0}G/؇=(.˪U vOGrhB|lᾋ*).zjQV4PT}Vnca_-V8`"X^T_ghdF@ Zdk+IoĴUW]ERPу(DѼ N 'lm YgEuZw)Cϯ.R9raxy 2e=X'X\r%vC_0{ җW_-{ү_?sț[/+WZld8"{ltҥKFZ֭|p*؄ol\P*هr<'\PfmݬR!^ mX ^l{\.bV0G etZ~oqdU^w}\ Jwj0Q  ]ʷ]^b5Qx'Z >ܞdB;իWS#7 CB{%GZ 9\V,\Ъo>\c:X1?2FҹO"LC#5.BGb ,o|ðhѢ%qF@Z`\wS]l dʕ\~mP%cp7md2Chb#6Mqv*zZ]s7lԨ=(t0nTZU4h^ (s]l? ͚5s7M5x@YL˻;iٲyf" 8Iׯ%K؞^Z^{5V5,7n,f`IO//V Aˑ y5ڵ4mT.yWy 2Aޛ8Q(S/:8XT~w[^,i슅L8^4D0`'Vj׮-WP2ݠLvl3&Hh:[,G*SRX)[lb{ƍ92P`6dCN4^;*.#5kXUrʹVn8_{ipN8AԩcCĉ5}n"K^D4h\ve,as'ҥK9j/i&n6l |_Un]{cǎV"NKIp/iJuLHK&mmg̘!첋m3t\6>ivl{5kmxvfg1?ܶm'l 0Fc봇;׷o_=.W燘/UFB\{O7s5*\1z绛J*m '4ZhRkH/6={~Ɩygm WM"_Z1  ⟹{L 5Gwm0f͚~~Gӧ, NhR/\6bjC7|hg<#FU+bӐ|F;c6:u2I37|QF;V?~vygT$裏ڶ9'Fב3bѹ+NbM!G=0͠!iEޕ@_f33 gS曵|̍1c0`@=a۷)#;Ļ55gJ$|9{ 56g|0y=fZR,`,!ctN`6x`wBtJG(h{>uʙƤSJzNLsk=ĩ\PƱ2CW:O9XDŽIP}nFG@4@(bZ'^:tpWRXy՜dA&a%Z-@)kQB?y11mg(d9q. G FYVF =VWQ!:S3B;C097iY5Iϧ %Q4 &|wM45YBDת,7i2v29viZ #vEO-붺A ^ESAKeLTmq7L4DP9j Z#@F Qr,Le*H7f̘4_&lRh`L3)߉2KWLѩRT-B}8<~&Mr={GVg0ZX]Mф'b4S q4a3mQ E=1 R ɆWrJJ5SA>&Qx' c bn/ ,STx7$;k ?w26E"֞IFD%?֡W,vqǚա*ST5+lpNۏ`rfU^=kWWiH[L8 ]LjôN_,Ψryni[lOVD<@MƘUDt'N= c!;_oF#p5hx ^y!d"@lLŋ/8k# {#6ݮLTM"jvW >+ sΩ{*Q 4{챺NuGE_~Yfݔ81;:{i'xBs#@BI;(ԗ\rIQj$+ 3SU;i ;M?&,C]Jƻ ~o>`4{ą0`Rod(|s=NSo!7Q袋4 ČF׀3P qI:3Į-oC bGԭv=#iT^52uO>?=3' da#waܱ#Gi?x裏v\pb1q,Ju7}t} QѳlВY뀩kYf'+L{iɓ';f Y2l;[.좉~i+_1@0aP:w9 ;iqwgeԩnScyW!*'A`D@OK6Ԭ74%2/derL+p?tf M-[:m۶hSL .eg. $53 ?OtM^dȹ{Gx= Gygb-]tQ//;S.N>d]%C5h12@~/a]K2-_N zQ>B2A AQGy wE^erL$ 9!)2X%X‹(MM9⿡DC;v2"1 /r/>'C^r<4 /pEDc0X .$x?+V "DW `7199f/FL2zQS)+yd<[oճO]v~rM6ϊ3CQ;wM[JBEX}0 2-,nw{޷8nb^sgϞ^r.ũi1 7Hb5;L$Q)m;2o&yd rQz8(b(}] ӓ#ltSV!4m4/΋^"k(D_$IB(9ESr?bfņɝAEUWy3я",,8wO?a4<D(/ҍF)LKx'5W<"b٥ZJ7vŌ&"cW"NI/~[5?S iYD uF,f_a4DV""vI'J5\3g^`}O"dalexb6cA\*a+ D񾭴JO=G1'X0aܫW/exYUOmMCR,/rDT>aL=BYƘ8E#̅4 dhn,'3NL9yN~ɨehx2 SXvXlƥm$_Qv ?Xo{{WLw4 {LcQk<1BȓwyœI"z9#"^}'| АC8c2W!ba'>WnӜ'gssi12@BVbeOEolY%\RPxÐ qt"BSFs~ꩧЈɢ /[l q"i<́&MF("y#5'_n}ny^%in%d5exMd(Ul+ʪ¡_~9Dbb޽Gg#;rb?]t7>IDATQ&??TJV/!SԏHM ~3 &5Nƀ7TPЋ0ARo^D*oo d8C6Ǫ, 0Q^DIF=\':gЏ8ĭnɗD0G M"zej6m8abmv.GJq,$ zø#&%xd] tW\qz5CQ#FGV}w֫)M֋.G<48"҅8;(Q#fb(t d(À`6T<"Rr?Z}'΄m/h sm#0p@' 7ܠVY+rczJAJǥ&t8fȔɿ}ƹֶ2`,%h;c| s{ H|i4Kpl뇀1a57mOA ku~HZ @)$:#z $|¿kذaw0E}Z$IZ}6TF+@kJzXwG\ K/" 46Z-EJfvp ) {IT&5@Ůg۸;7ڐ%S' у޽{*rڵSD4<_tB0Dp 5=)SIH1:XE$~',Jc*3cbg"DN@(ףGknc7nfرcvc*5*ꪫܔ)S"1>|"Hj׮]C,ي2FMcdw,̢^{mB!tM1+/, Z#`Lֈ[}\^סC Bv 3IbymҞc*cgOV'&9=~*Khi^{9a=;T>nVIcƌQ˶nOm@*=Ѡj+G8| z"`L[݊/\&M_2e"0uT-袎MS(Xc*Cgy xH;,̢S[֯fM1-HS;`阙gd[qnWs=7 ͮKͱ#jdc*qkC^z!Ch4]>2MpyݓO>Yfkv`c*DnrcJo߾e]vi𾬞cǎu]vMT&O(:t97&j;Nx_޽z;={]}f}Ĵj41PsC4X[_|>L>m~Ry7mM!Y׏?t8Łpk۶#GF˖-$7sGw-cM &LPqQtpn}Z7i$u)FH9YP;T ۬5 2kE8 0U3.+]KfX)>v2NviJQsSNՅO5cSVE 839眓^TTѮ];t>MsDI1.l?~|:WTj&xz)"WYeEezGf@0!gytm!`$ c* 0k!`qFJGf@0!gytm!`$ c* 0k!`qFJGf@0!gytm!`$ c* 0k!`qFJGf@0!gytm!`$ c* r?뮻Ν|npk9=#=N&+xWSDlwZn>Sgy-Biv"E*N8A]tEhs:M8M61Y~h}JhP֧ÇT|1Ǹ{̽{?\~; #;]f\L`4ydMMS0R~u}zj cJ+鹶mjlr?sG`,,{'>䓂Fto%(8oGL_vO?\rKGN|B#~-bn6ӿW_{͘1mnw0I&ioӭ[7#u]5o<7ܶnfe՟,"nuq W_=r7n{W^mnȐ!~s`;ts5;Cݿo7aM/c }owv{Gk;81ȑ##k8}:D/]zu ZjEt;W\mny-Wc;g(>uGzˡQb.M>]?]v:zrB^d}۷N{We7x#ѥs9 񙶁>Ւ?p/,+YVX Ҳݻ8 ȝqy睽|}ԩSCzaBZ0-M wk] ߲eK/"\0}/mnr~|.]gYpLago7g*L&LoF:9#C*APp_>.~I`*4\1! [|b-r<$m-g= Ny?e.T!Zk-+ HS=Sp裏mݖ;.G\ \vuCD:˙3T ֎:';Ke IՍ7*kLL"oX9t续ڪV?C'+SkV ,y Η: @r#()_ ȏ9aHNvLŗSq\.Vѱ1bD߈`;v{07묳W^dP AKj9?އB e"r?Jv./I9Ȕ4 X>!"bEhŸz L0 1M@ ߬lket88*\.f\sꪫ<&*#Sf`(,Er-ӌQb6pX> /R'8bv9q} Sf1p .`PLWQ;vO>,eC`\Μ`wXꨣLdQ|eK0@| >t.83ZԔ1c*flA;kL{eIs*^SUKPWJŞɕ>c.sa[?[t4i&ٙ^95gKv%c+Gt*cgO!`!`L;4 Cc*cgO!`!`L;4 Cc*cgO!`!`L;4 Cc*cgO!`!`L;4 Cc*cgO!`!`L;4 Cc*cgO!`!`L;4 Cc*cgO!`!`L;4 Cc*cgO!`!|* %oH9]t84G׫!SL͸c@*ačxwjI&5rH2<:yi&IV&Խ2H@{}h@q IMڴiSӦT%dl%|'h zp.pyEQ|ˌ0w0l6!=0 Lm9!5Omyve~{ꩧ-Zo=pĪTb0Æ S} Ma f1rKNA;O2 01ccy>3סCV`D`Hv5Eg +{snĉ-׿8oT_lR_t-A{Q]W>'MHm'_.`(&͘J)tjp U0UeV諘l|1"0 c"`:;e]2rYgm𺝌E6duK?8FX 3!йsge邜l6j蒝S UBUoC4mڴdC\u]9epqm`DXH6D̥[oKvS!Ua¶!b{?bĈ.ٹ!4&NfVz5jJ-ĘKXyOZ#`Z#w}/ڵkk.ǎe˖%ﳋ#;,LɇbE l*8gkTjuɚFz~gתU#`S{˩un0W[ !`dc*j!`#`L%zC021 u0 0=V!`AJf:j@Sc0 `L%3Cm5 C zD`@f0!=Tj0 C 3SP[G Cc*cl5!df!`D11 C T23QC0GJ[ !`dc*j!`#`L%zC021 u0 0=V!`AJf:j@Sc0 `L%3Cm5 C zD`@f0!=Tj0 C 3SP[G Cc*cl5!df!`D11 C Tb2-Zpmڴq2KLZd0se˖ohZ8Pӧ'wyG>rSNu?C1npڵs:ur;w/[au$=ϮUVE~o/̝}nڴi_uQ>o܂ .\rܜan}3Z1_{رcs=~w}Yf};vڶm~]m6lnƌ~s}N& 5Vfݻww=zp[l[z# `L%@"ڿ{GuS_6HK,Q?toc~i?nݺ.ʜB7T{{ >܍79nsO+2JZb0g\tEݞ{k/rUR=S _F#0~xfyщc9FJ&Cc/+6/+6{4򄝮Y=#&"{6>׋hJw}^vywSiSO:t]í$!7{˪J_ 7?C^TUGeﯿz//ny4 {Q4 M/nxHd௸ /fSs[mUW]Տ9"ٿ0Rfm݆g*,)ؽ~~W ǨzTgŋ.2$]I-~WN;Ho^ }kc* RXi;ϋ/첺)u= c"J}"HUTNvbr_r%5͗^zɯ暺 ;/RcvOT)u'x"QF= &$ʪgAϳ>xQƹn1ʆ9r~wQLbI "Ӌv- 12AEC/I"R^tge[FΘJqCD"݉XD6:>1yd#n-I^Q_4k{ݻa܋/tRSe-tt O%DW_]_~e5MRG[l15o>#XU#8"h~ԥヵk1[;L-\# 4̎Q:4ZYTW, 9n^0wb&`; ”klud gwݍ5}NlMcnpGo? sqǕ~(WÁ'_݉a:Fn1(B w>}}BL39gPW^]<`a_J _؏XOʂ7w>M? §aƸ` GUVYE}(3<^b^p[V3~yug^՟tI@D^*y\nS0E}K dItEifɾ=+9 =(%v`3=XTz(`->+T'k` 4fHhfa mVt%~ʋY+ 'NjJ\sojDgy(ċaGUe&a7;@M;3Fw'xbڻZQϪ.;|PJK/^Y+M ;4&BiIO_[Xgu26l-FSbz3E#ѓUV-=!BN1!"a.34;,h1ADLur`e տ@`&>ú' -F6Hʞ@Pe'vbYg* 6eDČG u@@|I&ib'^xmn]wu7xc,/;dM/Mu:tXv6(Bir&IJ HVZ)4Si͍-Ŋs`@;_~y'yXt$o|x':'枱"c'"%z[+x:wl.Z_yYd90IkM6ѬI"gqF"W>q#:NYT|&OFbU[ ,q$zRscęxG̺ETܒ&V"0<ŗu-@јJZG_&G&M18qbS:V68 5; 1'>N2g3fdLTZY}'$ ZCʮ q>tp)(Ƹr3F#7+>I#QC)3SLE5i ^r.NSر[dE*jΆѣ+*#IEQ|4l\NTNQ ,5N8K?0GXljUVT6NԾ}{' \>HR= J{n'9sbÏ]S!=JФ6R`n얊wAÇ"N ZpxD_~Z@?`pꩧU\0㛓5-K ȿ`5/Q\5v~`bO6,{\U3r}謚Pau8E.J@CΐeYM>I I#m()Z8L.]'TbC4rgS(z- G1#vho ܶnSQ*քa0KLYy{QO?OƜl%W1%܉f%޽'w@2)4lpLy SyhXn׋DSʋ?dN2Y<"1"B)TrKen$m12^fQFik^i*jR%J~1]&Va=Ojx¢ IQq饗*Æ 51n!ws=/g Ѹeɜ8,b.SNBߓ^Ž68"N+(OVI^D9&́~@= NyrdGc*(m}N|lY>nL,ٵ1c JaG5r+mX}i_%eE+f*Jԋ"?wJz,1r0fz!wڵ"By0S`55gdrʓ"XHy7og*|@ݻ]?C>}-"2PXt*29B%nVe|Q>|._}h岏kL \\Ê&]Tscp1cQi)nݺC{.sƿZjjP>)c>1g T5T936'LTL0~(">D}饗f ƈ ;((lR<ѣh{sd3cw! |s2G22CR\-FDZ1o\,du[Pxy׋xQLjb"多 ctQGZH9NDbeb*/JLVTz0tP/JO/̸""yrⲱ$XG|'}/.=+Ǽ7nZq^|MxQhW`4?ʣ|˲f"z^xahe65g1a%1^>má->j_]onlbbUB%Ns<3r 4zC.Hv=ϰCƜ-[V_)$;i/;PԜ2L%6GFf8ɮ9h_-: Q_p=x6IQi۶#!+! [Lh!bD9ODM"vanUWu!GQLE.b84] IYʚcN_ y$$Cp!0qDu0)7497p2MaA rQ IqosۅC'Q$LsM)SKt䟷C@D^SI"& 1Zć.qng6¥Cbph {/l~BM;c "ż;9-$֕#Dƭ**JKOx"cQ,Uz/N}b>w$jKHci_jɁ8jL{ X%^|EbYk8(ݻwHc{3[$6i$+f̊Ms e@UB$B{> FiӦIDUMd V1 $^Vy@LR#kk !cK@NJDy%6l&l߯!]/q|$Nȥ׮8Ѥa wOK8CI$~#aQ"> q!a(H0?^>z6ӡ7TIgH2m8%@ZQ   Dm6lX^f2c%Mk bvbėJjLzRy09 mdq&vbl]C(3KQ[iT& 2;[KXKHZw?H}Fɋ6ZDɊM}p>V0,FG_LO|޽t6[’{L,ּXyٍ2yX-WdT瞫AK9xXD&M1O Qc*UNe 7TBL)6Xsv3A^zis=V!cC9D-Ydey{גͳ{b-t⛔e'07<ٳv$ze<6g.1z99wzIɖ]=hU 0_~$`9?9#~'"3'>0:f}vqD>⑹œaQ']HeRo[gu*ZØJFW^yIA}F! 0 G print1 print1 out1 out1 ((<function print_and_return at 0x1084bf500>, 'out1'), 'function') print_and_return out1->((<function print_and_return at 0x1084bf500>, 'out1'), 'function') out2 out2 ((<function print_and_return at 0x1084bf500>, 'out2'), 'function') print_and_return out2->((<function print_and_return at 0x1084bf500>, 'out2'), 'function') ((<built-in function len>, (<method 'split' of 'str' objects>, 'apple orange apple pear orange pear pear')), 'function') len nwords nwords ((<built-in function len>, (<method 'split' of 'str' objects>, 'apple orange apple pear orange pear pear')), 'function')->nwords ((<built-in method format of str object at 0x1005d0c90>, 'count2', 'apple', 'nwords'), 'function') format ((<built-in method format of str object at 0x1005d0c90>, 'count2', 'apple', 'nwords'), 'function')->out2 print2 print2 nwords->((<built-in method format of str object at 0x1005d0c90>, 'count2', 'apple', 'nwords'), 'function') ((<built-in method format of str object at 0x1005d0c90>, 'count1', 'orange', 'nwords'), 'function') format nwords->((<built-in method format of str object at 0x1005d0c90>, 'count1', 'orange', 'nwords'), 'function') ((<method 'count' of 'str' objects>, 'apple orange apple pear orange pear pear', 'orange'), 'function') count count1 count1 ((<method 'count' of 'str' objects>, 'apple orange apple pear orange pear pear', 'orange'), 'function')->count1 ((<built-in method format of str object at 0x1005d0c90>, 'count1', 'orange', 'nwords'), 'function')->out1 ((<function print_and_return at 0x1084bf500>, 'out1'), 'function')->print1 ((<method 'count' of 'str' objects>, 'apple orange apple pear orange pear pear', 'apple'), 'function') count count2 count2 ((<method 'count' of 'str' objects>, 'apple orange apple pear orange pear pear', 'apple'), 'function')->count2 count1->((<built-in method format of str object at 0x1005d0c90>, 'count1', 'orange', 'nwords'), 'function') count2->((<built-in method format of str object at 0x1005d0c90>, 'count2', 'apple', 'nwords'), 'function') ((<function print_and_return at 0x1084bf500>, 'out2'), 'function')->print2 dask-0.16.0/docs/source/images/optimize_dask4.png000066400000000000000000001264131320364734500216670ustar00rootroot00000000000000PNG  IHDR sRGB@IDATxU%*e` b"؍l  P QT) T:ν{O޿{13{MYSɨHH\ Pم0 XT*,$@$@Rq %" D#G&19m۶r'|=/LE/֭KVmݺu{x\Z);wwyGv}x[H|'@;rFH$@%@g˜ T|F? +={1ϟ?_Fz F[n3ϸ,ls7,={5kָvRWK/w}Z믗ƍˠA\ @X һwoW7uTi?rEn&hQ'@?ԩ@ `+|aG2k,9s?(}I Ə/qAy+Zdڴiv /~InOMT*u-gq@-[JӦMKo-5jFmwn}_~y晶g[l{RreOSKTrge\~mՌ7N,X ?E)].]:ttUp?c[X_|E~Mx {ᅲx@c?SN=T1b;֎r>~ )bNco޽{v:.\P0և~(}߿U}8ҡCqO,om9^z{˦n~s$P5x@E孷ޒ?^{hذxl~䤓NW^yE.2믿b߷~km_u8pC@ *ؤIoƇʶvZi׮曲6;\P+@^+Җk-`)4PO2ա#<֥J6xckkQ뀲A}YACm6Js%P4Cll0)Y7.(3f0ٜ|ɩB-fm5֭ǵpTߓ'O4qU*ӞH^{e7kz:E{3V}/OA޷-o/(guT~;ÆVpcǎfJݫC=4U}]{LnN8vۥ~g2zh[ ,OtET˅;p[dfΜiׯ_~ q3;yz=ԣLuѣzt* e$}E9s&0vd%KC1ŋ'X~#8k;!x8+t bK*nlӦ8J%[ ˗/fj*[/J!K_z%a#
>Xf6K.)SJcݤCE~FM~VG5!6jk;Sa>s+]wռ{F}uc }ڵkcw}wʚ`m bt=[^x /3*^QZ7 輡+ 7`N;4sqm`Y"2O>QS`[oF Q_uFG gNE*!؝CG,wAk  ᩼?v8_۪[N# Liŋe |֩T|L6-]F/$@pfFhQ#@{yKX`"¨#G^?s*_` F J0m{N֯_/`1~ d'Pa+WZ%Æ ~xJJSZ9s改`9$@% ct W_߿T&ar1Ӣ1TXPH'0n8Y`Aw 4Tgn-0w҂ ʊ+Jo 4Cfs*P, PZx+xРAN8 $_0ʕ+ۡă J_}U;[^+%eY1a6bYdIkxT*pڵkZZ+^e….qH4 }UR\GKJ_2i$YfM²b j"Ccƌ){3xJ%=ś9pO{ hRR1~!/ڂ%إ^*&L(ARV-b@"]7cT;Sn`Xy% @f[o`?5k:Q @\ P2_$@$*3J +*>YH @tFI$@q%@'| @T(IH Td/ J% ĕJ\,E$@R :$ ReHH T*@g$@$WT*q} @J%茒HJJ%O" PQ @\ P2_$@$*3J +*>YH @tFI$@q%@'| @T(IH Td/ J% ĕJ\,E$@R :$ ReHH T*@g$@$WT*q} @J%茒HJj\3|]V~7Yh,^X,Yb?-[&sر%\"lT^~֩SG֭+o o1 $9GΝ+O#~9u觟~i+F:Nq>f[3/m ^zrAIv䨣TN IqСCeȑ2?xV[mgRqaD -}V1zZ۶m裏uʇJ.z7m%8qVvars?*DAC btQZl,0m&ؠA޳{a`t\uUFF`cE̍7hۼ7kth 3f%zN:,7j7'x۷މ,y晧z08Fό,&G.M0tQ sgm G}NN5g6: jeN;4Cf>E/(aa/+VD)U xL׮]͎;hi:c^xF<TТzwl 55ʳiԊ,xG$ q!.>Bcg1;AN MxT*$DDדXeՄu5vmg{PܔxPzVMV`8?gF}{i6lhƮXJUu]n'g̘cIFTs1]vmqb5G%# %|19?!9/\tQ~G5B9s^}Wqy>͇. c0z mZ ㏛M7>;tas cnv .4FZc =x14`u_c?0-"QO vHo64n1Z&W }mK},z5]w:C:' c#FuiɰXjO1rJVPW*ڴic_RrKƉCٕD/0"\@8S^(u%%RJVթ$F˘G=d?˩G[>ch(!^%NF~%Fm0VT*XolH}]8 "ULw=u_'DžӢKCć@>}nt¸gM *QªS<<`1ܦ >s;{աďFr0&F傉xL>abĴx@_2;v8|u_xk7\"I8΅70_B`{w\kSA`̘1vE1e7>jO"ZK+j.YN+Xwq֪;QE믿6[l˗/OV]-|ࡗEd2 4`a%P> pa(֭{Wdž瘸qЍ yv UJ u~CkXr/ c=M2PގO> ,(dC'=C1&իgm-='Tx]6,%z衊/ w 'w+O?=pmL &۶mKrraR)$N }O?wߕUW]epZ_o$# 6^E ]}#G暲'yzpq6xwS|U*X:whG3. Q|pDŽ2Xtu]=WX"T%&]v  2^y; M}7o5*"{[mi׮ok|Q*x%\R'.SjUV,P"Z2px$`$0a,~H%D&zJ5F@}Ax'5U*ҠAxf\]tEo D]sTp5O@Iɡ|/:u-y_*dA$jc_r3$IGoD=W.իWOBSyٳ_}lR:u$x&Emr'%̧t9sml#_7 ϔZId[Y&+hVñ`H,^E&MСCE ]ec-,6z-ObD`Eߎ{rHIz-~[X3adҤI<+3ѣGafs[_*HS_MDh0gpQ'di;S~\I!\Iѣ+[rKh%/4RidhɄŋBaq/j{Wn'@ B~?l^z餓 η1Q ðY EeP`fRRyv ͢G65c+lOl?͟{Th ʼn_^xq$`ۅ 6nrc[+B-l–''~K.믿[lsڟt…v+뮻 %gļ0>~8- B)Mγ.[\򑼔 pi̓($fkܸ J?l^qJC `[Ν;KϞ=pX~T8,jqP6<F%a!cBa'po0rJ)$[b[H.O_Є88D̙Sa*T*VfU!K^R(gف|max≥O,4ľۅ8s# 6L:uFp |'Pzu9zEWTP`Un݊y%k0 d̘19M >t/z0/O*T*SN D0juy1io<*x$84\^PH P[@oAuomolRH*OU*O\tSH ."(~ \\L<}?3.qGIJeرm+W.9 @Sƍ'ժU8h xBM6>K٤\mI֭[gI RP[TsO:a8#Vj׬d'RHB Gu/-X0:nF1l}Fu6ɪTpV#^q 6lSnp3C0T܄ʰ'FRAJeĉV#ƟBq! ʶ_͍o??c$ !4gL4n}yDOغeAllȏ Au{1dU*;%22ilzjϳ}=|&N; ܶdkeT*0[l+yOJ~1#+/^ =@ƍ*N22x`OK ;(((ߙ$cOXI_NL D@JeK֫b~)c2*3gJF2$u((^>2jK!R5k2Fq!2 |=F KΝ[&k {*e8@SvmJ*1"Ǭ 8LJ_ 4 ?%myeW-#Bq$P~}XR1ΝX'KF((K,ŋY<ˀ&쯂r^Z(Kn\DžSV*t+LuRq.j֭<3xp裏ʇ~%Nr+Gr+LuΝ~U`,ZHz<7h#qʺy]jԮ]ۋ=SNrc0]3zLuR1â(5j(h=([o5TYvqv>N-gqd2:nGvʺq-xTx2RӋg]:0KJэ%2M`碠Q)+׿dX\pA eRh֬+DZ%v|옰foo\y?;cMc}e]&C߮:kJ艀369mx1br-- 움x`z۶m[i޼e{:v2=yeOܼ6[p+͠ܒGicO-3>YKN\S2Z|x6x&/ly>Sfwa"0#G4]t1s3`ܨAWj /kuG{J=cSoe'}*PJwcy,,mx~yߨKs1fC7𗽿G6\pS^ZϡnjШB3:7]`Ty4iho\}ն#_: ʍ:>vҍ!j ik> 7Іudž/Pi>X>*@'vx~)SJljp^yyOۯ&uzzeJq|[?J,8/L5iWUl )H *w)(4'<nҥ~cؔ O?4~+|A鼗wW0D38$:R~nC۴ic__~e;؋:ā`BIb^9B8u'=lkgGszzQ&Ka܇Bu(p<\vau@';]rGZTDd{&?pXAjڬ%B>ЉxkuWœN/61sM0?55 O>iu=F[F's]U01iXkkkl&6\vքX_"&:ih`'9>(&b\ry;Sҧn PJž]wݵLnŞ*{/ a5 ֒k1x̕}*>}JjuxrT2q<_mXr-Lj^᤮I|蚯pS2ua:om O2R%il~ĉ6`7Py|gy"}*c%Tc{JXLXj DY5E=.;Ll%/ErLKJjlLHKQ^ą 6y}fmNy&QCuadf+8tva; Q*Pj5?HKKNr՞5O?X$ۻAQEePq>t0q,TN:++O`rmYk+cL5UDya9c-[sئ%n0[`݅դu' d"!@ x%Cx-X>wԇ+|3<5O(%R Bxr,qN.LM"0-=WfDzv+ 觐@aGuTIIB`c &T / 0 p^{I&M BQ#r{ 'UuNJe=wQ5Lo O6d ryNPH *~g0CErR*sϕW_}nskBCW^rKjO9c]\ ><0$+_|Qիs,gr[A^G3fL6M.@D+0,|C$Pڪ^zI{\$gR~}k V^(쳏aHML3,X41!$/͓ . %eTp'*ȑ#>+eB]^&خ? ?/~x^oTxaÆOӗp>uxgKb(駟B*qL !C.K;Z׼J*Uk,)$FK. –ƫZϟO=nݺ;`x8Q ,i@ kdꪫ˜)v[ SAH*ď?תKK!7/ڰJ|衇0P5k&?]k䌌7|wE9`-Z`M6z|'p۲Q[G2}t[ a =Z9b.ZQ 6L>hks?1ZU#$j*i>Ĉ1 u]pRT ?ȤIhb\S9  *Ug3Og};VZn0"[NFæXqM̙3Z_c&@E`cZ7o^8/bTzH.~ɮ<ה `W^y| P #\"b*{Ď+/^,'N,jr>JƇǏ/lAz\N@9;ꨣdVʙ0agl.-[tIA\U*H3)0VG! ?f~t;0Yp`q$ Je#??n3t] .Bn!HVMv+mnoYS}x[N El"1pV&n'J ~pfmfVB ]ֺ4Ay5jTRؽ{wk EngJ;Cֲދ :~[[U+wy_0 m s+,n9hRAb1Y{hFKN,ĤcIL׬Yc*7搰= !a8/=_,$T 0s @IDAT=Z[LxQ)NiӦPOxNJg,0|%%4NV*|򉧍ϕ 2߹sg4hc0 J`ܹvbCF\]:@ X F!\ 00jÆ s|Q*`M[nYPbyS`O dMTnVoWbJV<5/JŪz`i{+b/5jD0J /~ɘ1c:B!˖ wy]9&ƨ9رїo2K.3j{b]c`t?{S^=3k,cbQ'C1ڻ5+klZg1r_"FE 'qFL&M̼yOc =qƙ5jz{Z}W*ȡJ]it 3pPF' w٨SMCч:u56-(d7ް[n<3P&駟i Õ,ubtAQNfٲeJSݴlF?34TW^yT\tMd@nݬBQ'ITR 6lctQYdIR$zE@2[l5]FץxMT>}39 W̞=@p0=;`tGRA̡U*N>Ի&%(I'dbh2̃3"-rJ+>hk K@wg4pZnVp1uw[OP0\,Y!2_.vqnKnGI@x׬58ԃJ0>CVF0}ry?8NQ@oP,à^(DZQ>Jb/ๆ0}tӡC[ɵ' u؅{v46 cp1LOyⰁVZY+ Gb#70拵Cp.l[J>X9+w_6) g_uvuDpgnЊR <޽{1Ily7s 7JrJM7lfA,bT08Cl0駸G߶A;7UvR*cGG.Aj׮m/rO %`ҊͿ0ܱcP“'v'O0J)`-kxą7ϙ3g:Yy[oJVO;-kL"kΌ1"ƹu7kw~IZ8 '"Av{|'i~"c{oQJe? {/5,{ءfS_̓<fgyfbr'B8~-h5?y 7螃Mf?opJ?'N4~}Yb~2}s:P0G :u믿>qVR*}„ k+Wn= Ÿ9qnh:,3ztTN `xhb {̝;7q=;4j6Ȱ祗^2WkWbC.ԦM J/VUB-`|ڹ `OOșZ+VMOXWw#LQK.I.@PD/&whVruINj hD?uX|1#k-lYCRⶬ$?GGUW"U.B_QۢEG9lJPqJ%sVԋXIE0t:Y): %`kQ;.j ƍ).I cM ( UԑqaT=DBdΜ9O=uIK!|^YtTrxBtFt0btHt0%d|h@.pq JD~g+n?j?BmOyQRnܸqhBԀF{ֹNsO?نQ 6F$eoizT*+›,*Gu^P(Gл@Ÿ|RVhPZM4$'H^K%0dԩ%zN | 9G!8uwq e0]K@I]z$*ݠE派0֌a, `=u|ǟZ^oQ>TBO=y eQ!Yfh,0ǟ# ]/b #G' @#f]`QjеkW? @ӅGꫯڹ#SN$]Sf{+5oGJ%8v kG0Ie-QL&MRF)0z۷oS4u dk$!Oȍ_0XO=yuWds-taH$P1x(-0$%T*p&cƌ)SARf̘aW,FK";ڡD C`DN/Tliq"+D @-`.X:S; }ʀN*~!u"<^P'@?skKlB$f$|NT<)xݘ"5#+8O&c c:wL+ nF 0^@1'WyW.qyT?_> ={MǴiӤFHSڵ^zHdT0 q]yҽ{0$%i8餓dM,1cH6mMDcV\RW򔺛DTV^-[=zK3&yFF`͚5.oq{|ܸqreɺuTR jժ%-ZHJv禛n*^L͛70iG?gΜ Gqx% BJ̌HAJ%ϙ$ _P @2P$93$@$ *_03 H*dS>|`gƶmJVрrŞJ@ vѢErKƍeРA{H ji߾Kr~'SMԥz]&Yf9#Gs<@$P@:[oIʕeɒ%4b.v%o毂 Pλ?c[DвeKiڴi<@$P@:gɣ>*UTJ*.z_^⋒W8R07\R "'ONuTvRTg6l5-9ꨣ %_qI q;g(cvFɫj{䭢:;XQ#@цqt9蠃O̱+U|$f;Y!N=rsGyuh/ڿ\uRɍSWmfiӦ? #@Rwr &7nuC~_~/yLDBPbpłY0\z=K.+T"BV΄<:L/^lWţn`u<48#SqNnݺv]wgmnlWLF]ͷr7jJlty嗍=c9ƨ)o&Fǎ駟nFmv[-fԩZ7=䓩u ZFіyRpA`Сj /[t"ݨ OqQ_-2jui0 \^JQ.?HSKѻsv1>x|rBFUG/&B-0Hcoya|Gm2M+^IPX-l%Sx=Ap K\dOLשP0N{* S)5f:c$WSߊQ(ΩO{2&  RCeHH (T*Ag$@$CT*1| EJ%(HbHJ%Y" PE @ P2K$@$*3^ !*>TfH"@yK$@1$@Ç, @PT"xIH TbP% @b]c8+(PPH F\k~D7=T*osҴitF@wKů$PFmd%OW*KJ]/q͚5vva2h H wN wO Jp… S)믿wM ?n:{!{,đ%+WXVDΝ[ 0%0`@*H /tݳ * VZ%0譼+oQO Px7sXőM0a/g;$xovƹ z_~̜99OdEˮ>@8ʓI&˻H rw۷ob3A$!Ξ=[6p,GTR%EhwL t虠6~?>L%})Ν;˼ydСN @ ޾W\)5k֬j˰IH aT] t6 $J8K$@^R.& RIgvIHKT*^e$@$0T* {. xIJK HFJ%a% / PxIa @P$3$@$%*/2l H*=pfH$@%]M$@ #@ T˰IH aT] t6 $J8K$@^R.& RIgvIHKT*^e$@$0T* {. xIJK HFJ%a% / PxIa @P$3$@$%*/2l H*=pfH$@%]M$@ #P5a XiA=h9sl6mC_)@o裏:HKR)sK/$C-<C=TcTOĹOT8Fm5jwsyɉ'h[lH (P={˼yiӦm?U*guGPV"`?1>fm0|hZjewnu`ޱ|rӫW/ӦM]06)0DFx6l_m#Fs=ԬYhƜzo- qH!tRs}L3:wud;4mU馛NdVx/  h 8U&s^ >HACwF]l#MHMca&0wqit,tlVf62sYvm:;_4jhb:udt Jw5:o̙3 *PĶ^vmNa5U]&M {P+.~pQG5>DnуR#@͟tH*r^\|Ŷ"脡:uj9WofN:$:S8T+fM75nZfeRd(FME~\詠R9Ϣp4(sz'\sEEMMvLjc=xv* x<ضJt!]7c.V XaB^EQ]E:[+;b@HTҞ W_mnF+s+|Nq]{ I@k]n(Kg4n8ׅfƌJ^~d.}m>L<KaE( cGe쳏ToJE?zrK/ۤȗ_~irKӲeK1 C[n^~1Dޕ+WԮ]ی92JI,W*Xq!ؗ+^Il];}%W$N`N3aܨ %F7D9E[2f7 )$+s9GTȵ^mu"~}lb%_r%uUIX`-:4@!\`R&0lI`CݿL Ja0NB*hk)@EcZ |HeR$r g}%d'3m^7~!$#:_ ^xD=bW؃>ΪTtV;3<g{u32;4ydn|gmMfQ&2 "3V0IR+7u$<ۢ󨋺lEo6]g \ $'pB3c꯿zQ'ݣ[~s0Q%B~vb_L0Rr' GT.TW6+cEu8Vy:3J6mj^x {R> veҤIV|b{DkYׯ|Zqwvxlê;LƏ/Z!FFlo%IL _7n,oSͱ+GhLm{h,0+wݳd?U Xnezr饗ZK0(Jl a/Xȸ>?_p߲b _edٳ4h@ڴi\"b3,]T`WRpͼy(h3a 3¼$#yZ0>蠃bh2J-j y$\Ą=RO~ʹ݀+/hb3`jlons;WÃs/^zvuy|`aN,^خQ(ƙ=bj aВ%K0&:Ő_b(GN `=֭I6|s.vUnNh)CAe߬ 6Afʕ2d!N`p?H %HEwmHOt$cZ 4_~ńT*LL/+@2j{e9B )_w3r|PtAtc.]J)C(/nJ%v `A/X`!=5N'I3zVɀQ3|O yb5meJqƶXh'd̙֒h:ra߹sgv(>DžW$SH%JSW]wᥗ^Jy'0@ʩ%S_XͲ(Z` 3Ϛ#䥮 gR,`.M7dr .C8R* L4QrMS*0-; O ={(ŋe&rڱ`(9asuJI]uEyÇ|30Qpcrv\AVs 0<㍍A۷Mbw}p ] F/+;HrHrKKf-c… S"C9PAȪnqS+O8aVVae/= fKX*j~hVX <ة yJƊ9c tQ"q]w Yt Va*Xvld.SSJ7j(ҥS5w;5mca;+;SF٬N)c{'/b v\V\>}ћTpJ38éy_N;wv\;xN/qAaԠ&q?,pL" U3g#7h yׯ_D}V0*%)jR)PՕΗlJl:x aHBw<[Z3' ZX*Ea 93feC ʅCS b;nPs9RՓr'L%*{キ s寶eʜʒdC.eE+R]W*R@,t*aU`-S `UV$b 4KR%H+!kŶlX]=䓑OkR D]Mո>څ.g6UXN?|3+7n\ǰDtu!bT;<3+8p`\_drf-`6jDQQ?o4D-J7 D(yNL×t {v0/]="|RįևZrZ/"5^|0Kծcǚ5v}hk|-V8鄯[n%7+袋d^Fdp :f̘nձ!0e۬Q (X7l ZT^(mnk2LRcĘoa;ˤZF  YyF@`:)J [oy0ǘr@$I"ǾL2M5 Kzg}E w*|Q*mcxgʁ}V*ĥ+KS!)?C=dB2&0uT󓇩?>19R G!~zLaiOɱިUz ΘJ:);Z4/ /74=PrJWTH :gؼ1Ec'$5/ϰnwJq˂CDĊ5X k/lvC&DxlܸqFJoo1zaa„ ҵkbS*x: ~IϞ= ᦛnBtG&D`ƍfb}` fLu qӧ6ge`<ݝwe35zСƤ_~N Xti׹sg~s~e˖N#FxeDo 8qWꪫCKU^)Rw)8Ty}/ϝ;:Wm^+T>sI rKz#sSٶPKAAz(AZz9.vas^GsuwG]P'Nݱ;C@~95~q*߻itZxT0lݻSn8 7ezg8+f^Mi-ibnݺ5P4B&ʏJ%]Wh)@7%zey? ckڴ;C_|Qdn};֓(5ߧJ2 0㎋P`k'|iW(ܹsF7P2qL?mTvay(Rd";`9.wub'?q6.b;ʐNu$?:e=zNA:dwJ%NJ#<̧CMKhX,a`P C-\0k _|L)?~| Ԡ2R skԨٻ]ǎmAkEy$^{5סC3;@UQr6:z+F_K,) dJǓTWRxN:$9rO}S746LG/Q]Ӕ{Y6jiذkӦkךǍևԹjPxMR)x֬Yͥ7e)lĂ0 Lj7`@䫯r}}'0hN] ²J+lT6r9cSO=U< pꩧ05 |<:wթSǵnFfɤߛ4ibgd*l*"~Ȼtb.kDʨ]y啮E39r$Lyk>Csi15>|xd#+Vغo2دҼt}ijAɓO>)*$?C>`9P\xŀQ+Qj,#jFcPa&$55k֘ ޴i,Α:dÈ}rp@Ԁhm-]TB~o.EӢX[I&ݬY39[]]]5gD_wuA*Ib On, e"у\ a!G:]&:ojk\p@ta2;MC 2h"љ#6ťUクARG%/4)BI ^\ bm]lum´>&n9u(At-/Ve;rd Q*@ L!@R9sBYjHD0GJ,(ysˆuQBp e3EԎHސv@0j z\MQ=!tSBڵkg0eX[~Pr+Y*/c:)j+}](qrlzmn&:w`"& M uY8FAka9l=%8ީS#FD^#^zɲejxH 3C|!x,(/Hp+,H @sf+IH T*`f!$@$*?3[I$@R3 ! ?PJ J$Y AJŏV @$T"BHHT*~|x "m~zHGWI&?PL͛'l=Zׯ_u|NZjE}D-)ʥ.]ȋ/Η?^yFK/TjdGG;Hǎ]^T6n({{7Fvm2xb1&j޼X"ƭ~,X ڵ+\[RfΌw d!@  NJpfHH *,`xHHpT*3$@$@YPd$@$@R)  BJ% & (Jx @T*Y0 @T g;HHRIH '@R83A$@$J0Yd\ppBST)3!9Hz?u]; @^xAYfImdlr-RvmyWCn\S'o!/_^~_{r7ߔ_~Y֮]+}RV-fرiذ~0o^w}'nyv/x"x0$/i 6؋?ydO8#2BeH*\?^ϟ/;찃"-[Lz):J:t`oSN9EvqGJp1dO4|e:2?~'u2K䢓t7N.9c# tW79cZt"_|]?|QF6Mu7=cǾKѣ ҠAl>^ۂUL!-[Je7ʙ@U2pKzwWv}w|]o޼݌3~|Wv )nlҤHׯo2լYD'2O?Tu|spC=]vey|NS^أ>j+BFGN_Wcv~AG4NG$cb81c1ٳgGy$q_/t-Ď#CNrLd0m4YAe:X=!%Tri鏬N/5j`tw:[o]v;?n=ĉ*>4hzK[N [CZxqZA~!2$P #)ڵ|㹾%CXT%'LR^,%IVR:Yb^N:=bR:uvTisJPyY(!#C0Ǵ>SLKR#?` .;ggΜ)SHKTJAyM*ByM6bPvXzȔ ` #@R/Fvm7ER:t9V+oVyXaa,NJM2IWbI! k(fKH$e!l"~+_9 *,`B /,M`8a}?Xg^tEș6%?JޥBhqfKp)N_vקO:?XB塣6Vdi08XlSr-ZpN>d;<#T娍=/v9'_PB@*Ֆ;`NL l,;묳\`SŒ ^#˫_~N7?-d2OvS_fSU#v٨`PA͂msb@  ر^үwXn]$&`RhBYK.*[vP[[$^gYbEA!dzj3FFZh0\hBY,X`դX0 )qhO/^l k،XU  B}( HT%Ch L=W͒MK%PjOHHH AJ%HH%@R,AO$@$ @@$@$@R) ' HRI  b PK $P$P @T%IHT(HHXT*$@$@ T* @$@$P,*b ~  * ~  (Jy? @TL !I!!) 5iҤðeʿU.2u%5kPi"LK;Ȱa,u9$q`ܹrw- 4H?!R 眥Ke„ 9I &N(ݺu5kHÆ 3_ģW$Y AJŏV @$T"BHHT*~7|S}y%Kdʕ_w}g[@4kLZl)۷Og}XY6"ذa̚5'E } ~j׮- 40j޼m6!C:t=î)quΞJݺusԩSܹsOxwi'{j+lͬXf|駟&ҥKڵ.]ؿ] e"x7zWe[ 6m6l#M4 :eA C˗/ AF3;w6:cuqWcR =zIӦM!Cȑ#oK?p{>`iܸqJPRo)N:Iz!G}ԩSZy&('}?9:R#(VZUkt^x^zIV^-xs9rgr*'[L1U܀.iDۭXVqU܃>TQYm?t*;yʓvȜv8ѻ+q*JM8umNg tdS2}2L Clww:]|nΜ9ȩ[.\论*k1Nb-V\""0i$w!Xs׿NSETꫯC=tߝuYNG5!NQTiDwG{!?ON\-СC-@tLtmM>=ܫF/Æ s;vt>c T*x-^: rP&Շr Z8vj)~ @PcmnvsjRcuV?`#]Է#Fd3R%ӡg}֩S).)co;/4hPYՍ8[۷)&΂u={r2χER(X4"<֮][ssW_}-~ |w5ygSWTǍgYĺiERxbuW)O~{:RޕebG#]p)K~o̬-1ǚ*SvT*Y`>O>Nj&lܸ]zfQJOc攬T*V?bq>. sĘ]N]^ĥYlG駟nO=TְUlJ{Ø6%iV-[&t0aRIb',o+ʗ]ؖ~ZtHtIձ gѵV\tEfT'6zNcPq J38ſ$P4݌+3BqMj-7xe:ŜE#2:#~hwqǂaV pFwd&(zW^y[ѝfW_ve˔)SDT~| ~< 3f0>~CJnlc s[oUԔ%OֺQR0JٳgZYOj&T* ^MnE]Fnz5R^nݺE-[j, S{31cIR);RJ?~Wm̼W*U7J%Skz[{ܛ '`=uķ/S(-3^qwؔS~+y3D5j@Ot&u=#ξb{MQ`!+|N̦!pغK:(?-Fz0cƶU5ۑ B"^ѣGWɋdAJq_(^b#`#;о_|ѢT39R߿w>@va/~zߗ\rN?$B@] zwu;s>)om`;k6{RnvիJ O>r}'J:Ï$)ҼysQG6DbiR X}no~prg:c$`>s3CyN*I`Z+دcR`_Jvi'y6oһwo# d"Ѭ2Ǯzc ߒwJE#" NvŚ ܸ`S( HG;O4,pi~W Ǵ";)!C?'aL@cȘc=2i|y|`f͚WJ9Ç=z10H7H :t!0?Pl,PWJ69}zj멧j>p7e( o&MlSoV`^)^xRݭ}<3p\NM2Wg8a#_WJ?]tN"f"dቂj!V_7J%xT*`xbYpa7XR9ýwCƞm֫Ι7Jo/d:u&H|MhM8!o &`LNi;&Xz,_܂QFڷo yTڵk+oǞ"`ʟx-X@`9CJ]iĉT*{=_~J֯_oYyo tٔE; lH2^K,#MTw#1=rJi֬YC;lÚL,v*pȘ/ڵkm2"M"²e56vÖa $x9 য়~jƒygsݻwڵkˆ 'Olȑ?bf0Dx–!P-r7_f%&CaRK\?3-+|A^p /mZ)e&Hȑ:"5m 0 SJT 8}q*pN_DW\qSW vL瞮sN]3b{|vlҤIvl̘1N#1 ԉ;mfhp-7p@; /yABWL67."f桶np7tN={[Ә>2!\28{tj*;5*{tD~ߺN{WJB%iȍD3HړqC;jǩRC%S[{qdԋxY=#{3g}OV*P"P4R\sӑHSKNb&C qvM Ä ,_`t-hoi)v,…vZR1GT}ݺuX k̖JP~–M[ɛ5 -?^c=6'a譣;vwet` aDV ܋c#Uv} >/ oe@dB|^)C 4i֭[,6&'P2?{Ta${ @NKdDރl=yTV\؛7Y]/ॄҙ3gnHm۶N|)Rf1R)Ӫ<5j rL)N2JNfW1o ,0²=)?v0|뭷R%Ke]vI aRl za&?a֑yEC(Hn2c YhQJC]|d7"r!38'o ܳ Z] 쳏Teʔ)ӽW^f2k]D zf/9L_aC0%6rH(؁$7z@=0F8;tmS܌{UxA%̀(D=\;u|dpay4h /:CPVjiaH!C* !")h/LmUVt0%'ݟTi$Ls5/1sƍu̒Mf,0s:1w 6,5sL4#q UR)T ̦kA_? A0wff,5塣AtߘګBq_~oѢ*Gw'۱^kS)dH;5pCVB|Оj'j.豄dF&c IX\' ݱY sQ 6iPҥKCiz֌3Ma%ȱ#L}>2J`<W2tu "5{Phޛa rf|^z9ue9ܾTU\uБhOߦiT*P$L!(| IaƴO2Txw'TF^}2UyWE;l\7YwEY]DVh^>ɐ7JSKS1 PDCjL$|M"UPwNƧ(<~XS~'aȏW}/>mt0mȑJ$~2-.fR-`N didWycӦR6;؆ Ϋ .6 oZc|O:$P/`ͩ+*OF+7w3Y~+\[ jO>iAT*a{a&~ Ĕa1H RO윥I>_A?.ug{<f^" ` gϞ;-i7dU?Io;|W[L假(\'lČ !;.6ky|FZ#<; ^*sKoݻ^U^sѻw.y E ?yD_&/ \e /IԷbX ڞ:3?d1k2SȐsns7%.\`(K/zG6ǁ-:gP*xg}oS<6 3Ƭ'ѪZRɓ'7鱲 XWA}QQW pҊ`[${^<@k?LHX?868Ek_JE^4gK1]s5Yfɮ#Heg^FXC=T׿~"iV+<>RywyD m//AF{キ6,$;t]3=c*` KX!&t֍}x%nc[k-qi=Vb5ҟʏDpG yNb/>"+ޮ'ꡖAc'}N4hL2E:2x5_/dŽ+ȵonUyֶ" "o~j׮IiE!Һѩuөr7dȐSD@>ct35{XVWpM6uj,H Jzы kEVZJ: lǏ-8fDRSl*R(*U<0:6͝;j_u T*yRzM4)ϻj25Ij K@|F/6lPϜrlPѣPJK,Y4޴diTxwJ>lƊ%ԕb-lO7:~i%eWrJ5p,4(zvm^F]u:h/M6-s?4ܸ-ƫɰ[fM;x**LT8~zw=XOf6sj^#Vbh_[@=56W_^B5O#`IMݳ>02^ 0nթSǩ'7(T| P6r` Y7FKsŊ߿YL NW5[3>}=-6mwd͇7n6РA.\de1wrI_Q_#FcǎYf*J{Rg}SNp^W~}As9GtW+sDeDNNu |(\:MHu< 9s$d_ڼg@N?tiܸqܪPԘE An@pX52j =9DVA "aW|׮]aÆv@ >?o.֮]+i Gp# >кuLVX!PLE.A~}UwJTd򅒀%9 ڌ9wkKʖIu#!CPp=x&: mJ^z`t4o< Cx:RY JM  Jh(   P ? ~-}IENDB`dask-0.16.0/docs/source/images/optimize_dask4.svg000066400000000000000000000305111320364734500216730ustar00rootroot00000000000000 G print1 print1 out1 out1 ((<function print_and_return at 0x1071c2500>, 'out1'), 'function') print_and_return out1->((<function print_and_return at 0x1071c2500>, 'out1'), 'function') out2 out2 ((<function print_and_return at 0x1071c2500>, 'out2'), 'function') print_and_return out2->((<function print_and_return at 0x1071c2500>, 'out2'), 'function') ((<built-in method format of str object at 0x1004d0c90>, 'count1', 'orange', (<built-in function len>, (<method 'split' of 'str' objects>, 'apple orange apple pear orange pear pear'))), 'function') format ((<built-in method format of str object at 0x1004d0c90>, 'count1', 'orange', (<built-in function len>, (<method 'split' of 'str' objects>, 'apple orange apple pear orange pear pear'))), 'function')->out1 ((<built-in method format of str object at 0x1004d0c90>, 'count2', 'apple', (<built-in function len>, (<method 'split' of 'str' objects>, 'apple orange apple pear orange pear pear'))), 'function') format ((<built-in method format of str object at 0x1004d0c90>, 'count2', 'apple', (<built-in function len>, (<method 'split' of 'str' objects>, 'apple orange apple pear orange pear pear'))), 'function')->out2 print2 print2 ((<method 'count' of 'str' objects>, 'apple orange apple pear orange pear pear', 'orange'), 'function') count count1 count1 ((<method 'count' of 'str' objects>, 'apple orange apple pear orange pear pear', 'orange'), 'function')->count1 ((<function print_and_return at 0x1071c2500>, 'out1'), 'function')->print1 ((<method 'count' of 'str' objects>, 'apple orange apple pear orange pear pear', 'apple'), 'function') count count2 count2 ((<method 'count' of 'str' objects>, 'apple orange apple pear orange pear pear', 'apple'), 'function')->count2 count1->((<built-in method format of str object at 0x1004d0c90>, 'count1', 'orange', (<built-in function len>, (<method 'split' of 'str' objects>, 'apple orange apple pear orange pear pear'))), 'function') count2->((<built-in method format of str object at 0x1004d0c90>, 'count2', 'apple', (<built-in function len>, (<method 'split' of 'str' objects>, 'apple orange apple pear orange pear pear'))), 'function') ((<function print_and_return at 0x1071c2500>, 'out2'), 'function')->print2 dask-0.16.0/docs/source/images/optimize_dask5.png000066400000000000000000000460731320364734500216730ustar00rootroot00000000000000PNG  IHDR$ͬsRGB@IDATx] ܕ^!9!2DT2Ed2\C*RPi0RH̒!EE{?}}y}9k?{wk唄D D]< DCB"@JѠdFD"P!W^)Ū\-[J-rNτo&?*Gm֭+;wd$]78 ]&MTP /XyΜ9A@?\ڂ/(TBuͻԿ[:w, eE;D  {j5\zߖ.]7ի'7|믧CP䎕믿lV3uT?a1sJŋtƍ;CUl5cƌToF>>^zO<@9?=zlV6YڰaC}ݥVZ9G明 =zȑ#娣rKǀ>vذarsϕ?6fϞmz'|R~iׯ`}駛J ޳]wb iӦ<3馛o_,H!C .~E_~6„ $:Cm,]xᅲ baa?CZڵMH`ch7:ud=I*}uKzϝ;)O=i'vm?vnKȾvUaUW+Խ͛;-f{t5c`K]SP|(v Nrk~믿TA`!x1Ǹ D{u֩18j(Tۻ7<=Ӈɓ'X.nȳd+.SC u]SOj6dKߨQ#b <[o= {i[&M믿NOfu q7@8ZN)a΋{SL{~,;_}fm,G!hnQ$5j԰/?(T+kj$`F_վJ*o.HylZ~'qB,]u :_ L/mV{_rGB%wM9o<gE2o"70HJXcF7&/%JحĉbdM6Y~xsH"om޷kηaeC84 {PLIOa {|rO?y+V0w0sa 0 X@Xu P6/bfl2e~K?q%#ʀ>Ʈ٘Y3LaYg1ΐUrT&=b%;55+-)Մة鯥ر3,ۛoDyݘ1c;]==СC݆nhz왲f9蠃,\Tݖ*oرNϱow|ŷG\'|2 (q,X SL1Xh_} /p@,>`b}a_ߎe=4J- c3fXXU1|>}zh"?8?P ڦ"Dc.fJM̰'l %i 3+$"@*F Oc8(6*F4`'I&q7X3MC$iea_poȐ!eo; e={̝;]1aÆuyw(T|8saAUpoĈrʲ?; i? I.]*&LHK͏A!@ifvk˗/cǮv_s2hР3©zOGB%x%c*0 ^L6M.\{N0A#JxefPT_+G%wW"@T_^?CF}5 (T+}aUa@L8d$?|3٘1cJ0+MUKW*>,^X7UTC=T6`=~ D D AP$1Y"@@P," (TԘ  D l(TnOH* jLV"6*a'D$ 5&B ["@JU!DJ-  BB%Aɪ"@FB%`D!@dU a#@v |"@@PIPc*DP X> D AP$1Y"@@P," (TԘ  D l(TnOH* jLV"6*a'D$ 5&B ["@JU!DJ-  BB%Aɪ"@FB%`D!@dU a#P5l^UdO?ߏ?(K,+Wa,]Tz-뮻zvhnݺRN֪UKTtX?"˖-Ko,X"50@}5kPS믿FX)/鯿O?T}/ʮNkw 4tnlֲV[um]vEvqGD oV >WJժUM8x0|YkR5o abNk^m a<60q#U =5Ёѱ_[JW[q:SBެ +EɼyVp`^o4mTsOw}ev3"zwߵ1?[ 6ƍ&N@5jhV񹐧)X_|afj޼-[JeO)P15k?^&O,o믲^{%͚5NyUfyg6aiժi>S}VAg_8q L0 XuZdc3f0; C9D>`L)T24$7^x7nL0A{u.\V'|bI& CmʑG)P`0uTG&xQWVMolaR=4?2e̜9K38 $0@0: ԩo^6x8WGLf1b,X@n;{c x$_LM6\vJR#4ꫮK.NUENNgNEWbnȐ!NgZN nv:cǺ;fA@'u]6|s8U={>ȟ#NʜNy4h`=L8PBEg;p?3h=CNDH%y8p; BoB?j&avlꪫ~QyDL\nlrj>׵kWkdVRBEM]ݝZ_9U?ߩI\w>̙.rjjܹ{}/_~u]*09ݳ> dH~95>IZRP2]vNm؝=7,LO8GѺukSeI%ZGK/ԩգÅ/vz~j^xȁ;Z\:=Z8 0&O&ZNMŨ'A^rva:N7*́… ]=luoMm ))w Z=LPrlBIm̚%zݾS3JTBىh+l@C/Ōg{e9aN:hF*jhePsrgL^Gm1r18HA{#?&W^=ww;۾xOaKƌSB-B3nt[l6lXȰ&x60j3eRCfݕW^sD+qa}q *椘`^@`3馛8 p aV{ڬvYw}=X{AXǯ1]w,.1A~Kl q6p*U.{XyayO>8#y:?FM.88 :XPv\)BeԨQ8ɪ.!}^N@%)Xݦ+LMQt-!09?uY4APA?mF|)$ڕJ\袋.3A]VDGݹ;u`Gրb뭷`9F;1Y0W'hD~MK(l7: .(D8 `Ygw뭷], !;((ÚDr;_:|R82z 730O?a N=TY!='#a1\Oc$pwil`F5Ñ,#+T` Wp*( \ph0{2S"6*#DjjγsD")T4dnܹQŎ|L6f8hGF܃sk5 & QDN9ҖGqOie/pʕv_` ߝU7CYf ɯO>i+JH xĦ9åzō7hjv"0 |(愐kvQ#wlȇ$68-x_iNä#駟U!M(BG&!@FmxDdG:*FR mM41 N1Kl*^/lp/ՠ?oN8'Ē`-fO>$ *T0`@e@Pthn!&G]4  :*kB8rz scՁ>w_0$" B̩mjP4lRG/ 2%7p㫯w݄*zG@DO5/~!oB>}K.Dz ̴xG:_y %쳏@ձFoL|YAlŏ>}R/B(ؘ 6Y/3CC=h4{V_B躲r!XK.2vXQsPQcٳEm43,]4h۬^]R +jj^!!p'K/d u-mڴAzM-*̌u"3Dqڵk@QM&84&$PҬY331bRQʲeDØ{m_dyt! ?X4aOs='~ԭ[7a:pVZ}PTr uHf'eԈ#bbj/#DjdLΊ5)P)_Gd{)ITIٰ̻0j=lVue* u(cƌ) >E@@Mv +dE*0}QFN"A!HhwRF ap/3" ̑G)c&iڕO4Ҙ] "Pz뭹>t8x]wM7DJ.Sڵ3`T?lE]W>OFKz᭷:/i,(DJu/;qv2UPF3<#s̡ʴ-̮veiҤł3 ɓ'ƛ7w,!bKkFz!:uF ذ@Jq f"0n8{!MǸ\ÂM) @"a!#!09/J N:-=\fhDǂD5@/<~iرyjDD je-(  ,0(DFa C9$lv*,ժUvraÆU `R%?@B 87o9+gAqG``Qӧ <^veQd< 538S|(/c<򈝸|`fڠ ދϟt2{GTX)&HP!56󡼄СC38#2Pծ][ x?We (#@8%TJiKID `% * !ZemE'A!н{w3\)gK/k0,0aBd߿UȆԴ8EPAo="t#?Fp@^O_}Us(3b#:|pA\\('/9٭[\d",Y:OC 1M  "йsgHNB~'s|SLD"@np0LO#G?F`!H,,r 2kѢEl $ 7Pڴic+0k|! XSP1pETP ) T$mVv?"$Я͛J CFx3o{/}k1`D ,01Ab콗B+0{jҰaC߼$C[tde*p$"Dз۬QgWp8dEY(T~GW^ٴNU~ǐ_2( ڵkgVP_̎nݺQ<#~y{B$Lch %뀸{o߾hGл뮻䥗^JD}R o{}'< 9NC 7&Oei %xGeРAe]&MTQFluYV/"Dt :nPu__/y#D3]y!!Ch(KꃐBAxqիKjfk9?uȻY¥sΒLﲃ~r؎!Q";Տ.g%ey,w!K}=Fs9G&O,&LM7TN?ՄY⥰;=S s̘1*7ȑ# .>Hyb-K.F'8͛,TN5#<{.R|Ƴ`E{p2ek޼[:t{2n8s饗gV"v: +<^~e{/r!X7`5{l{c1%[ ,({(^_xͰf+ wta{,0jԨ!-Z0x*ÙQG%8+oi}z:A8 ѣ '+b! qHZSN)Ugw C<8 Ytd0(SxZw/ Y'f 鋱`f;ÝT`8Ngѩ{^xSa6`׾}{z`WfMנA*Z{vݺusڵzN;s+iq*ӎ9=eK^YRrܲeˬ|'$^qCUfw?PA&NwTg;<%K#NWӮG6Q/맲*>*̾Nr7 tW]uم?<C(m~ ٓ[p=FLeKcǎ5a:sh 䕏PxxNWgZ8? e+@W7ް[ X; t yB3&`qmR,&GW̾_Q?SI?1‹MgȮ1gyO \1SI1Pm֝wyTPzu?{PW-c(>jnF+ ևvX^w uPAb<̥x>hB@/P{"ؘVɞ*_}jPOT M5$+K; fZK|LxO45 2H;HJuK 9ً=P*=꭫+{/f&#u?`2q?#]Є>^H=s7q1u]S*Utun4R{ !O+A} 5m:O= ` S~{!*Scoك`eb&B%,@f-Q):mAJx/# n,<4QOt*<]qO}\*\,Z^e^^Z~ÞG H1RPFs -]kCȴ!/~`.>wo12{hϵ ^f64^ݳ>Ѽ^_H",9B`jc}T+*`=V4lB'62Q!|at9Xw?7rQ"UyǑ[n;C=X/CK?o22M*^( jI7gyE'(D^oP21<ذJ:3 {Y|jby B:aŊ=/fYŪWWja*M6٤X\-LgY`b)ui(1]L^[M2K_PB[΄)T0H!V4%D4ƁᑧMWGY0j0vE:&㽠L KW^y3 zꩧԩS3ر3xO? E<.2; _3#FX:t1>%SKݑ.hʏ7g:*K7܃̙3-bɞ ^Q6}%-QYfRVc0V kS G3LAFGMbWKuYJ :l=OYPh|t} sO)tVVp> xO`M`d/b>H(&~&APիٯe?//΄y]CD%8T ИX5<'x2gT^x_yk֬k4,^a XAa?zD50UTn:Nc)x~  6Ox]i!t^yQigCk<ZAӐ!CV]SZU~OQ'ޣ}YmbGФM8F0pU!|r Qe2UCG``Ig*A `݅ӤdB}isTTf p*[}ٟWvՕ^=xb&UzJ49 01Ù#];NJ$UImE@ Lhs*HC҉@@KOL.bD_c;ӞJFE@" e{BB2 Y1Q p DNdc@IPbHD iaͬc(Pe] t7I)M&M6W7$"4Я=9QnJڵDY=Oêsg} T81a8slBrFsLP S0iu]wQ̟ק *zC"IB}:`(v=Q<Ι3Y3?"6ڢtf˓u^{ z<$ };P(u\Dq,Bc mYʾkvu,zxANcM6-XPVJDcp@L,$0/Պ<~C~d[ӟ^-A`РABubPgq̛7O^||aZ"01ct=4 s=#<~@߿-*q;Yl0D.2M`@,z AAtҨD>@VLb,TP*2=z|YD _#J09餓LOzx%E=CvuלyK`q6}\08p;S(>k >}s.k:@F^ `u9J^Beחs=ׄ =3"<[1^^KέK/ԢQnx1U8RV-Oɇ 2>dٲe DhZK."{;+)"|ry衇ϗ[//@%xk3l$o%ʕ+-}D ^z(S.XI̙3ew/7-$A!ArW_|aT-`-;찃[M 0}2\LAlD[^Rn]y.yL80jܸtYj[PAi8e[~A!"q|AO0³I$L3N馛l'P ܶ`rmE]TH|RTIqƉKMV, (~t.wޑ-ZH2 "(=Ti3+m}ү_ԉg>0$t駛q,+suzh"+e֭[PA8AWӦM+& 0|믿^|TZӃKٳgx#3G,Yb^2T쳏2zy 2-gB+dD$Ed|OUPamu2\CP@*`Oe˖.(B\` RXѣҌ1"P#F0|V<߂+իWɓ'Keg0 qm=EhBp 'ID//^lg֬Y&X >K b棏>|:E*8[l?3tr)?_I" *=%m׮õXTT2 '!)ر5Jrz8`"H0b:mAŌ~Ave@Ť 0OD|p&b!k/X"UMرYX[,#8 \}]p "TS¿ [qfV0 .4lAyA!P`Q |ЌVŦtL n\1E| 3+aU,8Kxeu]@T?sr1HNnJF`رطoߒ͚51c%< !pT~,U2\sO:1#5/fc=׳IHEZ2UO'ETmڴBQ?4.T/6ի<,1ܾ+UVUx{Ż"E?trkذS#"lFCuwp5jR$k&:(~ Uk+{~駥Cɮl~g[OF4Ȁ° _+HA:sz82Y^L@[G瞘p,_kԨ[|yX 0w\[ѪISYO0JMקOfF@;5utjGmFC?tjc+V tC_D/8k;٨n"^iMFYgwEEqٜ:tjB" ,pzũ_/RPAmyK(mB9Sw>{+̨WF N9M:4SWFU?TZC`@6,Td8u p ȁ/5n؄ KPA  7 ᔍU*V_|q8 $T=Xԩ1b  8f9eԭO.&B5=V,DU_<:~AGDMH&lĴn|P_{}:D##T̍WI.#?|aPuar}RFf SRaaKȑ#pFqK.L#%T%KȴTBju]ԏUBjjG:=^~h1GnpaRWQ,V&*tlͶfΜY2@N?pvO4m95Դ0"6VASy*]ꕖ&95eilP,"zwzm.]x}-᫯2@q+YxRéT>묳OGe Kscm3(lX˄ 2iӦ>#^Pc0Bk#lH…`;1V3L'KpPR%ɫvi_u9Xt d/(舱'E'x5O w?s8$;|Gv8Y FPRn&;}}衇:IBw;vU=V7tM /AK%{m°x(VB7p;찃-ja/p4[l&Ep j(Ď.Zq{=ӭgϞtK&Ɔ/Nbë-ٳkyI!"!/HZQ~Y1cƸ7&PDakg-[ q@-~gpQ3? ?ЎM 0p}͚5ͱn5/*hz̮qtG+  gVZ6#`֬YUVֵkW#Z~ p;[nז5LLX&j. <^x~gTL1U"(I!TcpiذTv-XLtITpCA}`RJ""Uʃ>h*'hUW^IeSO=eQjV7R?ΰ:v'5^{M.O? }i۶hB9ZjI;L6MTgfV|ƻtI: ѽX֍LG ]D8[΃xL!+tDZnmcHCKڵ-I)T2.SHFD'nږ< oŎʼn JTeBAgW3KDVED]+jRcHDdb s.| OHDիIdB%;Q60TYTw*M45 ~kԨQ9/ VTzƮ>cLޑ:s*)&M'ڋZ +h͛- t~DSm`,/+6Tv~mG(ER@+EՀ ե失d뭷j 3]*[0Zp LS],#jlaB>ԥ mن$pyޣPBajƤ2fSL `2=oBsD _0N=#\-;WZ`#f _b(T*ƈ) D G,(&#DP# DP(&#D? o3XgqIENDB`dask-0.16.0/docs/source/images/optimize_dask5.svg000066400000000000000000000133321320364734500216760ustar00rootroot00000000000000 G print1 print1 print2 print2 ((<function print_and_return at 0x1071c2500>, (<built-in method format of str object at 0x1004d0c90>, (<method 'count' of 'str' objects>, 'apple orange apple pear orange pear pear', 'orange'), 'orange', (<built-in function len>, (<method 'split' of 'str' objects>, 'apple orange apple pear orange pear pear')))), 'function') print_and_return ((<function print_and_return at 0x1071c2500>, (<built-in method format of str object at 0x1004d0c90>, (<method 'count' of 'str' objects>, 'apple orange apple pear orange pear pear', 'orange'), 'orange', (<built-in function len>, (<method 'split' of 'str' objects>, 'apple orange apple pear orange pear pear')))), 'function')->print1 ((<function print_and_return at 0x1071c2500>, (<built-in method format of str object at 0x1004d0c90>, (<method 'count' of 'str' objects>, 'apple orange apple pear orange pear pear', 'apple'), 'apple', (<built-in function len>, (<method 'split' of 'str' objects>, 'apple orange apple pear orange pear pear')))), 'function') print_and_return ((<function print_and_return at 0x1071c2500>, (<built-in method format of str object at 0x1004d0c90>, (<method 'count' of 'str' objects>, 'apple orange apple pear orange pear pear', 'apple'), 'apple', (<built-in function len>, (<method 'split' of 'str' objects>, 'apple orange apple pear orange pear pear')))), 'function')->print2 dask-0.16.0/docs/source/images/pipeline.png000066400000000000000000001145531320364734500205500ustar00rootroot00000000000000PNG  IHDR#]ߗbKGD IDATxy\"r&("QVe )xijjhfe"*WhHhyʹїI^ x?}3ݑ3ٝQ1Ċ1ˈ1& \F1INyy9mۆRQ$/3f 1X-nTVVoi#&/\F1I2bIcLce.#Ƙ$p1$ˈ1& \F1I2bIcLce.#Ƙ$p1$ˈ1& \F._,:k"edd`ԨQc&ρʕ+xWRDGaMQiӦw(//ǪUPXXM6!%%fffxw~z@aa!,Yf͚gΜA׮]gBTT:~8qf\pO9ѭ[7bF\mNNNdiiIj)''汷n"{{{Z`fZNNۓ-]vАвeh޽4n8uR<ˋZnMZ{=~X &3e"H*N:)ܹs eggWGPPYf=|c }oZ{=\Fu֡E>}:\\\pmIIIjRG龗zWp5qÇ?Apssƍت̬6`ff###o߾>>H.&oҥǾ} J>LsD6"yyy=p9NNN())+MOII7|د5BYiH˖-)77T*+iӆĉDDtmrvvmV;n4uTׯUTTQg"2ͱ$ y摗fD|̈1`l۶ ͛5jƌ3f`hժQVVŋkvf̘xx{{#223f@nn.lcTiQj.<<ox?[F1I2bIcLce.#Ƙ$p1$ˈ1& \F1I2bIcLce.#Ƙ$p1$ˈ1& \F1I+Ԇ DGǏHfڶmf͚aҤIHVEG`ui2ӯ_?TVVb"n#FرCxŋ%V\Fn߾X֭[are,22h NˈYpp0 @V#**Jp"&g\FN_xT*ʹ`q:lJq:ټys"P(Ka3V[\F233_s iJ͛7 JˈZDDtu,رc TTT>]]]l۶ƀˈJJJ xEE0k,XA__IMMř3g(k,Xk>u0 5&}V+/24?ATݽ:wј)㳌Bxx8z{McIcLce.#Ƙ$p1$ˈ1& \F1I2bIcLce.#Ƙ$p1$ˈ1& \F1I2bIcLce.#Ƙ$p1$ˈ1& \F1I2bIcLce.#Ƙ$p1$ˈ1& 0)))Aii)ܹ@zz:SSS4kLpJ&7 ""!teff"!!NRٳgq%<쟍:w.]...pwwI&g2eĪoH߿ׇ35511 5ϽuT*d ##ݻ7<==1rH899 |LؿN<[nE^^\]] wwwyuwvv6~w8pwƥK+a2ebŊHJJBn0n81666Ǐcزe 1f|h׮]-I^5I'OAٳ'm߾T*Uf(((EQ˖-Ȉf̘A IF8Qs :u*RϞ=i׮]#QQQ}dnnNmڴ`RբceԔdmmMwQeeH\vƎK <<<(;;[t$pCMJ§~ dmmHOO3}TcŠ+0w\\v Wl .^.`kk+:R) |'h߾=pu3]rhٲ%<("ۨQhAVQ#s5<󰲲¾}ЪU+ё{Edd$L":\FHii) JTt$zO?~_|!:2>fԈ̜9)))믿жm[qȑ#q|'ӧ  :nZ#wy111xWEǩw>>>ػw/N86mڈQ1|2wQFoAܼy={3bccEa˨1>|8Μ9'O@tcxEaHbccMt7ydDEEٳ033UTTzBhh8B\~]tĉ矋.el͸tR液W_!//Otd?~<|Iq4i͛t)7\F2l|Gg``SGAA8djݺu>|8DGɓ'l"8 +.#JMMšC/:d`Ĉ  m۶4h(2zh?~ɢ:2p1O4 @6msNQXfx"Ξ=^{Mti֬^yo:2o߾HҠA_֭[Z2V=PT8x(dرcU[FǎqqQX-qHee%Ο?GGGQ$JRt VK\F2r88842/_`Җ.] 55Ut VK\F2r9},/##Fjeib<4o޼ANPT,m{'PTTR>/#\F2R\\ 3%%ӦMC޽Q^^UV6mBJJ bB,Y͚5Cyy9Μ9]>)(:to6222p c͚5pN> sss[nZ}MUQQQ?:vy:99%j""""@޺uii999dooOt5JJJ"CCC@˖-{Ҹq֭[KJR\///jݺ5j5;vЅ :_V¹dO?ݻkuVVVIRѩS42;w.j "4k,""'yLbb"-66VIT:yVU8ju֭C-0}t<&*:YZZjsQ899iJeeeh2e$#&&&(..< 憍7U%Ym5<۷Oɫ k~qɈ1:ϥKChh(T*>36"yyy=p9NNN())+MOII7|دnUKd}vѡ2ͳe˖KDD*ՕڴiCFFFt ""}69;;S۶m7:u*׏***C@3QYYX?мyKIWWWsP3{{{j?^kχ bضm?!"""͛7Ç1j(33fٳѪU+ǣ /͘1@__Fdd$f̘\lٲE럛JKKCǎP(:_Vi2RRRcccl߾Æ GFRDEEj&'FFFh׮DG.#ڵ+o1$J]vxxx >>jZtIJLLDYY<==EGae$3ǩSDGxtmڴtEۇHfttto 88Xt9s o >\tV\F2ビ'OӢH֭[Ѯ]; 0@tV\F2va֭Ha۶mx72kMttt0f㏸s8 ;VtVG\F25}tE8"‚ dH,--1e\%%%SNU;.#QTTԤ*++1|ѣ81pX6m`̟?vZ᫯=&־UVVgϞkrRF.]0}t,ZHtx[r+W"&&4"KKK|Ǣ0-2j^x|GGJJ8 /D\\"""мysqnZ#QVV}pСF}e#G?>͛':ӎ.Fܹsݻ7<==f͚uׯuݻw>fԘ~ïcǢz* [[[DFFr526>} $$[nmT02d5k;wqFHWt}Cƍ||wet^z% >>^sHָp5RLMMok. $ǭSNaѶm[ёX=ݴFlСؽ{7*o߾xꩧp5r\F?cǎA__zBXXHTZZ'b̘1?~<KKKѱX=2j:uÇ㣏>ѣ R):}m޼:uB\\ۇ5k@t,]]],\CVVzꅥKJlϟ7ƌXq51? WWW-BWtVsӧOǛo):l{%/|̈1& \F1I2bIcLce.#Ƙ$p1$ˈ1& \F1I2bIcLce.#Vg999 ҥKEGaR9FˈՉ#VZ%:kDX.#Ƙ$p5Riii6l̙ 0'O!::'ND۶maÆ...8}# 022BePYY  >VVV0aMiӦ:::8rK,zOOjX#ELPXXXoooOvvvDDT^^NfffHj._L(33o.13ghСC,XofϞMDDj|}}ITjE[~"#FЈ#j&L8LԶ֯_O6l ""JEvvvst"ZM֤_yT庻]F4n8ʹ,Ν;DDTPP@666ԵkW*--%"Dp[)???( <"S-. \yo>>>7߿xwYfi8qNNN())+='%%|M3ݴFHJJRDaa!ȑ#Aii)jP^^}}GΣe˖fϞM6ҥKԩf:aɸ}64[Lضm`ooŋ#++ P*8ro^O2jV\ 333{pppE`aacƍx"`(,,Ě5k 7oJJJ:WO>\xqf4hƍW-׎;sNoŴi0n8uG||<3f 77[li{Lf P(&ǗgϞ8r7o.$C{.dV"xˈՋ 6`Ȑ!Š3bZGii)xx o1yꩧPQQDFFJt$&#eĴc0-#Ƙ$p1$ˈ1& \F1I2bIcLce.#Ƙ$p1$ˈ1& \F1I2bIQ& ukkkQQ(**(8}4L>/_Νɓ'+h&9M/ƪUDGݻп̞=[t&c|̈qi@HH4LX@OOgqp:QQQCXXDLθX?~.]\QQM6ٌݍˈ֭[5hU._G Jˈ՚J͛5hUuVAqZ;x ^QQ`T*qZ gJ~~>~ .#V+غu=hUXcejeϞ=(**zGYYYbm۶Yf}Lqq1@Xcej%..F kLV݋7oj~?dݻ:9.#V+zsAASDֈncLce.#Ƙ$p1$ˈ1& \F1I2bIn26 IDATcLce.#Ƙ$p1$ˈ1& \F1I2bIcLce.#Ƙ$p1$ˈ1& \F1I2bIcLce.#Ƙ$p1$ˈ1& \F1ICEE233qekn 00077 ѱcGiFpr& ""!\rDZcp9?( C__͛7Gee%p TTTX3/cccۣSNpppТE Q/ISCQQvލ#!!iiiE=GGG8:: vvv}uQQRSSR RSN!##pqqA鉾}BG4q\FMUyy9yfٳ:::pss'<==ѣGNm?DRRۇO<F1cƠgϞZ_&.&;;_5 777bĈ077oC222֭[˩Dt,V¹R  CCC V]F3gΤ͛=ٳGt$V8%333IRXח ꫔):ӾpOmDܹW^>ӧOcĉ6oӦ 6oތݻw#55=z@hhXLi$''wވŮ]vډU/"N>)SǏGIIXLKdvZ<С/Uo #!!wO?'NŴHT*L3fO>ALL Dj8z(ڴi777ƊLaȑؼy3bbbpB4kLteccx_O?$:{ }+t8p>H4k ֭C0a:u k֬B_ƕ+Wo>t]t$Iahhӧ֮]+8-.#)))!CpU$&&cǎ#Iʇ~333 }Hd RSSqA.;v,JKK1yd? j;GѩS'ё$wիWG#F ,_;v@TTz%:,,\/^,:{>ݻ/+{Jee%<< o6.]Kt&H={޽;:t={q T 8Pt&mHHH2?DGiÇΝ;4i\F%$$wh޼(MڠAP^^CҤq t!ΎH0.#A***nݺt )))c4i\F0HMMI2ٳP(ܹ(u0,]Tt4@\F'OJ%0rHغt;wʕ+4Y\Fdddcǎcԙ#VZ%:TtI..#An޼ 1Zcii (,,2&&&cӃnݺ%:Je$HqqqQZZ 9s ɓ'ADĉѶm[bذa055 N>yrw@?c@gΜLKOOCQEE-X@sݻ ͞=j5R<ˋZnMZzw֖/_^`e$ׯ_O6l ""JEvvvst"ZM֤_y[F׮]#CCC7nfZ@@js""*(( ڵ+Qbb"j[ng2 c000@YYYҤI(,,ڵkqMiv@PT{B9rrrj>>7߿xwZ'NJJJbŊjIII7|Sue$ bll\eTPPB$%%ATj>?sؠRVG˖-@3͞=6m¥KЩS't"ɓqmi m6/FVV<<6n܈//^BYy桤z*>ŋ7nhAaܸqrر;w-[L6 ƍ Zn }}}1crsse˖z-"o qd…ضm[fWyy9z#Gq'N3<R.]EEqpp@zz:***DG6l!CdQD_\Յ(M3ȐiDGii) Ujj*젯/:J[F888@P4z=S"##aee%:R2EiTedkkTo߾JjjjB+.#D1[nɓӧ(M@Q,fwT*<4i\F 8EGiеkWjJt&H '''<|ie!:Fe$B@~Q<:u  27޽{lٲx饗DGi2d,,,*:Jyfs,1qLj#4f ԩS;e$ RRRpqQ]t.#Ixйsg_^t&Ν; ¨Q9/H ̙ dff$_%%%6m($bx'%_~%&M9[%H"0k,㏸z8ZPP 1sLQ]$dر033Ú5kDGiʰrJ3֢㰻pI.\5k@T(\OEGaZ 777 11Gz(-- ݻwNJ+ê2...ذa=cuK/!''ǎCfDa gϞǜ9s':N~ W"(2|ݻwG\\=˗/gϞ2d~'qTl;wD||<,Y":l_GvߊH\\\tR,\yTGfBZZahh(:{M8ZCTðI61rHlܸcǎ=ѯ_??H~ <&L_-:{4>f$VVVHHH[ /Xt$I;t^{5=_}8xHF777ر|u:<УG僷 믣Dt$I:~c."2WWW8pݑ+:$޽}E=~jHzÇ͛puuEGj˖-:t(|MĠy#:2);;;۷ptt/:RSh߾=|}}1|p|MƸd*++ 3g˗QYYcʕh*xW"Bdd$QQQ!:#.#_|N:!** `bbkO?'rrr_탓.\իWTgφ3Z]Fll,oߞtuu  }}}СCSOѓO>Ij߭[h̙GÆ VZiޏf͚pzj-ZЌ3ի */zg)))顏gHGGM2ES6*"CYYY_QQ}ԦM222>@ok. ԿڵkW_wnFFF5'kP<&AJӳF166̙3իWcptt k᧶=q>#k* p8p/ro?lD"ҼLbD! 6+ ھ}{V4~x233#===ziժUtq򫩮vEgϦ]rppKŋ<Çwi|I:}_ Ӣp־%&&s_pp0ٳG@{]x @R_@ԩSؘN<) {.#"ڸq(tjJKK*++W^zrrrDa猤ɓ|???q)((H3ʧm۶ Np͚5Chh(1l0%"''}An%>}ݻw6.\222#((Ht/>!T]/"غu+MKOOlj'%9[[[ܹ۶m_|!:.# 0aΝ;HD{Ϊ[ JU;Ǻu0gDGG:hU#gƒe'xT*54l1w\_VڵkHJJjDu`Ȑ!tRe$H/* y_G4FκvhYg}$E<&52 6LV#gw۲e#cVXX{6P"Ŏ;xM .6a?111055bcc9/ FyM fZ|9°o>ڊS'(((|a"<}u˨=cbxDabذa(--I6j ++ CE~x=#l'NG6jᏼfc@e˖# \FqYLL ,,,DGb2իWc޼y\ʉ݋?Hڵ 0T*1vX١W^#Io=@XX>3Kt&ck׮E߾}퍬,q$>= ???L<8L-Z`С())Ijo߾x}HKS^ ZFJ9r$rVgggw Ӝ1cǎaРA011 &LׯU鷺>qFpuu4h9::bժU G>}:?)S`ܸq8s F~Itz'q ,Y-0p@;vhNڶmaÆ!""7nܨ4 z4uuӮ\իWW+++| }=*++# kHZMsVZ???L6 }ˡVb,Y~~~ի<==qiiii6l̙ 0'O!::'ND۶maÆ...q?nnnx'^^^}M9R՘?>VZu=9zG1b1bRMEEyyyT*""ڰa(j%RˋZnMDDdooOvvvDDT^^NfffHj._L(33۷٫$&&>矵z j);wR =Sn:R5~>~ܝj>µZFV"tYʹrڸq#T VxiX""Z~=mذT*ّf~;w{UV58;ѿXG6m^y}Prr2}dddDy}hHnjx'5=zNNNHNN~c&MB]7oDYYY˞(jW(077GNNfZ.]ٳg<| 0cƌy`9`ffwy7nC^y}43;w)..FFFn߾}}* gggc066ugs[TT dɒZ[> hѢE#%e}Q^K.v@.33}JJJbŊjSRR7P(0x`HKL_q̟?_D_k2XG}nܹsel߾x7p5=z;vh=/^,xxx@Tȑ#ؾ};HJJRDaa!ȑ#̋4eRTTߑ]o}޽Xbx UHMMͷAq}Z x7`jj̚5 4iR=K GJՖFÎ(Qzz: >x 1cPNN]p>իWSAA]t‚i„ O?DԣG:p}wdaaA/-\P3E͛7)00P3m̙tΝfMJJ… 5~ݐS"y"sRvʊMFf͢ÇAD~'GVV#\כo j6F aaa>9^RaIcLce.#wqQ?ohXheVYWmԛY{3źڭniUd)Z" 0\'0̜3_/`|GD "AF Ȃ#AdA  HYa$,0AD "AF Ȃ#Adk`lh/qU-ۊ+`4$qa9rD,aD-`0?f4'aU-ʕ+k8cǐ&QE-ۉ'pi4ZUs0ڽ{7rssk}K,1 bŊ~ŋؾ}5è+WuF Z m.j[EvY۷lǏFq=ϲt>Ǐ۱˖-W"իo`0z:Cr!0ZdXh4^ڎl)))uLg0|rNlZEE6l`NJGa_~ż6bU;?Ģ6/_֭[TQv1;v( ,Y>5(11prrsjǎGQYY Fs?駟$eHHH:#uVKYj꽄x|5A aȐ!T*(ope… 7x]weZd2xlݻ7g~.Atwq>HRTm<1b؝/#AAA?~}| \hQIII!CT* &pϞ=6 ZCFx ;w&9\N2trr##MZ]m(''C%ŋI}Q֓3f̠+vݻwK] F#͛GOOOpӦMRds ?sVVVJ] vHr֬Ya07CBBek7ou+õkJ]M?~=z+?CY6l 'NrK2{gϞ:ztuu]w'NX Qqq1cccTVVƉ'䊋.j͛G֋Vn:0""Rc5#""j~zi ~tuuyٴu(//=zѤ$.]Ju]\l7x8~xJ]R=zݺu.6oLOOOvޝR(%%%| o6=@JKK㭷pݳc08zhj.^Xr?8zU:>>NNN|ꩧ RVs̘1۴0JKKcvլ9{,uѣR :t(ݙ u9Vc4/SRqR`_5J%'L ˍԍaGiꦙƇѩSءC۷IGMՕ+Wx2 fZSee%z)pϞ=RcJ%.b+WRY.&vM1)A۸0pٻwf}8II {Ő^pAr5vXzxx![?IFDK?@JӧK]MqDè}q/hS;wAAA۷O#OVaK9ѣGˋJMM'_|q^?HZ9s4 'R2++1:Lz{{sRRm۶QR믿1 aDD,Ÿz*8x`F˱/j;vh[F˖-BpckKHHB+.\矗|2oV>\Rj0L:t(;uĂ˱X5taCN05/2Z-O>-u)$>fHH^*u9زe JaXp!U*n*u)(,,dpp0 ҐYF?0###jk+++cDD}QK!YuJj3uTjZY}yz{{ޒIݻJ!kQ||< o"K)))T(EjՊ&M9(--ePPG)u)|'֢M0[K,yyaT\\̀55#F`N$=a+W$AN֭[GB!͛ ?d5˗W_}ՒF1=<]]]eaPnΟ?OWWW~v7''~]udoϞ=9x`j4.`x02L K/dꚉc244ԮGN:ڵHo~#vs߾},.qb eeel۶m}kX7-[ÝnOiii`z~~~1c]sT]v+bƏnn9ӧ߿K-[ ;wM^.]pw`ٲev_ťK0zh稞}YZ 6﫬 V¨Qlޗ#ŋe˖0tXv-{9\bڵ(--y_˖-C߾}ѱcGbccQTTDNg}}9`ݻZ(99eee>|͊k.z)c۶m6`0 11#Fi?AѻwoO6맟~B~Ю];FZ5pߦ5ڵmV5txmOs`6$شիWZ5n݊֜`֭6#99 i?ELL Ν;Ǐ۬t? YIhh(:vx a45w:+Wج;vYM=i۷~hn۷ atAL&]+//گ5 & YzikcVq]w?YnT*q=Gٳ'8Ps7ѱcVk®s̙9rv[S6mЦM;v&?Iq1 P5Ę4LDD p!222fªu~]0dddؤvm~mĘߕ0&՟0̴{0dffڤLxyy}念Ihh(PTTd P]G[n~Wn#Vm%Iyy9~z?#XR.-aLHrСZP(Vd@̏i43ٿ?t邴f(**QXX\? >>>6ن`KU?j cbv=k~WZ˜Tmlnfiu˗/@tFII UVVRRRPL>&ۺ,uUlĘ4V$۶mVZq geeq„ o} xi6Z;v,/^hnsѢEj;W_Q{gnoƌ,,,ܹs͏VѣmvAo^^^VoI^BYM6Y>&&Pƽi&L.{]oN<{Mo̙v٬}Ν;f7GN"ܹnvxx8ӭXܱ1myUsaŘ4P(,,|ލ`luN|||lv[se,my>bsVZu7^nԞsNoضmmޗ[lq7k &aÇ.]zG.F IDAT&66{^cmא!CЦMټ/Gv8p7}MaT}SSSj⚃?Gs[ѡCZXlFy_`kh49r${r+V@@@@ۺi{ _mښvR3<vlٲYYYv~?.6dDyy9,YQFAixnڵ.\HFSNpǕMFή^pnnn/گի b~۽_G0|zxxwUM^gǎ[fW_e`` z8q"XQQa={ؽݻwSRR޷UTTC/G ,3Ϝ9SK2sԨQ֩1b¬z㆚4ixjuQTJTTmΝT(5.ߒ]||7,yyaD7o&&&&6f !! ۷ovؑcǎ9(,,d۶mK/I] ǎ:XR$ /SN,))喅I6. H|'.$|rT*۷OR$5uTjժŤG[R"{RTrʕ0ɡ_~U ;ZOUW}:t`߾}e㏩T*/u)6WYYɧz^^^<|Tjj*===9rԺj**JΙ31ooxdQQwΐYl4>>M]l|dZZ;t(;w)MJnn.u[oǎ)//aƄ˱W^yj{nނ Rk5Unnn|ǚiaDVmSdN[4vؑݺusmbFǏZn'7l u9?ՕÇgYY4ٷ~KZW^yF5=Ȫ'WXa&%tRzxx0::YzΜ9 ƍ]Aѣ֭}}}bQ}v֭[3**R(%%%FB?ZZ'Ȫ_ӧST'p SRq4LRd5l۶-J]N̛74hPQǁŅ͓ٻw/;u[n[nf jׯew`m~gQ:*@]y]wՕ3gΔ)='O?LB)SHr[믿NBaÆ1''GTVV{...{lq3YuСC ?,/ΖaÆ}QYhM1c]]]*9s&]]]"-11tss| cBWWWΚ5V5&mݺ]tFall,333mٝE222KFHn۶M*77^zqÆ q'͛G iM9o=}U… F޽;̙c O>O>QQQ.ZHRIIIC=DRI???N0{RII WZšCRۛoF:Nrss9ezyyɉ<mTYYݻw^/J%~aڵ&݄Zv/VZvɓ'311I I&122غukdk9s gϞmo~~~|gx&-TTTp9`:;;SUZi}JKKrJ>CTtqqaLL gϞ4G5;;/O?M___? R=zl۶ HNNƟJ"88h߾=<<T*z && ÇcHNNƎ;P\\ WWW#$$pww ۇ:(,,4ə3gPYYN:a0`~J=ŋߐ$>}*  App0|||jm۶It:t:dgg#++ YYY(++'`РAڵF0ޕ+W_!33 PRRsڵkg2m CXXw0HMMEff&ӑlϟGyy9Zj<ڵ+f%;;GAFFR\\l1((( ڵk777#00GXXrK d51vEݻwK]m m۶%N2Er>FbVe9هQYY֮][c YeeeVֲ]_%HشiJJJjJJQwwwbLRj5.]Mea$,0AD "AF Ȃ#AdA  HYa$,0AD "AF Ȃ#AdA  HYa$,0AD "AF Ȃ#AdA  HYa$,0AD "AF Ȃ#Au:u*:NZ,;wI_``]DŽ$;u$tɉ;wkV999ٳ'LRZ> 999v377zWkbϞ=;wɓѫW/z):0|p՜YFŘԎd}SKmF Ȃ#AdA  HYa$,0AD "AF Ȃ#AdA  HYa$,0AlFyyy|`.j(,,K?L16 cǎa̙9r$-[f.ЫW/nݺכL&̝;]w݅xIc/r8pbbbvaرϷYr!1ŋSOqu]r>|x.YVVF o-UZZJVz&$9qD>/8qD.\ب2+>>ޢi6lSRRg!>C))0>>T+((Zm̟/;...jWWWʕ+u.''.]B\\zƜ9s֥JNncɈ`ɒ%ظq#on2eAcRMڰ,ٳj<6h iӦk SՍF#F%A5FEEExRqt׿)iӦ!""gΜ3g#** 1k,z%%%}^G޽7F1d»ヒO?ƍt:9]յm"f}ի 144r I244AAA$I^Oooovܙ$YYYɘ=&$y TFm'HIIۨsmF|d۷/cǎkn!mQ{8&̟v wyxܹ Ǿk.XdL Z&I~wm6x& ӇK,iԴFr?]]] ~mHNcr}mMaIm5m׮]OO{nQTT磰0@HHH6ʚG(DDDzzz}]Gn$9<9VVE.]{˖-Ë/ؔIt8r{3)))Dhh(OkTS%==kpqqߤirtrk 6 ް j:&`0N7x̙3l1cP(0d@ee%U':6mڄgw߅B0?6\zΝ :jm: ]Եהm"%%%捑GFF}5և'N>}`0$Z-J%wɅ חw^[*XZZʤ$zzzO8QgM>?1͟? ~Om3̙3/fQQɪ~a -bǤN#6xg쬬,N0} xUN: )S8uTΚ5.ZZQQQܱc+jZtoξ}ӓz+g̘}rܲe Fc5ڵ˼aO#DŽ`6mpҤI:u*ӨyC:VuLH299/PO?5oHHꩧ^zb͈g1bD81? τPg  HYa$,0AD "AF Ȃ#AdA  HYa$,0AD "AF Ȃ#AdA P筊T*VZUBM#Gk* Ęԡzٻ#F`ĈvQuΞnPu[lˎ;g>ox1rHn_J7oիWox`0`ըWQ7ouaڵ`5kPVV&Ae-~ @aӦMvH_PRRRs-0ڸqcaSVVD;V$t:_j+VsUŠ+nz`unVr!0ZbET*ZW lذ:ΎUl.두`ǪNatU$&&ֹh4bƍ(..ce-ʕ+=I?*~z) Umݻ_jz^iG[nX[ڡl^WVV~L 2?ֺuktYlMB@kܰaM&M dҥKٮ];j4=GG$#G066۷+ַ:t#s=TG'''NMB^xNu7ѣGSPp̔)t:N:wq%sΥ7;tuIVV^vQ?gYYdl޼QQQh4|뭷Fa0CFFF2%%Ŗ]5ȶmإKBOOOΛ7O6Ο?ϧ~8rHI]rg}.\ΉIDAT$Uv?Szxx0,,-M]|>(J%z-z[t$|7P(Oʕ+RdS&}طo_>}Zj?M6 }.@rƍRSSNO>tvvm-aP~vV/חᒮBڒ`s=GJ_Ygϲ_~tvvڵk.fhJ]N y*J3!QZZ;t]~^ҥ x1˱26nnn知.bFƍJ~+u9V`T*˲q?@>c,//f CM6ӧ լ\|z= TYYcbbjsNiT(ϥ.j͛GBӧK]Jl߾>>>4h5:a[n}OII 3;;[r`0pذajRf|  -[&u)Md* Ι3GRO}h0.f͚EFݻw7ƇѶmۨT*A9U*جNcpp0 Kt̤'O,u)0a.Yh42::Mfܸ0`DD ؎C=HYyiӦQ^[Rl}DU۷JYl|˹sݔ5ƅ̙37cqizxx?:_T\hԥELL {!ct uƇzHR믿ZCQnn.7CSK wlWϮwaj4~RrS .S;fF#ƆohҤI 5䪬ڵԩS.Vv"رCRj ґ^gN8aK$hy \/lhGotss! ?.N>M'''~wRr ٹWctd}!C?O:Q֍غuk_QP011QR$H˨d21""?ԥHbÆ T(<|pCfyUVV2 =o&;v(^uʪ&{ڿ?p޽RbV{ld21((SLiV[|ߴ$={cƌ]?SNa֭R.)qqq=z4 H;DnK]ْ%Ko]R$P(0zh,_FYF+WD=ڨΝ;{Xrԥ6oތ||xk.D$&&G I=䓸t'u)سg OJ]BCC7Zȑ#]\s1p@;wGlٲE 0t [l$%%!((@Ad- [UV޽{ V^^XZ=zGF@ttUs1h 0j9DGGѣpeod3O?mCڏ=W^yn6kSVV={Rj<Ge6jT*뵚Ø,Znnnn4%KX=G"* Oܲ]ݻwiӚV5l>ɪ{riZZ{7أGP>֞Nʲj I5ĉTTV_%Io8d>}[]KM0}z<.-bK߀qFNkze׸v;sxOKw|\`ɪDZMZPbT777cBV3f۵?|:,^tui'''a׮]púv~w?EEE?> QQQa>%!!R 3^,V=/z8;;CۤmKz8;;۴G'''Ǥ=<<:iUUfBNNNǫx{{RRRPL>\dggZy!J1&)..ѿzLNgF_8[T<3g:ƌB!C*++T;WV\\ y P5.RIgVm\4&>|ĵWRRBرHo߾ƺĉkkJRTrΝ\p!}}}ײYnU*}}}oIII$&z=zjm6TΝ9c cBV' ZBo>. 'ءCS8y$z-ssEEEكgyGwERRM[ś7^}Ucƌjxwѯ_?$''#""O<"""sNtǏGNNסrrrP(F :tcr ɄӧOK>&Oa\N:^lIdmۖ}Y9KZÄ ػwoIk]$aǎ3gHVs=4i%/J 9%Q}JP)T-+/==S, .] 55IE5'ҥ5DFFᲥKMME۶mѪU+jONNi- h8pnhʕ+?vի\]]tl˖- P]cR%)) {,zEaԿTVV"%%I5۷oP5O쌞={bRΝ;eF;vuVs- #___DFFⲞR۲e n6IW_{wUw. > KXv$ 2i Pj^諚3͘0 =04f=(K",, ~/w*.{w.5os9k׮Vr\0uivZQѠX%E^}h{nfdd=~~L`T*}(0y r9{=QraժUxꩧXG?,1xP(polAkN'A֭[K7⮻bčܹjZBBICC8CGG(iooqYG a᭷P8p2. f6!JE_L,.]BTTԼzer!++ ׯgz!zj;# C/F:N}BAAA099: UTT@RBzz:VX:άYVt:7[ e‰'hbXaٳb4?Z~Nu8pgeFFFDnh4$w}`0դ8ٵGRDkkXLSS ֯_qqDo>H$TVV;ՊL$&&⧟~bG4XhސrPQQg#`iӦMhiL&s=e.L&ömBvuuA/#hZ~c#v7oFdd?[YYY K/n݅h /8TUUܝ7L&.\ tuusSՐXzuHMV?z= E;wPhiiGk|ֆ4رcDoo/ G]r l}5om۶9==?111W[Q^cxx؟yfW_əEqyYGÇhhBjBX>Sr-HHHO:;;zj<;vp;#`B;x 4 nV۷:ػw/CݝA ={zxWW_!''Rڜ򞭡!yhlld… (++T*Enn.ѭ5W^śocϞ={wZ\.GeeeHUBuu5Z-R)JJJw_ݎO>+Wq(,,Dgg ذaVBMMM@:u y &؁+FXVܹ 8t萨CV1 HT*k׮F )>|:D;{sDcbbMMMغu+J%$ x≠|,PXXDJ---~]_{nhZRRR`0X̳@5p=ad4(##֭[Gz/_N$Jo]xΟ?ONf줈zG{1Й.ti2T__O/_%KP~~>=F: u%:Kt^IKKRhٲe$J)66vS__IRΦ~ (33xgr>l6uwwl .a&""ITRdd$iZtHR1N>wMNNROOuuuQww7]z&&&h||3*xK $%%%1L=Y,2M }tt4EEE… IJII;]^(8QXXؼSiaaaa.FaaaA!\‚5yvIENDB`dask-0.16.0/docs/source/images/scaling-edges.png000066400000000000000000001420411320364734500214410ustar00rootroot00000000000000PNG  IHDR-sBIT|d pHYsaa?i IDATxyXUU7>(2`j*9T#ZXY XYJjc*9e*H )8Cj) *N9 z8s^O瀠>s]\k}}ZDDDDDDDDDzJ!wDDDDDDDDDEaXDDDDDDDDט"""""""""5&H1EDDDDDDDDz ,""""""""kL`^cXDDDDDDDDט"""""""""5&H1EDDDDDDDDz ,""""""""kL`^cXDDDDDDDDט"""""""""5&H1EDDDDDDDDz ,""""""""kL`^cXDDDDDDDDט"J)773f@Æ abbBÒ \]] B@BBܡN:A?!Hip&L&M &&&prr›oUV!''\x¼y_'OFXX ʢ)ly/$A:QY U )H.r@De̙ڵC׮]aaa[n!!!Gƒ%KpqC-Rll,,,,g:I0cƌB׭[TQ~ODDT*;&HBXXԩZk.|7ܸq'TBCCe;7LDDT>8W^EXXc/н{wرC~B`eesss4k s9̙3 LMMagg///#//n̙3>>>SBWի})IR/&M#<EXXիSSSԫW!!!x{ɋ""#^yyy@ƍkllSbΜ9Y&W^y;vԩSk.޽FFF/: WWW>>8z(6mڄӧOԩS066 f̘H ,,LWaS 7oތ;wW^x@Yw SSSo0a;5kZЭ[7!Я_?ԯ_gΜ:w\~G6m`aa@Pe,[ ;woccc}v8qZ?mB$ѰaCJYUwy{9MVTlmaaaB$`s [[[n/((W?f!IرUgff 777!Iا3jaB It&32zh!IXbֶ/ Q^=uer=vY$JX B9sq^8 诺搐c^?SI`O~~^pttԙ< B;E^B$&& ID.]F%IΝ;=5mT3TݻW_}U>:uҊE> ""zQˋ}0`tި_^ƫbjjM9%IB~~~iٲV3 ##ХuZm/^DFF6l _8sI95O$m/^Թ jԨQ׭[H>}D>۷ޱcG8pNB4gADDEw>_^ۼysc>}5ku1668Wrr:ruuEڵqu'Oڵ[43japRSSK_VVzŽLXXXUVHLLĬYyf]ШQ#̘1 z )xU4zfjEŒ&Ê){.]T若$IxsߵjҹbS Ƃ 舞={ fffU]VΥYYU񝣸ϗy\E%˝;w =|*{nk%`kk轮{U-\oWT :NigQ`dd_~C]rB߿_˗ WWWu"YFFFh۶-ñpB@ttzjE2+}䴻z"/.1!Э[7TVMc[jj*BMLLyTZhQx*{VZ%wHTIUw>_J< ^{5Ş:?5 P~.8tֶj{U-L`nݺ CNNz$УGG /q]u{~~>>!0j(uÇu\7z&$lmm)))/qeJoE`kk !D'L07oĉu~^7o,Vm UxܹScqҥ @g}!}"q%,^X-:: hذa5L`\r,|ZK="")S //hժڵk///+HOOGBB._VZi۶-&OsI&0`gϢ}OΝxo...xWpYܹիW{ァ۹sg( L2lll IMXxKᅎ۷/7oڵ+6oތ~gϞ033  ӧtRl߾>>>prr۷q%>|_}݋<$IУGWΜ9{gϞY70j AqFxxxYYYسgSNܷGf͚زe 8ڄ>[ZZW^)*;GI/- $%%aŨ_>wڵkr 1rHunݺ3g>ch%vڅYf8s97bΝhҤ -[UVxs<U"r,}HDžpppM̜9S<~XШ?^L0A4iDXZZ ccc(z%VZs 77xCXXXSSSѤIW_Oj۽{ 7VVVZjM|ڵkZ]nfffB$ "\]] ^xsb$IB?XXP(:V-ў/N*ի'ddֹuYvҥ^066΢}b"55رcWXXX +=*>!I8}}{bڴiATԩSG?^ܻwOtIjpqѵkWaii),--Eʼn'"QE7n8aff&LMMEttPP9^Rgli}HKK$}= "BIR*BVZcǎEqaDFF۶m;D"qqdeeW, GΝ2;QYyΝcǎGff&V^#Fh})BCCvZdffYf/ѵkbKXɓ]vi_QKكݻcʔ)5kQXT7"''+&OѣGcժU>|8bbbtΥ'"z֓'O#G[ne" ܹ/<<<"Y`` """0l0,\իFx5$ ~~~h׮Fk"Z޽{!I+CTDTٰIJ;;;v{{{\=,ۢE t G^^N}:fΜ)cdDTQX[[cŊЩS'Qy8y$^}UU#N:'''T+~$ oggrEo-wDT1EEJOOGϞ=+V-bcc1k,ԪU |͛7ծjq,,,oilDDDD/ ,*_|4\x>}}Pzu}޽]vŅS H/Kg=-]<2&Hq%4lP4kLkշ,X+;+++RYxOKg٨ZhرcR9e.ϲQٟUBH}P(h߾=K,^zBtt4F {{{C$""3`c궧ObhӦ*"5k nj37 ;;W_}ɓ'Q#33S=E0&&׮]L8hݺ5~mL2oF?ڵkXz&Zn8 ""͛$Iغu+lI0|pXZZ֬Y]h޼9bcco>Ka;J.O+W ΝsqDVVVR{Xx?Ki,QQQBVVܡPB rINNXM M^B"""""Rc\pA0*777Ɉ ,"""""RrxyyQéQ)[n;<*w""""""*e-BDT"""" +++p9x""ETT%w(TƘ""""&DD$+՗(ɬ?W)(L`^cXDDDDDDT.\\\Z. BkSԩShذ!Ν[qpE 8vvv000vڅ^zF055E 0yd1>>7n +++iӦ9s&>}1j(ԪU hѢ֬YS佻>Lwww]vŞ={t1i$8;; @AAA) ""ԎI#Ru 08~ B?-#ƌCCCDGGرcͅs0}tt~~~AJJ bbb۷{;wD޽amm 8998w,Y|co>HLLDf kcc3f 22))) Sosqq蛓nݺ͛ݻ7 uV|6mڠQF6lJ'QǡC$IaÆ"##Cݞ-ڶm+$I,q=㤦 GGGޯ_?!I8s>=Έ#$I"%%E!ڵkxĵk׊}M;v u$޽{lu۷+WI$IbڴiZ߄$I[deeil$`Ggl!!!B$i&w}WH$&M~ add$$IZ@XCtuY$Ib\"W.$IAAA:~Vq^U)DDDTZt'Nm=~ $'ǥ{WL6 v̞=[%W… HMMnjjզ89s0bmBڵ}M!I.\1f͚GVV.^=f̘վpB+`iimĈh޼9֯_3>) IDAT{nu[nn.֯_KKKQe!ChHHH@;hlBXX/W^ i...8qx)DDDT)1…Qυ W7) ,%''C$tQk46Ca8rܹiiipvv :[n믿SNVo壏>Btt4 u VVVWV*Qy022j?rO?\E1''wAFFlll=‚ uV\x>7--M/\'OUVU:vQ+&Jz;wϟ}N::k~=XDDDTᥧ+];,[2$yKxZFm[nŀ`nn___ԯ_ժUB@||<8Qo߾żyj*,[ rٳѵkW&&&z]&+͞eh'w~~6{{{ܻw/|$ÇAnn.:wǏiӦ@͚5add!5aQQaqݻwgϞB KG9 ,"""Ўv䎈Kw$\TEoݺ5&//wE:um!!!055ʼn'ШQ#iii z“'OpQbɒ%x7qIk􏎎FPPF\Br$Ig޽{Xlj+WjlyV"LutǻuV1-\ǏnL/ra ,""""#ڵ#"":ODAAF˗ѸqcUAA>^?>LkCCC :<Ъgu 21پ}{lٲE]u-, BAA>3ߕ+WԅKCTT*/N!$"" m#G%K Ae,""ava„ Xh4iѰF2#88cǎE-Я_?СC8<}vO87n܀7֭ ccc$%%!>>...4hPըQ޽;>dggcԩÇGBB5w7oF~гgOC-;us)SаaC ...x!RRRcǎ???4h\v +|MlܸQ_}ۇ'N7n޼~ {FLL>6l@Ν1j(,\[5RSSq={GE͚51mۆ_~֭233?C:"dxɱ""" #) h/ >;+""}`,ZVVVX|96mڄ={bϞ=066U{appp5kuiӦk׮8{,V\e˖Ν;6m?(kLw۷ڵCHHf̘=SLAVVbժUFcHHH@޽ի4/y&ƌ/RgbѢE?k<-: \p ,3gtBG2999!)) f͂6l؀EѣpqqѤIucccݻs.\D ""H5 U;))QYF`-S峩t~DSݫ8Zn.G!%K^QXDDDy8xq8e*bRr2з/ о\87`g8QU鍼<`$`P` 1][HnBHDDDz]`@eE>`+""""RbdwЧ1oбQq ;"",C2** 5 hزSG""")wDDTDINNPb ,z@(Bn޼)wDDT|  wX\cǎEn4 0vXAȈwOY=>;`Dֻ""""1EզMiFx1 "STDDTQ>w!g#wDDDDD8^Ȇ  I,w(DDTlm 'N0yEDDDDXnn.~'x{{Q1}6ؿ?pPQQEخ]p}N$"b 7?X03;*"" \]]\Ũ]V.#XTb6l1yC!""=zWYY]@.rGDDDrqՎ<-y!ѽ{w鱟ڴ,-""&;"*%E%m60}:0g`n^~珊BTTF[VVV@DDDD#D֯_ ?oDDbbb4~""22޽so֯/ϟ k Θ0a/edd 4(~~~P(HNN\z AAAz* 5j Z¯Z6mڄ.]z033+^?,*;w`޽2dLMM̟*]ed;wrGDDDrh"8::b̘1044Dtt4;\;wӧOGǎ &&qqqؾ};wsN߿saɒ% -2}_~@bb"5kYbmۆN:[n((('0|رcxWqF4m#Fp!ڵ ]t zjݻ]v8-[Sc[JJ ^uԯ_#F{i&[ػw/:u+@PP֬Y5kbY&_ ^^^ϽD ,*M6!??_~FvՓ;"*r,@D*ƢEРAﰶ̚5 >>>y&\\\7n7ozIKKC֭sΩWX!ߏMjs"c[nFW_}qqq]viԩXdVAUVaXx1&O @9e}ƍhٲ%;ϳ1Xz5-[Zr% 0fxߏp=zoH`Xk֬A֭gXXXźLU:*?&6l؀ZjiǕ| 4+`@`JZ552^ǸpB׭̍JoիӦMS'gφFKKKqrrB#55uoLE9s`ڴix7U(u`޽[R%LLLtl^^^hժZjrJXZZrӧOh֭j׮Ǐk/Z$aٲe+P(=՗(MV1EvaC ""=  |5W+'"z1^w{It(Dtrr2$IBǎy{{C.|!,XG;w=--M:t(n݊_DNzG}h 0֭q)77˖-ƍq9@cd۳ 5kNNN%f"}֭ါ+ps#R %Լ[nUc[^^޽1%/$$8q5j?-- :G^ЫW/{m|ZǸ{.Ç022ҚX~}"66˖-  :Jjĉ|8.BߪU+x{{c˖-7ӱsNQzRSS鉦MiӦ]6>FBhh qiw#qcek_?pܑ>X`-Z+++,_6mBϞ=gkzzj888`͚5BݺuSkִiеkW={+WIJepL6 ǏWI}Ю];?} bbb0n8ܸq-ÇbΝ022pԨQGDD|TT"""t3???B>zĺuHLLDB"$%zIBRR<=+~!J"S/)S,33#{9|6t X1JoEhذ!ڷo~XUV)U~EDDDBY3 |Y+*{j{rGBDT1} b ,""*a㏁G7~U"*@  4i"w4DDX~=.]Hxxxߖ;,"XDDDUx{޽L^ыQԩSO?[nزe!5"""bWָ{Np6 Pk-z !uـN:@0* U99@DР|9 \@ =عSH*Q%'/@'o,>}:`n.wtTxx(8JXDDDرc@O˖rGFU`ݻrGBDDD XDDDЕ+A@6w;vpu8*]GTTm`娿 d (#88XP;Q% ̚,\+W*k\UFhYSN#8Q@@@%w8T8U&/N.]Fd_` pr*Qi,"" L :}䎆!))I0*777C 1EDDTAdeg}X[˗AA!椇6\kq""zy,NDDrs4Pj5~ ;*n UDL`#G:wVgv: RZNH"b> xyQQN#Bha]tԫl<g'䎄*&еk@׮-k`e%wDDY̝J ,""2L^{5j Á( ;[h"a dd( >|\yIC`#&FH"a%99EjдiS,ZHÇʂiiʑWhhۖd ݻ///W^˗&whDDz';x-e~^{MNpp0bLrt"(DEE!++KP1EEz???l޼YpZn.0p p`{˖rGDT"""Y>X2 ՗(;*CBHEڰan߾Yf=z""?@Plt wDD O9P!"" ,*޽{aiiׯQF}<}T(WV[S_PppܑQEҥKC>}гgOlٲ#GҥK$wxDDza`R` 历bjf1w"""*&H>Ǐ1b|wӧ,X1c`ƍ|!jl믁`H!8 Á'O䎆XT$333ZYHG{LDDN€>;'0m;"""w\sΡVZvvvBU-.MD֮ƏPg~O oF"""* XT-[b޽HMMEÆ 7nԬY}_tim""}uQy$=Kח%\Z[ \EDDD Ryʕ+5SN2DED$={Ae˘"zY)G5#H9r$VZ x{ #& PV)&*꯿n݀ ss#"\:vUbta ,"""]5k;wrGDT(MGrGCDDD ,""BܺL^+Q5bU> ,"""WN|ػpt;"ʭ^=TH#!"""}cs.8̰qrNq E9$! %~RCD$Sh!INs)a }l}>׿[ ۏf^)b:{hZ+"""O*`ŋs/PD"iSkM'GE X/JL'q/YB2Btq$*`n4`Ԩa:{j5kL'G=^{ fφY~}ӉDWVCw5sRKDDܚz'@f7͚ 9w.;g:8 DDĭ  FYKDļ֭ysIDDDQ%""n_?8^tpa][EDD:DD-M]Bп4"omªUVCwDD,X/#GZ=wDı4i0}$"""T|9l ͛Ä *^8,YE kar4"""b X""6֮瞃:uY2N$"m[8xt1M,q [@ÆP ̟3N$"cj."""*`ؽօ!:||L'q\֭Ã!CV9>t1I,qiOC>>@>f_o &Y3365GĤG7-7;r=Δ)SR5Wj >}bN#"""I,7  Aи7_V3kͨ_Cȑ0f is'|LfXNQKDBVWV筷_b:}z ^W_~~)!d~l٬hտ?  ]3ҥK7xSSDD7 !$$$U`'(n8.O,7m ^WC`VS [ŬgO2yLѣaj$(  ӦС"" RXCJ %9ZrS8\ Elg>xwr̮|SY( *VȪUPBSRML0}5Lqm*`Dx =jfpB,Xfu0}tό3|,^իl6hgn:;wJDDOX}bbTq'99dHJJ}Bۛ#Fp駟SNd 88CVsS"88Q$i IHݹt#!cFөDU؅ `HqWw$.O3DD\āLsa 3F+qmae:5VZGdž L` T Μֈg9Y"""ڴP믿#p׀!T""+<>/ M%rv;\rt/P | ,Zx%"bE(Wj."""K,cm۶j׮͛MGqK Xū `Fht"qWe,fŠSC&""tIc*`my{{ӬY3ƌCtt4;vFlݺt<{CӦCbS;$::9BC!9f6CDD  !::HQ$֣>ʣ>zgyf͚Q\9zW_}e0{8yBBoGkց@@4jd-#tI %$00ƍwaMqi7CJu+X={x%"o;vN""""iA3(P˗/syfzp.$$2g2u*t`5(? 2HEEEԓ)..PI/A0mi:6۷W` JT"e Ə_c!S&өDCJKbbbTD^X3fZ?PE9sk/pIl6rdɒ(ڵ'5kV /EDz54onXV5HDn%asCŊV3wDDD\;Hǎ'225kpa.\?Yf ѱcG0a]=W&M8y$tԉyѬY36mDFڵT)kW"+=aGAppM=L /'L'Epp0HٵS*X =zm۶75J8LBdd$LlWl޼Y=DЮ̚=za"*&GӃ#'OB|V#]Mc}DrR.ӭ[7:uFDn MX_gφ^0HDaǕ+|pDDD\:}ӜZ+"gR\`zDa[Z\ٳg?n8waO^ذad"o0h< Ԩ7B2S8Xׇŋl2hܸ7o09~YpZ=tt*I :GDDOHH!!!$$ĺ0kDDN#""i!**(LG46lգYftޝ2okvΝKMFq+CӦp | Ԭi:%w### 2rƍeݺ"+z%&&J*#iH,{nvڅ?Ev[BBcǎB &Mf[.D_4;phl_ ɘ1co}}}y $q//BNkE܉aT.kRKDDyZn5j̙ևqTqe'Y}.]2FDDD XNVZԪU9sѮF̜95kRvtH(a(]Nk8\iXtWZB~7 D֭ZjQHgώn'66qF6l@RR[f̙㋸];g __өD$=hv.%KBժ ٦MM{*X &MbС|g,ZqqCʕ2daaaΝPbq,}+† OD܋a;ѣVO,q.*`9ܹsADD:tSN3gN *"n =?8|W""e:iv?Epp0H ,[ؼjҾe  S8HLǸcO=[ytI !!!C%5ui%"/ADTbGkɠW""-Ch_EDD%"_~i-cx]kIjSHj 3g :t*`CӦ KΝ曠DD\Kbc2Bq*`[KJqdIXZVt))b:pXܺx!"""AM]Ȯ]2e '66~6ݎfo5Pıln5i_:ldn:8+ΣE xu1zt"f̘Aٲe;v,;III$''_;v oEم ЫA|Ц ̞ ӈ">>֮]k:عjքIؽf3LD\ag¢E>|8ٲeF.]B !Cm HJH!C`(R|t*qeS`5>th] TDD1"v؁fPBdzsΛcӴq|:CЧ ަSsq8""BBB 1̙ÇCduo={B|ȝ"**8Q$"8`:Q'NX8O ~%(a: w### 2[."baozu4 {> 6Wh:{RE\|-[mteˆn'..#GҤIfϞ"%_ɐ!P ,_uN%"Lɜ*bu`-O6 AtB%.b,Z={rQN>Mll,G7`… 8tLz5T`-vPJD8캼WY}f-1^jՂ+"""Trf͢u֌1p͛GٳgIII$''{n:tܹs Xz59Fnk˗wYV K97d"")Kq%$&`Ll嗡hQ;EDD$UiBѻwoˤI4iOQI!C6mӧ=?~RR]tW_tҩY$%Avl,YLoi=;7pVCVdSkNB;XYǎk:RExzz2uT"""Xt)p4jԈrO0CߦF\͚[[۔odqi=;yO"l.]ڱp@>aàkWȑtB)_<˗OsGLVc~S_Z5SV܋Hʢ&""tIc*`}[޾cjgtEUڅ и1DGChD""#aGIttMK6) {_w9//hvs U WKUIIHHDFF"iL,7W=w}oEY_V%KBHq?Ő5CLGq:2XlŋUFP̟oq7jľ{k;-X2gʖ-E8y*^& IDAT +W£N$"r4?[b蚡WXNmwl6pհpUm-w1w}wC? `[T)>&r͑#V'BӉDDRx/oN,t#ZJX j-5jք季\9xiEDD҆ XNW^tƏOӦMof#s(nn~xiq5PD""Gx0(n y#9jլ^۷CL ,Zn5puUfjSˉ\{Co>Ȝ9T"_~W3?@¦.wbFD|A?Pg} m׏aHv//(UV򋈈-\ă>h:511Pa -gקql6돦Gf(~t$EZ?'Y3oy !=oZCbO""4[Y%nUx"""A,oCO?H@@{橧b4hЀ#F).lhֵ'ibqSo/WfFE9Mb}҅UҾ}{O L5}:m ͛ÌHD܏;Ӹxcm@eݨXW T) ~{kmd|ٺpgk=իKDD^"<== [lxyy_]H"ܹT^ھcf\s/ח!n~coֺ̙ZN""I+\f#(( ".Eh fφ(DD8|o6t?|07GTƌVAU+x=k8v:VAxqX""fMȖ |7a|WHJ2jDD~h p\ttqQs?EШD""C+=ɰV.#ȓ:~ěgk}UkV277ϖkX.K.L8SN".k*O?e*^D[/Y*ˆG+WP$l=zO@P0C~0o٣Z""H3\DRR-ZMRH||n%ѽ{wYԫ7@*8&.K,D֋ټf,u1{t$q1SqUbUlO? ^>>)ʞkٵwKW8 PR%6oLPPP??zL)c:ȽKɑvzco^vLR!;qv^ڶ voW0cs>r3A\_)f -j68|l662b~mӑM k[UVۭbֱcֲm۬Ǐÿ[yy\JЕ' 2qF*`|tq?lͼf^,h:8|*NGV."ً$XŨҥ#%v;={uW[ߟ?{xX*r]=r򈈸#D䚍~}(Tڦ: t"qdBHH}?^c&ݾ-H"if.Cb.t?nݸ>.ϙŰLZ+n **(R#.E,QHl6nif{nfiҪUV%Kpq8222Ud͘~@-XR>0[Qd=dszv?)mwUԨa=]:^DK\ X.'\RRdڵ)SF䖖.MzuXzc%""wNpiVOy_uvj.n-S&kf|BobՄ>bױciLb-y̛z1F ([Z("TrSNm۶m^z_ qs@VШ̞""Bpн܄rq$j3I)xyA|q+vIϚ5нUY5kBʐ1ceӾ}{ze:8ɓ!$ZyTI ^%5C8p8".#{vx>~Μテ=B;X,xI/xEDTrܹt q F+@0mvIK^ᓃ#LGqY3X:sj ?t(&@zVoʕ!",'7ɓ'O)P(v8zҫ""iI僺h"tysj.?J y@0}:oIK"jժume$&&2}tđг'|uewoӉDDiF-3._u?m:[٬UɒЮu?Z>:?KERI]*`vsnF"ES/%J0O@R\O`Xt"u%^ȄM~i\Ն'g#[H"n`AxE8uÕ+Vկ^D X.bժU#|ZOf}/"O9qO~O >iNܥx Y3VZQ8[aӑDr`8~zAkp||Zj kVEhRKH矷̛╈+Wb4ـ1L4˾Ȯv*|3Q2emdO=ej O0hZ+ԱWb퀸hcŊݻx|}})Z(#44㭫aPD""L2I1T_y dʜ.4=_o_ޯ>/.x{S7PUgOHN_~>Ck1]]IyxX K<W[իaD|RC/+$N9׏+WR~̙S.lW:QѩΝ;Gpp0C%,,_?dƌ >>SC#kBJܹ]'v񦏙mg/~| >Obrvw6 RDW^#HswOLLmw~ .p∎믿fرrv=z'ٳo:t ~YX}p5Yxx5Ud/b:]KqXnT6OY:WՃ -J#HRߟsuޝ'֕1cЮ]tynxU5*`'O9qO>$ΦI&N+=a'2Xރ9明#"""iH{ا~zW߭#""hѢGa̙tڨRٿN * Z4HD֮$_aɯKy_5iS +dݷfy{^ԚvAxꡧLG4 O+^8ŋUVԫWnݺѢE |||5;Z*XeArL2I1T_OJ-tT㰤UVVC:6'"""ܳMb C n=""΅^]ÆA߾P>̜iDDI=V2aD3eCi_=Ay5KTTQQQ7K ~6tFo$"""i@,g #aJ"## J-&>ڶ?~`Ȑt*N?ԭSxK2i0VZg$SJKbbbZ]VN ~ /}~MGT։'->}:9st҆_],\=g:n4cF-d:xTĈA1gzITӨ8ڵkG||<5k$_|;v3g믿2e2hJP0ȗ6l%L' ń)(CkM6ʜt֬Y%KNu*ӑDDD$%bؼyN=6A`D"..YE]Vreuߧifl6'O_4 5;Ɗ7LqDDD$%bȕ+з/-['@,S;{,_e\Ng#hV(vREv z8/EDv<#}RN u̚╈K1f?G{Z؊N2?W{]+!""֭ˎ;Lǹ/m*ZjtZډĤDqDDD>%ζlʕaVXwt*q5E9M^nBrq}]u;ڗҭZ7 0UGɑ#L6-^t^z/_>2gLjXrmcĈ8pCQn]4hsR w cMH:1{ r͛V-ӉDĕ|?902 }j(dx4MGt ̞={P`JIxx81f2d@Æ Yv̜9___|}}yxGȜ932egϞO?KCAyP[?5GDDDz`D>ᣏt*q/goKs.K~[+ k Yf}pa翹Y\\6lf͚l6ƍǙ3gZj꿘t'Þ+_«ARpMӑDDD%~ 6dX t"qV/5ſ.B*Ā'ЬT3lBbȚ5 ںukDÞ={bŊ,]__tɝVlˤIt^ڙ1xz--F&MΝA|IHL߿bι|\H@P OR h:8G7oޛ_=wȑ/W\lܸ1Ma`lñTTqzMGH*t t X;¨Q1T",X2˗{|y*>P~5Ѽts(j:8o:)Sk*ӮR;^6/y>`:DRџBfu+L /d:8W.ZEgZ|yӧFj96Q.]ŋݞ"""\HH!!!fZʙ9'>.ՠW^H""rRXCJ %}G^́7ngݟx,e!)P7{楛S"W UXUVCPBSA/3)fvbsj.""$3c bcc)_</zu{O _>obJcDDD X"wnރ޽N5 r0JDgdILsGQQMxd̐t}` Ƞ"n-̷ٞ壍'/ZkMG:R&x"iv!W^ӭ`f3xS"W JTRܥ–zE>DoA&p |JDWlB,SNe zWҪ\+|}o"2xd?.vɮXR\<wo*NVAPDD$iŋU+ȓ֯RL'S6G?"($_Yf|'T/Tf:܃,P'u\;g9vصοv.؋d̐94ch*l" À ӧT"s7g +HxڹLEl6%o^~kv;^{9p/fl]U4GQ2xzI"""NO,wqQU?wU@%DP1{--'R+wf;HdQ[f.*&2s~09 e_yɜ{[ `~PDMʍ|f3bYn1}$!j$IBshn\ۅHKѸ ?u3  [k\w[w,)[}`~WJõ&lMl1k&^ٶ;<"$I#1u6ua[GoEj~*@_]jw;w,`= (xUsgE.$&c{vl9 гEO|z'x&&rGTo͟?VVV;@6389`ԼT\J1kkәMHKPVrupƖ ~ Gxx8j XD ٳہ^6l厊jMg6K@_IJW[0UǺu-w R3fo]k][nAr^2@O҃]Ya}'8;̚k &ԙ(gƌ,jnƍ.^,5cQm_taghokŴ`cb#wxDٛڣ_~צF{z~U>k+\sQ*諐3(l9jfml >~cɓ.x%wDDT[.$_3z>%xc rIRvvxSxSBde#5/)y)H/7%/-˩4(Rc }؛kQzȒ7αEMJ,[6xA3ܼ9^:^y--[c$ ְ6{UB̂Lu`+17!5/ii03D3fYjf ,xձ<ܹG㡐hmZpp0Aa 0l{o (8kQˊֳ[=f;RR0e tDԤ)$L`gjT)^^ 8{(ڇvqԡ΍fp0k{S{D޿;9?*xBRekWkְ6f IƎ;""P_`nhi]/fOQSϲGK%HOӚ`zu?h+l4?ucSiirjܿw"23,?$_]JMFr^R. 97Z0И]~-]Jye3,Hh΢*R_'Y]E8dTkԬ:5TvP!97Y"י3ʷHOS ©"-gqQ Gxx8j XVVzEcc"G%̛Ik{n<,ž CA[rA )\]bRe]yH_wPJU73.n=dvzz%$&V8[q/z^-zeKlns4sv -O~ICgqE'F#>;buS \ZUkl c}~8GW~%::>>>rC,jnƍ.]厈jBRZ5D'F=좲3ln6nxo{x943k&sDDTj2|ft})>dh^!)̴֥HKQϜ{.r4750E+Vhiv2Pk攽}=bj`CUBԼ gq&"Z$RR4^h5s$=(*Rx^ٶ=IG=$I03` Xh8L X['NݺUXYK.V%F\9\]7 2GMDDуWy%ygwg,ȄZZDWǮ>R8eedդ IQ6=ZOAIܿSa? >;uIzSA!)'j$HKW^'AAկAٿ鸄K:,ӧ#G ..vvvի/_w\Ӂ@С֢EB!wTDMMUbbp92B }>B^UG57Q9J{jk׮x \rEVPPV ^z cH֐!ngVϨ*$%lMl䍉ˊUN^hg nEDdxzzí[e ^ L&[(DJp=11,8;/^vFkMnKDMaee5AGvvܡP-c_|{aml'nd S5WD'F\9\]{ I戉>Yn(i|Xr f͚>}`ZO{R*.l0r$eE,"܃sv䍧;< VE5#GuzPz:0y2ppY뭷!xntwGV}XjΆ߿_͛7z52ΜƍrsCCk &/$kŚk`mlOĔS:5:ka)^y '""""P9R'Q*+à?瀵uB j$^Y1,,&rFDDDDDXz!- <V|PhDr1"Q6 xrEDDDDD3,`N.[*?8t2Dt,=l+H$IЈ?>*#Q]Gxx8j X$!mۀٳn݀U+"J%|f3] Pb՘51֭aQV~%::>>>rC,EAΝ+FFrGET׏wh.^LX>h9VEuM`XղڧL;"F ,8W#Tpųr;,""""":թ} Vvv]Qv>[vۍO:WDDDDDԤEuB}X= 厊R v[?,Qcj]Z0iOʕ…B!wTDoaN$IX=x5ZYDDDDDtEn& &8|>1[=l/} U9"!"FbuzKakb+whDDmB@@p#;;[PEDTK.\Cu3 ܆!2 O4{Btfݺu; ""jODGGGpEDcXzd)6WW ؋#!Iܡ5H,`Hlƒ#KJsC=CC#"""""jX""zLIIwmR/aL,rFDDDDD(EDTCB\L"j$N= `}&^N^rHDDDDDԨEDT %;ȫYaf`vxǫ>rIDDDDD(ED8p"F.FK˖oFc&QK}i౸cP %1|0ݚw XDԤ)UJ{J}iK03`8b#F-Ic>>rC,"7q)]?btv_l I!wDDDDDDTX""Ybߵ}=@%T2ki+wDDDDDD$3Ω a.F=Ѱ089"#,`Qz aŅ/WkWyr&{NFrGDDDDDD XT<]'OĩS`L6MШHIėD؅0D'F<& K I9JXlڴinݺѣ,:Prsݕz>Q7Ї{,~`o$wDDDDDDԀEUrvvFRRpYC*U⧛?!B\ރ9t # C$"""""&," ;'v> ܍ tk +l,wDDDDDDİEfh @@@LQe_G0]̛he /z]&Cg#"pkegg  XTk֭[oooàJbv> '%?1;F@6rHDTc,Lb)()@H]#Gq_aTQ0109B"""""""M,`QlܸYYYw 229sR Jaj ~g=!= XT-}$aϞ=o!INV=u!· _n]ٸaAe2ٶ;<""""""ja֭[r@Քa9s`gb'"K l$$I;D""""""a(U k{a4 \PP XD մ Fȹ$&[naI`gj'wxDDDDDDD:QS AA=w K ft/'/#""""""9!~A1A(()0a=n7Fw c}cC$""""""5,`cw>Gpl0nd@[XwvVV ;""jlCZQ=STZȫ ơH=cv<)($!֭[ooo "&$Jtt4|||j XDĹs B؅0ddw2j xFrGDDDDDD$dQ]v!86щp4sLm:5$wxDDDDDDD XDuLR[?!(&{J0(,~`g wDDDDDDD XDufM#\'fO`堕GsG#""""""X"E%7 Gaidx9pIcHDŽ8y$b/SAL(050;D"""""","IMFPrelczpq;<"""""",P,}> >\AO'wDDDDDDD  XDR%#%/>N>`cb#wxDDDDDDD XDՔ]FPLN= ;;v Čn3еyW#""""""jX"Jp,bq)E"Ej? FFrHDDDDDD豀ET Apl0nfD;vXo1v-IaJ){GȘ IDATտ I!5I,`Qw.bv! ӪoóB<I,DplY89bL6u;<"""""""z Xd Gn(UbdX @@,`Q;8teaJ)hn\ ,`QTXZ| D݌&xLs^ϡw\aف/.|,m;Fx074;<"""""""z,`QQ/ & /^3ޮcb$Jn 칲*{, |6QcAy ;cwb繝ώG'NX9h%QE^AI\ك1;aah'bLlѓ 5r,`Q$ij ® ]~m!dLv 3C3C$"""""":+ii'3fݦ]H,`*%~#vW00a\cedW_ Fȹܹ:cе9̚,`Q/7APl> K#KtLܝ hڵظq#Ю];;v rEDDD>!N; _ 2τߝ SSC$""j>S>|;Zl/PEu`Xrt ZZĜs0tٺRʕ+oe˖Ν;r@_QQ.\gggW^'vX6hWDDD̝;w͛7Gǎ}v""""Rc4}t[SL #Fz;|BOWˑ5NyyyXt)|}}akk B >ʉw";;ׯ_G\\k,Z[m"""c*u)޽Wƚ5kF6mo^.w S8bٲez*uIuN<}^}UGd'Nk͑u{Sx XT_Ta̙8q޽+ctM nq\PjգxzDDT4\ߔE7n>Clݺ ,!88zB-^cooɓ'uDDDj gcӣ`'")w!JǏ[o b֭4hvZ#""7J[nUUu≈! ,ŋ#44ڵ+ۇ}QqFdee/D||<`Μ98񔒒777 >>rODDDX$!;""""+$$ nB֭͸Zx1'-[C;b5E:STT ի;}4^{5xxxmڴ pu/_/,,,`ggS"--M+V@PSkǴ1zh O4p<̙3x 333t ˖-CAAF?<,]B@HHH}k2v;v@N`bbcƍ6,G\_7sn1?:{ ґ' om۶>}oZ6vX,Ν+v!/_.7o.ŋpww|Xrݺu2-!!A sss驵cZ=wbb7 .TxV煑puuk֬۶m3f$~Zݏc֭[B$"($I!!!Zj2v7o$Nj۷S IĚ5km51?>zbm4!IBpL+RTT$B9sh;vNk>00PZz' sc}^=U,҉@hZJH$ܹ#Sd ޽0aV:!Ceh /"ŋErLgӦMB$q!BTjxV߲e˄$I"--M}…B___ !8U9}C?Vw~!I8pF'NIDXXou^7s1=X111h߾=5{#Kd޽Tt]o=S!{Jg /ꋊ%СCXXX *p,҉iS\rfB>}0m4_UcV&==K,%K`ggWai"?? ֯_3f eJNN`۶mþ}b 8::b֬YP+((a122bzs00s}cozX"011QO/~Paaz;U-)) #G """ IǏc\=1{V_8h`˖-?бcGZl޽k׮@ * .D@@GCMcz`{a=|p , '' ]O,M ???ܿDʧŖagg33sul۶ gƝ;wpmܾ}(..F\\23395PTxVo///ߍGll,GCM Jiii\;\{at ׮]CNNFɓ'ݺu#qۧ>UEh֬N>SNq|p]T*̙3m۶U?N:k׮˖-@bwh/CYfpvvx@II JePZZP+_}Ϝ9Jq~sa-{b=|}DjN<)$I~PkN[R1zhahhu[+T$$$ۢ$Ib˖-uj&;߫}ܹpqqxcZ]111B$1ydahh(Ϛ FFFڵkcƌjagg'5^(Efff1n1s0S9I!.Q0aٳχBBBpO۷[Æ km PvF ֘;w.rrru8}4WaHOODž m{g}ѣGEϣW^k[[[۷ /-[>ƍ{a>s:gXZZh6mڄYfaܸq6l~Wbʕx7zsa;^=iFGaa+x'Ç0`P(B$BÇ 333akk+L"RRRda0`jVOIIx044۷V?g|8ѢE ,[8twSSSlٲ0W* JJJdׯ=/>>>tRZ'ٳ'ZjׯWڗ1?s=s=s=Qâ/wD$WWWL:۶mÛo ''J?WE&L֬Y}aŊ–-[0|p|  н{w+VB[od_C All,MC=B$cРA_ѣG}?۷ǪU xLNNF>}PXX9s;wѣ1cƠ Ŕ)S0l0L:ұ^z !!!x0o<ܼy7nDLL ?}_ɋ/Ɗ+0rH1gϞQ\\wbâE`jj۷P$44ӧO/֮]$ǏrssE۶mPTU_$/RСCB$k.!)))Pk{$Ibƌٳg B!Ν;nBP8!999ZKLNN_B)$I}Q矦M&$I>BQF ###VqB6mIÇȑ#VR%%%mYYYyb̙궹s kkkzg'11QxxxvډjHDTW+\\0D/!$j\]]1elݺIII:?ZP$a̙v+++tnzԩSaff~>n8899ƍ@ZZAرcZ|嗫OO>6333⋸}Taeek 3339r̞=[ѧOtEfcc@?#1qDc+ S}lȑ#^PbO5rm۶СC)r B}WR!##%%%ѸE rssq*CTرchժcHDT13W%DwyXzҺukVVV066F%233^^|iӦUx|Iju|šwZ;vPGUΆCSSS߿=lll׿/777cA*Çkoʔ)Ǖ+W^zMD  XDWWWb֭x7?l= R}i=lSQR~!uVaeg6nR]vUYfUT8*,, ͛7^~Ν |w8t/^UV~8DMǡg Ì33`…pppBUpMcСC8p8`L:;wرccʕ#Qmc\\\OED̆a͚5ZsyUwy׮]x.7}}رnOMM6mTx'7nh(Vj۶-^u1裏ZT*/3梁(!?""nnno4ڗ,YF¨Q -[`ɒ%gpgϞ 777,YVVVXpN Qm`s=s=s=Q5@@lٲEk} KKK_~hjtϑ~$w777|z?Ĉ#p)궼>>8vXg+*jE߾}1c $%%apwwWX$l߾~~~3쌻wȑ#Bddd+o"<<~~~3glll83{կ_?KXjbcc^_lذaoo?XjF???7xaaa:t(fϞ SSSرmڴAff,al{IDAT6mڄ)S'N=?o߾OpU <&L@N={ 55'N=СC>}:z`xaggWqxݻwW_}]Fٳ< F[na˖-i̙ĠAТE O>:uT1׮]l̚5 | rrrxDDDMs=5E\+((xP۷>Ж5DDeQֻwo 8;vDrr2v؁\,^c^"""c'"*Q6rHDDD`֭$ >>>F߾}4IxV0XDDDDDDDDTq ,""""""""aOIENDB`dask-0.16.0/docs/source/images/scaling-nodes.png000066400000000000000000002055741320364734500214750ustar00rootroot00000000000000PNG  IHDR-sBIT|d pHYsaa?i IDATxwX?,)Ac D+@=vjAQ%֨ĞĠFn,bKT.bEH9?\w_s/gΙdgΜDDDDDDDDDL ,""""""""*՘""""""""R ,""""""""*՘""""""""R ,""""""""*՘""""""""R ,""""""""*՘""""""""R ,""""""""*՘""""""""R ,""""""""*՘""""""""R ,""""""""*՘""""""""R ,""""""""*՘""""""""R ,""""""""*՘""""""""R ,""""""""*՘""""""""R ,""""""""*՘""""""""R ,""""""""*՘""""""""R ,""""""""*՘""""""""R ,"*233z2 vXZagg{{{mQC&!""Bۡi;vLƟDa-BD*])S000-z-[͛7%LJ$VX f͚5kѰab2o2, P$HA ""*n4۶md2o߮Z> e...ܹ3LMMDDD`رذaΟ?Pt!W$Iuyڵd?>xDDDũ\L$QY),^裏?E*u=e˖h\BIJJBժU?ֶ4+OeAy/"*!ܹj/k׮8|J޽{Ѿ}{~)|2 ;;; EVV6-X]^!pܹsG$NA! L>vvvG@@@p۷-[DJPjU :IIIBw# Aǎannt|~znfffTҥ LMMann9s&4C;U

};wFʕaffnݺ!666؅~DDT8(|_SVƍ>^z8N:EǎaÆ|v9lmmqeQU:&mݺYYY:t(7ng]}}}gϞ@TV #F >ٳgѣ8vxlժttt {{{<7oĆ h" ;;;_~ǪUʕ+*^P7o )))֭PNb*ӧpٳ.]ŋ*U۶mݻwhq}!$$=zĉq]o'ݻ7; bĈ044)SٳرcF.]@nݺ|2kOu055!PF h۶-_u={K,&O7o_EDD;m۪ ")/*H_SF֭ѠA9iii0775&TR۷/6mX&oprr nܸ}Ƨ~hi&|簶F>}`iiGҥKضm&Lq1zh| AZ4>DTA ""!$Ijw)!I]x<++K[H$/^(>}$I8p@e])))"''G񷟟$Ixj׮-?8^$IC$!I~~~*\_Jˆ &$I{U*СdƠXlݺUH$tttѣGU:ug-ƌ#$Ivv㏅L&!!!J6n($I2L3QFl޻wOyFرcBGGGL0A<44ToViYpp$IԫWOWς <]|H_i{m6̙S}.xJK޽R044?Vi*B&ۋgϞh?bB4jHH6c I͛UݸqC:u(ǎwő*hud2q!_*Uy޼y*_V*VUggg acc6y3!g} !Ddd$ItIeYNNhРX\/ʏ]նرJ,`GA9y/޽{ CCC(srr*U(%V&M$BD!$I> V^=Ν;xLMM1dY}ѩS'nݺ2^9CCC8::j$!;;@m7oRVfMg :-[T)q={z)yaa… lG8^9++b}%'bAay)d͛7s$ ^w-}_n&իWݻw-OE>!ۓoKJ "}͇yzj裏`ll I~\t fZ $CXlU ]]]ի'o!$"nɓ' N>.;h֭qA ::Ç1lذo /$e{_vv6|offx幕ϟmT?w""*^CӾCۢ.ʂ?q믿ϟ덼#GHNN1c ""]vœ'OTb C*U#G>Q͛7=z4j׮ sssigΜvXT|||_~%G}5 Sw-$&&^xڴiYXhG|H8^]X5BʕqiG ;S Bt*URZ'׶j|h֬Y!*/?~={ 6퐨(5 6,Vyד'O d/_T<ssst6m7>}zG*UЯ_?ND&(_YYYGtt4RSS_w=>DeGڵ7oޠgϞU[ȑ#֭ѣG+;m٘9s&3fԩSHOOWY U޽{UVȎLcԩj?k4|~P(-۴in޼Y@Q|׿qM_^,88W^U&M Lr 6DDDWAa޽"8S=x`L>7nhr/DVVТE &&&x!"""p-hBѦM65k.]  88r\vҥKv&&&r BBB`aaq)꺻C&/ğ*U@$̙3Xxׯ4iRwCѽ{w#F(:͛K.aƍ8x `kkG͛8u/^FIߣ[nĀPN\|'N@qdO^: ݻwiӦ@jj*?ccc4m/^T۶[n1c9O?n¯ ###lٲ@LjS|W*wLбcGKiTQaHMMUE8?~mu_ ///ܾ}[Q6w\$''cÆ %ODe"9ʡǏc…~:6m 7x{{cʕ9r$֬Y*u3331h ԯ_+"XZZRRRTʷmۆӧOK.Ś"*711ϕڥԴv8V9dcc Jܹsسg/_ӧx*\̚5K6''#Go^"ADݻwѬY3tuEVV.\hTR+VvDeNQ˗/ !"""|pV9+++uhطotuu&4001cpi$&&*Ǐ`޽lF1b?=111hРC$*s711A>}tG/Hh^uד5k0xg6mZ6jNNNh. ͯn^-+lه쯦JjR/:)hUW^QM-Xے8GՕSw+}=PQ~澬QU}"ya_OvT|yfM4QZfnn͛7jժpqqddd`ܸqhР|}} cӦM?~ډccͯn^-+LYPPgE$&rS˴qHL`ٵk5j0Lj#L`Qp,kԨGFVp ,*6%""mEmXDDm` /jHxQKDDDT>ȴQ^""""""""R ,""""""""*՘""""B' Qegg{{ٖ7d2Jd{DAPP<==Pqw""""*H3$m|aKXDDDDDDB! &Tc[| PfML2*?e˖5kքЧO9sF###ѻwoԬYF6m`btlmmann'Ojӧ3gPR%T\M6ŗ_~ׯ_+սy&`kk bԨQuz!~ Z}?ƛ7o߻wO1bߏVZaر#\]]U\/b׮]xpcnj!** /^TI`lR瑓\Y?L`Qjm2I-jǖWc" IDATLWWJeall ԭ[*UL&Chh(Ñ߯_?:t+V-[wdtYe={(y x;",?}V\^._jԨR m"j)I^zolDT0EDDDDD%ظGBiѼ(Iń0o<"&& 4P=lGѣp:t6l@^p4jH~pp0|||0fdffbر1|뾻ܿ_޻ޟzӧO5 * DDDDDDggg!&ݺu 7VI^ ***m +Vٳ9rD^Z `X~ӦMѣG!ȳ|PΕתU+d2DDDh.U L`7o|왢<==_~J}{{{ܸqC1: x;麿?UF&EDD ;;[e=QO7F#&OoFNBB]4E\\\pE|*m8A/To߾}B жm[1Zj>|8bbbW_$Ɲ;w4Z/|\\\0e]0`UZi$/>s4k ޽{J:u*ڵkC__  kl E׮]1sLcي^^^@hh$vBǎ1{l/Сy&?ׯ+ܾ};<<<0x` 4o ;v(\nn޼cΝpuuEՑx`ݰ+z,""""*___xzz"((HۡiիvZcӦMسgwǏC___iTոquVX[[cǎ Bڵq98993g:w+W~w}ǏcΜ98R$R N< ̛7~~~*ocgg8̚5 /^~["113gDju[lcذa8}4/_3g`8gƞAs7{.\xT)`Hp5iG[WL`F6 w3B05T_Y`.`dreA&(&""mh늃!""B9ܷh |,,ӧoG^yz/:lDDDDTpJBjF'1D,XJrop8 {}VbQX 6 v8DDEBlq=:8o{~{*- 6 hO&@"  $I賻F £G .\7X 8z-%-oood2$$$˶m ɰ}dpss+*d2k;"%|4ҧOÇ!r4DD {ń'@O!l{l`r`j֬0ʼU~022v(DJ*?~ hڴ)Tx{{cʕ9r$֬Y%22<=l팿&sovbQ}KK.`""-[n>f͚2e RSSU=˖-;j֬ XYYO>8suGFFwިY& amm6m`v%'O̷~ǎ!ɐ nݺ022BÆ yfEƨUUFA&"(M#!!w܁L&SQS7ֻ9m߾͚51W1cÇff&,X Pi[0``hh;;;L4 #t {Uwe :vvv044달,c^~|1n8X[[ضm222:uuy󐑑9˨a9dcc hѢzΝÞ={|rL>0rH888`֬Y&,4Qv AJz ~>M}?v Ӂ{AJ,V"`ڴiXv-lll0~x"88gϞEff& zU̝;:t@޽QJܽ{#GpAtUQ?$$={Dʕ [[[<}W^ņ 0u\ǎÐ!CУGDFFb֭ ٳgai:A=пXYY:xQvm`Æ FTType4lcƌ>nݺm۶)awww\x3f rrraÆʕ+XpbݳgF`` ԩ!CIII8<ۇﭿ/_FVOOOy&6l؀EAW7@JJ \]]a``> ػw/F L///E]! Ç~2e ޼ym۶᯿"RKPvy!Iؾ}ʲBOOOxB|ɒ%B$(iiibرbܹ"--Mdggfll bccvgXx)&>I}N ^b!!<*8pׯr/^upB!ʋ-¶m۔2DDTv)m=bbIy4ܾ ^ L 83iuk\{rķв!v[̙ʕ++ dGԮ u되2J B 00sA۶mqh 00P)^{{{",, Wb9z۷#))M>rH4iD[lAPP֯_}}} .T9x WWWe3f pqCZ#ٳ666X|J\N:]vE9sR=ѣ駟"I SWZULyTsRJoFN5j...ׯ$RennyaĈo#:9yYRR%ta٩eh]5;zU ;X>7.\79vcکHIСCej Xz5N>Ǐ͛7JݻH`1GV0x`tyNV_ 88Į]T3$ ͛7W)8;~~߉.133CӦMx$IhٲJ82o;wŋQV-?Bo>Iϟ?P+33(>|8֮]ƍ>CкukԐ!CfDNꊺuի2jժ!={H`]p:::pqqQ~HL`U`iiiJɳiii%qOo"s fN nh?B(:t(p ihbݢ&z*tuuUVڿ?cccxxxnݺTd2BCC E~СCXblقҒ%KйsgFFFzY䕜}ԏڑ/'ZJuԨQ5i# x<%%Eɽ$''x::$իWW\:u`֭ D`` tuuѣGXBjѢ"##h"۷;w4h~~~2dHPe.g(KMMEժU&qs< #((AAAj_@ XR(X!.efgbqb|;.V7ر2ÁvJ._Ⱦcʕpr*,T1H(m_>x@mzYYYx >#Eټy`hh4h@{=zGHKKÙ3gp!lذz… ШQ#1cc`۶R""*i޾ٳgt|*q McW"""˒TڸjԨp8::boT$$$ڵk:uHF`ff`<~XQSh]UVţGO|^w:t(4ZO߾}aaa &UVΝ;ܹb2KKK :III9s&Jm^|x|Zj>|8bbbW_$?Ɲ;wo޼AttJL<}$)fNR{;>_sUz455 .,QX|rlڴ 3fddd`֭hݺFiQw3&q1t| uUB9֭vl>pT.Qy)S`ڵppp`TZJI ___|h֬===DGG#>>{?uT$%%k׆>bcc ;;<.Dhh(v튙3g"==gV,BDDBCCU&9?RX6m.\f͚o߾‰'`kk չsgĠ[nh׮ дiS+߶=z􀫫+>3ԨQQQQ=USJe 4:tAPV-㰶VS&n:_ظq#ХK8v<+޼yΝ;ի#)) 񈉉ݻaggׯ_]vڵk#==ǏǵkЧOHK"44ڵLLLpƍ3(///޽!!!ppp@޽_-Z7ΏE&ʩu!%%EvN 333l —_~Gnݺؾ};&"+G`ݹu>hQۼÇwk& ""eWFbӦMD~h"4iDiTոq```UVaǎ066Fv}v۷RZ9s~ĉd]6̙/Bi&ITFpUR'OD1o`nݺظq#FXI`aag"** YYYU&M3f(52W'6ŋsN[666Ƅ o#$QTu*Uq]P|! In߾H2##î]34i .G-g$66s`ig1h 9x< ;kc?h8HYhx*`)F9?~]vŗ_~EYW~UNݾ}[zXt).]Z1DD%Oq1 T5^'nw۷#ӧ>:|kKZH{߿kkkd$Iׯ" ,*6| !QJ|G/ay03cho'f:Á'*: Gp!| ƌ4G&"""*jU]WQ>>@f&/@%,Q 󃟟è Ai; *'"""*se&=~4wׯYoulޛ4by1&}Wa':X[U`[L """"*#d4?~?>Y Bob==pwɫZd2 .4+Ra899A__H G`Q PDDTxҞaʑ)ѧA|;T7w,V Td.%TmR ?'<&""R ,*6+W "*<"-+ ;p'>US++M88;;k;ROs>| IDATB-GCDDD ,""R*5=ӏNǖ[n5ͿSA@N@v,Q1XDDDЉNq#~36ތ矼;vwU+[nL&[Ɖ'J8b"""Q)KL}"|x$DDDTpe7u .}~ ZN,yuv֙3L^S<'X߷otuu1n8EƌӧO޽{Fzz:tNeok;*`oo_"L&CBBBn$$A&! ضqd26;&L4)6l.6* [|l8pt,Tpׯr/v…066?Ev*x˄~7v$TƢVǎ!S<,}*Ǎ0EDD8lpN=5'G[ i6RfenQs a*nA6+? P@~>W{ߟs'!|ݟv3)!'t4z/!tӧ÷ƍPX-xaaaXQٱ[nwԩ$%%ׯ_Śg89A6Zx܎Fz,̐DM#5B,V.\~ 2xI>qBB :l"O8q"LL`>֭ӒYB!xd18::ۡ!!.1)ShGc3Z5%!(HW兣#kajjJlll111ӳk?ol kj3YB'j+++>3S`̙i+++)S ;wiVVV`aaAӦM6mZb;uwt=R ==֭[jSƎK 011J*Fpp0ݻwL2sv;wޞRJabbBʕ/|}}}4h5k333ԩôi|s筷̌zlٲv>dܸqԨQ333+Fv؝β'O0zh055FxxxHMlOAd2Kׯa!̝3ԏ3w0D&4a:ȑp!ڬR={gϞ?~ v89"eaaaav*Pz##XnKȑ#?>ʕc͑#GO13L8-[@ el߾-['߱c;vXb8::biiÇ9< ,`/m޽899QHywm[xqL’%K eԩ笭SCѱcG ظq#_}111iuU4iBj۷/ј__SdI(S Nb֬Yl۶CQH~G.]Dfppp &&2uTϞ={R>͚5#$$͛{q- B|=BCCiժh{{{"##>`… 0 }ll,m۶%00[[[ˣG>}:?Ȥgk}fAAA PAAABxwps-x30s\TFJ)9dMǎS:N-]4չc*~t:qFĐ^LWW/Z*uwNSUTQ=J>6mt:TRpTܸqC+WNըQ#q'''ӧS8...JөPR˗/WVZ~N-[Tzzz鞯Xtcǎ*&&&ݻwUbTbT|||NS&LH5޾}NSvvv*<<<Ź%K(NSvZM4It:f͚t:=ztぁPt:_zScekkLMM՝;w?{uvvN>$$D(QBt:f"}}O !9iD߉n:AA4(;Ap6<"-$&&o%e4iKK\.00e˴X}Q)?>xb&L@6066fƌ-Z%J:niiI׮]x"7nHu$iZ!6mJ@@˗sNǼyR.+]4Oٲe2eJhѢ)ιPn]V\xJҌkԨQڵ+X||<+WhѢ)f4hЀ޽{ԩSѵkW>͙:u*111_>ŋOښ#F"d B{d.My> U̜ iyyAR9<==yq͛7suFAѢEiԨݺucܸqܽ{.]דdg'www\ʑß!@|<|InG\'(H1?Ne˖٥XL@@sСCܻwo޼}aƍ4nܘݻӪU+ϧeԨQx{{̊+022zg9ove=Junݺ^>! Yvm(q==zDbܹlܸ`"##Syf/^Htt4 6L -[tT1<~8U ޽{\pj_]z *\KCd///Ҭ&I` !zun?#۶bfh""Ml8>], ٳg h36nȆ t/y²e˘4i˗/ѣGԭ[{l@/==.V.! յdRn|Kcc!2Gj` !(<&>kC/sz' 5Ҿ;&+! ΅ѣap܎Hf 4@)f$%%8vj֬*y_Xnݚٳg3~xؾ}{v˗Ϗjժ1x`~L='Mk9_NhڴiΊqRKߡFrI"""ROkM痡)Bʕq׮]c!2GXB!+u]V*"M 0f |1tCժ9GGGr;81EJJNf_VN|)>0nܸT+UDpp0aaaǔRL: .rg3z *f\e˖ԩSÇ3gΜTm_\y%KD)l90p3QQQ>|8fhw5T ӧY*Zbylذ!gΜIFRR_~e_HHHrzpttL=,!B! >1}4+ߌ.fA܁! @1bEX< _py~w}tjt0cV/ǿ`Eh֬}ϧvtCCC)Y$) 2z䄡!\plْb#Fp-X"FFF답5=zH7RJϘ1cas__E۵kǺuprr?kkk铍kӦ =ƍJ*cmmMdd$Ѽysm+WfΜ99s[[[_֭[ԩWN}{姟~"00;;;Xv-;vdͩZ6mп͛GF(V7nӜ;wÇSti>s6m_>:t_h"Nj`B!2)A07Lo=/LKmzu$ط7ϙ`K., MDEiF鴍 amɯ7`+ΝKժU(U]tonݺ)fU 4ccc~'-[͛7gҥ[cO07Ȟ={ӣbŊL0QF%^gp/^{2i$bcckD=k> 44իW3sLhժUr+ZVŐ_|vvv̛7MbŰd+㫯bcccɓqwwO3UdI?~<[l!00ի믿Rb4K1|֯_ϪUHLL‚5k2rHj׮Ȉ={0uT֬YüyT&M⣏>H^fQ`<|BE)};E,Y鴒 3;̨Q#E,ɵ){sㅍX߃onajh3fh3&Niʂ x2>#GB!2^=n%oĜP(#zT۩l mYL !(www͓og]jtaK-|#:woIøqcB\|}Mb !DnˋE0I` !/ܼ݈OgSMt9\ NNp \ -"*5;㪎tXсmQܴxƌvuwjb͞-I,!fR]BtDG3b>\!uߪ˙g XK?]K7U粏^ښQw_a(9R[F,BLBSOs?˶۰(ZU0e 88@pԩ3A IIL @"r;!2rrwbq nDxqO? a|6LۼA!" ,!9I*Ϧ'pP #@OKÇЩ#7MPXI [oQH?8 j? 4_ܜkàAhu낧'̘ jLyн8Μa+ -WSPKSĸ{oEt\Ց͗6C.~=h BIK!DrU/n7~0$)aA`hJ'%R&Gȑ'OSTDUЌM7P5Nxzqmҝ;I'BT$%2޽{tؑ… Szuۗ! !D(Xrr mu7^&NP̙ª^=g΃ⓒUڟ:EM33N.Kfy1WЛ?~[}й4Y!)ȭb!~)ʕ޽?˗/SxM!^aC fuٺ11.A""/_E ص *WΙ:㑑xmƖ/Ժdb fDE2ɨ;o۶ANڬ͛,yBXpHKTdd$ބ`bbuWWO!^hﵽlriS53?Ν0p <|? CLb~;wLiCCգQѢx diIa¸tI&~֭av~||!DfmmN#$$$Օe˖?PBy1|ժٳ0l$$! s%Krw%y%st:߷oZrϗ(l{OD V׹*ȯx>Ŕ)S(Qzzz,]4Ͷ|嗔+W3334iž=)}-Z{B*=G?3|fŮ*jA|| ~MBim#U b,^5kRT NcB O{&n$~f`n-i bD&0A*y${1}t.]-WWW<<<۷/C__{{{.\ HL֏Bb'stQ>o9zL\>}4u¹sA,I)f]N'0x-[67Hp_u+ I 7nK];x*B$+W۷o̙3mwQ֬Y?طo+V/HnWJ"##uV3gPV}B#ocʞ;F0@Rl q#Ԭ;Zns$޼vl,>keeSE M aǫ^g辮; 7n@ۙ0$ڶ^_"<==UXYYgYT:""3gҦM066L2tܙÇ9?XYYabbM6eڴiԩSXZZbnn޽{_>((#GRn]J,)UVe̘1<~8U8͛G)Q RJ|Gɏ#̨ P??C=(U4lؐ[;֚5kh۶-%JԔJ*ѫW/^o&I`CFFF)Sxua``Aӿ:č7mVΝ2e 111lٲgҹs}"BA[.m΂:;ۙ{н;89iΝuaÆaaa9sKUJՕQF #GE\@ IDAT={6/^={rڵT%K_oλロjsP7nqqq{GߟR WWWzٳgqvvf4oޜ0%x);vLt:tTڵkjժ={NS>>>ݻ학Vڻwo=OB!5h Ty7s$%)zRJ)URViD 1j __SNllnirm^zf=WBR-P1/n|RoTZJݾz]P* @t:UJѣ111iӦJөJ*%W}:Ut: UJ)|rehhjժ_Ҹ.ZHt:?${taÆiy>@锳svSLQ:NBBBNS:NM6-E;w*NS_ptq*"""ŹDg/4}O*`aaaXXX:KK*%w+ohό3HHJ`a ?0s;/ݾ(q#8;'VG?}J9FZY'3cnnȜoeW]دݲvl}f%n\&߯jJ[Z縼ib"`Ynf~xb&L@bŒ3c Zn7GzҒ]ɍ7JI>%JHs,=&Lc͘g9UP!nnnk׮#Ϯi^A4loooܹ[&&&h"-{5'NLqC/_cǎ8>|t: .LUWOOR1+%"Vq.ѯ;$!xĤD^$I܎boDI)XFmAg :RJm_1קl!#R97ŗ+:ji+vM|^ВX-[jI,L) rQP^vqt:-[Lu4Z0w\:Ľ{Kq͛ >}qF7nLiժvvv\5j8;;b 2Yp!WDDD;g޼y3E-[`kkK׮]i޼95,Æ ͍?qm6n޼ɰackkfr|9r$QQQ;weRn&`1ߴ*%dn7a`8{^0w.*3Aa fݻ-˼ʕ)lw>2<<++d,Gh{y3`RX-Zh;Vzʅ/s`Ӊg/o1Rlܸgggh߾=666*T===|}}9p@]tLJٳgpB@4c ڵkqرcWݻwgӦMХKʖ-1J)~T!֬Y?U2e wvvf֬Yy''~7f|,X@N8q5jH777O||< dӦMoߞ۷A~HĄ)S0enܸK,aŊ?hʜ9sعs'5kd4i҄:ud8δ|8sI\x1Eg|}}S{.~i1߿ϙ4DFFbhhj ۷LJ RxqzȬ#FrĈ璒_7!D#K)OOO?~Dp\~ԋ-JF֭ƍݻذtR_B.Ob0lf= #}#fF11HK]͸aLKɖqxc==֭KkI */+i [{myi7.UJ+noڶ {\5k֌>Svmv튡!ޔ,Y R2d CCCpBr11[naggGŊ122"((____)U3Ə|_~\aÆٱa찳Ν;رիS\ƍԯ_:uPNʗ/ODD>>>ܹs#Gd:t(vΝ;1"ͥ2X ?˗/J*8::Rtinݺ/gYz,!D.S"_V:Nt:.&&F;VYXX(ոqck׮Wz젠 W}B .!NyTefQӍ]cç6XbRs*efTŊJޝ' jxpW9>vH9JMK^IfieqyK?Quk5T5jPR >\+kkkURm,YlmmUBTҥ:{:uSHnvZճgOUJUpaUhQUN5qDu㺺RNɓ''oժUSJaÆ)kkkebb*W&L>}<~XM6MiFYZZ*cccU\9պukzt_DUT)Ο?f锛[ş+W-[*sssebb~mէOuĉtcoQS?)k!^ѳB-Z\vBI)_b¾ \}x[FyY08>Op :8E ~9+3\-CHAAAH6xv3$n켲Ϋq~hpvv&/ppwAdիWR ͛7O~d}Fޏ?%r7o"龮;UKVԐS,8kɫD5 օ۷a~URv4`eM^#͛P6|N|Ǭ8"Ʀq#tNNv TL5kÇHyBڜs/|+id__ZYǎgѣ0j| e[IDB/]bݽ{ )W66vX"sww֯!+VRȰ6#2.!I^ nnг6+,D:_V\˗Yd t-yE& ,!9zu&Nf٩eؔaZk:g}0nZ2x 4kA#rbcY_NKvH"e>;Na :'Ok76twu(6+DZ^)T:t`DyP_BcF?d 9 ?C}ì 3f+սD.`eḻ *3ԄȳtzO1.{ >)ǧX~UKb})<} cƼހVZa!(I` !v?:gϸ1hڀ 0e DF_رP$IJ?0-4RXR:E /D^72a*W;}k sh5ǎ?Lю !y|BmY~z9}'Ơr2o~+k*۶i_.^~:WVVx>8>/Tq*BBD[N&4ZZjw+-5bԖ !y$D®B Go_p -+Ȁ#4lo݂I`bRAf d{pxkկOew%]EfolQ3WsaJz& }% ВXi߅B<@n!Ȓ.n41V|]|ռyp,8:J%bµkt=w%8T$( c[mDEњIyqO>+^ .*B"I` !Ȕ;wu5Yy1'ľ},%&jVテUYܭyO3gu~xmV׬IaQ!Db]̚-=p".\HR/gOX^Yе+ļ$%BB yهf3Y]h8Ytm[S{w1*Uʾ3|t,;/Q"CBFX鴒kbS܆~;Х t% _p!}EE<# ,!/bō|3<}#꽯(a ɒ nM!h$.޽ŋT15eOݺT2}D"_R 3d1]m;v@ǎڟns,=v>}O!(I` !H׿2|p6_LГ*f}waT7P֮ggq IIL g2Qf 1h>!T4H{/ТC۶s'dbիQ !U^=CLXB!RILJ'}'RĨ뺭éSj\V_姟WZ+c <{O}966"_tp<翜 $ejSF?oZPlLvBZHw!);AEMpNwq t5k$X UI \?U&|w8ɮuq/_^WB4y U㪎= {yu>feo*Bd$BŘ]ch{Cbb$;IpVתwo_Νs34EhVݹC'(a`@`)^ wlZc?Mo8>8M7`Wh۲7oڿ6nU5 !)W.zhba)6gyxxyfzۡ4X§_$&%ok3X ׳gO6oތGn"r$DBnGަt\Ց%rvYz+ 3?Ç5kѣ|ߖ-?^\Nfލ̫\%իc*3M>ԊE=v[˖-5&cʗגX%Jh'OlB!DHK! $To/+`gؔ`qq+h C>'_QQ쵵3).xU|~:G=3魷+BpH(B|BsX܂![еFW.|z|D)X^q5f ||px05͙ eocw8oԠ-퐄po# X%a]ڵr6H!$%DLB Mzz.Yy%ͲPTQ6ViW펽Ȓ$F\ŋz-lm)/hft֙zpv}\6vY[)ǩ!j*ƙ(>^mUjNs$hD  Ʒn]ZZJ+!D134csqUG"b#26l>f.B!^3I` !D>bɥTΖK[|]|^zr{Fuu22CF'(obBP'Aek+ï}]w2V}woX(gB!# ,!g.?LpvsqE.oJ`k kI…s&$.)at ײeokK9LΊBWPL-}=0|pR먯%`޼ T!@!DKǀ)ǎ;xYC<֭O>s%qMbcq>w'OjU+! ! vocaܟ%*3٘uOO6ȑlB! pA Ki6-'cfh|}!O_iND8Nslmibn! ! O}ՇW{,Ukͮ?h77ƏגXӧkDžB ,!E?=_hl٘ハ[dm84 f΄V`2x nb4*RujQV !L꣫ˢ4j:L ff0vĚ3GXB!rBbŊӴiS>aDOǒ^< c/O?is&P!$"?t\Ց{`W ^F] ~}C`6K'Xv6VgժɥWyT3j(kbb`¦(j\U<~%KzAll)OBG$$%0Yg7?^eQˬ x>tC@~ doǏi͸8"C"]fffL4 ͼ޽;FFFrdu*eVmq7.]v%.1.s :'9BIj`Ǐ >:RJQF 6mJs;D!Dp1 ӌh4iQĸHܵK6AlǍ hQ֪E# k7v|2>rʹxͪʦh bq2S꣏`NptmaV(]:BQ`H+Ʋj*,YQJN{J^0(Bx&y̓oȀ#[ݬ:hYm?y€K8R% rV&%&7iв6Q#m:QQQ9rGc/^KqM<˗cyڵkNJO>?"E^!Q.bzoMq&Zys-Ub+Ol0tPߟ7oSqxxxPhQ knqtt+C"Otq5~'`fxՒWAZkvI^eD\Bà p̴U: }{-y56 IDATeg@N%ptt=[}{1}t.]-@b\]]o߾̛7}}} HnrJ)B"E6lXxuFժU4)I Ӌi1ydV^lm! @N !Bleee<<<Ǐ3CUP!#{}r;!..(WګG!6`RR+ebTZJ:-q C}hKLh;w*URJkƪ;w( T:N-]4U#G(NfϞ|,&&FU\Y5k셏wUK~Z_0$%%).h/kܺ;(UR !sڔɭlv5Fy&n%3j(\ !t]φ>TƑGX>Xwht? cw-^\}/\ӧ11̻e 2* 0a^=8yZu_(S @K֭[ J>fllL9t7oL}6k׮EO~,~îY(oa-']ڵf !Y NaB]gB0y\qk}¹u+ԩڟS RJmj=ʶXR:֥YnƺqZ~Ӿ/g'NPjU .xÆ 8ydBCCYhǎTR _v( &#}#2`ҞOg~bŴ:XiWfaIBO簈)_|7o믿G׮]iԨQ.F(-Î3o:.nº5v.0vVcGϼ-xC\fHp0{=W2xT,; ĶmЯ;ݿ{/#o>,, 4j=;v֭4UXM]M6јΫ;^L L27)_A>p>3 !ȗ$LHHM4͛1w\oN֭s9R!rMgT.QYNo aVƩSЫ\%W%HHJb΍LN>,Y2zkKgK@RUySirhbb|^T[znVnreUU2-ҥa(wOB V;x`:+V ,,j׮M۶moւ 9{,&L`ʔ)y?nj.+^;zV?AڲZ^}\A`D9(++UD!} =zh;\8"Mj~^|9m۶o`L4ɪZ@@&MO?<"0 ^Y{x- 탳S!}X?q テ1c&Cks>+Ʊc4fcfb/̿e?BVWT4x_j x^f̘Af 5xSӺLcwc ЦC| 3g̾X׸,QDVA'KmF-HB)?jrqI?r… *ݛ={R\HDLáo%nwII,w)Gn^xlêU 7g`f9qjb«+0Wy௿:Nxgoڴ)#9997ФI 7?#r_h`s+ H)u`Y6mx뭷Xd =;ha"R `Mt;9٬쿒-m:w^{II#஻̮ؿ]k; M; +S-[2F \Ktm ooK@VWUx߷o_y򎥧I֭C fcVYtՙ> +~ؿe ޾LEK쥗^k׮1cРAXp!ݺuHIIaڴilܸM6Hdd$L8瓘HF2e ;wD]=KNXu*:\nn5;>70 <bթ+kUX\hauE(~֬Y$&&-\|9=___ZjE~?~<񄄄0w\bcc/Hn,r<.N.,軀vi6>*Un֭n۫VApp,"Vn?,r]zz믿w<))XtsCcl6f͚Fǎ f̝;w}1n8cΜ9F6m WWW㧟~{lx{{?#G4{g֭`lݺ~/ +;ˈeԟ]9Ns;kcͲ 㥗 077hٟbtW5kw6ӭ.DHM56 0~ #1YQMk֬il6fNNNS#G=.--;vQjUø馛UV{zk`xq>ءCjUƎS8 Mf`77{NpwpIٺu+-[,q6mbL>1cfS 0n8~g .~ Y9Y|S^Kn=Toc77;;4$--W$m>+Sbm6>|mF n$Zncbx$0[TxuY3p6n#3b)Umg}?cžn썽?YcCsf%""BױcG:vȂ jd>ڷoONZۯJڵwNJ2eZHqۛ&0\ M4am,ᅵVT.]Fjr糲x|~Zoۆy%$/Bpzlֻ7l[]ux_"""իQQQV"WF7a=zr(3af%s^"RDEEѫW/""".ELK y4h...nݚf͚L1 bbbؼy36m";;A'ص8VzcMa ss٫6?Twn[}̩NN]l.őa@f&ddu{竹}\\.tͧ菜JbR`3L`R Xsrl@| y/J3f̠YfV!#!'ǿyz1k;OWk1XF!C)xBYDJ܍DmFͭ.GHV!^:s^?fٲe̞=|EL:RR%ז%=<0 }W\ 0o_環$fok^#1<?[_!$sr̐몯1>>p#0 "OɃq٘_*WFW)%I<o ^^VWuurӾ.㽈ռܼxx0~(=zҳvO^:׺`dT {̜5g9cYDD/rQժU \&&-+(K\]K7*!5g7HLahT.'.bi ߷ \WCBSsdQ7kSɒXVP\{ԝBη˒=Kx\V8z4Th.'<{> 4VDĕNVIˁ4mڔkגO7Hiwi^:onz 7ضc[:dѣ<0jԈ.]|<:zSꪤ8*Fz}ڍ)0ǩm3w\ۋ `X}@׮|9+gEP,ǡo߾dgg{KOO'22֭[hau"E#!5{3149WfƆ 7*ŋ_d;U?MLB̵Pnkt$O3JYfDp0zh|}}iժcܹs%22EӬ?uױ:~7 VCf\?ݗ}E?t͉jըW*_ Wl<.9 @hP:u`\qtفekX\'O޽4a}&T3kr< ,Z<W9$QcW͛;&nw|z{zשY_F 'n¼RI؍#4vMH&>%00Jȹ#8 "b8`ϼWƯDD:t,/Z{x- b8:0'W=Jl`Cr(CTڵq׬ky3{עEf?J]NVêWh""<}ӌm; `jn?VTuW."łNV9WfDFFCBBfcVXr$lݺXřaH>3{{zyÉB¬í![[J8@Jԁ5;>=UX\sYY߽oϞՐVDV3 9Ǝ&M]kղQc#^ggɼ5f7OLO7wX}לb*"EcSXv6| VRDl6)SR3S9p@pk=,^ƹt͉̅ryVrpp*{UKh`U9[ʍ#:;}.kNF]; ^]?+!"^z ܬx/R8ݼy i2G~:w2^V~jz!8}z tCDPeg=M6orDta40q0OgD V[̍36u Tk9bXŰAv%ǓcF*Mt@vTTXV=˽wSՕ͚QLK*1o)S`b^lӫU>E WJu~|s|;v6O^3̚0~>J`٨]ޕi>}<{0߬|+Φ5rA NOѧªGw>'ͫ6tف[jB9^3|18x*Ue]4t]Mj sue/E f#MfҺI17In{<,TdPq|!lDDYÆ 9qe07g7VKJu/ۃ2{lrsI/Ʌ-x:жF[|}Wb.='#IV΋jᬥ WO0y2Z0oK㽈}YaOsWuA ~ҨQr7ws]Mg^_~t֍mZ]NDï~5h[#LbcBi[-7Wo7odz:wŶdթ*jO ֬1kAA{߾GVT;9x^R…2fUen 7m]!,G|-VDƦO3lΝl6jԨArr2v1^DĴ >i t/`foɓa&sw=U4ދ-wuf.+ [իK \^(""%,;;|%{9Ę2ߟ9x:ڷ䘻 NۛnM4ދX^z0Z[3ቛȿiSsm?@V-""A DDRYY˵j1n]W|bsuK Dıl6kp{˿[g>K÷o?AγgC8~ܚEDhVYv-_~%qwСC+ΡTzIlz:+6䎊.X̄?^%|7luer9E+[aOѧn^50T_o.%\ZXUQegw},[ raΝcu]|gZ\HZ@]fͨeuIFz:|> we*ؼՕh)>g|g< ή˨֝6Wվbժeu""rZBhg&Mbٲe/f%m{Fjw+q lWgBs""əLٵvX#.k֘+K65ۦ-kuEboM-[ӿ3rs 2_bׇ> ͕c䐔f[__|Xұ|gr*0{UŻ *7GETTΝ3XvswӠAߵk/֙[)qv{Nqc:8h/4WtK8x?c 5kfu" |`X<`XÇ孜lN(G9kW %3owf_0,=;=|hR M*714!BN6Ǜe-[Im۶Ѽys;Reg=@sN&MT>ЊHiCMkܘ`OOKĂ=!*l"I㽈prw11-f͆7n$>53cIۙc./`j\qP<]s-XKHHUEd0 ccy:&;W.9|<`}Dr4ދ6bmXO>iuUՓaU oyOj=wC͉:~ufk5ڔƕSɫFm֭cݺuy;-Y\򸄄,X@Æ DkͰ{gbP֬fuYxuY̓x4ދ8( M;gʮ*yUst9Xjf*jmc;_DAJf >yr/Dn `͚5=ꊤ8x/"́C{% ]By!ēY [-V*r*0D;yP*Ⱦbxyꅗnyy#7oWoܼt,K"m6N6'ly]D?_1 ^n,;Y%\cx:& ww׬7Z]e23a3YX]GEܣߝ ʸU`+Z;B~q2yy ||]5sc^n^8+Hq;;x7ٶmIII5-0 l6Blٲߟ[]\C<͏xjL ,_}eX]9RDEEŹs^:ދȟ77:22L튵]6Կ'Sg͆ru֮"H]]RB7oԩ/ԯ_-ZX]]̘1f͚Y]\0x/.'͍5MС\9˲TJ y',[=zX]\ܓ(۶myv{GE"C!֠ALyE_nT%JV "ŀ5'RV-6l@ff&?n7ҽ{wMfu"1l^V%$0jU^ ?l'&wo7@VW$ŝ{g.'>s&֧-̶m۶1l0ʖ-ӟstM1 &XY8(0w$ 6ofWJ _7l;^::= h|%K` ל%""7DgrJ|||ڵ˪DAݻv18:^~~lْn+Z] XZsD@㽈gO ;RSHDDSeg!!!߿'''Yd `~ꫯR%YO͛9ԯϼu)fAvfYՋ(K)u/4;0)TQQQՋK;Segwq~)YYY<,]0X|9#FСC lٲ|lذDvݴ/[]-[rW%5Eؽ \\̦aaVW$%Minj3X|v': 7o6$+)U˙1cե9v"0aFc8;;h"yg2dE^AVVTV гgO>UڻԜ>[l6*m]jUX J$)a;j\Jح~hYۯa}Οd{f:uzu; iiuժiueR)LC >Vvmʔ)˘1cطoM62,8'OҽB'겊=_ *X]De;k̳&W0aӍsey{CݺPy_߼ '-)1rr. :-Z֯7\ˎl6͚5^cVShٳg x :YY*U4"}wi6j_||HJ:ދ4oWC.б[ՄMϛA?)S̼+B͚L6^/6ksM:[Z̵RtΜ{a߾S uLo  VQAƎ\\\ "榤0m46nȦMHLL$22_t&NILLQFL2Ν;_{rOo{"҅lso?έʱ.<ϲepй3,ZrcjRqcXw߽4HRBC.lʽ΅WcHt>acCݻc <`+4ԼOυ p_Łٳ=zuϿ];еK_j[ Ge :vڢԩSL< 4iڵk/;bȐ!,^ˆGYm'0rH[oK$Gڵ-)1$:c#8CH> CwZ CQ"RԫvY]ճ̀zuqd.À'/ ޜ欬KڵD ;)8q˛!Uu;4Ԝ'R)l O>Y1c\{pIٺu+-[,q6mbL>=H 7n?3 `qvvfܹ]GzNѣeeÆKeXsjqlE1ދ;6٘jUN4ؚ3 riUN c 3)h߁a> j׆܋`رcnᇗ}܍|uss0.E󎹻3l0~i?N```1b'OoנVD-9A{?5jq֬K GÌ?+({R%\Yس'5DPfK-??s/k[JJp916 ?< ~ Yn"R Xvv!K믿Rvmϝ}#GE7|PD^fN/2xyyso1OxNS3ϹR:^DXPڶ5/KJ4s8|_f+8*̋kdz!gR  rC- f͚V'..U^r<؉' |^PP9WDDԮoP Mg.co/3D8""%/tyXJ%&璙ys5 %~m՗*, D(r _r?55Kkmv(ɆfhkuYRv6 [|)aY30ϊ=\YKD,; f]қ*w@0lEӳ- 旤$^5k.Ȁab7ꊤ+N㽈؁` "R4`Y7-ܩȑ#4hЀfEtFjժ. SDDe˖w߿PG1 >qqR͍MpKrVUlB~wf댻")jQQQDEE;v.qz >+VB@@ƍcСV%"""(>ر]2`"iӦ]d|||oܸ&M͘1X)b Zv?ݗ zƍb~ :Ym67on,N`̙xxxeڵkGNKDDD-ֵPƍ1bO=T_߾};Ndd$[.pBaDpfQ#f׮= ;ömj+)>z^u {""""V7!k׮~YfDp0zh|}}iժcܹs%22kq2=c3

CUvïꫯ2qDyl6K.eĉ<$&&=n޼y<̟?{lV\-r5ȍ[Oٜ͛ a: 6ڵS`zWRx³>KnݨPNNN̝;ǦSO@2ehݺ5U[oEJJ .dС=zk)LVdg;vہb DGGɼyn}bbbqL6iӦ{^Inw5nL=pxvm*Y]V O3ԪeuUR6tww{NbѤI֮][ 2ŋAXXу5kжm[>F y뭷oٳ'mڴ/GEDDD ,;Nmm6t҅CRNʳ+5q:?$$@t4粲WW|/))k=w&J){M=pIٺu+-[,q6mbL>1cf@ՠAƍ? T>33//YDDD0)kZ]Sٌ?t7SrD֩C(K~OÛo¬YpwY]{777!-ZwݝaÆOs7mIJJbʕwww.]ʆ 3gN2""""A$%10:iiʣ8iUW_>0~A3jִ*_vڗ;ckX6l6ׯϊ+ OHaPeGiii||w>>^ڮXvsNző#Gۛ?m۶pBN˩WՊHQ{a[r2O1!(W'm{1ÀuoF հaO6ާ~q?'ڽ{PeϟW^ /0p|;أ+)lbDĮ~9wAќHO筰0F`s&NV՚5 o ՉcbbqL6iӦ{ d„ L02DĎ2rrt0/LJ6$L˲LV|ٜ}vh.J㽈HPe~5=ޑgjTRRg;SRx>8W1Ÿp"#W!&t~nKDDXC 2DNr ׏Cdcf4,K= g̙~`"P;qKDDXCU:={Xժ158\w(̙bue"""""RZ)Ѳ)- `ɓD. IDAT>p..nܘ[˗"oL ~ 0zHqe"""",-+ >#iWaauq:?zʗ7w|!pЕRhY}dXM'} JO桽{ԯ]*Y]RȀ'7߄whQDF'DDj:Y8`MRVɓX9Tvs"w8sl fQUPDDDDDKD"L>*U9`jv-{/к#s -;rTww-ZÅWӧCP>lۦJDDDDDX"''30:}.rZ^g `bx)2_Cj""+3'WÇ[ 7eYbnn8q,") XM:X"w#cOJ OVΤ`ܝsU…0t(Ԭ [@VW$"%v!iBDa%df2r^+eҼ9/8dx f^`W"""""R 5'Yaa p^Wqqp=fh"""""R(R`j*cUB*UPݭ.2?hW6] mZ]?s53"02rrxl Xٰ! w0סcGsm DDDDDd ,)~JLdľ}p'Wgb͚x9;[]eΟ 'D.""b5B8`Hr638na[4,KEGwѣзHi]EDjڅq(R0 >|8UVח:uZ]R22gv͍_[`JZ ]}ME^HXrU"""9s&lٲvѩS'j֬iuiRJAɓ=xx?<TQl}BF|=thuE""""""EC3ԭ[}}}")Mp ۻ+ݪô`À߆vzuضMᕈHQ1 Tqx 䪍52eЮ];̙%I ̈́o‰ kԈu\CQ0r$] VW%"8㩷y3.EDDġ)*RRRxg֭*Tɉstz)(S [￿y뭷HIIa… :G!gie /5٢`>7}>fe{""En??ݼxaX]CRU :uɓ'w^4i2K Œ38p 3gٙ=z?=O>F6={ҦM RRjgdpt7Ѣ&X͛36lHD1y8;IݺL KFNe85q/8y$lݺ-[M6`OΘ1c8p 4`ܸqy!րpo˙xyy/"Zaa\|ΐ*U.:lx92zslYql6Iyz2l^bX\>\].MDDahV)憿??Nk_h... ><;Æ _~>/))O?ٰa}WQݛzS!@  "t]"|AUиRDEƂF1FЈ"*P 4 ҔCBB2~>`sasSkOD#mʀSQ õ`W"0`)))vR%թòvqn/%%ow(Rˇ_f͚Z=ֆ [W^y;AVW^-4nr ۴Ov֭22OgO#93fСCèRݪUcU\OtYZ[jv%"Ⳇ !CXn;v;DJ`{SlϞ=e.<mۖ[jf?5ct@QK<ڵ 0bxX`#F믿ԩS3gG]v,Z]ZS$a|rgZÆuUw߅#~}{hD|K]R?۴a¯2f<t8MDDc)%vQもHHH !!#$S fiC5-)BXQ›(]KM~LhAV(S )0 wl_"#Iiтzvo/g{t={..C~6ukb4^DLV~>G8Ǒ\sԿGrs9ǽ]DPŃ)%"t('7#L_G5d\};epUvG$""vP&_oO~u,nӆ6+"n0H?3 uT̂23Ϗ(3NPK.X"rV2X1/ C;PQw;bzlI_"R l yye`&N[?8vD@S&Onz2i`ef]w\s!" `Wqqܲi36κuKD<ԡ6<'Nv8/*L6]\2 UFB*χ[|X"R VΝ_~d1DDl9a׺5wm֓'ys?E^GssɓEIO@n.CBhBiL  [|XRitW3HOgƍdh9,YC@d$|-oowD"r&ݕwpbmIJ˖뫹/;/U=PUbciJP\xirJ8RitWֳ;5d '`TxJDʢ[F/t[۴""^D~> TzUfg\ aT::jBUÔp|!甞 |=dn~~vG%""O|۾=מZ6mnwX"͚ۛ fdqr 'ϏH"3=U~fYiMl.LPU3+hF eHt4BCi%!!S%D|܊tn25d~n?뮳;"Xݡ3W_-ZpCZv%Re6#5Ej1,P??3+cyy'=/徯%'"HUV#ɤKT-3. Uh(7֬Y4EHa:,RaiQ2x~z F/5kEDDR'(/ufnܸ4zz/?`dYEsix8֩å\EAAg9+(X~~VѿaeنQg:Z8<֓'?ulL` BCF Z*4D*Q%rS$⃊X>5d\p0t($&BHQ7 c^˖LٱŦM59xù=գwUdKDol!g0p|9\qለC`nܸ+֯gQ64rKcyy+֫jMFJV!,լYԳb %D3)%rM,>5dц 5/ycwT""Z::tu|к5-fg;'fO%ϏjP߿~T73zVm Cx8k֤cX,$D*J`xo!r$~;̝ w O? !""vjª $hhK?0 NH䰧s9 n` As87#yyegV8˱2c3[[ 851 SP}ܑܢddI ︰0zGE10!!y'%ɐ!C2dg Vo7¯›o.YBJJ v" 䳸8FoͿ¶'ҠA:o&N%уjOv69V`sSGFGF9:SI#g'crs\!^NϮ3{yNr )_XSl5 SI$r8 jո4<!!ZGDlT3fСCI2x~/aàfMX ڴ;"JMuѱcG+W+`L2p 9-Z,$;w-35b_NY{Mx??3 .ݫU#0YujPiځ <ם /U{GΡEh(-BCCDD(%r8p{K%ӾX$TKI*?رfwT""v Xz5}iii$%%q뭷:6;;z9s搖F۶myٳY?cʔ)E B("""nE , bӦY3Ð 3yt‡uHb޽Ĕ*/,۳gO'))ҸDDDD.z`W((3C̄իad%DķdffT<88yO?D Fҥ0~<<\vG%"R\.٥ʳR||<%ʆ !C,RRRHII)QnS4RU0nSDDS0{jV1c:t=EDDΥ%֭cǎ6E$UACE#0x0 ?䕈Hٺu+%W^ @\\a\0%D,_m…0g̛QQvG%"bObbbQYvv6IIIt҅uȟ!"1{wHNJDj⋤ \p!L0.2 ĤI8pM4!99JYYp,{%""v)Ks`y?%D## [@B{/8ՇTD|ȳ>ˮ]p8{,X#_gԩ̙3GҮ];-ZD׮]-Is` oh,Tݕ+=|77T7ݕݱcG "!!JHDDD(%FweB _}'<xݕJ`1 ; U?;*9z[ݼX"V;wށHM}X"6,#~ ;"qZKDlw${C6OJ^iJ`I\?nw(E֮W`Lc[DD"0`)))v""">*%%ow(R4P*x:w;yy0mm AvG%""Cs`4PK*$11.]av# WIࡇ 0DDDDDD]iÇy6mݡ3 s`v?|3gG~XbE1oᄇ׳fƌaB(^W~ / vG%""""""Bs`yXGtt4k׮SNew1ob4k֌=6lPfrrJ< #""r0jе'SGݻ2nٳBdi\]6'l=JDDDDDD<X>"33REϋ\[aF3u睠+EDDDDD J`Evvv򬬬V8kq;͙wupjAL[NZ&vlJ`2 ݻ0W3&v~Ǐ]wAr2/d{%"bnhbʡUEDnZw(#ڷo <<|ĩˌ~?N'DDķͪ#GW954^DhB;$??ĢlҥK+0̞V;)y%""뫯aR,//V4Dp…0a"""4h&M4i҄dRSSTѣ0z4 pjt 0s&LիÔ)vG$""⛔>,vp{`#F ""_S2g=JvXh]v4/w[222 7ȹi^ Mj̕vEDDj)vQもHHH !!#2¼z N.]7~}͋!V:Lbu:DDD|X"R}`p3<y[CDD|> iipП"R%K KWNxeHOnO>JDD7hB)!7&N޽}{ذA+)[||< %%P9+XkoKIIaT2";wYc0wEDD]CϞЧ|5hawT""I]i*".ř^}5wW""" >&Ty*23;`@ j"""rvQQ朑AAfox/ !JOdddQNq?6m0vʒHII!%%tC/˖sb ˗CvG%""}JbxÀ$?4テ6mJDzCBF抽W^ ^kwBs GW"""U+cGFζ;"Y:t?4!e|#J`\GMaw$ggpכDDDZW_ pADDSh, 0q"$&Bp0Ԯ]Z(!5 -{'|_s p捣-"""rw W]ہ6.)XF p^@?/[n1XȜXVDD*Zq|#F@Z}Ě}KJD*"?> 9g۵k6YW_mׯow"iw""""""<>ck`.#WxAsڎ޽`tX썵t)e.4f 4hM¸qfaB;H%K[77ߘ -;?n&im3, t1 xx5dd$ #Ge*r?={O a2g߾ZݺAHM''|W}bc^]:/jJUf&$Uu+{EO75WLz3À2Ο'OI7߄n0UQ:ofO>3ZsOC` /_}9& E·~LDDDDDlи+ݗ,PO۷NLOT񇙠05mj^{MBPPâEf+0뮃-gTHN6{[mMĉ0b5Xիjn730u9N҄"eSKDDDD&[sb]}5p;C,sgT۶DžNJ V2I} ;ۣGf2GNÆYW]e.PI23͉ؓDRH d>֭GhanǛ]{:uG=޽cwdɔQЯlHI??盽s /6RNP5kf& Jԭ fnYYpEOϞf2k0`j[oAz=L^U}L'W0Z}f.0 0׿=PPGNHONQ򵁁檍J`ɅPKDDDDfW_mQ=prYbygSYo7{CM6Ta^=TU >}_~1YmA9YuיۥV}gڻ1W6v]_lolg 1W'#%'KN߳9ޅNc&ΕڿX\jfRNn/|\XV_~ɅSKDDDDx"##2dC ;XWO=?<ݱceڲmv I^J_}'v8̹Z0-|b&^|ќ>:YxufR&"jbɁ?4~Y<QQp{g͙s üΕڻ+4WF-L>mkNfRvOn_RRRHII!==P9 p:?k[;vZ:t`w8"""j,\/IvG#璗;vLP&;}\Lكyssk52{Zyl&ΕP3?cph\mI w?_T3r_ɬ%K6{nAhimo ={=Q.r>0N' RgСL4#"*~g{>0P=t}t\<B.]Xl9?#!!;wJ^۷/ǏTDDD jEDD)<ٲe qqq9EYFɌ3>|83gϏ~bŊcx㍢ \ЩS'BBB_ZONWQ}ZOuj-է7ZON9?J`yXǎ;x=c޼y<#+=泹;s{zB*[UiE+)?̮;\sF*k"=V5ZV/tZ~ҼVwFv]b%|޽{)U^XgϞ2_WfM{;ÇYlYvRf=5nSZsK֗𲛷kj{ܵ>/suhE>25zc<TLJ=olڴɒ+Ozz:֭מس=_sBηN+r\yǜO*B>kr_F+r{mU\e\&Yֹ+okwBs5Z7_g5zcKU{pɥkժѳgR7n4hI s5mڴiӦsZIm6mڴiV mM=|HLLLC +Yw̝; r,yO Ν;ݻݡT*""|eJ`۳|r222JLʺzj溨Y&CDDDrWBS[/""z_I}'11,;;$tBݺumNDDD.zV%^|EҊ ,\T&L@DD]v bҤI8p&MLjj*IIIv/"""砶^DDD|ݓp56lh8pNp:Ev*:.++˸#88ܹdɒ*F~Pyg}V1xYf۷7Gyp;4vӿUavBBB:u*`uV#l-Z 88qDDaaa6Fҥ \r~-zʡZjﭧrs)%Un۶mXʻM6lܸƨDʷm69_lw(!!!t֍_~5k:|0/ӦM;Ott4z⧟~;^<{k{멭\J`98q~>}$99cy%$$.]lٲ?~e?~X]2333aÆ1yK{ʪYfq ~m2OmTF}N2x"##|nHAeiBB;w$55^zѷo_i[Om[Km[KmI ,92}tlB\\P/Α#G2c ̙3_~X蘰0;Vu>eS_WYˠAh֬SNsp7y:_3É׳fƌ]};|e\:u"$$`U*\܁z멭{k{kR=^lQYVVq_~yQYFFh޽{kVg^ƍgL6rwSQ{n2B}1^}UkwSV?ou1ԩc\.#<].DGGSvm\.aaainn.'OVZİxb>#Vj뭧zjﭥzjﭥwx{SlϞ=Ee5kdU::}GxG*4TlРOѾp>*|qIII'h֬YJcDj뭧zjﭥzjﭥwX&33REQZKi=թTSZKi=թTR}ZOuj-էPK,r.UUթTSZKi=թTSZOuj-էTR}%2111evwݻw/USZKi=թTSZKi=թTR}ZOuj-էPK,Ӿ}{nJFFFիWgGXMuj-էTR}ZOuj-էTSZKi=թTC ,'11,;;$tBݺum3NZONzSk>:whB_| .$55 &e]ƠA4iI&$''3ʠ:zSk>:zSNZO) p84Ng]2>#&&6:wl,YݗZONZON:zSk>8aI4h,qkJ`[SKDDDDDDDDܚX""""""""֔5%DDDDDDDDĭ)%""""""""nM ,qkJ`[SKDDDDDDDDܚX""a8N,X`w(~H5p:̜9x*ڴick """g¨ʢ^{ ӉbϞ=gɒ%L2sҧQ IDATO2G/G7Q[/"DDʓ͓O>YD}1?n{ǝ8qG}Iݫ(:uj/z%"n+.._~{J;q%sA"##+|a|HEpjEW(%"nkO;wt:INN.dڴiEyN'[n[nZjDGG3uTRSS0`0cƌ2?3//ɓ'SN¸?JzjCj 媫o-qLaL6m[n!**nݺoΠA"44/|GE0 ^z ӉY;w iӊ}G9r$7rѣ9rH{aÆSvmzz.K,!$$CҥKڵ+իW'<ƍwelْCb 6oLE1p@ «`ƍ\wu1}tضm[?DD/z2""n&))p8ڵk۷w]|ݍ6m=ޱcp8Rp8iӦ=~ a7,??߸袋 a$$$!!!ȑ#ʾ p]tqwyp8̙3 0iӦF߾}Kēi4nիWZ{ aXFƍF%|σ1魷2_EFF߽F``q8fƌ0>|θED3?;""iF1|pٷoe;f̘}Iǎq8=<22͛cǎR1bEHLLLQ 6믿2d:T?~=zW_zqU(>Ν;s2vXv/R»YYY:tΝ;2PzuVZuyL %%oqիWߧ74"")%"n$//cxIpp0QQQ%#""8zh7mڴTY&Mصk۶m[o%::6{lrrrHOO/FU(]vѼyR\r `a#GpwSvmBBBq%!!!.Ν;3mڴ2 رcÇg 3_ &>3ŕyL.B]ˊtM\￟8ϧO>%4nݺ{d~iz),X@>}!&&ŋvZ:vX󂃃ꫯ/Xx1| ͣG,Y'\jKR[^DJSKD<ƒ>ܹsyꩧJ}+솞Vieغukdža믿}mҤ nР7o.U^X֠A~(=z?G}|ԩwwС?x/.EѣG×_~I˖-KӣGz>O<)S/DD?Mmz)Mm7fذa+5?FDD5k/,Q>k֬sg߾}K/&M3pĉR?xyV\~J t iԨQ/PjE3xK<.(((5LVZĐS"""OkaEϕ5]vex)M=DcL29seZjU1cOrmѱcGreGEEѵkWFž}xiڴ)v`~A~W۷/ZbԨQƲ{n "##YpyWhĉзo_&L@INNf׮]=].-[d޼y4k֌իӦMZjŕW^IBBƲdvYǎ^z 4mƲeXf =\c F,]]ҳgObcc6m_5^{-̚5.]Ϡޤ^D4%D-uI& 6R?CcKoY}>)S?Opqf͚UbݻrJO΋/ljs~DGG,ڵkLJ~XtWxW?~<ӪU+|MƏK/aݛ?آ׆rwd,X@AAM6?Y566e˖ѭ[7zŗ_~_Ϯ]xW9tj⪫bڴi|jEDJsZADDDDDDDDܘ;:tIENDB`dask-0.16.0/docs/source/images/simple-dask.png000066400000000000000000000514561320364734500211560ustar00rootroot00000000000000PNG  IHDRsbKGD IDATxyXT0₊ 您l)⒚ b injyKjmvK&آ@di ?03<3ixsJNDDDT6T"X*H:5w'OݺuS;Mիq}c5R;MIOO֭[ՎA舰0888'jC׫{*N\cؔ R;=Axx8VZv իWUٷG*z=bbbf *Bll, v W*?hL`0XP4s*H,DDD """RK)RADDD` """ET"X*H,DDD """RK)RADDD` """El8q}dee 6mRt}ш>R +"/iӦoVZercٲe E ܦ1yd;w8O^m{8OX&ZT@bbbJs .r}oVl&LOe„ (dɒ%""&w֬Y&7n7n,X7nm׮]ȩSJnLL<"64gkfCBB$$$?WޙqQts-bU\8r䈸ʵk5mT˕&mٲ_%ozzt:y l3gTT*""___,њ,1ql|-Sg4|xܺu7oիWz쥗^F#ժU{FQ.]*oeb4KH||9R|rG矋nk32"M ޽{ĉ͔t=*mڴDD'F*ϰTǍmSf4͍ g8֏s]r6=ת1Q%%%IXXdee!i<(.\Ν;'}gϖgy4y8W Әq4s-#&iѢ̙>L(ZlYJ+V`ժUF։];;pqq)pɓ'c„ W&`t:RSS -33D ?׮]ժUd,Rp.\~x@&O? ;BFVZ˂ɈJ'""jx-H!/"ZcheDeKE)DEE=ThZ'ٜ&Mm0fTDPvvvhӦ ,la(0"o7|%U ;;;tNET:h߾}G)Jի;z8MբsΨ[ ɈJqeOdW>=KE)=522҂I.]-H!!!ntڵ+j׮mDe KE) ^x$$$ơC \aZܹ3v>}FLLT<ׯ_ƍ?c׮]8uc?rhѢvݻ[nO^KE1ܻw+VҥKo>ԩSܹ35kMɩdff"55رcpkFBdd$*Vҿ3g?q%4oAAAhӦ 6mF=r݋/"55GA||  zJ.]vUZhƍ۷k׮j) ..={ŋ1j( gΜ`67VBRR6lv!hٲ%?X8dddشiqTSnKŋ퍗^z q 5yd,]ǎC:uԎC6b5jٓmkr}j XvqȆHLLD Ԏ< mݻqQL;wݻ㫯R;ـ xxx $$}q}vׯG~ԎC6`ڵ8p _3֭Cjj%rY*;͛cʕ S;c}xqQ4kL8dΝ#--/ܹs?hԎCVLDЪU+4m~qڵkhҤ |ML:U8W.Kñw^9rvv}wt˖-S;Yw GܹsՎDG@@~'G8d~'xՎDSNŲep̙rwHjŤIP.1=i$j!+###'NT;J[nXdQ-YAAA6Q(`ҤIyfmqe}nnnf[lWÆ Ku^n6=,o&ly\RK.֭[ٳgx0D|Mq^ʳP8~kZbZYޙE~R'svٳg憄tU8% ;wW;Y?˗a>0vX$''cjG!+Ծ}{>S;JF\9X HEÆ m۶Ķn ooo *T`` ^$֭[r*m9;11ׯ_G``Q,\ ޽M^q޽1JVZ67'N@ZZgԽ{wԩSjG)͛7N:U;ERD$'':C!<<\(d˗DVX QducŊjG)˗#,,^,rW*:w 믿V;JEGG:tP; Y0`߾}jG)UV!$$vv ʕ+a0ԎS,;N}pmڵK(W.<3f `<ᅬ+QkذaՎX999xw0d z&M`РA7ozqk޽ؼy3&OvU#`ڴiHNNVJIIIA˖-1r$2hDN "ؽ{վ0|̜9pwwW;ـ'NE;w.^{5h4}ptt/RΧqhӦ j֬m۶Yݓ`@`` 233w^hZ#8t`7N88y$Zj)S_qȆ̚5 |cāݻ>I9(*TzK(1c8::ՎB6h̙(P;Jߗ֭[K6m$;;[8dc߿/Z?B 2{l\  VQ(~O?T(dztE4i"/^T;F6l899IjjqF?~\T"#FѨvp4jHu&A8*h4ʨQbŊsN/"+VѣG[0d.]$ҪU+uq_{{{ٰaQƭ_^t:L:U(rMiٲ4kL\vՕR!௺AI*Udr"z^Tv>}Zׯ/=z7nhZU@eOtt?O;whB4h d6,/77W &*T/裏Dʈ#$77+--M5k&͚5Ç[t߷nݒ`PYƢ/&&F$22R,{xzzPK_F>}ITTܼyqㆄVYf%2˗/K׮]RJxbCz);?*N:mF|RB W}qqqR~}qqqK^//ڵkK $!!A}^YftAg\zU^z%jһwotYCŋңGj2n8vYK^̙SO,7 IDAT KE222djS/_[嫯j2a}СCҡCh4,{Qd݋/ɓJ*"+VPd]0|r]899ɔ)S{Ӟ={_~hcǎȺeK=zTF!RfMyW$..DeeeeRfMqpp#Gʱc̘hA֮]+<???y$))D}?^V\)N'ՓKFF֭[2o<[t: UV狽`y@:v(֭KOPn?Q.]+V૯ѣGaoo-ZZj߾}Ç^͛_Ddd$ԩҿcٲeXz5nܸիh֬ ?đ#GpA={Ĉ# %D˖-öm۠Ѹqcj hРAĉ8|0q-ԬY1bZnҿĶT?DBB݋cǎ!%%oFVV UVѶm[t!&&III矑G"-- YYYΆFAQvm;wFNPre)++ v®]pQ>|׮]í[ "PШQ#e˖ڵ+Zliupv, ƪHY1113NTbȐ!A専"X*H,DDD """RK)RADDD` """ET"X*H,DDD """RK)RADDD` """ET"X*H,DDD """RK)RADDD` """ET"X*H,DDD """RK)RADDD` """EhDDa1h ܽ{7۷UVVR%|wxF"S|ǘ?~mW\AjPBm^^^ؾ},(()))ggg###...7ydL0lNCjjjeff?ڵk[ѠZjETj.\ {Tr|DxagWChЪU+xyyY0QDDD@<>Z/e)_V}}4 ,a(ǖ V$ӤI?v ,a0mڴS-,XFOdt:ݣϳx`@HHbcclBCCO4^u?Dh:ykCo$2B_9DDD """RK)RADDD` """ET"X*H,DDD """RK)RADDD` """ET(֭[jG 2 6Um(Z*N8YYYpssæM\jݿsEPfl2Cn3| i V*qdF}|}}QJ &&"- 7?QBBKvvܿ_o-:ݻ,NRqquuk׮okڴ$''8-4y8E_%},Eo]`9sRJR*W233K eqKVifZKg8iii^`ۖ-[ jl5w~h4"** #F(pM6-?+W^G```ݻwݻwtR#Zl wwwfY8ld-6}9|j lWzK]*֯_Dݻqppp@*Ucܺu h4֭> HLLDݺuƫF… ҥ 6l7nĉ0`x DFFK.HJJ`Ϟ=xШQ#9s=UlݺXɓ9s`ĈhӦ {NNcܸq6m^uܹsgڵkA ^zaɒ%8ua)=%]. 5gSNxꩧٞgyÇ.Jz:,,LHNNNy饗B r-y: :4>ҹsgw޽;wyGm&FL&MHNNTVME͛bŊ@M&vXqvvF#h(9vX~=zddd`@>|F9yhZF+-[ {nwҾ}{~RJ_l.ZvI箤Ly]g0;w͔69nnnRZȑ#@>mRR%u=<<\~Ze""b0I&oo֬^/QܹS~z/$%%ȖTJ gΝܻww iݺk?)ӧDZclOY풮el[clOYJǎ嫯*vSflT\t ΏuŋCD\|7+V 222gDQFƌP,\gFvv6z}jKII)}ȃU_~駟2 セy&^zfw< [8%]6's>oO>]t ].ja0x#)) 1| 8K,#Gʕ+?q;w1cTğ{~dee!-- Oܷ|||.\(=N:myږq]ҵ8ۜms2ڵk9sy%gԥnݺ4A9s&ܹ___;Ǹq/k#FFA>} ?=bû[>Gŧ~tlR̖6x`!>>cСfw^[a)mlt-6gۜyf;wӧO/Wï#F};{lh45){侍5[`ɮ]dɒ%Rvm ={6='NGddi&'';;;u#G_-ӦM=zHFFݻWZԮ][n*wޕ8qrrrb?~yxxx<~s777Ylc7m4iڴik.f*Z%xA s-֚y vٟ⬕.ܕ)TploݺUv*|I… _7x}Mm d˖-OիWeҤI-[Lٓ}VV['L0Ao/YZl);v?\gϞr2eDDDHDDL4IܹFqsYyYԩ#ʕ+/ҹsgqrr ʬYs2fپ}{$>>^F)^>9p@}饗D<$+(K.Hy7exKVI2~bggW@Tplg<ζ5 vٞݻw[?AӔ6'j[&NX1|:Zi?AMmG6mXZ""QF笱Tp-,ζ5 ζ6ۻ?૯† YJh4O;$0sL,Z\ ك'N}/t*N>&M}?qqquCdi|&Kڿ)Xf ƌcPDյkW'^8VmC9,aÆlC2k߾}xW~K9s&jԨv,111 U; Y9ш˗7߄`ٳ1zhh4r$ {Ohذ!fΜlQi&_Ƙ1c1cưP#,Ppp0RRR0w\|h޼9֬Yv,"QNBpp0郀8q3gDJԎFRQN988`ĉHIIA $''l۷1qDx{{?DBBv4R KE9W^=,Z;ݻ֭[c̘1zшJ ,^^^^Xz5.]DlM֏mڴ]j*l޼Xp!zшȊܹm۶ń 0l0?~Æ KFAHH; &`ԩæMԎFD*KOOGpp0}YԫW¼yPzuaGTT 3gĉ'}"88NR;YXVVN ___a۶mX~=5kv4B,T$WWWDGG#>>'1qDܾ}[hDdf"hxyy/G} ((HhdX*艺vD,]111pww… a0ԎFDfo}9r$ ǏcѰW;Y9 *;;; 6 )))5jLmbΝjG#"\t Æ 3<ggg$''c…]*6 *իc޼yHNNFz"88jG#"ݿ3gDӦM{n[7oưTI<<<~zl۶ ԩSv4"*5k}Νcǎ!88XXdX*T /YC!00aaa޽;RRR0qD888lK=FǏcРA9r$ڷo={rU3޽{h"ԭ[WhTTbjԨ "995j@ǎ1l0\tIhD^NN}]xzzb˖-Xjv QRAƦMn:޽M6̙3q}K7nD-0k,L0GEHH/INc Ʊc0w\|G%։,ѣѣ뇶mdv,dV]b=00R;Q'駟Ƶk/IޠAQRAQn],Z{Evv6ZjaÆD 2 Xp!KbҥѨ` Ν;zjرxwv4"c`ʔ)?~ڷooT#33'N7Ξ=8ƢiӦjG#2; *7ׯhosZj'֭[jG:)6{{{/hŋo| ߏݻbx&K"+V`ʔ)1~xhZY ׮]X]4r=%7|#NNNϚoF#N>%$$D4j QYRAdAgϞ( 'OJ^^j2iҤbeرj/4_x38v옸??}vjXʔ)SRJ+۶m3cz"z?H6lkWƍܮhc]'R֭êU0h sE63g ((qqqhܸcao߇`noowwwx7+V4w|" "deeYf|rV?ѣ|rӧ9ŋѽ{wv]@\\|ܮh~zܑ|K)J/^+WZ(򄇇o/ķ~kӅxp5Ը8dggW^z>wE޽qʕB hZ̚5 [HROѣ1v2U(yXjbbbdɒs?B_GvvbQX*Tgkc??O Doeb eKrdnL\%eKMJ%]3[hQ,*v01/AE oHt"h}~|ׇ ΋sq]xe ?~<.]b̘1tܙEݮСC5k3f̠իW3gΜ.kԨ;vT1@"w^'//b\.5fQSSp8سgaaaZr9➡QF\t ֭[ӧOzM^իaaa/`TXLee%'??;(**r=e-[f8WPP@TT}w}o߾ٓ^zѾ}{)E?TxСC9pit駟no>%tOcÆ | P\___LG /0bΞ=Knn(݂ ;w.Ǐ'((tTX ;;dQ:u*~~~*"|ruȑ#MG1y$$$lٲ+Q*+&O|},DN<ɺuLGhDw}Dž !!!İj*QD:T*D,o!..N1ƍcڵDzT*D,taFe:% ?<,_T8ILL$77ԩSy7{|FMii)֭fQRRBUUJE@@KDFҥKiҤ ӦM#&&ZhwkQeeUǯxm۶7_YYIqq1.\sǒ?sώ;aʕ7J[RO+aȐ!74Ndd$UUU,X Yd #44 6j*jkk={^[EEǖDijl.e:\ȑ#{ns]xhΝ;Ǯ]ܧN>ݻwc%$$K!s%ݻsa&MD||<dΝdff@PP$88:ZnMDD{fuܙ_~d'"  j׮6#5i҄_q1qDHNN&887ҥK)))`ܹJii)o&N72j(HJJիWӧweƒp:8qBGXf*D,cǎk̜9t8z(]veΝt4S!bQݺut K~o-"֢R!bQa)oޣoQQk.6o?l:\JE #Qسgk֬_7ED m-RRRHKK αcrݫ)D,N3"^`ڴi\tEr[9[fΜB!4S!%>RRRطoݺu3s:3*..&)) / /3sL,Yٳ8WSSCLL l߾ƍ$"7HBTWWd۶m4it$JNNfر=z#"-xkOF˗MGO> xb /R!ⅺvʦMe„ ՙtӾ ^yϟK/d: *"^GdddEbb"Nt-++NʬYLIBċ=dffΘ1c6b }Y&Ml: /7rH駟6lN2;0ydIKK㮻+Iě"w;w3`$::tk:s &L`,\ӧ$"? D8wΠA5*7oo߾B!rQͼyxӧ[n5 'N0~xhcTal6IIIҥK ѣٻw>> 6 &0|KD *" T)vζmۨLJ:uꄯ/-Zp/R]]ͩS(** FTT&..XZliD tr1q:?57&00Hx"""hڴ"b*"""zxJxJx/"""g#k7IENDB`dask-0.16.0/docs/source/images/trivial.png000066400000000000000000003565011320364734500204160ustar00rootroot00000000000000PNG  IHDR%zx,bKGD IDATxyxLg?$DvKB-F{D)K욧}POWm*-J[NZk)JKhd&g&u:srw9st"" """""""""vQEI"""""""""(.JEU """"Ҋ'O>`PjwAÆ M>w}כ|ܒ-Ǟ2e Ν;gqK@L:&&&_ǃL>vI}|\=ǹG{(IDDDD݋;vwުX#$$, ׬Y/y&$x1/_1cƠQF&…FM>vI eQŋX|9 GGG_\|Θ9s&_~\2&L{s6Q:瞄4h:`ԨQXlY>%>*c1u}x$Q]|SNń 2=0z"ɓ'cԩ_gҚ>E P8w.\+W~;v,rݏ|}n߾w}7۶=znݺ͚kXSt IIIիWm%>c1e}(IDDDDTDC윹mڴiǏe˖t1bѱcGoGͱaX;wW|ԫWIIIXzYc.ŭOQj>ϟWTT <իWXw}r ` p=̟?usX>(]tm><쳙۬6=gΜݻw1lKz}T=ƎclQښfȭ7`{ V>%7`}͉KZh""܈6l؀&Md^J@ƍqFTP.]= 6oޜqƺz*t޽Dt:~zLQ0M}2%^\{Ƣ'歏ΝZ /_ƪU "%7`͉KRh""#66ʕs///,^;v@=0g8993U} 3SEIkl~ of3=N8Qch5֧ހm^<_~7ofΜ(ݻԇ(?w>]vEtt4ȕ?G2e5NaxzzLŚ w^ gy&s59ϝ:Zjwe5 ukX{[{{{TT غu+\\\sl>3U} 3SEIkm~U~nj) mN\RG~?N۷1ybsŊ [柸8L<nnnسgOMrr2\]]M>Ys}2~|lo*U f16cs 5jȱ19pB <8 Xr퍦M-vxOa1U}(io ÚZk}JBcozDDD򂇇GUy>}:|M?ɓTT :N<"xԬY㚛5קiӦhذ!8mЫW/8::fn[t1Cܹs͛7cSoEŻ{.bcc3n-/wAhhhmLUŒcyQ֚ZZs}l7`͉DDDưGǎq̙<9rn߾6mwށ/"""bӧO/__l___82BIOO .CF%1?WSGaҤIXr%<9S16ƌsa˙JL6 slڴ k׮֭[u믿FDD3=cҤI8}tիWW^hܸq}Y',9{O%mmA5{i3l޼9N>Eʣcv޽{Ø1cb֬Yؾ}{~\jh>nnn8<^xSNŐ!CЧO>OX>F>}qF'СCQVS\z瞴4ohٲ%"""0|TT ӧO϶󄊹_X'N\[cXcUk7 <ѭ[7ݻwڵk̿w}X:;/^U!ŽѻwZt)}BߝOQk>ukO~,Sgnݺ9 K'$$Ĩ^\zg>5?枼`JI[i~k kljހu7VGܹse\~c?~p bG5ST?WڹcqXT[ Ɏsq|dQ6nӧOG3?]hӦ 1bĈF1klj;k[i mqkɫy2cb!""*RJa5kFzgrR1ɓ'oaɒ%+ֳ13U}Ls58R;I+ګϝ8ȋEi~իW#""TÇ#)) +V@3OmYn;NV55' 0ae6ϳk NFbݺu=k2/M1F ШQb.`}ҹcqXR[ ɉsOio3\[o Xosܣ]=\$""""?8wyz!n݊_|...o߾ 5ݻ2)\rΝË/:JFZjeq!66,chҤ$+S :udkժ#G`0e|Sغu+QZ5Qrնm[k,cs)>=9DDn(S +blƍ1|pܽ{cW_)NBQLaÆBOܣmVR)IDDDDDDDDDEI"""""""""(.JEqQ,DDDDDDDDDdQ\$"""""""""$Y%Ȣ(IDDDDDDDDDEI"""""""""(.JEqQ,DDDDDDDDDdQ\$"""""""""$Y%Ȣ(IDDDDDDDDDEI"""""""""(.JEqQ,DDDDDDDDDdQ\$"""""""""$Y%Ȣ(IDDDDDDDDDWrc̘1ҥK_~pppcҤIh߾ʨ%҇~ 6d=!!<@˖-3૯BʕUD, ̿@T^|󍥣xqqq>|8>|;w.-[gϞx7UD,8h-=\$""""Ҩc߾}99r$~dy/_m۷3.C}||lDsܣEx\t)Obb"mWĊV4ܾ};۶}Br bbb2pUիW3d !"""m8p r}--- p"jȐ!(UTt:‰(CժUѤItt:4m>>> _dn, :‰(+=f+sNSʔ)¤>} 4(p^zaHDY-^ oѢE=zQQ /_>+WnH2sΡN:9zK K,qr\1yaԨQdarӦM.~;w RGld}|Gó>kDnnnf͚Y(ee!!!fNBDDDaoo䄁Cbϱ]DO낗`@>}$I8h=hy^ <,uLDo AY0eFݧf͚\'"" qǏ1`E(aÆ^T)tYQ"6md[@Chh(&#ҥK[P0l05(=f s%wcЭ[7 &\_thѢ*VhT!7 BDDd=BBBrJ*A#|L3tlT@:''';g~>H;8h-=_tqqA=r]tpp@Ϟ= @V ^碗b 󉃏?fsb"""+0lذ+J*E Z*7ny`࿳4t:Sa"jthҤ V8{/JOVr{B0o VCu%^D eUׯk 4@Z,*<<)))a y!Eҥ+ND<<<ЩS'zztci׮<==۷s\BŹGleщa}!44qu:~WjJq2iӦϜ8qԈVZþ}T"""ܹsáCH\t)_תU ]67o֭[\rL^2\t {h\zy~;PvmUVh֬- :u +Μ9yܾ};߯+_ܻwGӦMww̻Ґ(=zgΜAҥѦMݻwVW2۷7`Æ 8rD͚5CT^z>[ۯ$''#!!8<"##qƢZjh߾= VZqM8hW {B[o%NNN$o8  -[CYܽ{Wz^/wUD{|OO IDATҢE  M4Yfɑ#G$--XcɡCߗ  =|W#}2}tYzҥ|'U챓d֭2~xR8::J޽e&H^2={VF%eʔ>||wr֭bիeС.e˖ѣG_e%ömۤW^(>>>[oɶm;22R.]*:u{{{3f۷MqѶ8aUǎ@q)s!?VҩS' ;wVz,X@T"ʕqӧzSNرc\rRjUYh$''2n8quu3g\~l3 m6  ~IvLkvqݻI۶m믿6BW^dŊҦM}ʩSv6oތ}?DJ,A<ѣGܹsxW0~xXXn}DEEa„ xw;O =ƹGC=%gܹ;wDTT J*___xxx= innn>߿ .ѣG?ڷo^x=lױcܹ{A\\tҨ^:<<<ၫWbŊ0 w'''4i/^x&rYر;vp=@JPbExzzQQQÃp=\~=O'ZjyԩSGDDDd3N>^{ &O1c(Qyaƌ %KJ?7ހ` УGo޼z V¿/̜9e˖UGT̟?ӦMC aÆJ3?~F_w}zLs;ꫯ0d̚5 *TPGD~zprr‚ еkWeyTܣm{),K Jm_~K.u`//"ӦM6mڈ8::JeڵLnݺ% .` e˖={ʢEd޽F_"OuرcaÆL2ꫯʾ}xJ;wNWW.z/ȑ#r=ƹw9rD/_.ÇU YL4I"##& SL^/}1XE'aaae֬Y%c.2eyH߿_ի'^^^m6q,… ҨQ#^uleŊ-AAArEՑ,_~rI&M4ZÇ2eqttJRRHŹG8dd\\;V<==] "Iټy Du|-^XJԇhiԨo:N>|(׿DܹsK̿~VZɵkTӕ+WE-;vP">^/#G?{J*IƍK1mܓe%]&#G'''_Z"P<+WJݺuTR2zh3qÇs΢K.{n6&p 5kx{{K dΜ9\|JzzYFׯ/NNNkٳg-r3gȨQQ4h k׮qU\\ԯ__JFƍվ}tҵkWs8FYjm ,^/'NTq c0azYd8f# OOO][nɋ/(e˖cV{sO,(&777[|Yz`5kH``xxxȢEКwʈ#N:w,GQ#11Qf͚%^^^[b~XKhh8::ʨQʕ+2bqttvIttDDDZ)իWP[hݻwUVRfM7n(oZUTTI^|E(==]&N(NNNVKoVeҤIEbbtATb#L`0믿....?c{sO̿(JӦMtҲh"IKK3! * 3gΨbŊ秙^52rHAI||HJʴiI:t蠙ގΝvIR9MDDOoooٳoiÇ[nR|y[x(ȪU2{Y˗/KڵYf:I9ReΝٶmMF:Iݿ_% @\:N͘1Cdժmܓ/.J~',F?Œ_.}Yl8,#G>}ñcǤQFRZ5UDZk׮IV|M5k[BCCJWJ*U_~V T UJll8&m6qppERlIǎmVɓ':tHubEM:IQŢ-Z$ruQaqppӧObQaaaVMcƌ5kmOKJJjժUGWZZ4nXUG'O^M6/={s:я?^zax:mٲ/6n܈^zIuBy_ɓ\8f1~x|8qU}hٲ%gzՑL.-- mڴ=vm#Lsq㆔*UJ{8hY1枢_)9j(ڵ '2HNNFѥK,X@uqhܸ1/_:ٝ={AAA4[bbbPN,YÇW>37Ν8DDD2~xl߾N4$w5HΝqxxbqv|}}qFtIuln޼5j믿F>}TQb͚5@LL UfӦM ŋ4S1bN8Ç=ZVgl%9-Z`Ϟ=hժUa*ܹ;vÇѴiSqGa֬Y8<ʖ-:EL4 ˗/:Nu`׮]XGٲe~zqѣGT̙_~Yue>sL8qqqpttT'ѣGhԪUKue$㏪d3g,]ѰWG4ԪU cƌqTɦk׮([,kQB@@;ƍs{s"5"x뭷Vb$}ѣwbƌ6mZYwyvvv߰glݺU9MIᣏ>O?CDDTd??~ԀM6Z?/-[͛dW_aС%vAz=/Rulnܸ["""BuвeKmȹ =VԹЋÁKެYg画iѢE+:Ebʔ)裏:Nf̘~XTPP ӧBDDTd+WDϞ=:R֭V\:JGa͚58p(ʵm< ;Q2?~ϟǠATGQn8{,N::JիW[VEb͚5HMMU%'8h[QB/JN> @͚5 VvׯfV1|;%7f8߿+Gu%&N;wjzQ(/ضm:&[nŃTGݻIIIի(١O>Xn(֮]M+')4Uu֡O>%W}s8hWQB-J}ZuK*UĮ;88_… cqrѽ{ڵ+>#Q m߾}CUGфCDoI-ZL2hBΝq!$&&I}:w:ft;wT#G>\r L}8dǹGۊ2jQrٲehӦ 6lXpqhٲ% QrXl+ؼy(ܼy6m믿:RcƌO?[nBDDT({EfJrqq>{I}ڴi:fh8p(HHH':fرc(ؿ?мysQ4#44{Q瞧qѶ=F/J>z~-^pdXr;v/ 6Lu| UVm>>>Xj(DDDo߾??M6111'O,\\\дiSM QE35k;;;:tHuoWmڴ5=9qѮ=F/Jnٲݻwْ0`֭dZr%Znjժ/Ν;dZr% Tt: 0@S͉ b0rQ)͚5éS4ǟ `9Yf8q8q6lgggQ4L}5k: -- gΜQsO8h[a%gt E fKѱcGϪd:&۶mSpӧ('N ..Nu""" %%hJ`` >|W*qyTZSϫɅ:rù'w{mF/JܹUh߾=vڥ:ҥK|2^xQ4?fk.T\uUE4h *hy4QA;RRzu899!**Ji(+͠Ez*戌d}rHE\k⽍sON{sQ.\k؜8m˸|(ؽ{7*W ???Q4#44T3{a,t:ڴiDDDx"jԨ;B=#ۣzx/^ Zt(q%'t 111'xoܓm+c3+W>Oէ-YxW 2e{,cjXW}xh `}\B=b &HǎٵDi߾;cȳ>+3gTCS *UJ~9EКNBDDT~[:ud矗eʂ $>>^Ν+d""ҷo_8qb=zTO?ܖ>ϟ ˗eҥboo/ 0cǎ="f2XV'''ٺuI, 9tI3E} zR9xYX[lgggeLmD>3gΔf͚lܓ7=ڮO!7Z޽Ke^{5ի)?Sn]Y` Ǐ4ܾ}[ȩSTG!""*ШQO>&W_jժe=66Vȧ~*r̙l_ӳgO4֭4m4 N'7&+:uȒ%KL6-K֮]k ֭[@Ξ=k1Sj#"ϙ3gܾ}$/&/ڈUgRn]UT{ƹ ֧sFݾBۿW IDATc}rhP|yaKP\91RSS`t:]{zzxrѣGnnni޼9ݻ .N}۵k1iOtl`}>J}lْ.\<3Q]KՇm|o듷ǨEXTZX2Rjժ)Y &Z&꣥s},jժi;,rX+qqqj5S'g>zo>Zxoܓ;G+m\%ӑ@a#&&'N޽{i&L<1~wI&aܹxѿ|gF^L޽Gtt4֭[3gb͚59rII-YZbb"Z챌 `]1BZaNAX66CDDd 777}h֬`ǎٶšf͚xg'7/^Dz2Л>ZraŚꓞ(_tO}=Obb"L92,yꣅ6=J}ަz) ?x7yNѣ@L2^QOalȜ9srƍ'ݺu3xgk׮2~xGDDd.gϖg}d 2D|||2'?CJJ2{ͥH d͙_S>ݻw+ݻwEԩc4n83*;w &l<[9>lڴd>駟dő#GLyڈXO}ƍ'/+ =yܣryC_ТeGQ߰aC}&6l؀ .?{\| 4(vUJJJ2鱋Dkyd} MA}&2Dm22X7DDDQJ\r$cܹ;wױxb 2SN|wر#-[ٳg#""AAA>.\6mdc>...7n\\\ye^d*W\A*UL:fa>OʕqUeS2Q]_z˘ԭ[)n} ;(8a0xYVZL%L@KXm몏`PZ)6O(L}g`0@/-H9ܺu (]tj߾=mO駟f6q|/hA?QE\$cR}"##QL/_l0)jSd+VDFF{QR}Zje'w{v)d}Ze>@ mf>LX>gF}qVlETT4?yע('22Ru ͉T~~~|o{Jzzz~*qm+Sࢤ3zIV32Ǐ#((CŲepGcѢEhܸqgիHHH6˗cܹ&>'''YXHNN6xGUsG`ڨ1\]]Qre={Vu%''#55լ8{,|||l<@LLa_01KuB)i6t/Jr瞢{QNzxxd.P-IHH駊DCٲe51e{d,[3h!""2V֭g >\u}8x RRR{aȐ!1qٓw*rؽ{7"""T)k.̘1V֭q\xjR@OTTbccѺuk[[U0%{nTV 5j0E''=EչG'F\k޽{.V8Kѣn޼C>~gt>9?^y}{we=(*( jZ*hZeZf6;_-ԴR35L4MPEm233u]^W dee!--Mjp߿/""֭.]ӧOˎNBzz"Vz@׮]gQDCxIHHC1v-&*6mz>c0m{ܣl{ՔlҤmjkO>OXOKK^ BYU},IJJ^G lcWNC-> :t-[F1DDDKVB,_gϞRd$$$ ..NvpjlܸQv6lVˎb]vdݻ7dG1eSdq=L6{gqm74 V{쉽{";;[vrD}~gk&z聟YvrDmG,cKdd$Ξ=]v:d1Bv"aȎ"պuШQ#tAv#{{{ <WE5k ,, u 6񲳳qFDEEɎR8([er5%ݻwW*-JHH@vqdggСC(FBBz!;M[Mݻ?޽(DDDP,\Pv.\=zaÆjxT*q=z4~G={Vvi}v=Zv"T* H_CCʎR8(WUr844jV{YΝ;*;AAAعs(p>oܤT/_[$ -Z9sdG9s (({T'Nj*Q,*;;s>5kʎS*={sOag2 Xr%~iQB.]b QJII޽{|I!wɎ"շ~ ZʎBDDTiaaaٳ'{9Q,Fg}}O>);N~mܺu ͓b])S`T* },Y_~Evٽ{7V\?\q{}ӧcɸu8{!++ f͒L{8(IQ.\vvvbƍk6eݺuB׋4QYf.]E^xA ;J1ǏAAAf A^xAv""*믿N(aEbb(_ gggqEQ,_";;[vr۷]^^ qqq˽{Dƍ+";E\pA8::KʎR.{sO\+mڴ|ѢE 1aQJd0DVĤIdG"--Mz~zQJTߴi(R_^ԨQZ7͉ȶKˎbVӧOO{,5k͛7o߾[i6oތ0$%%Wv-[ /"Ν;q,jʔ)ؼy3/h4qJD۷Oїn;vDPP qL"//jEn;v@OR!88=z-qh۶-ƏsʎS!?#|Ilݺ},{ :[nSNҥKKv6m.]#GnݺeS!*RR!yѲeKSU瞓LybرXTRRbڵ̙3B׋UVɎbQ˖-5j)))ٳg5k(fo wwwׯ_/ZeG1*:vh^|EeWҤuZe٢}W^"77Wv駟F}(¹G8[/Bׯ {U5՘3g]q(w^VeG={~ɎQ.3gu7oޔ"]&jժU-.!"iƍBш˗ˎbRK,ZVTɤI8v(&c0D\\SHNN߿/q,n߾-;ddd֭[.]X>%9s<(&uQ*^{5Qsq)5%⣏>..."55A-%%E899>Lvr:tСM~ 6V+OQ%33SX/ ___%; |B ![j_-;JWU7 {l[D6mDnĽ{dǩ{] !;NML6MvHJJkG}$KùG8K囒C"$$D*Ν;'xª wwwCvJMMjSNB;VO?$;YmٲEjeQnڴi/ڱcprr JRRRD@@ٳU`}HoJ !!x SRW_}Uxzz˗/ˎRa[nj=&''GtA*lꫯ &;Ywww@DDT`'Oz^_^vJYvSNUDݾ}[ q q*`0I& ^o3 ;qℨ_ѣU0u  4N֯_/īj?.EϞ=ŝ;wd1)=ƹLUkJ FɕP}h4bӦMTڄ ljpa'Oo߿/ڵk'BCCܻwOtŪ7^'""9sF#>#Q`0?Ph47>++K 4HԪUK$$$ȎSnw#FNNNbΝMrrhҤh۶U4LNNmڴVm&ELLUmʹsN. bs ܣ\{JUBnw^QF 1sLQ{_\rEv?)t:_"DxxUmP<1dQ~}_NDDTY-z^DFF*~U۷ŰaÄXx8f#}Yj￯U9"00Pԯ__,6m*7o.:$;N8 D@@8r8rJ,z)qEq1 bE 8P\zUv$j7T pԩSFoFhxrׯpss+WDiiibE]Vvܣl{0MSR7)XzS kq5kŋQFm۶V ާ~*47o(&k.QF k䈘 #;"ܾ}[ F#Ə/_.;Bk׮gyFh4*~E$%%Ν; wرcDHHprrϷ+ 47Vva}bHQp٭ѣbNNNk׮_i.{s隒B>>⯿Hrssŧ~*9stEŋbM?&?ׯ)SѴiSW_Yt1j(jEǎŎ;,ɓbذaBRݻm۶|?o^^غu jZDDDD?ٶmh߾t"..N=z"ϛ)KѤI(N*n޼i{=oJ/D5֭[bСA"ݻ'^xjܹs݄=yhٲm*"--M5k*2|Mbw㏅h4bo1yYYYb͚5Booo'gMbҤIA8;;ѣG_~ t`,ۋ͛79lѣGСCZ>>>74U`'NӦMjZsG0 ڵD۶ml=DBBNNNQL<٪)=VM5%xp=zEF9~wWjժZZ~psssrr| DXXX+//OL>]hZ1n8RիgZV̞=vUBnnزe^C_~׮]x׮]3g z^8::h?r dѣGVE͚5EXXXp+bW^AAABVzW^yEf.lׅ {'Zl)???1frJX=rrrDbbE||hذ +_BV`1ydeq… )~w'C wwwhD^ҥKJ8([5{^V !(++ 3gm_ IDAT 0p@̟?~~~|G:{,^z%l߾'Oƌ3PF dp&Lm۶_,5Ӯ]/ʕ++5Lرcq=z4j<ꫯo-BN!""_g$$$G^^<<<ЬY3ԫWpqqNCnn.n߾ \t NסjѮ];"$$ݺuo]|v2')) * 4?j֬ WWWAR!++ w۷$ܽ{NNNܹ>:tFY'Obسgكtt:4npqq3\\\o6IJJ¹s琛 OOO 44ݻwG@@HHH={o>kpqq jԨ!nݺ ܸqHMM 5֧nݺ=ǹG٪3MGqp7@Æ -F)));w.-[cѢE hڰa&NlL<&Lxsr߾}5k~g5 ~!<<<,Arrr`ܹad~~>֬Y9s… 1c&O Ng DDDIvv6N:'Oz*nݺׯhժjժ777Ԯ]ZB-ЬY3Ɏo222p _8y$nݺ;wܹsHOOGpp0 ___h-[N8$ܹswɓ'͛77-[D˖-)93 HNNq ?X?uEF 7774oLùGlp\Sx hӦM;w.?h5 ݺu3`0`Ϟ=XlVZ6mz)T*<{. |رc-Z9cصk 6sZ/b޼y/ѰaC?fXv->sbرxWQ^=='.%%~~~HNN8KbHNNJXd$T???̜9qqqC8(=,*U o7ʕ+ݻ76l)S`ǎʪܽ{?^y4h}wСCsO#99dž$UVƓT* 0 +Wf[}4 qo0PdwILLDRR:@N:_GDDj׮-[JzǏqI|XbM777 00͚5C@@qkԩSHLLľ}pq899W^'|[SN̙3g޽{_c֬Y7n4hnݺE(H> III8qكTxyyaРAصkv&>dauĉ1qDܾ} عs'>իWҥKs5q;wY兀aʔ) ~[мys̝;sŋsN޽۷o… q-bZhڴ)> ?8ZY5Z?;v`߾}Xz5N>wrrVE^^233MbիZliEu\\\SOᩧ2s߾S7lԨdŮ6W^Ejj>G1tP8;;5⦷ЪU+j &M`5djj*][nٳ7n-Zƍ hРWBE))Ygggnڵk ]vKcW^S`%eÆ woܸ1O>ܜR=v)IDDDDDDDDDŦ$YDDDDDDDDDdQlJE)IDDDDDDDDDŦ$YDDDDDDDDDdQlJE)IDDDDDDDDDŦ$YDDDDDDDDDdQlJE)IDDDDDDDDDŦ$YDDDDDDDDDdQlJE)IDDDDDDDDDŦ$YDDDDDDDDDdQlJE)IDDDDDDDDDŦ$YDDDDDDDDDdQlJE)IDDDDP_}j5T*T*~~~cj/z9r* HII)r'OZ.RKbҥEկ__vjɓ+R96rHQ%=fKsDDDDD լY3!>>ܹQaFB^^^aQSQxh4Rj`"*PF >J^^bbb, pQ6[{ؔ$""""R(JXۗ^v4hmBRx^^- :^_C`"*,::%>Rжm[4h©ܣt4^Zlق/NII|c8pU{999Xf ߿o<}!''xCppڥKuV 75k֠f͚Z;;QQQRQ!=  !UlZFhh( GleQ|S2** >kU?p[<;;; 4Ȃ'tR*VZ]N:NFDDD]{NNeFU*^^^x%%񘝝K/,SNW^cF ܣl0()ٹsgxzz'x &JdNСC9K[Z-bcc-*aÆ6njϛ@(KXXXzƖy"{JZM lJ¨Q,t sܣdI X4jmڴ)|n~+YXX6nYXt:>凋~FddTT*\]]ѳgOH㞒7 rqq pQ.[{ߔ*ඨ}&}^$$nnnׯ_Ւ}dODDdEFa\핗}&&&a?%'Z*j"֭[iӦƯEY8(=VєlӦ 7n\x&Mкuk b~aĈ5,'&&jbJI2ԩ.]vʛ@(̀RTQ ''7T#GBRAӡP!{bܹn|ܹ3g=o@ N+|I`0 55o߾_зo_;wNvi4zŋM>?t888gUܺu 7n0UZjիԬYnnnf2UեK!лwo\|YvRy{{ySu{sQ6SgϢiӦe* gΜAF$'#Xx1yMoի/rO둛 NÇcʕc)Η_~cʎaK<3&7::V2ՉJ©Soׯ7iii&7)) ͚5+syzh|&sipQRIVRq ğ lH*Gxx8Ə*֒.&yFoOqY7%ܽ{سg(V+$$f;++ Ǐ;cm]FF̶Zݻ1`on݊ &e쬬,!\?T~oVqr:=Uc5MIxӧO77)z۷#;;O?HTH^;wёw$""*FV7g}*YJrtt4sp[gooo9T *k6nN-;=$..7FvdǡBwDDDDDDDDT[)xb>}ZFrJ‚ dG)?Fmqݻ&rrrj_]vR&ܹsXxb7'sKZFgy~~~kJN2 6Tm}}}W^ŢEҔ88:NIJJBbbY?>tcJpp0^WʎRN:x饗dG!"""""""3*5k, 2Yl¦Mo qFRI&MBJJYBqرcYƯyG@""""""j$"""""""""Ǧ$YDDDDDDDDDdQlJE)IDDDDDDDDDŦ$YDDDDDDDDDdQlJE)IDDDDDDTHLLȪ)IDDDDd۷o_///o;7nܨw}~~~pvvqɓ'oؿT֧2L[UVaȑx뭷0vX8qgxb~_;GJ2j8y$[\[hҤ fjXS82F{yæ$QɁ/o!z}ӧ  pA3™sssn:KXf 򌏵o¦M*<U> `\SNŧ~3f`رݻ7Ν;gS)w~2_;`_’%Kd^O>$zXc}&L{aܹ?>} ++˦jXS8 6`ժUƯQ6%O?Evv6 GGG4l=֭[hݺ5bccQn]?q]Et: >'O.^z | $OEk>yyy6mbbb  `޼yl>kTg.󵓚|?qqqÈ#h>rꓜ5jwEDDϟw}IIIHHH`s)8ս>2ӱk.xyy9n)IDDDDTIɘ={6^}U=zťcT|777DFF9r? 7R6m4DFFYEiMZT9rRSSѥK"ǻt+V>%0_}L㏱b bҤIxb>իW1k֬"dž r5޹TȚ{ !0c ̞=MY5%m%^X겇`}T}ZJ3|t 5j0{뭷;tJƍ۷/zC;2?7ob&ЪU+dffbfܪZ|}N:VxyyݻpOI,Wv5k(ddd`hѢo^<С܊e:NF7]vEׯ_жm[lڴ uŹsh-[ys\p~EW_Eǎf|'UfJRСClذ,㛓)SUOASJMIkOi_Svƌ+W"99+W111qFXen۶m-[Ykmۘ{5Nux0>|.S֧MIkgx:me`.{ֹHuڧ,7n@ZZjժU9Xp!v؁!C`޼yU~nL4 [lA-+Jڵkfۜ>Z7~|mUjJZ>`{Xk}:R]i!""z7o#p 4OI[ ضmsN0WWW\~,c5קI&"oݺ46cN>}аaEb}*ƔBKlYcmVqXj}>31.RFelsomڴ}߷o`ggg~-9R䏗&N#GcǎsMUJ7%miGƽ >`L<i!""*F}㥞W^EHH^u4icƌ)uE||<n a7nq1իW#,, m۶-+`03gЧOr$Oy~Jӱb dffxѱcNJ6iSڔgb8p z-̛7;ղ>X>yf[:ׯ|r3k6g{)o֧%4iҤN4isMUJ7%meƽ >`Lfה)Sg!=?cСhӦ M ;v HII)rX~=~///{E.]0f,X^^^3gN(lÆ _|lRׯرc7n#R"Sէ?WS'<<sK/YfۋКSVmQ)8NNN8uz̞=FË<`Ū3O[s/kO=tZnmbwK(OOOKȑ#ػw/>SՕ~ˠ)~Vꣴ׎a}SJ}l6NQ{IVG3go+Z!xŐ!C}~E̟?_Sa6@?SL1W|"""S|^`&N$GUsC /r.#͛78y/Y-FGI% TP{X8林+$`Lx:ue`=tBDDTQ* *Jv *%a}˖j>JfkXc}1fUP>2BmՇ"""""""*7%Ʊ l>R@yk,Joprrœ9s 8^^^cqZn cKUvLҔl:y2ZmևvȢؔ$"""""""""bS,MI"""""""""(6%Ȣؔ$"""""""""bS,J+;dggܹscXlT۷O%9RRRbEfsOpQ6%'ODƍeGjNNNfK,%K2~uR`1~x]]x{{e\T*e"::,r1 =Uæ$3Æ T.\@PP9 ȎS*WWWd,\,cªUȑ#JV>1ئ0aPCvvvf7oބ`0^{ QQQ,r1 =7)IDDDD?jcAWWWE4;;;5mLQٚ6PNu6VX-{8 otCDDDDDDDDDUJ+VbN>m={6zٟ믨_ƿt^uo.]$;Y@dxx8Ο?ÇHiii`RUe\Ǐ7p)ԭ[WˆУGݶm[nZ$&&" @K[nmʎADDDDDDDfV)W_QnP-zTV-l۶Mv2a„ u;vȎQ1)IDDDDDDDDDŦ$YDDDDDDDDDdQlJE)IDDDDDDDDDŦ$YDDDDDDDDDdQlJE)IDDDDDDDDDŦ$YDDDDDDDDDdQlJE)IDDDDDDDDDŦ$YDDDDDDDDDdQlJE)IDDDDDDDDDŦ$YDDDDDDDDDdQlJE)IDDDDDDDDDŦ$YDDDDDDDDDdQlJE)IDDDDDDDDDJv}߿طoΜ9Px'`oo/-gucǎ>qݻ;w9::SN2U{NBZZK.@ԯ_͚5x6ݻƯݻ'NO`` ֭+#^ƹGliQ !Xd$T???̜9qqqCRRRdʎCDDDG_.y'N@"*lyޑ#GЦM $\]]q2qqqAFFQG"(( <6m@"*s3o)T~uހ̜J2`ٕyNڵhDTX~Ֆ~qV-kL+00k.;;;nsæ$BUVe&MX(O/o,^Gll,jaȑ(@!bbb, j5 zs nTTsܣDDDDDX\\\oz=͑zF)lDFFZ8ׯJ}}`"*,22%>hлwo[8ܣ\4(~Oɷz /^4~}A@ǎǼ1c g_ٳgH~j ~~~cݻwLj#dD~7,^ ݻw?'#P3۷p%*BV#-- Փ`5j=֨Q#={VB**0vX,[ 999E!..-$HII)v\bŊ|)egᄑh"] ;zٔ#33 .,vImoJlN&8qy+W,O<$yyySNؿwT*<|S(j1,|䊎W_}Uxnn.%${EkZr[^0 %"""ʊN+rLY[ONN^ еkWxxx;^n]tUB"*lȑ:C ){ =e+s⛒ѥ3**ʂ~IVfMZ.9;;cРAqPkذa.3  9rdqƒQZ" ]z=bbb\\AѸqbw6 9rDTeGMf͚Yf>޼y2'z駑_czVq'[S쉈;zm^Ѡo߾ B۷ <|Ck Du UѧOe*E%6U޾п '"""5jqeJBLLDT@aĈƭsrssm52~ݸqcKLDEFF"77EFFy5Y岅* Æ {䞓DDD *ZeРAQa#F@^^T*qBabbb`oo{{{^-0CǎRguM[ǹG٬}d Ю]"{~T*$dddΝ;tPTX~=vgggM6?(lfHNN6~qܹsLyW ggg4j//6w_7OFjj*ܹׯCRpvvuӴiS.H=esJv z*<==wh4|2<<<$'#ظq#n\׬Y3|dff"##GBB!ѡC;v,w$rJs9deeA"022L&CFF@bb"ݻB>>>ѣ#228}4Oаt"FFFw5j"իWHHHÇAD_~GjA1sxӌq%annl믿ܹsY˗V:t`8㸢":u ;vCP(ЩS'xyyk׮pvvʓ/_"..ΝÙ3g X`ԨQeʽ_FF8 BN +ٳgqYܼy1j(~Jz!v܉ ߰жm[A,؅%+q={pqqQ0bXYY,׮]CPPvڅgϞe˖qqqԪl$&&ҥK8{,ÑOOO5 ~!d2 M{ղ3A~7H$$Hhͬp2j(&---{.8?( $T~}R(#qq\EV H$"___ڳg*8999k.ݻ7D"jذ!^zÇ4e'LF| ?zOҚ5kՕP6mJ?NMRI[[i…ʊit&)..}?ݺukZv-=\)**p裏А hԩ4[-=4jR255tR255u\]] G`iƌfΜI#qq\KK.:uꐩ)}W#;55K211uҊ+(//O-駟T*%_4zhԼysڷoZIgϞ<==ɓj]TTDǏnݺӧ]zU4v"gggH$Gэ7rܬ,ڴi5i҄ttt?ǏؚaGx/_Cђ%KhȐ!ԬY3H$$ 5k֌J_5>|^ziXIIIm63gӇlllHKK144vL=󓐊_Eׯ''YXX6‚<==iҤI~z~:v8T̙3@uԡ+VPFFtR277'''' gCH駟~"cccj֬SQQ,ݣ &D"޽{ӽ{ 2e D"ݻ7EDD0rEѣD">}:eff2"wܡ=zD"'(**;vPӦMԔ6nȯ#{""&=%k.!&&2 wB+ٜd_2'$$ 11׮]CVVZl OOO]v*,,qA#99VVVpqqry͉ ɐYfsx$$$Dpss7FF~+==wq ΐe6'622Bzz:Qfs℄!>>fffpwwG>}0l0~q2331eaXx%͛}֬YCCCֱ3f o,Y'O>/ԩS:ڝ}8rMbaIVŋhڴ)X(,,ď?-[֭[aoo:#lb/hʕԬY3֦nݺѲe(**ZÅINZZZԼys˗J|5[BB͚5իGzzz4x`ڰa%$$Tkܬ,:q͞=r9ikkܹ QFխ[>Sڵk=}Z>yvIƍ# ':wsq\qUrpp gggf\/_&\Nr_:Zرd2O[C) ڶm3gbZpS;w.D"Z`6Ϟ=^z رur/ݻYQ+{2'OhdhhHdJJJR޽K_}ؐL&yU{&q 6cǎ?ׯUv0a __ ر#I$0`>|XetA߿?I$ԩ;vL%8㸚fƍCÇ%4x`ե~u+((ǓX,@_♐@...deeEQQQǏ͍ԩC!!!ױčtR+>gFDDP UVtqIPP@@D"k1={O7)Es!===jѢگ/cy椯O ,7cӠAH[[>ՒN˖-uRÆ i˖-u֤d$HGGڶmK.]R98N|w$hڵTB+WH$e˖2Էo_SF] M~~~dhhHOfGeI&Ժukz8D-[$\NɬLHH?䰎SaO&SSS0`@^{pVꙔܷoY[[-={4q B3g߿u*ٵkIR;wn8##<<<ښXǩ4BAӦM#&..ˋ8ԭ[7jԨdzt#TJgsCLL 5lؐx6{I4x`4gALʢ3fX,&???l|O...dnnN7oԋ?))|}}I,M]=JfffԬY3x"8e;w6mJuԡ'Nqq0{ldtQ%44 i(MNNuڕ4j^y-[F=:ܹsׯO~FdͥS jԝ>LYG#yzz rҪGxy/MJ޼y^T۟IvvvD͞={H&Q^֭[АG^bG-h$hΜ9P~~>͘1D"}jݎ8믿&dE)IWW.]:JQI.ט> iժUI& .o}vH$5f?S>}Uc/y?~<Ћ/XGQgϞ5M8u*%ׯt.\ XLwfJx6{*L$Hh˖-V~W5]PP@ u rw)**cǒL&pq*%%CdRɉRSSY88ɡf͚YGQBA'ۃ֭[d``@k֬aEeF#'h IDATH*Do JsNQ*mdkkK_fEe{244Q*a㽧R7)Y2we ̾}H,ט FM:Nظܙ;--\\\k׮sff&u҅Zl[7pq\ư76lXx)YZZҢEXGbԩK#+0:J=zLMMiٲe7|CN8Ab.\:J7uU=Iɽ{X,of$iT[55F> >cиo-++ڷoO5ԪU+СC8&DBGaE-O:::kӦM$j'O&\NTȑ#m۶5+**VZG}:J撽=͘1uxh՞=VIX200C ΂ Аnݺ:J$Hjĝ)//<==E73f YYYis}jР7u8S:BA;wV.O>%SSSZz5(jA 4%K^'N HD׮]cEm\B"H#_h5nܸV-. $333<佇QBޤdzz:5i҄FYamРA쬑'tuuiDZZi̷/^ ߢIGGlqq%JVnܾ}u'T*:'OMRAA(jyfקǏV ZlIcǎeEFMmڴJJJ QPP(jG_{=BTSIɱcǒ-UgA{%5jԈ>3Q*%''7oNt㫮H$:JԆoV\I2L28 ZhAӦMcI&QV255tttB AO߿ɓ']JJ ÇYGyɓ'S6mX`b޽K=b\#T=ӴP'OD߾}vUe www{TׯĄu|r‚u""tu:LqGŠAp]XYYv>D&MpaՋu={6N<7o?~<ЦM᧟~b &눊b?={[[[:t(8jP(ТE ˖-c?xGȪ{WiR2''M6СCWiڴi8vbbb:;ݼymڴѣGѳgOqTNP NNNغu+8qF̝; W8*M'|:qU7nufFǏ#44u2233aeekb̘10QTTGGG; .d gD8;;ܹsܹ38e|7ضm Xa~ìYCS=Y5zt~@~~>-ZTkŋYGy3gۻVLH6V\۷#22uwJKK×_~yՊ Iœ9spBqqUpL} IpttDϞ=yfQ "lٲ'N0j(hii{WuzO'%_z@,]ƕ}233w}e˖׬Uhh(5bT1tP̟?uwZr%1c QjիYG8*ۺu+Zh֭[TvЬY3l۶u2lقQFA*ѣ/R*33: s}vލ,QJ#%%0#F`˖-{[U{O'%׬YKKZ,uر033߷~ 8::v}.^sαR4[-u',]]],ZW>qǽΝ;1b1cz!.\^zΝ;YG)uHRӇu>b;vuR>,1ΝãGXG){{pUTjR2==k׮ٳknX9s`ڵd?.^K.aܹ0aoo rcb駟`nn^kOXG)6l: qUZ||ԇpQ~n?nnn_>?: {aJԤdpp0tuuk1c@$a׮]O?}B.̬Yp)$&&RFqq1~gL:u&$ &O 6@Pqqr֖uAppp@ƍqiQOFY ooo >>uΜ9ݻwGhh Ǐ: hii gΜa={UTjRr˖-ݧUIOOF>iii8tƍ: Sm۶EVwSNŋrۃS4y80xxx!( c _lmmaccgϲX<'>}[ng֬'Ξ=+Ic~n/{*“񈊊¨Q&9r$"""o޽066F^XGanȑؾ}Vw055e)333AAApq\DDDݝu AqwwGDD}6^|.]"(]tAdd$Dƍ?J0網tgϞݻS{“CӦMѲe*IڴiGGG߿uR{~X: sÆ CJJ XGȑ#>|8(Cu8ׯ_h޼9(Ҽys ##i[nA&yXw-!8...?eooὧ|[e{O'%O:W)TMԷo_ Я_?Q 8u(K.m"''G sq\E$&&BKKV]۷o3͑H.#11%pttdA_yGD}6?4i¼>alФdNN> H沎R:ӵkWIcΝ;vАuA066`qqKKKd2QueOx |R|HHH`!55ټ>(Xa#l=VQXX(KK&YG L qW?R:<Ǝ oooܼy;w1&MT~-[̚5 =z}ʌ߹|2&N>'OD۶majjQF)r7+++<~XcV֣Gx}Q#󾟫>B ԧ"4>B8 ὧjR}*{M6ZŅVZ:ӇfΜ:xիWP(d``@GaChOFFFcpq\|奔rssˋW^ʕ+ ]xJ ,(}Ltt4D"ڸqc߽wI.1]hÆ $hĈJy%<<<諯RꘕN|RƪIQ(MaaaJHKKΟ?Q\] '---*..VژuDP(=VEjC9Yd uMiU=o{pS3B| 9ZjaÆY ;;;ڴiS^=ڱc < tm9&..Pjj*(q^ӧO+mO?7n\甔@7n(@e3p@266 N~m۶e~>j(Ң =}Ҍ36^U?jR}d2/_~MڵkJ:|omH-z*Pj+СCdllU"ҨZZn ֧gZ.NHHLC.3$//)ꓐT/qqU\v ً sN~GKK "ϻw"BRRҞ!6^Udee)un^.͡,թ;wC)>:и{OR4>sP9JOܿ קBϝ;w`cc~m'JѸqc8㸊J(,,T˱߼=aUwS 6TZނHRWRtCx}*G"Pۗ_gʪOɿ h^}pn㽧|BPm\ MJz uԩV5isb |RiUEQ9PSr- pq\E"33S-ر# 44ߧܽ{͛7fff23:WTNɱR=zo SXG]6@#s=J}rn\<@Ujė.]"1+ɓjԩS'Wͣ={*e-Oq'T}uEi=KJJo8fjР=޼oѢ;v1S)--233ٙN"""6fUH$ UxխOE꧎T*UxUqE@J}!Ҝ̟?z衴Gdyn?N(&&OMٜ͛^|b޽TNW6'^v-(e/hJ&m~ɓ6qǩʺuY)cR [O?%Ծ}{B ~W_}EӧO2wOӤIh̘1Or֯_q+cРA4qDUc@qqqJԩC{QXʨOE꧎Sz:fezYǪHm/L01+Gd&~J|NŖ^V@U7 DS1*I&J2SO&MT6~EC[[ ry߿ӹsg_B}*{޻dݞJj*&WYj YQʪP0m~컕qqr 99uAIJJBaa!r9r\i^5IBB7n/ȫק| _;zzz){OxNJLeOm~tbE&6G͉'pq\EX[[CWWW>|WꗖUT~Q_]ꪏJQHLLdRx}Mē'O㽧|TmSJnn. EK[PPWWW3+^x%{uкuw>x_fRu˖-XrҞGff&KW`hh"*ewՇU׎V~P}maԇ8*B$e˖bB6mڄH ?WH(˗ѪU+ED?(uU%cW+nܸ+dgg#&&F0|2DEEA,EcW=㽧j{KXhƎ[ppi3=ºu0zh̞= ڷo͛7C.#00QQQpuuEFFnݺs0`bccѵkW#++ #F(]ɧ qYY߇-ܹ{{jU4kBȑ#_UicV֭[_KQGm7/_J8Ta…8<.\:`GYGAΝouĀBBB4KNNLMMq1x{{3"!!!0`Ҡ4Khh(>zWB1|DDDܹsS{ gz&%{ V? 0O3gDBB=c{쁯/iÇǬYTv2224fDu&==&&&F۶mUv8SӧOחp?077Ghh(<<ŋv`{OY[UzO&%_`,cxAlN,y}%%%cbbu֭[LsT:^;n݂%S80`l: lݺ(ԭ[;wdBر#F`Ԉ#pİ܍7pu9uR#FmPTT: s;v@Æ : {jФd.]*S7ulw dչsgQaOXXի|RxєoԵmxx8t颒98NUF#G˗0 ;vLP*6lXGaɓYG)6m`0vU~4l0ԩS0CA+k{UThRuј###Xp!~IM*}۷:J>ݺuD3֭ӧqUF=`hh{Ԯ]`dd$QW¶mЫW/Rvܩ+rc SSS`۶m0W >{UThRRKK ]tݧܹspwwĤWaff1!<<]veI;wh̤ݿ<L}88R)&MZ{cAA_@"SF۶mѥK,_uf~L>u?~rHfDAAՋuo `E]d 6YGQFh֬(qWiӦMË/gQصk2331m4QʵpB#99u&VZ.][n1>sBPvXb ݻwGǎrJQsvލER.{x OJ!33'NAjG"77CeԘ1cpQxun݊Ν; fmmm!JlݺG*c8,3337+Wu+Xr%ƏcccqգG4o?(jCرc学Lì݁SLS;w׮]8*a̚5 qqqhܸ18{Si+Rhǎd``@Ϟ=LzݻYGyڕhIRz!(*..&ggZ?͜97o^Wrq5˥KH,ӶmXGQ~$ EEER)d``@nbEFM74Q*LP/5oޜrrrXQ,jڴ)ߟuJy%Y[[رcYGQXקիWR)=2ғdccCsέ΁5Ҍ3I&TTT:[ǓH$'NVԶm[裏XGyM61=yuJMM%LF[la8jժUVIWW֮]:J) 0`9;;Svv68*i&ѡ+WRiiiidkkK1(*3tPׯ_Ri'I=##4iBÇgJx.{*DD&XL7oެnquD:{͘14beG&&&+..:Ј#XGQ!C_%q8ԳgOj޼FN<˫Wٙ|}}5?{6lHʕ+dhhHTI,ӯ:$HҥKTY@@d2믿XGQ*BAÇ'kkkAދ"x6{*jDD|yyy)#) ܹ3 8u Ƞ _:Z<{LMMi͚5Tȕ+WH$QXX(jqIDt5Q88N%ՕڷoOYYY(EVVk׎ZnMTKLL Ҍ3XGQx277/uj۾};bڷo(Jw^:J( ON::|gdff y6{*ꓒK7oVVڸq#ݻwYGm۶>İrÇ͛SAA('PӦMkLx rtt &qq*aÆ z(,,!CPF(%%u !DB֭cڞ>}JrzQed``@/_f"""H__Q"??ɩFW$TJOfE)x6{ޫꓒD:W*+DDDD"Ƚ4I&5n9?]d2}sWСCYGQBA}-[֪8+&&,,,hrrr_~T^=_Eo;v XL˗/gʒё(33uI&idQHHɓYGQ j߾=rJNNfH"Ю]XGQ*{wޤ$iӦ<@~~~TׯޞF:J\vtuufJ\B:::i&QTbKׯ_g8&!!lllÃYǩ4rww';;;}68*q!ӣ3fh>_7oޤ P޽k6 ,X@:::gq*mΝ$JiŬDff&MӔ)SH__=:J#lU'%zYc. *L]^Mzzz5< j c~u@e (qvBr\cz*5i҄ZjE?fGΟ?O&&&17رcd2QʳfŴ`*,,d h$駟~bGϏhݬTȳgϨW^dff7{rUR(%%iذaT\\!*..!CPƍ)55uj;s IRZv-(JSjҤIGgѢEO(EDDђ%KXG88f222hذaqU?ѡ#Gx}6lْ… UNN7b1\RVTՉ'ܜÇUrr2uԉ,,,(44uP(|rg rpjРnZQ=II732NͲ&OLFFFBElܸ$|6iiiԥKjѢE+SPȑ#~:NĐ%3Fq,[n%}}}rww%Թsg200 q.//LB/+ mm>}JޤO˗/M<髯"}}}7h%[[[:~88e<I[[Ν+n⽧ MJYjllL/((#F)]xu+bTɣGEԢE z8JOÆ #333]1yE255#FԈq,7o$ww+rrr( CpXСCdccCt_>~tBO?/_2RAA!uIF^J:t LFVϽs;v,ikkG}DO|uJٷoѴiӘL98N mݺ,--~rJ=933Ғ4h@۷o}dggӂ HWW\]]i޽jL&M"]]]j׮EEEBC}i̘1$ɨnݺ4eSB!$$JdmmMs̡|NHH 6P޽I$QӦMiҥs%$xZ{iA6n܈Yf \]]Uy r &L;w~رcYGR<̚5 6l~ԫWu,@pp0OCCC͍u$KII?._c޼yJc!//˖-Ê+ХKl۶ 4`84Vrr2v܉cǎ!** ܹ3!C"аqYYY(,,ģGDŋFǎ`Ĉf 5[tt4qIܺu ԩGGG@OOKKKCqq1x ** :::1h 1|!!!ػw/BCCm۶pttd2b1Cbb"hƢ~ѣ =z>t۷Bxx8 бcG r666011)}\^^rssNBB.]4k zѦMP#lLW$freƌ8x &Ls2399˗/ǦM0tP\O\|&MBRR.\ &9;ŋq̝;+m۷oǜ9s`bbo~!՞EP`޽Xx1233j* >\988&@XX.]T:Yr=1Rppp;<<< ԘvHMMEhh(\Dܾ}|r w˱uV4lC cPTTc>x~-ѳgO4lqqqbll@$%%a2e 4h)Sի*9ftt4 4hӧOǰap},[OHrqD"NNN'''Ԕ(dLWW R)amm) k4CCCԯ_0779Q~}>!)&&&3R)lll`gg >!=VSzУG;wڴi9s^zҥKqeWz|DFFテ,--`k7n@xx8wﮂgTsHR78|01h XZZbX~=߹Lmqq̚5 [-v܉غu+r UQN|HMMŪUpuk?~ׯ_Kv}кuk<3_>4h^OX0i$L4 "?111ELL ?{޽{s/// ,,,6k׆FQG""""""""<==Ѷm[m6mcƌ[ֱTRUTA׮]VfM̙3G}0FA:uPN 0 mqqqY&N< eA=%bSlMI""""""""")6%Ȧؔ$"""""""""bSlMI""""""""")6%Ȧؔ$"""""""""bSlMI""""""""")6%Ȧؔ$"""""""""bSlMI""""""""")6%Ȧؔ$"""""""""bSlMI""""""""")6%Ȧؔ$"""""""""bSlMI""""""""")6%Ȧؔ$""""""""")M6l6SOaҥpww899!** AAAJF-K|wj9sf1gggL>=^d;WF|||ϟ\?ole]RR^ufjgСzD2s9æ$JoDC_;vVu7fL&DDD 00Peڜ9s>GL&TPMI\v s΅V?ʕ+d2!..MIpQ7G{x6J 4Z󕙙 N+LFɷ60Lhذ:#GBX^#G*L[.4hT`}G1==HsDDDD6rU\|YdGg5DVPTPz 77o75j QNHKK+404j(owww*IpQ7G{ؔ$"""U"::js=۷㯿R:\TTTrh4q"b00tP |oJ/=ӨQFy6LD9 >f00l0z,{ˑ7%CBBuVlݺ5ױcI.\@*Urƍ4iRc'OV:j}vxyye!88FǼ}vk֬AocСX"ڵkEJG$ NV:aaa6ND9EDDdjZn ___RQѣGϞV9*???n:מY쩩8=oJ:99HNNFrr2RRR \RLUbEٵHNNFjjjzU\߿JG%"*S4 VZf+33̙3Ѽys4l3f7|S *[Я_<+z=WWW>>>yt:W#F(r9rd/oS eܣn2)9vBlz;ֆ(S'7Zp$]v}ڵQV>Ǐsۙ3gtR\rڵ+WQFٜlfSE0bĈ-DD +ld~6E ԥyPdfff<~A4kLDs9ܣ$hj<5XaF>sWX˖-QZ|o^:ZhaD}` IDATDDL0XU9蝔dM322p"iԨQgt:_DZF@9/…-ùG}dXXXwrr€HD9EFFxe˖S(e쉈E`ʕzҥ+zbxq &%%o+V肃۷W84l0Aբo߾ʌ1:ᩌ+V4 6LHu.y.зo_^Y:u*zDDDD+r""jشiSWL'99ws=5jpj*3BLPPa2:X̦ 5 & 55 Tsܣ®R"w/zc[ꨦL7|3jq GlllϜ9`"""*H֋?*{xxK.ٳ'zի[qթShѢB/D۷^b̙hܸ޺u+viq-ܹsЯ_?վqwssʕ+n̙5|^AAA;wMJJ /X|lK0OQ~}*@ʅv9{MS2==ٟ{xxΝ;eHwQl?]t_p*ʲxb̞=iii0xW1c cQD?<֮]kǪY&vPVL5[|9ϟA)@Gr唎Q;w7Lb  .uf3>|h%c8y$4ib˗/-Z@TZ\\=D})$%%MM~ صkSz{r+`!C@W!ehիWǕ+W Lxx8fΜ Ç+ hzjzc2k/ʷ~h׮BCCZf~燷zKvѣVK.XlUQݻwׯcL0aaaV} G{n?-[///>:u*6>ܣ^=ÇGff&233TQ(899AѠ_~JǡмysO? فƤ%,*^ћlŮ]tA PbEtY8#Fl6#,,̟FcǎDD~J4&syEJ*k׮Xh?H""""ry 6FQVŝ;wpmhZհaC+..nnn|}mۦx¾&Mdlٲ:N񟯠7ǏWO?? ''e_ٳgΜ/;Vsbxtyv(_<˗/46bccÇg;v~FŭOIjX>7oƈ#0o<<:Z}كbΕ~/h4_:X@6'33 .D͚5޽{>9aiĈ`)IDDDDhдiS_?˗/㭷B߾}t<,Z!"EwFXI&!%%F֭>|Xؾ};&O-[ 333͛ݻ5Z>% `|x饗zj̞=>,v튋/:T}jSjbΕ|˱vZ[֭G}}p=֧q6}=f͂/>;wȑ#7nz9sNl޼9{kԇ{J*UZ{F>}ЩS'U4zjO>5jnݺEnnnźK₅  Ubƌ8pzUzC 'ݛɓ'#""5j@HHHU֧,WLˈBrEjs=4jnݺaĉxwSܓ͛77qKׇ+%Hz=BCC\zUU!s%̝;3f>ֹsgxzzyXXX֭[E~̜ZmϵҞ>ŭs)ǣ]vk6mBjj*O~WK>wbӦMԩSXקsm{Y1p\z}{fϞs{%ÕDD%0j(#_ɓ'Q*Y!CCCѥKTPAHŲl2n...͛8q^x:t=,XpL&,\O?tE-:5+k^},ܩW"""cx}vt-XקsmǙ{bbbwy'^,6l@^햬Ś{A ₆ O?E 7w-}ddd`…ظq#Ot ˖-C5J;66ɓѪUǎCzz:ڴiSqyf| իW1eԯ_jhРґJLDk.<3WV дiS޽ 6ŋja2h4I}W߿U~-ZΝ;%S%OVS+ﳚZjض>%}7ƍd֭[1~xDEE!666+:7p͛Xt)֬YٌFaԩٷ(5Ǐcʕǒma{=֧l eih""5Z`)Ȳm6ܸq{K/d IW^EŊ 7VZ{",, K,nݺ۷޳ؘǭ6s}tGkG4MYߧe6ucVEDDv؁[ns֧d,q6}m˙oԩQ~}L6-UY+i}f3fϞ]OWJ7cU{OY͉DDjd IGZ YDxO B>}pIZE'NIJe˞<(r?b]6߻wSOe6ucNnPF \|9mOY>Ok߶,ZW_}۷/vOɔ>k֬Q|'R)JI{66{U{OY͉DDj憤, %%Cۘ={/fY17ry&}S֩5;ިYfOY>Ok߶yy.hLia <_ ={6ݱZ>jJevA`{.wPϹ[-Zȳ57roio>EOIyT}:ׇHm֐,JC Ք7<==WEW_ŋ/_~˗/5͛QR%?Xbb"^Zw]ԪUZ=קyhܸ1:C0p@ cX|}y!ܹ֭[繍)Kէ(cmV;wcǎ>W|8uT/___ 8uZl}_KէMɬ;5k7\r <j 97'\l omSIjh""* !YWCFբ{)>G۷ѡC̜9kƸq \A1f 2?3l߾z;v;qF7>>>E#Pfsy*l,]^-QFYfaӦMHNNlӧOYQc)JmI)8Gرcqa^ϛ7K,J#OQ6}=wŬYpc}M/-???Ԯ];ח^7j׮ WWZ>///ٵksUV=۷oұcG9q_T7oޔ͛˝;wJ<޽{|gjժi]vWɓ@x_{Ϝ9s:n: }YFDdʔ)VիWȃdС@z).]u,[xgg|5}"ӎ;k׮@"##syE|&SL)-U^-]͛7رceΜ92l09zhۋ[D 'O,%E uiݺСCeΜ9r̙|[彤֧(㈔oZKsMСxzzرceٲerc 4(m]tABBBغu+nݚX?#VZի{-[>q'ӠAiڵS3b6oV)Jm2N \% #OQ6=*U `}(5%Sӷeۢ V>7`ߛg{ت!ɽ!Koҥ/pڵRu -'Oի>}zQc+cqXTG 瞢)qS6-*{`՞{mqƱ)m͚5VC C {g˗/!K+V5k|Z\O=ƌNիWɩTTc߫qcqXRG 瞢)%e˖裏0n89r$Unjdlذ!9em;vxtݻwg_(dyހ}nj{ƌ1cݳ7~wrk'L})J/[tL^: 44}a ꫯBDJ5oXbFI&G-,QK^-5#GMK䥖8RmuՇϝ8 Bzz:&O ???سgO݀}(>ũ ` "զ(u>DDJdC}ސ6h}e}J~Xr9ZmGX•)ifyހ}ojq䍽ݜlMDdkk׮-7FϞ=ѣGi:]^YMۜj\Ւ8ZG CDhL8اgŶmpm|ޛoYY!{聞={r5$-U檖Ǒ6Xu8RmuՇ""ZhfΜY}r ٹsgTXF鈈b;杀mHqQY[o{Q$"""pl\ IDDDDe DDDD6b ̚5 _!!!\iNDDDDe DDDD6Ylf"""""{Ħ$cرJ """"R""""""""""bSlMI"""""""""))IDDDDC\\t g?s$$$X1UFFcɒ%ؼy1c@[qѩSРAϹt8WasDDDDDcǎU:F>| =z􀫫q5tPt*c>~Uƶ˗/#66=zP:J&N=m4\z*c[͚5S8Iʗ/={Ze쀀L0&*[W_} ԨQC(ԩtb9{4""9/_֭CXX9ݻwc̘1HLLاNBHHe˂S"..v˗/dž pI]V`ԨQ2eQT)..5kĥKtz1w\\tI(1c֭[pO͚51g=Z(=fsT)IDDDDDDDDD6Ŧ$T=%u: DmquG*_U/+lqu:N:FcˊJG """""""+Ӕ駟p}%ܹssQ8Iʕ+gq4h'Ozs~gA߾}RjժYeܱcǢuV]~O?q t""""""";ܜ=%Ȧؔ$"""""""""bSlMI""""""""")6%Ȧؔ$"""""""""bSlMI""""""""")6%Ȧؔ$"""""""""bSlMI""""""""")6%Ȧؔ$"""""""""bSlMI""""""""")6%Ȧؔ$"""""""""bSlMI""""""""")6%Ȧؔ$"""""""""bSlMI""""""""")6%ȦtJ """"ݻχdddd Æ ^hZ̚5 JF-^uڵ+{h׮]1WWW[UVU"b6be>ʼ3>>,]n_T"bƹGiaSH/}ͭxxxO>6JD9EEEAD ;wFJl7FۍF#ƍgDrԩ nIfDEE0eܣn4)IDDDDRFC `vx;YW˖-QZoj9r QNHOO/t04rHhoC-lpQ7G{Tߔ{..^\޽t2+!!!W-233q\rBc2]+W\},...{"""RHdff{[ff&"##mr9r$M ,,Ɖ(KѬY3h4lLӧѨQ'oعs QNV?'oʕ4i QIL&TT)׻+Vĭ[ =+66s\ӡo߾HEYV^iӦY1i0l2L8Qd|x+fffbذa %"""0LHLLĥKлwo.q%$&&{FNjj*n߾d2]vǽ{W%'' N^GPP_dyC||<ڵkɄMÇ}v/d[{Q!ر#j4|wh߾͛ dO...sTСC899m۶8xұ1wqa;wgϞŋ #xa}@@֭:uu+0ypE8p'Nspy\rf@ԩSAAAh߾=Zj&w|& ;d?n߾]迫T֭@4l:t@ƍs]J/%%?~f? 7NNN^zYfر#j֬ieusg4%EUVUTABBB I9.\@:u "2d>Ccyw1qDddd@cڵ7nұ@ll,6l؀/111(WڶmnݺWT^kKddddڵk8{,Ξ=~ ?аaC GFݺu 허ƍo>mڴAzP^= ...D\\Ξ=X>}GFA˖-1p@DFF[~b׮]زe <#88͛7GݺuFgeff")) sFLL СC >UW*۷o>]pQZj`ԫWuօ?t:]mRSS{ٳqY:tW^E5#G} *CsT?)FQFKJǡ4nX|JǡE tt$""2---M{=iӦf͚Itt=zT233K5vff>|X,X !!!@ڶm+֭4 ꫯJZDI޽7ߔsΕzdꫯdRZ51 2h ٳg gΜ'JSƌ#[n[nz7oG}$FPL4I~7 $/k8p _˃J=ٳgeڵҳgOjRvm?ܾ}u+sjJ;vL9~q1K.)JǡSH^BDDTf=|PVX!ժU+ʴiV}_~EN*+Vիʕ+%%%Ūi]&ӦM777[,YD]f3L_Kxxziٲ|b6ĉ2h qrrN:ƍ-*HrrlذA:t NNN2tP_xl6ݻ駟 g1LV{̄YhL>]nܸadzg{ԭ =ST{vZZ.\gҥKHJJBRR~m> xxxVZ[.\e㯿 >>III~:}ԫWPNzܛFn޼gsݻ'N`Ϟ=֭6m ///x{{#00uEJMDDD6mK/F_|=\S-99k׮o''',^<Çk7@PP^y 8Ц]\ŋCpp0VZ-Zի:u*>c7Ƃ cСxk jO?aҤI3<ӧfo6c,XΝÌ30sL흧=ƹGE{J&%%aطoۇsd2k׆'<<< Aբnݺ E׮]ѱcG>ٵkװw^۷GBB 􄧧'\*Ud2{.`2`4ѬY3t]vE˖-yt 9s ݋{СC>UTArs!00Ǐٳ1ydEKIIe0|lW.'|)Sd2aŊ S7~!c…Pby˗c޼yhԨ֬Yƍ+ĉ8q"~7? /Pfܹs3gĺu0rHDGGrʊܹSLhĊ+ЧO(sq)))m6۷zqvvN:ɫ*_~\xH˽M&\pAK7otAF ߿l߾˄K֭[WZh!B 2`Yr8pKSSSӲc:u4nX4/_^}Y9x OQ)X-@eر~z9zܿHܿ_=*ׯ1cHԪUKf͚%gϞOBDDL&̙3Gt: 2ĪbDBB T:R.?4lP믿V:ҤIm۶)',6l .(KX4kLu[=|P̙#A"##%99YH6ŹG8̞ 2uT)Wxxxȑ#/h1dҥeؗ_~)Ҿ}{W:N._,mڴٻwqll6ˢEDɄ T+M6-3c=ƹ'6%e„ b4%88X>C4l6˖-[$88XFQFF,[L<==Et$ܹST"٫ݻ2aj2b{ґ!(ݺuSގҥKqvv^{Mi"""58}ȀT}Jca>|(J*]I>=եKN:ҪU+IJJR:EM0A<<#XuS(6%|Mqqq"_Ŗ]&C WWWywcS)))2aꫯJjjґ8v4iDjԨ!?ql*>>^ڷo/*UR'[lر#C "2ʕ+RZ56l`!իիWc_zYrQJ͛(ݻwwSgϞ-nnnraڏ?(2o@t:L2n&48qzٰaqld2IݥaÆr=Xݻw~һwo+˭[jժe?CA;&xbE9Ykoz/)k&rIXlp |tINN ߿Q????7nQ*&&F\]]eٲeJG)=ƹX,Ӕ{K۶mbۃM6ҰaC'\r2uTXĹsRJK}]tk.XG}$ZV{=YMZZɈ#bÆ `P:J?^wQ:%%%I@@?^(Ei&1t;u ٲeQlܸq(JG5kֈ\xQ(E¹G8[雒ҵkWS]Q$ׯ_4֞W]zUU&q?QJe޽b0ӹ'Yx oBDDdBqhI.]K.Jǰ]\Ν;RbEÓ̚5K*WlY/NNN+&ftIut"ܣn{MW^yE*Vh7,|2{lHFFo^Zn-)))JDZ;vV})D^*>>>q&M=t /o*ŦV\)r5j׮]JGK-TUɓVZ}㒓F2}t*33S6m*JGSNN>L(ùGJ9Lш> 89zQaT/@߾}{nW8K/ǩSPjUXӱqF3 <.\3,i8y$9t<8pQR=SKԔ>+;";wijgdG"!!Hgff>} $$Dv7ۻԟIxO奨O>r—8x| IDAT({^ޣ\E=<{,ݫ/ i„ 믿pQ^/L2v^bرXp!RRRdy7|=z }@nOv""B;x ';"ACɎodGQ.]㈏tEv ڵ+#; 667ʗ/f͚)>=ٱ([QzO&%.] 4jԨLE&MЪU+,[LvW,]G6eȑx)o.;J6wŶm0vXQ7nlقˎBDDT(h޼y;++KKK Ȏoj59"; bccqʎSQpahZhBve2e+J)drr2֬YaÆ))6lu6?˗/cСHecc޽{+nqUVյԯҶm[TZWP}Ȏ{ƙ3g#; QDEE[(Rn]$&&֭[RsZj|u",,Lv %էN:c( \\\ׇ'g=V׶OJٳf燽{ʎq"##ѡCQAV]v޽{OOOQaÆTb&""ODDʖ-+R^ DDDHnBRRO<<<.5CBB9)EG { 4)y5ܾ}gѶm[DFF"22Rv۷...](뫘Is,T*|||x4Q~_www&OV ׯ_Y JTfMddd **Jj7n>9Y&nܸ!5CTTOj֬6W([a{Ou>|NNNʢN:P>,; 9VZɎ(mڴ[pmQpanZv EiӦ";DDDrebĈѲeK߇ aĉر#BBB>Ǐ{gnݺaΝ/_O<똅|T7d}?~"66P R2_G)t0i$ѩSZO>Dv 믋ٳgˎ(L2?#>>^T*q19BRgϞɎBDDɓ'Ν;e$Ѯ];Q\9ߊǏy B!~m1uTg͛7xEޓ;eקMJC;xL/evvv~Cq<==ŷ~+5ӧc98w(DDDzD>}6ިQh@,^X8qB/^=zvvv"--@t]x{{g{|BROgݻx6^QԫWO|zϔ(6nܨ t,N}2' x -z{rRSQ.9SC\\듃:uիR3DDDbŊz$”8::|ҟ?DDD VT*UɓkklhqqqvZQTPo߾=z]]"55Uo;|mk[^X>FjՊ*)*}hK}Lmjժ)>Jz䷏!V/ !! ʼTP`kk[}rEoyacc9S8oRKK<ϭ6YW}2'c6꣄6)>Jym3;)<_ iӦ™3gm۶apQAsL6 ×_~~.طo>W"$$gƺu^/>>>ۄCVʪc6qGiK}[m'>>fffzyWT^z?klQk{OΔR)>=]S@:uWT9y 2^QlٲEmcѽ{wgJLtML8QoZzojժcbb7!2dpvv֭5sѰaC}vO=Dʕœ'O/nW^=I&ܲtEL4IoJ}J}\QXYYm۶m֧ 3D}l"lll6^Qz}?_m0|⭷xEޓ;֧#M~3/_^EV~y3yf\v ^+j*DFFaÆ9^ 8(><=7O~o} Q 䏈8T7oe={`Ϟ=s-Zc̙ףSNXt)k1^^^x).\81,--Dҗ7oJ*zXeNdnXOAj>ky[nnݺ d} mc}[|'%3''Ӌ^۞:9=nu&222 CR `\IOOZɯ6/3Q:h} """Gll,5bbbm[x1/^mԩSG仏#~嗢-GѣGo`eJ *VXb(}T)Hmn&[re"<<ؓM~%էu%6~A䌽'wJOa{OkJf^ D[I3ـVL>ƴDUX1033CڵիWDkOxx.}Rj׮׶ddd𓉽'g=Vޓdٲeh:;f)))!CtR<| 6C&M|<孷nBll,._sssXXXm²ARR˫6>'}G탫+:nQh{T6m:uB&M0{;F~v؁>} >>^6鰶͛oqeɒ%1c+k>VBT\gVěVZ%92nݺq"&VT> #GE~'̙3GgX]zUw wZdQ+V%=V3@$\$g1}|%xcǐV۷oC!99Yv1D}݋^{ zڵk{ʎQ bۗDDD%~:N>-;T'ODdd$';J6شiRRRdGjƍ^:6m*;N2eУG]Vv֭[)SFv-Zյԟ͛7#00Pvl{^`QMJۣqؿ™Pk{qIQ#44ڵIGͤqIKJJ±cжm[Q Xh(R-Zڵ( --TO|᧟~°aàRdf?quQ]0|pQQT:t(~Gˎ#ͪU @^dGɆ*N)}}}l/Cسg|}}e١qسg('N(>[FJJԻ+ɑ#G-[ʎBDDThSNEpp0]&;Xf >SQ^accc"((iiiHrJիeG"-- f¸q`mm-;+{{8ÁpƍBDDDÊ:m_~XbE_g6lɎřƝ:uʕ+eGQ+WK.дiS̛7Ov)k4o\1죏>½{";/{ァؿL`ܼySvv֬Y)SȎ#;;;=gFFF8vZ<|cǎ%G==JUSIIoooxxxONZf ׯƍˎ۷oòH޽{CȎ3`lܸ H~  &N˗3Vɓ'ˎcذaύ&rJᅧ@veddˎ"Ett!!!(sREQlR=iNDDO>޽{W899iӦɎR`7;v4={F#v-;JݹsG888/Bvgğ)4 Dvډ6m{rG*! w-QNl۶ :u*Fj۶mGDDdъ+ƍpttǠ&Nm۶ҥKPղhذaǑ#G}醾 !ЬY34n?8DDDz___h4ݻWGjj*ڴiKKK޽ffZIhxyyw.;Ny&4i1c`֬Yʟz ;v@ǎe)[lA^c{(ZTT?(֫W/ѴiSTe6mF?^vIHHUV5OM ?nnn"11Qv""dVM憐h4bժU[曨ʕ+^VLnllhԨhӦHJJؒD֭EƍE\\8vyaoo/L";^DDD *Ç:aQ.)djjhڴiii 8e˖70͛ADݾ}[/_^L))ċ !GGG1uT}RI& '''q]Q mǎO{MJJhڴh׮QjҤIQDGGˎR"I>e!&L ,,,DHH8E~zann.&OlgeS+V*._,;Nedd 9+*˗/*UvGTVMɎw!!!\L4(_.^(\\\DE||8zޣl=y*ޤ/^jI Z-~wQ>0Ӆ0aQ/~sk ___D8'IIIUVF:QQ Z-[Q ,##C̛7Oj^7>11QtM/_^ʎS`Ϟ=bϞ=㔘HQfMѤI:022R4jHxxxUڹs 2#DϞ=M}WV=ޓOJ !ԩSIXСClٲ>X?.ڶm+j׮-߿/;|7BQQQYyIKK={UT1Sԉ\ IDATjB_g<}T[)SF,[Lv"F)4ꫯVNxxhРR8q8%-ZFqErDVĽ{d)qG΢qڵk)##C̚5Kj-({O3)grLb XFy)VX!Dpp(zqYagg'Fdddwyd^'""*SNZju늓'OʎǏ !Ξ=+;A^Z؈ݻq^!V\)lmmE׮]ŃdG2Gh4駟*8bBVS LoW'GѢk׮V_^vbQ6l3))ċdzq} kpGʕ=z0#W.4ibԟ}BVsʎWe˖C 1w)))bРAR8p@v"""Ex6lPb̘1ѣG# !xxwZ#GT5%%""BlRڊ;… GX[[Ejժ5j(j?C 777k.qȼRkN1k奤s kkkѺukşYR{GGBlsȑF =z\JգGDͅ-!Ǝ+ZXb8%ɓB ]vӧJ\\h۶Xd!""RC  GGGW_Iqqq/˗^^^&RQedd_~ETPAxzzkJ{/p 1zhjE޽۷P1ydann.:w,ÇEǎOųgϤeQ7o^z V+{=iիWuJ*+V{GII!HOOM͟?_× 1w\hĄ Lɓ'u֢bŊb޽HBB bƍ㔨 gggm47'~UV.]HRSS/\\\ĝ;w r옘1m4aoo/V*~'8=z$&N(,--EZҥK 6tY1x`hDfݻ r\crѻwoRD۶mΝ;Ezzz7--MرC333ѷo_^56;wBՊCs bɒ%f͚JL>+66VKXZZ_~EvHJJh4b֬YrnnnF,@aDGG Q\9E]-[{{{Ѯ];56"""c$.\(Z]v6lybݺusBV wߙgݻbR؈Ç}޽{bKb۶mz=):wի033UV~i\ve1eQJaff&fz>222Ė-[k&&M }M4**0aQ/Sf=VJ{OMJ zuի+Mԯ_T"텯~_ kkk_>JKKӦMF=Z1kdzxwF3g]DDDE*o.(Sh׮/Ç 5ÇEPPJ 0@ׅ'~WѮ];aff&ʕ+'ŢEӧ }&͛7Ŷm,7n,Dʕ,N>]B?u떘3g1bXz/m)))"<<\ZJ 6L A⫯eEp)G '''aff&Ą ŭ[ 5VBB8}D^PO,_\'A){J!Pgaڵ+ϟww|ᇸ>͛!CH#ӑ#G0j(ܿgaff&-Ozz:.]S/F-!""2 8x 8P>}iiipttD:uPreZx)pѣGh4x iVVV>O>Edd$"""3X[[e˖4mjZg\8pݻV5jvڰ lmmO>7n 55NNN񁯯/ڶm ?KOOlj'ȑ#Bڵ[[[ڢlٲB 66qqqx1qm!___]}*U$3z=V ztY=gϞСC1uT:QQQ5kVXooo,^ 40hڴiƎdL0|''93f`޽&LV5X""$99aaar .^ 66=ŋQ~}/_PׯzN:077˸t\Xƍw`cc^z􄻻K.]˗xʕ+uSvmxzzNNN DFFŋ|2n޼ϙ3gPR%T^666GݺuuoTr{{&%/@;f͚/bγg_?FjЩS'֯_ARJ3f pylڴpqq |7pqqAΝ?ڵkz9NDD~'t UTo#228!IDDDDDDDŦqPJ.]K.>֭[7oZF xxx6γgt뀄#""'O!@-'o߾P(YXX`̘13f \UV!88SL=|||РAԩSu@^>];: aaaǑ#GpEX[[ z-)SFOk|*V ̜9ªU0c =ժUC6mP^=xxxd[$˗qܾ}֭ۇ֭[sJʤdV+Vرc1vX<}سgN:kΝ;}jnlg9;;7ĉ+-nݺ5kf͚ٳǮ]h"-Snr1vF777ԪU z?͛CϨ>>>/b8r֮]Wh2eʠvڨU&L???xzz!""""""CQB޽;wH_}`ɺ|X\~GŋQF ժU㙐DDDDDDDdP̉ 6lz7|SV$ʢB .믿???xq&.5jtؑT'8.NJAqR DDDDDDDDDdP$"""""""""$'%Ƞ8)IDDDDDDDDDII"""""""""2(NJAqR DDDDDDDDDdP$"""""""""$'%Ƞ8)IDDDDDDDDDII"""""""""2(NJAqR DDDDDDDDDdP$"""""""""$'%Ƞ8)IDDDDDDDDDII"""""""""2(NJ)ҥKaffJJwwwn-[&9i4p@]T* lqQK%lX|9/_m[*Ud,\ssl°aòm8p쨥{R$BթSB<BN:JDYթSZ6}Z- Ur˫RPV-&Lnnnh4yjQn]%{͔z'%e˖pvvsUe˖JDY <iii>nnn>}lٲLE ZÆ 3`"TlY'ω4 4Ȁ({R$BT* 2eʔq iZjhҤ T*UaNEzkmϯ^z0e5`JB&MPZ5"GL}l߾111?n vjh^JJ ֭[ϟ붥ȑ#HIIm{%#bvرǏ֭CrfffҥK<ٳs|,99kI6d\p!߿Ѯ]; lmmѹsgl߾4 tI}Ó'O^yLbСE:=f*G%[(@#...&`ggG8]|jՒ lܸ~ \INN?w}׀Ɉj׮W=<<\B"899>daa#Gモ`ӦM۷+gjlذ|Xt)m733ݻwQBI`Q2=v߾}VjTWj֬\cff}ʎZ*,d' @vT"""СC_‚g)@ ʇ 2u5˔).]HHDYr|}}9!=e*G񓒁HMMT!Cy&9ufD Z,V333n+V4p2"""*=%%h֌2u~*h޼DL2n9r]/ E\rmjbQ6S=lٲ%r} oQV9}IբW^l 2$;j4 2(\]][Ԟ7Pl7!C ΀)iL70u* v o@=f G9He2_^:5jc\V2l,/&""2C V_~x7u'''SQdC%&[S2DSQ&2ޣII TV 7E93h~~~Q&{{{曯-VѩS'6{"""#ү_?^iii\W]a 0vhРDIѠs aÆUkآ,=f(&%5j5jf͚hذDU߾}_Yd8 zlbJAIJDDDDEQbEj кukBatV JI۷/RRRq*Rjѹsgq( e3c43FC YtgLf^M999e˖8rvrt Z6::x뭷$"""R ܾ}鲣SN8x :u7nȎ#ZUzHNNFLL՗:`֭hժbciiǏ*W+W%2ݻwX"cWV @p]qr^{D^ )QV-i* ׮]C%'#Xl{=Mo*W Ajj*Z-իWˎEDD8K,Qd0zK,;Cq 5k}DR!,, kUTQwch:uE ^'OӧcԨQСnܸaRɬK~2;`_믿bڵx뭷 )) fСCuVt&U8{ϴiǏcӦM8qF;q6mڄ5k.h6Q)#99ݺuã@keeU@բO>ݻ 7puuW#[QSOZZLA7ժUܹs?D}^Ma~2;oFzz:ƌ>DFFlٲ={6o߾pqqI.]Dm=hذ!ر#{=,]TOiޓս{o>8;;gۮLI"""""̙31i$ݶvֶcvLjSqL [(^}Ξ=۷oUVٶj x9ON S>w.\`4hǏGLLL>σ0cƌlz n10coo̔lJ{}dLBL>3gq}GoƶD~ZXҲ`|뀔uZr3|he˖mOFVR0f<~:uNxm۹s'zOOO6c `޽{4i5kuxiܺu NB=rG)u&&LPgܸqذaΜ9Scdo 4jdOa߲u}b={6""" tCDDǏ#::˗uGGG,ZwFϞ=1w\XXX0eUPNЌ>͋U*U_'''c} R@ɤVܿzeGcƍXl+cm~mcW>Wc} /##ӧO/ЙO&%uŒckk}J:RZi!""ϓ'O ^nݺի]!镝=z$;Fs}j֬ ˶=66f)hmee;vn9Xyx0'6qeRpvv/v KKKٳ'>ׯW_)֤3QƼ֧4: e""/%%* < M`ee%;Fs}2{nC2eРA6cOAk(>9qtt+Yy.\ArK&c `ܯm9Pammm;cx+V@޽amm/&&ӧO5߯W_)򤤩3c]ckHVN QA8::VwVDn0aL2 ,1x1W.;Fs}ѨQ#9r$#GmXPf}^E<ȱfTX1ۺxOyo'N`8~8'|;wnwe}^0T}`۶mظq#Z-BBB+WbĈPn?c `ܽǘ6m.\۶vZI&e}^0dZ*j֬?V GGGԬY}U"OJ:aks}L} יTi!""*'B޽{ѫW/4j77ƅ l !_ywӦMXp! =F^dz.\ѣG;>f̚5 ƍÌ30gڵ+M>y(o X[[#,, :t@߾}1sL <}ye_Cĉӧ~wGߐ!CPF I\x{OZZ:VZaĈX`m? YMQ8L|7E=Ƹ1': 6m75t3!6/3uZ iӦ޽;~7d{}}miic9RpQ,ZժUc5N3fP̒1Zq1'%WdOOWcâ*?dTTT\#5rCܲ]3S3[6m3+\*2+MS,\25\pX ?If93|?^ [֧m۶f lϵTZqqqw<S{n!6`LIGg\={|O QI̜9?3N>]ww"_!99&M*:Y>>OkK8BmՇϝ{Sꡤ3a:q/{> BDDTr䒪^:F|{?G}'Rɠ %~ZQꣵ玥a}n8JmՇϝ{SUu sg{s}}~)/^}UTZLԬYY,FӦM1~x2%~ZǑ꣥玥a}n8RmmՇϝ[{Jτ9 >`}ZJJAөa~JQXr9ZmGXb,Z8^ZG CDDDDDDDP桤1:#Qjh>|-|nBsiZ͛[ zVG CDDDDDDD`$X#j>DDDDDDDTpz@DDDDDDDDD6š$DDDDDDDDDdSJMq(IDDDDDDDDD6š$DDDDDDDDDdS$++ ǎSneeeYuT֧Ҭ~sα>t9qQjjUg)SPq!ԯ__uau0o<̛7*:nnnVY?U./jժeuܠfˋÇ[e]`Ѯ%g̘18puI4o{EڵUǹ-ooo;o<|VY,Y3f`޽ܖꓐlm ?8h!ggg˗/d2Ye}Kh޼9LaÆr[VY2{p(IDDDD?NNNQ/tNkqvv5ߐ5[mhc Xk`c)NNNpww/{{F7DDDDDDDDDdSE)h"l߾YÇ~_~...V?#ڼy3ӧ1uTN>:-Cĉؽ{~~~hذͳm6\zLn޽h֬ ͼvxyy!55F(͛~￷A"{9PyoiTz\Ϟ=ҳgO8;;1UTAhhͺw cdYR6E{͑zDDDDD;>&44 4Q" .o, F''٥Btt4ta &&Ɔ(bccrǘL& 4Ȇ({9R~B""""rlȑŅ(Ν;C, :Ʃ(_vۯ[n6LD7:t(^G.]cTGh~OW^y)))رжmۂժU /ͳw/^/\ A@@@""""bk.|\z?#www7ޡ3f Zj2*ӧ_YNNNHNNF5$#Xt)bcc{իG*HE|A,Xم>쌑#GbΜ9pǏ-ߑ h'59s%fwPR |ᇷ\q̙鐕f駟ŋPHj֬plݺ]:w}7(T_~0 a5|p||>''ÇWn6rH書~z WQ*{Qz/=zpqqѣmթS[Nct:bccmw}n~FFXsFѮpTnnnׯ-K:tʕ+jժC ͢oHFޓmh%~}&6̆fEH:u] *޽{ХDDD]:Ʉ(JD7>_D #F(t bbbxrFoCdBttDt3msޣdÆ Ѱa~QFw:YW\\\0|p㓣E͞NK.zt֍7Јnݺz7ЖPŞnQ*ڵDGŴvw}\2:uTgףG5777Ɖbcc tQFDEEl=5EW^GXXDtC"'' :Wmh#J:7;\y+ziFA"AswޅVݻDt(BᮻM 4&&&puuBSF m:v7Tqt=f.kF˖- кuk+LFw?{#>|-DDDxxxܠnZc:ujժADx#N AVVx="jժcǎM{{OwР#Gb߾}gLO^z,}VV^+ҥKEDDTDDDt'/^Dbb"pa\pW\ѣG 2d*Vʕ+#(( DJwl"d$&&HLLDjj*a4|rlذGPP; [YNN ?IIIHOOGzzz0e􄧧'ի`nݺlʮ^Z9|0N:t\x:2d<==QbłM`` OHrޣCc_T^BzgϞEʕ'#Xr% TpF^Æ q!ũ(1gdgg?0>ձ222qF[֭C||<w7l5jԀ7<<+?g???qrrˮ],~G'˗//oE2k,-z\#GO*RJTRE"""?Xw版~@\[$GjjRR%iذ)ɡ%yyyG4iD.]*J;vLz!1ңG9v옒Z&Ǐ^/=z[*˲yfڵzy'%==]Y8rtUFt<عs'O'x:Nu,[f Ǝ ̚5 ={T j*L0yyyѥKՑlND;^@xx8>C4jHu,>/f͚a_X6ޣm=)y9s4iDcǎoȎ;4ɑm۶ӥC$$$D}]xǖ &MjժI d'HBBBȐ_~E&O,$]t%KHvv;7JttTPAV*>,[LΝ;WuϞ=+K,x@T"nnn#6mPr""c(7;wS۷Kpp޽{UDZŋW[CL&YpxyyI^,/egg˔)S`0ȳ>k=,ڵkO^iӦINNH6sy޽TXQ/^:NW^%_86ޣm=gɓC䥗^$ѣ/JݺuSNZ#eȐ!$wu|ro֭Cԭ[W>c^}&I~G뮻h4J~07++KV\)(w0_ IDAT}OV93gСC5gZZ 8P\]]/P겳eرb0dƌ3!!ABCCOvء:՝9sFårʲvZqO?o߾\uVY4o\9:L&>}zyGG{ddFFL2E*T M6{LL67ce${^-oHժUVZ2|?Im-ZZiժlٲŦ'""'z5k(f3L2sLoc5ҧO\]] rUY~8V$ 4-Zɓ'U1[RR4kLĉXڵk]bbbڵkm#w`{l3\|K@@\Rd2˥N:RNYr,Z%ӧO777ر_J\zUz-qwwmZuY'''yI.%%Eƌ#NNN2zh9cիWYf*==]ڴi#-Zooo?~(uAT<# {Cɹs`~K[h4ʼyTG3fxyyɆ T)k׮I~jժk.qҪU+iܸݽ$Ҷm[U!""xwyGux7SwٽJFÜEt<ٳ%cǎuʅ TGϋ<裪JffJ߾}%//Ou?`0_:Jh{,;1cF?%?\ =RjUM_N'2f8q,*99Ym۶vKYPaÆ:]vM4i"w(Vc2_~jw{:tHWj._,uֵעEaO8U,YDu;vȕ+WTG~[<<<$!!Aua6P2wYjIe/_.aΘ4L+իWDq$Osg˗/Khhs=vszzo^5kf[7ԩSVZszsϩb<{v]QYlܸQ [Nu>}Z|||7P^yT_~E XU^^tYz`.Po`W_}%AVX:J_(71L2j(Rݽ_ҦM s;Jͥm۶71j*QlbŊb7{͝;W<==ƍ`R,ÇVZ9e+͛7#GbL_L8Qu8y򤸻՞=SbeJ8p@_,R3m4CRjw%""BBCCz#F!YǏK͚5PL&kN:MK:t?Ν;'>>>{祈b3iiiRfMy饗TG)/"z^٣:ڵKz]sI:u3f___͟yޣ5=eJJ deYF  7K\]]?V*._,ճw> *8esNqqqqm培qaQl*>>^5θqQF:M}&gΜQL&4kLƌ:J˖-5=XINN *ȢETGׯK``eaѢ2 %nj#r,i/^ڵk?:J\vMBBBt+[hKR"a[3geJDDd2IӦMe„ (cI5eJJʕ+UG9$XB*T gϞU撓E~QnkܸqҲeK1oUN>:J{{gNDf[nE֭Kضm:t耵k{U,< .]{bŊXo7|R8kUVʕ+TGA>}8^|Xz5 GOu;u4h~ݻwW'Oƚ5ko߾r{ҥK1vX$%%ibA˖->Hu%z!ݻ;vPϟG@@͛cs& M6E>}os -+CyTCk׮QFGVL"##QN̛7Ouebccq[NuBYfaĈ( 3>8DBBׯ:hܸ16mڄvکS+ "!!z^u% L4 N8hYzϓN9ヒ,}r;ht:\RuB{n`Ѯ%/]3f_wIk7xW\Q֭[8ZR0x`<3̙3퍉'bS'O+{=QJmhڴ)Zh:R[F&MpBQ ?>:R8q6oެ:Jt,_FRE#G믿FFF(␜Qrqqa0|Q a﹁GJ{J<|QzryZ1c߫bs6oތM6R˗/cx +{9{ɒ%K0l014!** K.US>UH,YDuV3z:r{`O?:JKsΚۇTaÆaӦM8}({{vh(Yfan)S`֬YHOOW7oƖ-[OD1p@MnL }*UTn_bccO>Du""Ç1h Q4aРA8t9: ?ڴi:& 8WVիѧO\ лwoGDzjOxx8jԨYu=ޣm=%J.]#F@cٲe⣏>B>}:2&M¯DQ ç~'xFQu%F#ƍO>&Iu""P^= S֯_: `{UЌΝ;#99@Do>7{n:MyA9s;wVEt:"##o{4DC#&&Fw * ::ZsL\|=xQjժ7op미pBfF¹s4䉈̵qFtIu Mԩ6nܨ:L&[.6lؠ: 8hFDDΝ;C 6^zWE3"""aM v+*M1{(;v ::TñuVMݻwWEÇ㫯x-B=:R֭-Z: Qlݺ:tPCS:t耭[Çŋh߾(Ҿ}{l۶Mu l۶ u&uօf׶ڷoѣGJ{J._5BfJ̑lAAAXb(0 (7d$''cǎ_UVaСhBTT{dggBDDd+W̙3 QESBBB49:OOO^!!!8СC UCsBCC5SV~}+{O{䯿~*#ӧ֭[:ڵk?зo_Q4aaa_UGlٲټ ׮];DDDHLLN+v%qa94#11Q% RA_y'"8|0_CӡAS4m+i1k(y5ر$""۶mCff(Ý{Gq툈@\\M6uPE'11իW(RbETZU-((8{J-(( J3իO+*GJ{Jn۶ :ߤC%C/777Q4SNq؜gΜE1ftCv{ଂlxW1i$t˗//Ν}v<ݻ7֬YVZgΜ%ui֧{8֞?}ݖBmX>hMkϝSzT1Ì3e˖<\ wyGu ٳuI^|EYR:tW^y"k9R}L&899ƍ-fIN"Y>}VN'yyy[~7b2ʼ9KұcGW=ޣL0k(9zh>|x9!CرcUǐzܹsUМjժŋf8yÇ+͡5: Q|Iׯ{N:''' 3gرCȁ O%77׬WZjUѢ$--bK>}dĉ[4wߵzTOOOo_rEȞ={,fYm۶bk#"6ݻE]Rx{{[l;FD>h"k{ܠL0턄3Q`\~Ǐg}$$$ٙw`0(9222nt:]ܸjΝp^HMMő#GzN^/{"$}/ȰzaѽY˹zjAK)K}~;~ȑ#ǰv}3^]}S4G+m\G]X¨a2X"h>GAݺuoywΨS99969Ӎ_O:UUTxyyVZ˛ gggWCkc}Jh4䰅~{67R^^{h͞cPҥK\rBs͉T/Zl?%hk>/eHڨ9<<}}[4N*ݺuZH.] ""Ҫ^{Mڷoobcc߿㔔Pp#FH͚57mT~駂Sc'5jԐ˗/Hzz4nX֬YcCD$<<\^uYR;w{b9J}lݺbkhuYlǜ٢>k׮gggW7ocg]Zl`=֧@_t(۷Oŋ-^i|RreHϚ5KBCC-Vi=2p@H2n8GDDd-gϖƍuI͚5̞=[RSSҦM9pӥo߾/ʓO>)qqq)1˨Q#Fڵk-=,88X>c[ G}"k9R}Μ9#]*W,|EֲD}̩-tRVE, 9|26"RzHl5K{[ ΤwJʕ+ʕ+qvϯ IIIhڴi˼Vieee٬67'Z}\]] UymclQF}T>w̕Ounn. b}Ν;#%%̙9sܴi`(1+WƗ_~YfcǔmX;NY9R}Q^= 4 -Qsjئ> 4_>P-̭Mq?-է]vV[=Ec=-ԧ=%4koq7rwwYmVVyhm[K߭Z'N)IIIAppz9ԩSǢoSϝ *ߟ)S4m+i)v(?4ZV)xzzڬ6G rsi>DDD]񑙙i&&&͢oZF`` RRRlF}Y٪>AAAV=9:F>fٳc){Oiء`@ ,:X̄d*8 È#… n\7`hѢ~'Oĕ+WΟ?3gδ77T@nn.233-ޝjze;r幣z `ڨ9z=5k;vbsb۶mؿ?O~}v4o\дiSzJs˖ %bX-sU߿_3پ}fE}vP]=Ec)Hq s=1cƔ)_#Fӧ1{lbЦM|ƌ3c!-- Cǎw:=777ddd`ذagYg}3fȑ#[?9r/ZԦI&RÇB -fI-X?>|Xi9|0t:+<<NNN"hƍ쌶m۪Ν;q%Q4aڵ@Qkת3X󹹹M6\p{L}{ cѶAAA/u0lg||&6'f}#99W^U^+jժ8t%aΡCPzu)IDDvUGф C TGt UVŒ%KTGQ.''/ưaTG)0l0ڵ WE{UG)0l0,\(xbԪU :tP{hWi{YC#11))) gkĉ8z&^ڵk7a6[gƍV$pc/lj۸8o*kYñj*\xQu.\~ISC''' 2-RE5k -- T@Æ ѲeK|W(h"nݺ!C 55(-Z.tV{n`Ѷ\ Ǝ4dgggŏ?jԨ.N:HNNV,Oǎ5L:v_^{D׮]oVu-[///?;w*ְpBt+VT(,YWhYvv6.](Q A.]pBQ:pvޭޣm=f %u:ڷoOiŦMСCM BBBk7gB\\1J9rnvqKĉ(;}ر(#<3fd2csyyyx뭷Ku[{ク뮻0sLQ8rk}òeпq$''͛7G-ō;p!޽Aub;v M4ٳcUh޼9u놏>Hu,YGݻ~mڴW_}Ac|6mž={:U}'4i<:uꨎS,mc)'Ktŋ]Ο?_I:}_:m;ܞ?:uJu"Iƍջ7{ꩧ$$$\KDDe˖-b0d…X_|!FQvء:J̘1CСCXUllԩSG._:L&KBBBڵkXMFF4jH:J\xQe̘1XՁM{=QJG{JdBRn]yr`4qDiР檎r[_~QŦUV2rHQhܹ-gϞUŦRRRSϟ: E;f͖޽{Uf͚:JL&߿4nX^:U̝;W\\\d׮]˗% @F: sQ,O?([lQԦO._bQ&I*9{lJ["##-BL&kN>Q̒&jՒ_~Yu8￯:Yv%z^6nܨ:MYFzٳGu"""HMM0iӦdddcҺukiѢS&8q(/*U\u2ꫯ`0UGoV ,]Tu21L2zh\$$$c1?Y=c%UKKѬ9sH ѣm…&W*!!!:F-5rq;iii$=(DDDVuI!ChzsȠAvڒ:E]VF̞=[u2;wK;y]o߮:JmݺUW^Q"sҰaC| β~zQ,G{U_G{nKҜ[h˽4bbbAw:f͚%v]׃VjL&G5kV "kRJ]vM+ժUkb07TԒ$((H%==]uzGӮEk׮7n(&mڴ`9q86}t1l2Q,G{lCIJFڢJ``DEER*W\Ktt(VgquuO?TuRٵkܹsUG?X\]]e޽LBBԭ[W:u$˗CR^=9|8VK dĉv׾}f͚ңGd2ɴiEqJlɒ%,/(V.]t???MV^:Uh{m}(ye nݺ9"OS'v)*Tpk^rΙ|;ssq%+^7ŐbԨQyyyRl׿xw$^*Ϋ*ľ}$'믿.u(DDDyHHHPUG3riѵkW++u8*))O=jbŊ;æm&<<v:ZVL0A֗ ٳG=z(G8bw*bʔ),z_*\\\BC\Rt:Uׯ}.]XM̪*1zh-Y?.ڶm+9@DDd*V_~1++K>>b͚5֭[oݻw6lϟ?/&N(D^ā,9x񢈋jZ >\槟~?P"11*|ʾ}D=}H]t ؿ?f̘ӧFPZZya۷/>SHb?k׮Ŗ-[p899ᡇBNx{{CbTTT8y$N:'N~[`ذaHNNP:uaΆ/|A $$AAApss= ϻ~:z=Ν;ܹsQRR)S矇-ݻ1w\޽x7`}+Eaa!̙+V}1c-rdYY֭[˘8q"f̘!_UZ^7o2dmۆ`F0[R~)>#ڵ pvv6:,ؐtŋ d ƍDGG駟Ƙ1c i'HsY3բ[EGGopi_~ ISзo__Ŷm۰o>$D`„ 8s ϟUV]v5jlقJ… ={60x`k'NuXWWW,\ΝC\\&O L<G1>:I&SNEBBrss1o<$,H R(1;;;xyycǎ󃍍 :u___|)jNNNFPP<<<ၠ x{{ )nnnEN`cc???t^^^,HJsYck@ 4==={_F6m0p@̝;GYYY-++O?7|>(ڶmoի;={`fxEO=N:jh۶-|r8|ݻ/U4={۶m)S ??F͉]]]N ܼyF͉ Ѻuk<#HOOǐ!CбcG_*Y!YtСC1tPw Ǐ/'N ''p+Wm۶5\+#F ,, ;w楥&֪U+ %%o!++ YYY8q;fo{{{C~ЧO<ԩ $~E% &M¤I 3gldeeȑ#qnܸk׮nnnpssbbb J%EQ\\\CᡇnܸqވƎk* ?~1h]nn.qQI sNIDAT$)IDDDDDDDDD-DDDDDDDDDdQ,JE(IDDDDDDDDDŢ$YDDDDDDDDDdQ,JE(IDDDDDDDDDŢ$YDDDDDDDDDdQ,JE(IDDDDDDDDDŢ$YDDDDDDDDDdQ,JE(IDDDDDDDDDŢ$YDDDDDDDDDdQ,JE(IDDDDDDDDDŢ$YDDDDDDDDDdQ,JE(IDDDDDDDDDŢ$YVnXz5h۶-/^ '''ZFjj*BCC Eڶm{gΜF 쐞nYᅬ/>}4'Nxl-]QQ,YR2F8ya#}#o2Ⱦ(Vq-o۷ ߺuSɼ<<<rQ\\R|b2ggg姬BH*݅#xZg t: >EF{yyZjٯP껄 )))DD53—~a "j{Z%|ɻOZ0"aÆ TVV*ꀰ&h EDDDDM*eUUU,ȀZFJJJ񪪪0j(j7TpJ_uAIŹGeQ^_z^x FD5BQPX0"hfkkѣGs'""R/>qtt%jxpww0*6vXkt[>kP7 =f s^^^߿ZG}DFcήu;ve"#6.++c"""tFMyyoB^QYYiE/ UгgO #j{%;}&hxilmmSyy9'{EFF}uСzmለFmhj_QQѣGKՔfBEbGD|||@R)&-A#oJ{QVj#F "iF<w*(*V9)O~ 'qDTSBBʠh&2hZÓ 6 eeeHHH:$s)}QDQٹ UZ-qY_*:}Zd"99٧Hy4GqƱ/̄"<<zDzTUU%Ciiiҥ I 3{MsOMT|F kfoN>+q$ ŬYL>nQQ&O[n|lSNJgř%>=ǹGsJ2R^^OOOskeHwADFFa0`رC⨨… 2bx饗Hv-[7|RRBJF6mڄW_}/1bΞ=>}|lSBIIY ~P^^O>GEnL>;zmt9)--zo)\͛~,]nnnf݇:u*rss6>#_w{D=QYYJUd(55j* O

uMԦM6'vpp R@DDDDDDDDDDDDDDDD|,JE(IDDDDDDDDDŢ$YDDDDDDDDDdQ,JE(IDDDDDDDDDŢ$Q3zH(IDDDDd;w/>|A)((h֮]xgpĉ FRRoXva۷cQcO}X[~v܉FR;ǎJ2hݻ;vl8ؼy3>#ڵ 'N4o111 S0]~65:7ڵí[̚S-[+V`ggu?JϤIЪU+̙3{g_~Gʜ{fΜ߿ǎCrr2ƏosOM6mڵk1w\l㙒DDDDD Æ #>7蹍ݾ3f@jj*\]]СC,ZQqt:5 /bL/GmԸr467Ӑq!?5s4=عx"z=&LcbرHLLF1lHsCBB.]yԩSسg ̹ƍҥ ЦM 4?}-=?R=5]z}|||:?,J5ѹs0k,Ke>(\\\?x׍U)97r777$%%-+..ȑ#H5TB ##fͪs)â$Q-]}􁽽aoѷo_T*L0 Νg}!RSSQPP`#϶۷c 3,Sjn{^^z ojȑ#>|xۘ2?&)J.Xh׮]r6+xg_Q1WTT`Æ 2e >ZFDDСCؼysƕ#%nXSnR͉MI&ۘ3gƎo BII凈>t<<<vڅ,ZMާV{>*hyeeeM>^^^8r57%1(1? `h4HNNƍc޽a~\m;6l؀NV3Sb˖-ԩMVL*ddd4LMSEI%6JOKi (9qKjMDDt7ׯ_{}Űapi5kB7nm۶Y57%1(1? `4h>ÕC51? c!cҥu@ٟm4 |||۷~6OÙ*?˗/Gbb"﹭ӬRtހ*5?-7-y4ѽ8;;n߾}ˡRf+W-z*ެrm8::|\sSr~3ˏOOOZ49; 55jJ ϶xyy!""NNNF˙3U~VZ8899yyyȀvmTiVQRoC VPns<^<==b8+>gƋ/3f`ٲeFsycEDDk׮9r$lll1? c][6wu\ts(>׮]CTT2L/FL<4lk4(imoJΏ79qMiQo-ZTj;,? 6@aƍظq#>S?^^^픘@sOAAfΜǏ[#GD=e~@N'`T6m~;j(୷ޒ]{֭[2(9?m쭴k~ko}Z딚""JOOG\\_3f S֭ b3l/@NN~'$''cĈu3662e |}}qIܹ ڦMrJ~" :԰?#ӘT8Ք0O~6d'''{С]TK\c[q״b 덺:CNLƾO}TSr~|{ڐqj%>Odddza*97r֭[nz7R=5Zf4mki~JlX{co@okDDDxblݺ/_nX?3lc1ѣtӛ5LS18̏19r/?UEEh"]Fzܸqtfߏ5DXX<{nsDDDDDlPxDEEeD|wfx`MK.eC 1x2~Kп 0,csi>=Vâ$ѿu +W: GBB1sLCz8;;cRA#o{̇=%ȢX$"""""""""uVň#jxzze\N2~KgqZ-233R2~KSOIJ!j.8s *"00 !p16'nۣM6&͉M 44RADDDDDDDӿVQȌ$YDDDDDDDDDdQZg..ADDDDDDD;)H[IENDB`dask-0.16.0/docs/source/images/trivial.svg000066400000000000000000003405151320364734500204270ustar00rootroot00000000000000 image/svg+xml('x', 1, 1) noop noop ('x', 3, 0) noop ('x', 1, 0) noop noop noop ('x', 3, 2) ('x', 0, 1) noop ('x', 3, 1) ('x', 1, 2) noop noop ('x', 2, 2) ('x', 0, 0) ('x', 0, 2) noop ('x', 2, 0) noop ('x', 2, 1) noop ('x', 4, 2) ('x', 4, 0) ('x', 4, 1) ('x', 1, 1) noop noop ('x', 3, 0) ('x', 2, 5) noop noop noop ('x', 1, 0) noop noop ('x', 4, 1) noop noop ('x', 3, 2) noop ('x', 3, 5) ('x', 0, 1) ('x', 3, 1) ('x', 1, 2) noop noop ('x', 3, 3) noop ('x', 2, 2) ('x', 1, 5) noop ('x', 3, 4) noop noop ('x', 0, 2) ('x', 1, 4) noop noop noop ('x', 2, 0) ('x', 0, 3) noop ('x', 2, 1) ('x', 0, 0) ('x', 0, 5) ('x', 4, 2) ('x', 4, 3) ('x', 4, 0) ('x', 0, 4) ('x', 4, 5) ('x', 2, 3) noop ('x', 4, 4) ('x', 1, 3) noop noop ('x', 2, 4) noop dask-0.16.0/docs/source/images/unghosted-neighbors.png000066400000000000000000000010421320364734500227050ustar00rootroot00000000000000PNG  IHDRI5qJbKGDIDATx1j[A_ -etBdD6*-#^A.M;H$R$8#.Sf[,˾4 j>uv^q,[Vw7Zk_UUU 7nnӷuq woUٻf\'////////////////////////////////////////////////////////////////Zf i<}wUU}j y|Og[p8xz{Fn6}l=t:bq-Ϗ=;4IENDB`dask-0.16.0/docs/source/images/unghosted-neighbors.svg000066400000000000000000000046071320364734500227320ustar00rootroot00000000000000 image/svg+xml dask-0.16.0/docs/source/index.rst000066400000000000000000000176201320364734500166260ustar00rootroot00000000000000==== Dask ==== *Dask is a flexible parallel computing library for analytic computing.* Dask is composed of two components: 1. **Dynamic task scheduling** optimized for computation. This is similar to *Airflow, Luigi, Celery, or Make*, but optimized for interactive computational workloads. 2. **"Big Data" collections** like parallel arrays, dataframes, and lists that extend common interfaces like *NumPy, Pandas, or Python iterators* to larger-than-memory or distributed environments. These parallel collections run on top of the dynamic task schedulers. Dask emphasizes the following virtues: * **Familiar**: Provides parallelized NumPy array and Pandas DataFrame objects * **Flexible**: Provides a task scheduling interface for more custom workloads and integration with other projects. * **Native**: Enables distributed computing in Pure Python with access to the PyData stack. * **Fast**: Operates with low overhead, low latency, and minimal serialization necessary for fast numerical algorithms * **Scales up**: Runs resiliently on clusters with 1000s of cores * **Scales down**: Trivial to set up and run on a laptop in a single process * **Responsive**: Designed with interactive computing in mind it provides rapid feedback and diagnostics to aid humans .. image:: images/collections-schedulers.png :alt: Dask collections and schedulers :width: 80% :align: center See the `dask.distributed documentation (separate website) `_ for more technical information on Dask's distributed scheduler, Familiar user interface ----------------------- **Dask DataFrame** mimics Pandas - :doc:`documentation ` .. code-block:: python import pandas as pd import dask.dataframe as dd df = pd.read_csv('2015-01-01.csv') df = dd.read_csv('2015-*-*.csv') df.groupby(df.user_id).value.mean() df.groupby(df.user_id).value.mean().compute() **Dask Array** mimics NumPy - :doc:`documentation ` .. code-block:: python import numpy as np import dask.array as da f = h5py.File('myfile.hdf5') f = h5py.File('myfile.hdf5') x = np.array(f['/small-data']) x = da.from_array(f['/big-data'], chunks=(1000, 1000)) x - x.mean(axis=1) x - x.mean(axis=1).compute() **Dask Bag** mimics iterators, Toolz, and PySpark - :doc:`documentation ` .. code-block:: python import dask.bag as db b = db.read_text('2015-*-*.json.gz').map(json.loads) b.pluck('name').frequencies().topk(10, lambda pair: pair[1]).compute() **Dask Delayed** mimics for loops and wraps custom code - :doc:`documentation ` .. code-block:: python from dask import delayed L = [] for fn in filenames: # Use for loops to build up computation data = delayed(load)(fn) # Delay execution of function L.append(delayed(process)(data)) # Build connections between variables result = delayed(summarize)(L) result.compute() The **concurrent.futures** interface provides general submission of custom tasks: - :doc:`documentation ` .. code-block:: python from dask.distributed import Client client = Client('scheduler:port') futures = [] for fn in filenames: future = client.submit(load, fn) futures.append(future) summary = client.submit(summarize, futures) summary.result() Scales from laptops to clusters ------------------------------- Dask is convenient on a laptop. It :doc:`installs ` trivially with ``conda`` or ``pip`` and extends the size of convenient datasets from "fits in memory" to "fits on disk". Dask can scale to a cluster of 100s of machines. It is resilient, elastic, data local, and low latency. For more information see documentation on the `distributed scheduler`_. This ease of transition between single-machine to moderate cluster enables users both to start simple and to grow when necessary. Complex Algorithms ------------------ Dask represents parallel computations with :doc:`task graphs`. These directed acyclic graphs may have arbitrary structure, which enables both developers and users the freedom to build sophisticated algorithms and to handle messy situations not easily managed by the ``map/filter/groupby`` paradigm common in most data engineering frameworks. We originally needed this complexity to build complex algorithms for n-dimensional arrays but have found it to be equally valuable when dealing with messy situations in everyday problems. Index ----- **Getting Started** * :doc:`install` * :doc:`use-cases` * :doc:`examples-tutorials` * :doc:`cheatsheet` .. toctree:: :maxdepth: 1 :hidden: :caption: Getting Started install.rst use-cases.rst examples-tutorials.rst cheatsheet.rst **Collections** Dask collections are the main interaction point for users. They look like NumPy and pandas but generate dask graphs internally. If you are a dask *user* then you should start here. * :doc:`array` * :doc:`bag` * :doc:`dataframe` * :doc:`delayed` * :doc:`futures` .. toctree:: :maxdepth: 1 :hidden: :caption: Collections array.rst bag.rst dataframe.rst delayed.rst futures.rst machine-learning.rst **Scheduling** Schedulers execute task graphs. Dask currently has two main schedulers, one for single machine processing using threads or processes, and one for distributed memory clusters. * :doc:`distributed` * :doc:`scheduler-overview` * :doc:`scheduler-choice` * :doc:`Single machine scheduler` * :doc:`scheduling-policy` .. toctree:: :maxdepth: 1 :hidden: :caption: Scheduling distributed.rst scheduler-overview.rst scheduler-choice.rst shared.rst scheduling-policy.rst **Inspecting and Diagnosing Graphs** Parallel code can be tricky to debug and profile. Dask provides a few tools to help make debugging and profiling graph execution easier. * :doc:`inspect` * :doc:`diagnostics` .. toctree:: :maxdepth: 1 :hidden: :caption: Diagnostics inspect.rst diagnostics.rst **Graphs** Internally Dask encodes algorithms in a simple format involving Python dicts, tuples, and functions. This graph format can be used in isolation from the dask collections. Working directly with dask graphs is rare unless you intend to develop new modules with Dask. Even then, :doc:`dask.delayed ` is often a better choice. If you are a *core developer*, then you should start here. * :doc:`graphs` * :doc:`spec` * :doc:`custom-graphs` * :doc:`optimize` .. toctree:: :maxdepth: 1 :hidden: :caption: Graphs graphs.rst spec.rst custom-graphs.rst optimize.rst **Help & reference** * :doc:`debugging` * :doc:`support` * :doc:`changelog` * :doc:`presentations` * :doc:`develop` * :doc:`faq` * :doc:`spark` * :doc:`caching` * :doc:`bytes` * :doc:`remote-data-services` * :doc:`custom-collections` * :doc:`cite` .. toctree:: :maxdepth: 1 :hidden: :caption: Help & reference debugging.rst support.rst changelog.rst presentations.rst develop.rst faq.rst spark.rst caching.rst bytes.rst remote-data-services.rst custom-collections.rst cite.rst funding.rst Dask is supported by `Anaconda Inc`_ and develops under the BSD 3-clause license. .. _`Anaconda Inc`: https://www.anaconda.com .. _`3-clause BSD license`: https://github.com/dask/dask/blob/master/LICENSE.txt .. _`#dask tag`: https://stackoverflow.com/questions/tagged/dask .. _`GitHub issue tracker`: https://github.com/dask/dask/issues .. _`gitter chat room`: https://gitter.im/dask/dask .. _`xarray`: https://xray.readthedocs.io/en/stable/ .. _`scikit-image`: https://scikit-image.org/docs/stable/ .. _`scikit-allel`: https://scikits.appspot.com/scikit-allel .. _`pandas`: https://pandas.pydata.org/pandas-docs/version/0.17.0/ .. _`distributed scheduler`: https://distributed.readthedocs.io/en/latest/ dask-0.16.0/docs/source/inspect.rst000066400000000000000000000030731320364734500171610ustar00rootroot00000000000000Inspecting Dask objects ======================= Dask itself is just a specification on top of normal Python dictionaries. Objects like ``dask.Array`` are just a thin wrapper around these dictionaries with a little bit of shape metadata. Users should only have to interact with the higher-level ``Array`` objects. Developers may want to dive more deeply into the dictionaries/task graphs themselves ``dask`` attribute ------------------ The first step is to look at the ``.dask`` attribute of an array .. code-block:: python >>> import dask.array as da >>> x = da.ones((5, 15), chunks=(5, 5)) >>> x.dask {('wrapped_1', 0, 0): (ones, (5, 5)), ('wrapped_1', 0, 1): (ones, (5, 5)), ('wrapped_1', 0, 2): (ones, (5, 5))} This attribute becomes more interesting as you perform operations on your Array objects .. code-block:: python >>> (x + 1).dask {('wrapped_1', 0, 0): (ones, (5, 5)), ('wrapped_1', 0, 1): (ones, (5, 5)), ('wrapped_1', 0, 2): (ones, (5, 5)) ('x_1', 0, 0): (add, ('wrapped_1', 0, 0), 1), ('x_1', 0, 1): (add, ('wrapped_1', 0, 1), 1), ('x_1', 0, 2): (add, ('wrapped_1', 0, 2), 1)} Visualize graphs with DOT ------------------------- .. image:: images/simple-dask.png :width: 40 % :align: right :alt: basic ones + 1 graph If you have basic graphviz tools like ``dot`` installed then dask can also generate visual graphs from your task graphs. .. code-block:: python >>> d = (x + 1).dask >>> from dask.dot import dot_graph >>> dot_graph(d) Writing graph to mydask.pdf The result is shown to the right. dask-0.16.0/docs/source/install.rst000066400000000000000000000042471320364734500171660ustar00rootroot00000000000000Install Dask ============ You can install dask with ``conda``, with ``pip``, or by installing from source. Anaconda -------- Conda ----- Dask is installed by default in `Anaconda `_:: You can update Dask using `conda `_:: conda install dask This installs Dask and all common dependencies, including Pandas and NumPy. Dask packages are maintained both on the default channel and on and `conda-forge `_. Pip --- To install Dask with ``pip`` there are a few options, depending on which dependencies you would like to keep up to date: * ``pip install dask[complete]``: Install everything * ``pip install dask[array]``: Install dask and numpy * ``pip install dask[bag]``: Install dask and cloudpickle * ``pip install dask[dataframe]``: Install dask, numpy, and pandas * ``pip install dask``: Install only dask, which depends only on the standard library. This is appropriate if you only want the task schedulers. We do this so that users of the lightweight core dask scheduler aren't required to download the more exotic dependencies of the collections (numpy, pandas, etc..) Install from Source ------------------- To install dask from source, clone the repository from `github `_:: git clone https://github.com/dask/dask.git cd dask python setup.py install or use ``pip`` locally if you want to install all dependencies as well:: pip install -e .[complete] You can view the list of all dependencies within the ``extras_require`` field of ``setup.py``. Test ---- Test dask with ``py.test``:: cd dask py.test dask Although please aware that installing dask naively may not install all requirements by default. Please read the ``pip`` section above that discusses requirements. You may choose to install the ``dask[complete]`` which includes all dependencies for all collections. Alternatively you may choose to test only certain submodules depending on the libraries within your environment. For example to test only dask core and dask array we would run tests as follows:: py.test dask/tests dask/array/tests dask-0.16.0/docs/source/logos.rst000066400000000000000000000004421320364734500166340ustar00rootroot00000000000000 .. image:: images/dask_icon.svg :alt: Dask logo .. image:: images/dask_horizontal.svg :alt: Dask logo .. image:: images/dask_horizontal_white.svg :alt: Dask logo .. image:: images/dask_stacked.svg :alt: Dask logo .. image:: images/dask_stacked_white.svg :alt: Dask logo dask-0.16.0/docs/source/machine-learning.rst000066400000000000000000000010521320364734500207100ustar00rootroot00000000000000Machine Learning ================ Dask facilitates machine learning, statistics, and optimization workloads in a variety of ways. Generally Dask tries to support other high-quality solutions within the PyData ecosystem rather than reinvent new systems. Dask makes it easier to scale single-machine libraries like Scikit-Learn where possible and makes using distributed libraries like XGBoost or Tensorflow more comfortable for everyday users. See the separate `Dask-ML documentation `_ for more information. dask-0.16.0/docs/source/optimize.rst000066400000000000000000000271551320364734500173630ustar00rootroot00000000000000Optimization ============ Performance can be significantly improved in different contexts by making small optimizations on the dask graph before calling the scheduler. The ``dask.optimize`` module contains several functions to transform graphs in a variety of useful ways. In most cases, users won't need to interact with these functions directly, as specialized subsets of these transforms are done automatically in the dask collections (``dask.array``, ``dask.bag``, and ``dask.dataframe``). However, users working with custom graphs or computations may find that applying these methods results in substantial speedups. In general, there are two goals when doing graph optimizations: 1. Simplify computation 2. Improve parallelism. Simplifying computation can be done on a graph level by removing unnecessary tasks (``cull``), or on a task level by replacing expensive operations with cheaper ones (``RewriteRule``). Parallelism can be improved by reducing inter-task communication, whether by fusing many tasks into one (``fuse``), or by inlining cheap operations (``inline``, ``inline_functions``). Below, we show an example walking through the use of some of these to optimize a task graph. Example ------- Suppose you had a custom dask graph for doing a word counting task: .. code-block:: python >>> from __future__ import print_function >>> def print_and_return(string): ... print(string) ... return string >>> def format_str(count, val, nwords): ... return ('word list has {0} occurrences of {1}, ' ... 'out of {2} words').format(count, val, nwords) >>> dsk = {'words': 'apple orange apple pear orange pear pear', ... 'nwords': (len, (str.split, 'words')), ... 'val1': 'orange', ... 'val2': 'apple', ... 'val3': 'pear', ... 'count1': (str.count, 'words', 'val1'), ... 'count2': (str.count, 'words', 'val2'), ... 'count3': (str.count, 'words', 'val3'), ... 'out1': (format_str, 'count1', 'val1', 'nwords'), ... 'out2': (format_str, 'count2', 'val2', 'nwords'), ... 'out3': (format_str, 'count3', 'val3', 'nwords'), ... 'print1': (print_and_return, 'out1'), ... 'print2': (print_and_return, 'out2'), ... 'print3': (print_and_return, 'out3')} .. image:: images/optimize_dask1.png :width: 65 % :alt: The original dask graph Here we're counting the occurrence of the words ``'orange``, ``'apple'``, and ``'pear'`` in the list of words, formatting an output string reporting the results, printing the output, then returning the output string. To perform the computation, we pass the dask graph and the desired output keys to a scheduler ``get`` function: .. code-block:: python >>> from dask.threaded import get >>> outputs = ['print1', 'print2'] >>> results = get(dsk, outputs) word list has 2 occurrences of apple, out of 7 words word list has 2 occurrences of orange, out of 7 words >>> results ('word list has 2 occurrences of orange, out of 7 words', 'word list has 2 occurrences of apple, out of 7 words') As can be seen above, the scheduler computed only the requested outputs (``'print3'`` was never computed). This is because the scheduler internally calls ``cull``, which removes the unnecessary tasks from the graph. Even though this is done internally in the scheduler, it can be beneficial to call it at the start of a series of optimizations to reduce the amount of work done in later steps: .. code-block:: python >>> from dask.optimize import cull >>> dsk1, dependencies = cull(dsk, outputs) .. image:: images/optimize_dask2.png :width: 60 % :alt: After culling Looking at the task graph above, there are multiple accesses to constants such as ``'val1'`` or ``'val2'`` in the dask graph. These can be inlined into the tasks to improve efficiency using the ``inline`` function. For example: .. code-block:: python >>> from dask.optimize import inline >>> dsk2 = inline(dsk1, dependencies=dependencies) >>> results = get(dsk2, outputs) word list has 2 occurrences of apple, out of 7 words word list has 2 occurrences of orange, out of 7 words .. image:: images/optimize_dask3.png :width: 40 % :alt: After inlining Now we have two sets of *almost* linear task chains. The only link between them is the word counting function. For cheap operations like this, the serialization cost may be larger than the actual computation, so it may be faster to do the computation more than once, rather than passing the results to all nodes. To perform this function inlining, the ``inline_functions`` function can be used: .. code-block:: python >>> from dask.optimize import inline_functions >>> dsk3 = inline_functions(dsk2, outputs, [len, str.split], ... dependencies=dependencies) >>> results = get(dsk3, outputs) word list has 2 occurrences of apple, out of 7 words word list has 2 occurrences of orange, out of 7 words .. image:: images/optimize_dask4.png :width: 40 % :alt: After inlining functions Now we have a set of purely linear tasks. We'd like to have the scheduler run all of these on the same worker to reduce data serialization between workers. One option is just to merge these linear chains into one big task using the ``fuse`` function: .. code-block:: python >>> from dask.optimize import fuse >>> dsk4, dependencies = fuse(dsk3) >>> results = get(dsk4, outputs) word list has 2 occurrences of apple, out of 7 words word list has 2 occurrences of orange, out of 7 words .. image:: images/optimize_dask5.png :width: 40 % :alt: After fusing Putting it all together: .. code-block:: python >>> def optimize_and_get(dsk, keys): ... dsk1, deps = cull(dsk, keys) ... dsk2 = inline(dsk1, dependencies=deps) ... dsk3 = inline_functions(dsk2, keys, [len, str.split], ... dependencies=deps) ... dsk4, deps = fuse(dsk3) ... return get(dsk4, keys) >>> optimize_and_get(dsk, outputs) word list has 2 occurrences of apple, out of 7 words word list has 2 occurrences of orange, out of 7 words In summary, the above operations accomplish the following: 1. Removed tasks unnecessary for the desired output using ``cull``. 2. Inlined constants using ``inline``. 3. Inlined cheap computations using ``inline_functions``, improving parallelism. 4. Fused linear tasks together to ensure they run on the same worker using ``fuse``. As stated previously, these optimizations are already performed automatically in the dask collections. Users not working with custom graphs or computations should rarely need to directly interact with them. These are just a few of the optimizations provided in ``dask.optimize``. For more information, see the API below. Rewrite Rules ------------- For context based optimizations, ``dask.rewrite`` provides functionality for pattern matching and term rewriting. This is useful for replacing expensive computations with equivalent, cheaper computations. For example, ``dask.array`` uses the rewrite functionality to replace series of array slicing operations with a more efficient single slice. The interface to the rewrite system consists of two classes: 1. ``RewriteRule(lhs, rhs, vars)`` Given a left-hand-side (``lhs``), a right-hand-side (``rhs``), and a set of variables (``vars``), a rewrite rule declaratively encodes the following operation: ``lhs -> rhs if task matches lhs over variables`` 2. ``RuleSet(*rules)`` A collection of rewrite rules. The design of ``RuleSet`` class allows for efficient "many-to-one" pattern matching, meaning that there is minimal overhead for rewriting with multiple rules in a rule set. Example ~~~~~~~ Here we create two rewrite rules expressing the following mathematical transformations: 1. ``a + a -> 2*a`` 2. ``a * a -> a**2`` where ``'a'`` is a variable: .. code-block:: python >>> from dask.rewrite import RewriteRule, RuleSet >>> from operator import add, mul, pow >>> variables = ('a',) >>> rule1 = RewriteRule((add, 'a', 'a'), (mul, 'a', 2), variables) >>> rule2 = RewriteRule((mul, 'a', 'a'), (pow, 'a', 2), variables) >>> rs = RuleSet(rule1, rule2) The ``RewriteRule`` objects describe the desired transformations in a declarative way, and the ``RuleSet`` builds an efficient automata for applying that transformation. Rewriting can then be done using the ``rewrite`` method: .. code-block:: python >>> rs.rewrite((add, 5, 5)) (mul, 5, 2) >>> rs.rewrite((mul, 5, 5)) (pow, 5, 2) >>> rs.rewrite((mul, (add, 3, 3), (add, 3, 3))) (pow, (mul, 3, 2), 2) The whole task is traversed by default. If you only want to apply a transform to the top-level of the task, you can pass in ``strategy='top_level'`` as shown: .. code-block:: python # Transforms whole task >>> rs.rewrite((sum, [(add, 3, 3), (mul, 3, 3)])) (sum, [(mul, 3, 2), (pow, 3, 2)]) # Only applies to top level, no transform occurs >>> rs.rewrite((sum, [(add, 3, 3), (mul, 3, 3)]), strategy='top_level') (sum, [(add, 3, 3), (mul, 3, 3)]) The rewriting system provides a powerful abstraction for transforming computations at a task level. Again, for many users, directly interacting with these transformations will be unnecessary. Keyword Arguments ----------------- Some optimizations take optional keyword arguments. To pass keywords from the compute call down to the right optimization, prepend the keyword with the name of the optimization. For example to send a ``keys=`` keyword argument to the ``fuse`` optimization from a compute call, use the ``fuse_keys=`` keyword: .. code-block:: python def fuse(dsk, keys=None): ... x.compute(fuse_keys=['x', 'y', 'z']) Customizing Optimization ------------------------ Dask defines a default optimization strategy for each collection type (Array, Bag, DataFrame, Delayed). However different applications may have different needs. To address this variability of needs, you can construct your own custom optimization function and use it instead of the default. An optimization function takes in a task graph and list of desired keys and returns a new task graph. .. code-block:: python def my_optimize_function(dsk, keys): new_dsk = {...} return new_dsk You can then register this optimization class against whichever collection type you prefer and it will be used instead of the default scheme. .. code-block:: python with dask.set_options(array_optimize=my_optimize_function): x, y = dask.compute(x, y) You can register separate optimization functions for different collections, or you can register ``None`` if you do not want particular types of collections to be optimized. .. code-block:: python with dask.set_options(array_optimize=my_optimize_function, dataframe_optimize=None, delayed_optimize=my_other_optimize_function): ... You need not specify all collections. Collections will default to their standard optimization scheme (which is usually a good choice). API --- .. currentmodule:: dask.optimize **Top level optimizations** .. autosummary:: cull fuse inline inline_functions **Utility functions** .. autosummary:: functions_of **Rewrite Rules** .. currentmodule:: dask.rewrite .. autosummary:: RewriteRule RuleSet Definitions ~~~~~~~~~~~ .. currentmodule:: dask.optimize .. autofunction:: cull .. autofunction:: fuse .. autofunction:: inline .. autofunction:: inline_functions .. autofunction:: functions_of .. currentmodule:: dask.rewrite .. autofunction:: RewriteRule .. autofunction:: RuleSet dask-0.16.0/docs/source/presentations.rst000066400000000000000000000015431320364734500204120ustar00rootroot00000000000000Presentations On Dask ===================== * PLOTCON 2016, December 2016 * `Visualizing Distributed Computations with Dask and Bokeh `__ * PyData DC, October 2016 * `Using Dask for Parallel Computing in Python `__ * SciPy 2016, July 2016 * `Dask Parallel and Distributed Computing `__ * PyData NYC, December 2015 * `Dask Parallelizing NumPy and Pandas through Task Scheduling `__ * PyData Seattle, August 2015 * `Dask: out of core arrays with task scheduling `__ * SciPy 2015, July 2015 * `Dask Out of core NumPy:Pandas through Task Scheduling `__ dask-0.16.0/docs/source/remote-data-services.rst000066400000000000000000000232441320364734500215410ustar00rootroot00000000000000Remote Data Services ==================== As described in Section :doc:`Internal Data Ingestion `, various user-facing functions (such as ``dataframe.read_csv``, ``dataframe.read_parquet``, ``bag.read_text``) and lower level byte-manipulating functions may point to data that lives not on the local storage of the workers, but on a remote system such as Amazon S3. In this section we describe how to use the various known back-end storage systems. Below we give some details for interested developers on how further storage back-ends may be provided for dask's use. Known Storage Implementations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When specifying a storage location, a URL should be provided using the general form ``protocol://path/to/data``. If no protocol is provided, the local file-system is assumed (same as ``file://``). Two methods exist for passing parameters to the backend file-system driver: extending the URL to include username, password, server, port, etc.; and providing ``storage_options``, a dictionary of parameters to pass on. Examples: .. code-block:: python df = dd.read_csv('hdfs://user@server:port/path/*.csv') df = dd.read_parquet('s3://bucket/path', storage_options={'anon': True, 'use_ssl': False}) Further details on how to provide configuration for each backend is listed next. Each back-end has additional installation requirements and may not be available at runtime. The dictionary ``dask.bytes.core._filesystems`` contains the currently available file-systems. Some require appropriate imports before use. The following list gives the protocol shorthands and the back-ends they refer to: - ``file:`` - the local file system, default in the absence of any protocol - ``hdfs:`` - Hadoop Distributed File System, a for resilient, replicated files within a cluster, using the library hdfs3_ - ``s3:`` - Amazon S3 remote binary store, often used with Amazon EC2, using the library s3fs_ - ``gcs:`` or ``gs:`` - Google Cloud Storage, typically used with Google Compute resource, using gcsfs_ (in development) .. - # ``adlfs:`` - Azure Data-lake cloud storage, for use with the Microsoft Azure platform, using azure-data-lake-store-python_ .. _hdfs3: http://hdfs3.readthedocs.io/ .. _s3fs: http://s3fs.readthedocs.io/ .. .. _azure-data-lake-store-python: https://github.com/Azure/azure-data-lake-store-python .. _gcsfs: https://github.com/martindurant/gcsfs/ Local File System ----------------- Local files are always accessible, and all parameters passed as part of the URL (beyond the path itself) or with the ``storage_options`` dictionary will be ignored. This is the default back-end, and the one used if no protocol is passed at all. We assume here that each worker has access to the same file-system - either the workers are co-located on the same machine, or a network file system is mounted and referenced at the same path location for every worker node. Locations specified relative to the current working directory will, in general, be respected (as they would be with the built-in python ``open``), but this may fail in the case that the client and worker processes do not necessarily have the same working directory. HDFS ---- The Hadoop File System (HDFS) is a widely deployed, distributed, data-local file system written in Java. This file system backs many clusters running Hadoop and Spark. Within dask, HDFS is only available when the module ``distributed.hdfs`` is explicitly imported, since the usage of HDFS usually only makes sense in a cluster setting. The distributed scheduler will prefer to allocate tasks which read from HDFS to machines which have local copies of the blocks required for their work, where possible. By default, hdfs3 attempts to read the default server and port from local Hadoop configuration files on each node, so it may be that no configuration is required. However, the server, port and user can be passed as part of the url: ``hdfs://user:pass@server:port/path/to/data``. The following parameters may be passed to hdfs3 using ``storage_options``: - host, port, user: basic authentication - ticket_cache, token: kerberos authentication - pars: dictionary of further parameters (e.g., for `high availability`_) .. _high availability: http://hdfs3.readthedocs.io/en/latest/hdfs.html#high-availability-mode Important environment variables: - HADOOP_CONF_DIR or HADOOP_INSTALL: directory containing ``core-site.xml`` and/or ``hdfs-site.xml``, containing configuration information - LIBHDFS3_CONF: location of a specific xml file with configuration for the `libhdfs3 client`_; this may be the same as one of the files above. `Short circuit` reads should be defined in this file (`see here`_) .. _see here: http://hdfs3.readthedocs.io/en/latest/hdfs.html#short-circuit-reads-in-hdfs .. _libhdfs3 client: https://github.com/Pivotal-Data-Attic/pivotalrd-libhdfs3/wiki/Configure-Parameters S3 ----- Amazon S3 (Simple Storage Service) is a web service offered by Amazon Web Services. The S3 back-end will be available to dask is s3fs is importable when dask is imported. Authentication for S3 is provided by the underlying library boto3. As described in the `auth docs`_ this could be achieved by placing credentials files in one of several locations on each node: ``~/.aws/credentials``, ``~/.aws/config``, ``/etc/boto.cfg`` and ``~/.boto``. Alternatively, for nodes located within Amazon EC2, IAM roles can be set up for each node, and then no further configuration is required. The final authentication option, is for user credentials can be passed directly in the URL (``s3://keyID:keySecret/bucket/key/name``) or using ``storage_options``. In this case, however, the key/secret will be passed to all workers in-the-clear, so this method is only recommended on well-secured networks. .. _auth docs: http://boto3.readthedocs.io/en/latest/guide/configuration.html The following parameters may be passed to s3fs using ``storage_options``: - anon: whether access should be anonymous (default False) - key, secret: for user authentication - token: if authentication has been done with some other S3 client - use_ssl: whether connections are encrypted and secure (default True) - client_kwargs: dict passed to the `boto3 client`_, with keys such as `region_name`, `endpoint_url` - requester_pays: set True if the authenticated user will assume transfer costs, which is required by some providers of bulk data - default_block_size, default_fill_cache: these are not of particular interest to dask users, as they concern the behaviour of the buffer between successive reads - kwargs: other parameters are passed to the `boto3 Session`_ object, such as `profile_name`, to pick one of the authentication sections from the configuration files referred to above (see `here`_) .. _boto3 client: http://boto3.readthedocs.io/en/latest/reference/core/session.html#boto3.session.Session.client .. _boto3 Session: http://boto3.readthedocs.io/en/latest/reference/core/session.html .. _here: http://boto3.readthedocs.io/en/latest/guide/configuration.html#shared-credentials-file Google Cloud Storage -------------------- (gcsfs is in early development, expect the details here to change) Google Cloud Storage is a RESTful online file storage web service for storing and accessing data on Google's infrastructure. The GCS backend will be available only after importing gcsfs_. The protocol identifiers ``gcs`` and ``gs`` are identical in their effect. Authentication for GCS is based on OAuth2, and designed for user verification. Interactive authentication is available when ``token==None`` using the local browser, or by using gcloud_ to produce a JSON token file and passing that. In either case, gcsfs stores a cache of tokens in a local file, so subsequent authentication will not be necessary. .. _gcsfs: https://github.com/martindurant/gcsfs/ .. _gcloud: https://cloud.google.com/sdk/docs/ At the time of writing, ``gcsfs.GCSFileSystem`` instances pickle including the auth token, so sensitive information is passed between nodes of a dask distributed cluster. This will be changed to allow the use of either local JSON or pickle files for storing tokens and authenticating on each node automatically, instead of passing around an authentication token, similar to S3, above. Every use of GCS requires the specification of a project to run within - if the project is left empty, the user will not be able to perform any bucket-level operations. The project can be defined using the variable GCSFS_DEFAULT_PROJECT in the environment of every worker, or by passing something like the following .. code-block:: python dd.read_parquet('gs://bucket/path', storage_options={'project': 'myproject'} Possible additional storage options: - access : 'read_only', 'read_write', 'full_control', access privilege level (note that the token cache uses a separate token for each level) - token: either an actual dictionary of a google token, or location of a JSON file created by gcloud. Developer API ~~~~~~~~~~~~~ The prototype for any file-system back-end can be found in ``bytes.local.LocalFileSystem``. Any new implementation should provide the same API, and make itself available as a protocol to dask. For example, the following would register the protocol "myproto", described by the implementation class ``MyProtoFileSystem``. URLs of the form ``myproto://`` would thereafter be dispatched to the methods of this class. .. code-block:: python dask.bytes.core._filesystems['myproto'] = MyProtoFileSystem For a more complicated example, users may wish to also see ``dask.bytes.s3.DaskS3FileSystem``. .. currentmodule:: dask.bytes.local .. autoclass:: LocalFileSystem :members: dask-0.16.0/docs/source/scheduler-choice.rst000066400000000000000000000145561320364734500207320ustar00rootroot00000000000000Choosing between Schedulers =========================== Dask enables you to run your computations on a variety of schedulers that use different technologies, ranging from a single thread, to multiple processes, to a distributed cluster. This document helps explain the differences between the different choices so that users can make decisions that improve performance. Briefly, the current options are as follows: * ``dask.threaded.get``: Uses multiple threads in the same process. Good for numeric code that releases the GIL_ (NumPy, Pandas, SKLearn, Numba) because data is free to share. The default scheduler for ``dask.array``, ``dask.dataframe`` and ``dask.delayed`` * ``dask.multiprocessing.get``: Uses multiple processes. Good for Python bound code that needs multiple interpreters to accelerate. There are some costs to sharing data back and forth between processes. The default scheduler for ``dask.bag`` and sometimes useful with ``dask.dataframe``. * ``dask.get``: Uses the single main thread. Good for profiling and debugging because all code is run sequentially * ``distributed.Client.get``: Uses multiple machines connected over sockets. Good for larger work but also a viable alternative to ``dask.multiprocessing`` on a single machine. Also sometimes used for its improved diagnostic tools. Threads vs Processes -------------------- Threads are good because they can share data back and forth in the same memory space without transfer costs. Threads can pass large arrays between each other instantaneously. Unfortunately due to the GIL_ pure Python code (like JSON parsing) does not parallelize well under threads, and so when computing on pure Python objects, like strings or lists or our custom objects, we may prefer to use processes. Threads are good when using numeric data and when the computation is complex with many cross-dependencies. Processes don't have issues with the GIL, but data transfer between cores when using processes can be expensive. Data transfer isn't an issue for embarrassingly parallel computations, which is fortunately the norm when dealing with pure Python computations. Single Threaded Scheduler ------------------------- Debugging, profiling, and general comprehension of code is hard when computing in parallel. Standard tools like ``pdb`` or ``cProfile`` fail to operate well when running under multiple threads or processes. To resolve this problem there is a dask scheduler, ``dask.get`` that doesn't actually run in parallel, but instead steps through your graph in the main thread. It otherwise operates exactly like the threaded and multiprocessing schedulers, and so is a faithful proxy when tracking down difficult issues. Distributed Scheduler on a Cluster ---------------------------------- The distributed scheduler is more sophisticated than the single machine schedulers (threaded, multiprocessing, and synchronous all share a common codebase). This is good because it can be significantly smarter about how it runs computations. However this also introduces extra conceptual overhead and potential setup costs. The primary reason to use the distributed scheduler is to use multiple machines on a distributed cluster. This allows computations to scale to significantly larger problems. This doesn't come for free though, as you will need to `setup the distributed scheduler`_ on those machines. .. _`setup the distributed scheduler`: https://distributed.readthedocs.io/en/latest/setup.html Distributed Scheduler on a Single Machine ----------------------------------------- It is also reasonable to use the `distributed scheduler`_ on a single machine. This is often recommended over the multiprocessing scheduler for the following reasons: 1. The multiprocessing scheduler brings intermediate values back to the main process before sending them out again for new tasks. For embarrassingly parallel workloads, such as are common in :doc:`dask.bag `, this is rarely a problem because repeated tasks are fused together and outputs are typically small, like counts, sums, or filenames to which we have written results. However for more complex workloads like a blocked matrix multiply this can be troublesome. The distributed scheduler is sophisticated enough to track which data is in which process and so can avoid costly interprocess communication. 2. The distributed scheduler supports a set of rich real-time diagnostics which can help provide feedback and diagnose performance issues. 3. The distributed scheduler supports a larger API, including asynchronous operations and computing in the background. You can create a local "cluster" and use this scheduler by default by creating a ``dask.distributed.Client`` with no arguments. .. code-block:: python from dask.distributed import Client client = Client() You may prefer to use the multiprocessing scheduler over the distributed scheduler if the following hold: 1. Your computations don't involve complex graphs that share data to multiple tasks 2. You want to avoid depending on Tornado, the technology that backs the distributed scheduler .. _`distributed scheduler`: https://distributed.readthedocs.io/en/latest/ Diagnostics ~~~~~~~~~~~ One reason to do this is to get access to the pleasant `web interface`_, which gives a real-time visualization of what's computing on your cores. .. _`web interface`: https://distributed.readthedocs.io/en/latest/web.html Asynchronous Interface ~~~~~~~~~~~~~~~~~~~~~~ The distributed scheduler also provides asynchronous computation, where you can submit a computation to run in the background, and only collect its results later Data Locality ~~~~~~~~~~~~~ The distributed scheduler is sometimes more efficient than the multiprocessing scheduler, particularly when tasks have complex dependency structures and require non-trivial communication. Because of how the standard library's ``multiprocessing.Pool`` works, the multiprocessing scheduler always brings intermediate results back to the master process and then sends them out to a worker process afterwards if further work needs to be done. This back-and-forth communication can dominate costs and slow down overall performance. The distributed scheduler does not have this flaw, can reason well about data-in-place, and can move small pieces of data to larger ones. .. _GIL: https://docs.python.org/3/glossary.html#term-gil dask-0.16.0/docs/source/scheduler-overview.rst000066400000000000000000000124351320364734500213400ustar00rootroot00000000000000Scheduler Overview ================== After we create a dask graph, we use a scheduler to run it. Dask currently implements a few different schedulers: - ``dask.threaded.get``: a scheduler backed by a thread pool - ``dask.multiprocessing.get``: a scheduler backed by a process pool - ``dask.get``: a synchronous scheduler, good for debugging - ``distributed.Client.get``: a distributed scheduler for executing graphs on multiple machines. This lives in the external distributed_ project. .. _distributed: https://distributed.readthedocs.io/en/latest/ The ``get`` function -------------------- The entry point for all schedulers is a ``get`` function. This takes a dask graph, and a key or list of keys to compute: .. code-block:: python >>> from operator import add >>> dsk = {'a': 1, ... 'b': 2, ... 'c': (add, 'a', 'b'), ... 'd': (sum, ['a', 'b', 'c'])} >>> get(dsk, 'c') 3 >>> get(dsk, 'd') 6 >>> get(dsk, ['a', 'b', 'c']) [1, 2, 3] Using ``compute`` methods ------------------------- When working with dask collections, you will rarely need to interact with scheduler ``get`` functions directly. Each collection has a default scheduler, and a built-in ``compute`` method that calculates the output of the collection: .. code-block:: python >>> import dask.array as da >>> x = da.arange(100, chunks=10) >>> x.sum().compute() 4950 The compute method takes a number of keywords: - ``get``: a scheduler ``get`` function, overrides the default for the collection - ``**kwargs``: extra keywords to pass on to the scheduler ``get`` function. See also: :ref:`configuring-schedulers`. The ``compute`` function ------------------------ You may wish to compute results from multiple dask collections at once. Similar to the ``compute`` method on each collection, there is a general ``compute`` function that takes multiple collections and returns multiple results. This merges the graphs from each collection, so intermediate results are shared: .. code-block:: python >>> y = (x + 1).sum() >>> z = (x + 1).mean() >>> da.compute(y, z) # Compute y and z, sharing intermediate results (5050, 50.5) Here the ``x + 1`` intermediate was only computed once, while calling ``y.compute()`` and ``z.compute()`` would compute it twice. For large graphs that share many intermediates, this can be a big performance gain. The ``compute`` function works with any dask collection, and is found in ``dask.base``. For convenience it has also been imported into the top level namespace of each collection. .. code-block:: python >>> from dask.base import compute >>> compute is da.compute True .. _configuring-schedulers: Configuring the schedulers -------------------------- The dask collections each have a default scheduler: - ``dask.array`` and ``dask.dataframe`` use the threaded scheduler by default - ``dask.bag`` uses the multiprocessing scheduler by default. For most cases, the default settings are good choices. However, sometimes you may want to use a different scheduler. There are two ways to do this. 1. Using the ``get`` keyword in the ``compute`` method: .. code-block:: python >>> x.sum().compute(get=dask.multiprocessing.get) 2. Using ``dask.set_options``. This can be used either as a context manager, or to set the scheduler globally: .. code-block:: python # As a context manager >>> with dask.set_options(get=dask.multiprocessing.get): ... x.sum().compute() # Set globally >>> dask.set_options(get=dask.multiprocessing.get) >>> x.sum().compute() Additionally, each scheduler may take a few extra keywords specific to that scheduler. For example, the multiprocessing and threaded schedulers each take a ``num_workers`` keyword, which sets the number of processes or threads to use (defaults to number of cores). This can be set by passing the keyword when calling ``compute``: .. code-block:: python # Compute with 4 threads >>> x.compute(num_workers=4) Alternatively, the multiprocessing and threaded schedulers will check for a global pool set with ``dask.set_options``: .. code-block:: python >>> from multiprocessing.pool import ThreadPool >>> with dask.set_options(pool=ThreadPool(4)): ... x.compute() For more information on the individual options for each scheduler, see the docstrings for each scheduler ``get`` function. Debugging the schedulers ------------------------ Debugging parallel code can be difficult, as conventional tools such as ``pdb`` don't work well with multiple threads or processes. To get around this when debugging, we recommend using the synchronous scheduler found at ``dask.get``. This runs everything serially, allowing it to work well with ``pdb``: .. code-block:: python >>> dask.set_options(get=dask.get) >>> x.sum().compute() # This computation runs serially instead of in parallel The shared memory schedulers also provide a set of callbacks that can be used for diagnosing and profiling. You can learn more about scheduler callbacks and diagnostics :doc:`here `. More Information ---------------- - See :doc:`shared` for information on the design of the shared memory (threaded or multiprocessing) schedulers - See distributed_ for information on the distributed memory scheduler dask-0.16.0/docs/source/scheduling-policy.rst000066400000000000000000000105771320364734500211450ustar00rootroot00000000000000Scheduling in Depth =================== *Note: this technical document is not optimized for user readability.* The default shared memory scheduler used by most dask collections lives in ``dask/scheduler.py``. This scheduler dynamically schedules tasks to new workers as they become available. It operates in a shared memory environment without consideration to data locality, all workers have access to all data equally. We find that our workloads are best served by trying to minimize the memory footprint. This document talks about our policies to accomplish this in our scheduling budget of one millisecond per task, irrespective of the number of tasks. Generally we are faced with the following situation: A worker arrives with a newly completed task. We update our data structures of execution state and have to provide a new task for that worker. In general there are very many available tasks, which should we give to the worker? *Q: Which of our available tasks should we give to the newly ready worker?* This question is simple and local and yet strongly impacts the performance of our algorithm. We want to choose a task that lets us free memory now and in the future. We need a clever and cheap way to break a tie between the set of available tasks. At this stage we choose the policy of "last in, first out." That is we choose the task that was most recently made available, quite possibly by the worker that just returned to us. This encourages the general theme of finishing things before starting new things. We implement this with a stack. When a worker arrives with its finished task we figure out what new tasks we can now compute with the new data and put those on top of the stack if any exist. We pop an item off of the top of the stack and deliver that to the waiting worker. And yet if the newly completed task makes ready multiple newly ready tasks in which order should we place them on the stack? This is yet another opportunity for a tie breaker. This is particularly important at *the beginning* of execution where we typically add a large number of leaf tasks onto the stack. Our choice in this tie breaker also strongly affects performance in many cases. We want to encourage depth first behavior where, if our computation is composed of something like many trees we want to fully explore one subtree before moving on to the next. This encourages our workers to complete blocks/subtrees of our graph before moving on to new blocks/subtrees. And so to encourage this "depth first behavior" we do a depth first search and number all nodes according to their number in the depth first search (DFS) traversal. We use this number to break ties when adding tasks on to the stack. Please note that while we spoke of optimizing the many-distinct-subtree case above this choice is entirely local and applies quite generally beyond this case. Anything that behaves even remotely like the many-distinct-subtree case will benefit accordingly, and this case is quite common in normal workloads. And yet we have glossed over another tie breaker. Performing the depth first search, when we arrive at a node with many children we can choose the order in which to traverse the children. We resolve this tie breaker by selecting those children whose result is depended upon by the most nodes. This dependence can be either direct for those nodes that take that data as input or indirect for any ancestor node in the graph. This emphasizing traversing first those nodes that are parts of critical paths having long vertical chains that rest on top of this node's result, and nodes whose data is depended upon by many nodes in the future. We choose to dive down into these subtrees first in our depth first search so that future computations don't get stuck waiting for them to complete. And so we have three tie breakers 1. Q: Which of these available tasks should I run? A: Last in, first out 2. Q: Which of these tasks should I put on the stack first? A: Do a depth first search before the computation, use that ordering. 3. Q: When performing the depth first search how should I choose between children? A: Choose those children on whom the most data depends We have found common workflow types that require each of these decisions. We have not yet run into a commonly occurring graph type in data analysis that is not well handled by these heuristics for the purposes of minimizing memory use. dask-0.16.0/docs/source/scripts/000077500000000000000000000000001320364734500164465ustar00rootroot00000000000000dask-0.16.0/docs/source/scripts/scheduling.py000066400000000000000000000062611320364734500211520ustar00rootroot00000000000000from toolz import merge from time import time import dask from dask import threaded, multiprocessing, scheduler from random import randint from collections import Iterator import matplotlib.pyplot as plt def noop(x): pass nrepetitions = 1 def trivial(width, height): """ Embarrassingly parallel dask """ d = {('x', 0, i): i for i in range(width)} for j in range(1, height): d.update({('x', j, i): (noop, ('x', j - 1, i)) for i in range(width)}) return d, [('x', height - 1, i) for i in range(width)] def crosstalk(width, height, connections): """ Natural looking dask with some inter-connections """ d = {('x', 0, i): i for i in range(width)} for j in range(1, height): d.update({('x', j, i): (noop, [('x', j - 1, randint(0, width)) for _ in range(connections)]) for i in range(width)}) return d, [('x', height - 1, i) for i in range(width)] def dense(width, height): """ Full barriers between each step """ d = {('x', 0, i): i for i in range(width)} for j in range(1, height): d.update({('x', j, i): (noop, [('x', j - 1, k) for k in range(width)]) for i in range(width)}) return d, [('x', height - 1, i) for i in range(width)] import numpy as np x = np.logspace(0, 4, 10) trivial_results = dict() for get in [dask.get, threaded.get, scheduler.get_sync, multiprocessing.get]: y = list() for n in x: dsk, keys = trivial(int(n), 5) start = time() get(dsk, keys) end = time() y.append(end - start) trivial_results[get] = np.array(y) ######## # Plot # ######## f, (left, right) = plt.subplots(nrows=1, ncols=2, sharex=True, figsize=(12, 5), squeeze=True) for get in trivial_results: left.loglog(x * 5, trivial_results[get], label=get.__module__) right.loglog(x * 5, trivial_results[get] / x, label=get.__module__) left.set_title('Cost for Entire graph') right.set_title('Cost per task') left.set_ylabel('Duration (s)') right.set_ylabel('Duration (s)') left.set_xlabel('Number of tasks') right.set_xlabel('Number of tasks') plt.legend() plt.savefig('images/scaling-nodes.png') ##################### # Crosstalk example # ##################### x = np.linspace(1, 100, 10) crosstalk_results = dict() for get in [threaded.get, scheduler.get_sync]: y = list() for n in x: dsk, keys = crosstalk(1000, 5, int(n)) start = time() get(dsk, keys) end = time() y.append(end - start) crosstalk_results[get] = np.array(y) ######## # Plot # ######## f, (left, right) = plt.subplots(nrows=1, ncols=2, sharex=True, figsize=(12, 5), squeeze=True) for get in crosstalk_results: left.plot(x, crosstalk_results[get], label=get.__module__) right.semilogy(x, crosstalk_results[get] / 5000. / x, label=get.__module__) left.set_title('Cost for Entire graph') right.set_title('Cost per edge') left.set_ylabel('Duration (s)') right.set_ylabel('Duration (s)') left.set_xlabel('Number of edges per task') right.set_xlabel('Number of edges per task') plt.legend() plt.savefig('images/scaling-edges.png') dask-0.16.0/docs/source/shared.rst000066400000000000000000000117431320364734500167650ustar00rootroot00000000000000Shared Memory ============= The asynchronous scheduler requires an ``apply_async`` function and a ``Queue``. These determine the kind of worker and parallelism that we exploit. ``apply_async`` functions can be found in the following places: * ``multithreading.Pool().apply_async`` - uses multiple processes * ``multithreading.pool.ThreadPool().apply_async`` - uses multiple threads * ``dask.local.apply_sync`` - uses only the main thread (useful for debugging) Full dask ``get`` functions exist in each of ``dask.threaded.get``, ``dask.multiprocessing.get`` and ``dask.get`` respectively. Policy ------ The asynchronous scheduler maintains indexed data structures that show which tasks depend on which data, what data is available, and what data is waiting on what tasks to complete before it can be released, and what tasks are currently running. It can update these in constant time relative to the number of total and available tasks. These indexed structures make the dask async scheduler scalable to very many tasks on a single machine. .. image:: images/async-embarrassing.gif :width: 50 % :align: right :alt: Embarrassingly parallel dask flow To keep the memory footprint small, we choose to keep ready-to-run tasks in a LIFO stack such that the most recently made available tasks get priority. This encourages the completion of chains of related tasks before new chains are started. This can also be queried in constant time. More info: :doc:`scheduling policy `. Performance ----------- *EDIT: The experiments run in this section are now outdated. Anecdotal testing shows that performance has improved significantly. There is now about 200 us overhead per task and about 1 ms startup time.* **tl;dr** The threaded scheduler overhead behaves roughly as follows: * 1ms overhead per task * 100ms startup time (if you wish to make a new ThreadPool each time) * Constant scaling with number of tasks * Linear scaling with number of dependencies per task Schedulers introduce overhead. This overhead effectively limits the granularity of our parallelism. Below we measure overhead of the async scheduler with different apply functions (threaded, sync, multiprocessing), and under different kinds of load (embarrassingly parallel, dense communication). The quickest/simplest test we can do it to use IPython's ``timeit`` magic: .. code-block:: python In [1]: import dask.array as da In [2]: x = da.ones(1000, chunks=(2,)).sum() In [3]: len(x.dask) Out[3]: 1001 In [4]: %timeit x.compute() 1 loops, best of 3: 550 ms per loop So this takes about 500 microseconds per task. About 100ms of this is from overhead: .. code-block:: python In [6]: x = da.ones(1000, chunks=(1000,)).sum() In [7]: %timeit x.compute() 10 loops, best of 3: 103 ms per loop Most of this overhead is from spinning up a ThreadPool each time. This may be mediated by using a global or contextual pool: .. code-block:: python >>> from multiprocessing.pool import ThreadPool >>> pool = ThreadPool() >>> da.set_options(pool=pool) # set global threadpool or >>> with set_options(pool=pool) # use threadpool throughout with block ... ... We now measure scaling the number of tasks and scaling the density of the graph: .. image:: images/trivial.png :width: 30 % :align: right :alt: Adding nodes Linear scaling with number of tasks ``````````````````````````````````` As we increase the number of tasks in a graph, we see that the scheduling overhead grows linearly. The asymptotic cost per task depends on the scheduler. The schedulers that depend on some sort of asynchronous pool have costs of a few milliseconds and the single threaded schedulers have costs of a few microseconds. .. image:: images/scaling-nodes.png .. image:: images/crosstalk.png :width: 40 % :align: right :alt: Adding edges Linear scaling with number of edges ``````````````````````````````````` As we increase the number of edges per task, the scheduling overhead again increases linearly. Note: Neither the naive core scheduler nor the multiprocessing scheduler are good at workflows with non-trivial cross-task communication; they have been removed from the plot. .. image:: images/scaling-edges.png `Download scheduling script`_ Known Limitations ----------------- The shared memory scheduler has some notable limitations: 1. It works on a single machine 2. The threaded scheduler is limited by the GIL on Python code, so if your operations are pure python functions, you should not expect a multi-core speedup 3. The multiprocessing scheduler must serialize functions between workers, which can fail 4. The multiprocessing scheduler must serialize data between workers and the central process, which can be expensive 5. The multiprocessing scheduler cannot transfer data directly between worker processes; all data routes through the master process. .. _`Download scheduling script`: https://github.com/dask/dask/tree/master/docs/source/scripts/scheduling.py dask-0.16.0/docs/source/spark.rst000066400000000000000000000202771320364734500166410ustar00rootroot00000000000000Comparison to PySpark ===================== Spark_ is a popular distributed computing tool with a decent Python API PySpark_. Spark is growing to become a dominant name today in Big Data analysis alongside Hadoop, for which MRJob_ is possibly the dominant Python layer. Dask has several elements that appear to intersect this space and we are often asked, "How does Dask compare with Spark?" Answering such comparison questions in an unbiased and informed way is hard, particularly when the differences can be somewhat technical. This document tries to do this; we welcome any corrections. Summary ------- Apache Spark is an all-inclusive framework combining distributed computing, SQL queries, machine learning, and more that runs on the JVM and is commonly co-deployed with other Big Data frameworks like Hadoop. It was originally optimized for bulk data ingest and querying common in data engineering and business analytics but has since broadened out. Spark is typically used on small to medium sized cluster but also runs well on a single machine. Dask is a parallel programming library that combines with the Numeric Python ecosystem to provide parallel arrays, dataframes, machine learning, and custom algorithms. It is based on Python and the foundational C/Fortran stack. Dask was originally designed to complement other libraries with parallelism, particular for numeric computing and advanced analytics, but has since broadened out. Dask is typically used on a single machine, but also runs well on a distributed cluster. Generally Dask is smaller and lighter weight than Spark. This means that it has fewer features and instead is intended to be used in conjunction with other libraries, particularly those in the numeric Python ecosystem. User-Facing Differences ----------------------- Scale ~~~~~ Spark began its life aimed at the thousand node cluster case. As such it thinks well about worker failures and integration with data-local file systems like the Hadoop FileSystem (HDFS). That being said, Spark can run in standalone mode on a single machine. Dask began its life building out parallel algorithms for numerical array computations on a single computer. As such it thinks well about low-latency scheduling, low memory footprints, shared memory, and efficient use of local disk. That being said dask can run on a distributed_ cluster, making use of HDFS and other Big Data technologies. .. _distributed: https://distributed.readthedocs.io/ Java Python Performance ~~~~~~~~~~~~~~~~~~~~~~~ Spark is written in Scala, a multi-paradigm language built on top of the Java Virtual Machine (JVM). Since the rise of Hadoop, Java based languages have steadily gained traction on data warehousing tasks and are good at managing large amounts of heterogeneous data such as you might find in JSON blobs. The Spark development team is now focusing more on binary and native data formats with their new effort, Tungsten. Dask is written in Python, a multi-paradigm language built on top of the C/Fortran native stack. This stack benefits from decades of scientific research optimizing very fast computation on numeric data. As such, dask is already very good on analytic computations on data such as you might find in HDF5 files or analytic databases. It can also handle JSON blob type data using Python data structures (which are `surprisingly fast`_) using the cytoolz_ library in parallel. Java Python Disconnect ~~~~~~~~~~~~~~~~~~~~~~ Python users on Spark sometimes express frustration by how far separated they are from computations. Some of this is inevitable; distributed debugging is a hard problem. Some of it however is due to having to hop over the JVM. Spark workers spin up JVMs which in turn spin up Python processes. Data moving back and forth makes extra trips both through a distributed cluster and also through extra serialization layers (see py4j_) and computation layers. Limitations like the Java heap size and large Java stack traces come as a surprise to users accustomed to native code execution. Dask has an advantage for Python users because it is itself a Python library, so serialization and debugging when things go wrong happens more smoothly. However, Dask only benefits Python users while Spark is useful in a variety of JVM languages (Scala, Java, Clojure) and also has limited support in Python and R. New Spark projects like the DataFrame skip serialization and boxed execution issues by forgoing the Python process entirely and instead have Python code drive native Scala code. APIs for these libraries tend to lag a bit behind their Scala counterparts. Scope ~~~~~ Spark was originally built around the RDD, an unordered collection allowing repeats. Most spark add-ons were built on top of this construct, inheriting both its abilities and limitations. Dask is built on a lower-level and more general construct of a generic task graph with arbitrary data dependencies. This allows more general computations to be built by users within the dask framework. This is probably the largest fundamental difference between the two projects. Dask gives up high-level understanding to allow users to express more complex parallel algorithms. This ended up being essential when writing complex projects like ``dask.array``, datetime algorithms in ``dask.dataframe`` or non-trivial algorithms in machine learning. Developer-Facing Differences ---------------------------- Graph Granularity ~~~~~~~~~~~~~~~~~ Both Spark and Dask represent computations with directed acyclic graphs. These graphs however represent computations at very different granularities. One operation on a Spark RDD might add a node like ``Map`` and ``Filter`` to the graph. These are high-level operations that convey meaning and will eventually be turned into many little tasks to execute on individual workers. This many-little-tasks state is only available internally to the Spark scheduler. Dask graphs skip this high-level representation and go directly to the many-little-tasks stage. As such one ``map`` operation on a dask collection will immediately generate and add possibly thousands of tiny tasks to the dask graph. This difference in the scale of the underlying graph has implications on the kinds of analysis and optimizations one can do and also on the generality that one exposes to users. Dask is unable to perform some optimizations that Spark can because Dask schedulers do not have a top-down picture of the computation they were asked to perform. However, dask is able to easily represent far more `complex algorithms`_ and expose the creation of these algorithms to normal users. Dask.bag, the equivalent of the Spark.RDD, is just one abstraction built on top of dask. Others exist. Alternatively power-users can forego high-level collections entirely and jump straight to direct low-level task scheduling. Coding Styles ~~~~~~~~~~~~~ Both Spark and Dask are written in a functional style. Spark will probably be more familiar to those who enjoy algebraic types while dask will probably be more familiar to those who enjoy Lisp and "code as data structures". Conclusion ---------- Spark is mature and all-inclusive. If you want a single project that does everything and you're already on Big Data hardware then Spark is a safe bet, especially if your use cases are typical ETL + SQL and you're already using Scala. Dask is lighter weight and is easier to integrate into existing code and hardware. If your problems vary beyond typical ETL + SQL and you want to add flexible parallelism to existing solutions then dask may be a good fit, especially if you are already using Python and associated libraries like NumPy and Pandas. If you are looking to manage a terabyte or less of tabular CSV or JSON data then you should forget both Spark and Dask and use Postgres_ or MongoDB_. .. _Spark: https://spark.apache.org/ .. _PySpark: https://spark.apache.org/docs/latest/api/python/ .. _Hadoop: https://hadoop.apache.org/ .. _MRJob: https://mrjob.readthedocs.io .. _`surprisingly fast`: https://www.youtube.com/watch?v=PpBK4zIaFLE .. _cytoolz: https://toolz.readthedocs.io .. _py4j: http://py4j.sourceforge.net/ .. _Postgres: http://www.postgresql.org/ .. _MongoDB: https://www.mongodb.org/ .. _`complex algorithms`: http://matthewrocklin.com/blog/work/2015/06/26/Complex-Graphs dask-0.16.0/docs/source/spec.rst000066400000000000000000000075711320364734500164550ustar00rootroot00000000000000Specification ============= Dask is a specification to encode a graph -- specifically, a directed acyclic graph of tasks with data dependencies -- using ordinary Python data structures, namely dicts, tuples, functions, and arbitrary Python values. Definitions ----------- A **dask graph** is a dictionary mapping **keys** to **computations**: .. code-block:: python {'x': 1, 'y': 2, 'z': (add, 'x', 'y'), 'w': (sum, ['x', 'y', 'z']), 'v': [(sum, ['w', 'z']), 2]} A **key** is any hashable value that is not a **task**: .. code-block:: python 'x' ('x', 2, 3) A **task** is a tuple with a callable first element. Tasks represent atomic units of work meant to be run by a single worker. Example: .. code-block:: python (add, 'x', 'y') We represent a task as a tuple such that the *first element is a callable function* (like ``add``), and the succeeding elements are *arguments* for that function. An *argument* may be any valid **computation**. A **computation** may be one of the following: 1. Any **key** present in the dask graph like ``'x'`` 2. Any other value like ``1``, to be interpreted literally 3. A **task** like ``(inc, 'x')`` (see below) 4. A list of **computations**, like ``[1, 'x', (inc, 'x')]`` So all of the following are valid **computations**: .. code-block:: python np.array([...]) (add, 1, 2) (add, 'x', 2) (add, (inc, 'x'), 2) (sum, [1, 2]) (sum, ['x', (inc, 'x')]) (np.dot, np.array([...]), np.array([...])) [(sum, ['x', 'y']), 'z'] To encode keyword arguments, we recommend the use of ``functools.partial`` or ``toolz.curry``. What functions should expect ---------------------------- In cases like ``(add, 'x', 'y')``, functions like ``add`` receive concrete values instead of keys. A dask scheduler replaces keys (like ``'x'`` and ``'y'``) with their computed values (like ``1``, and ``2``) *before* calling the ``add`` function. Entry Point - The ``get`` function ---------------------------------- The ``get`` function serves as entry point to computation for all :doc:`schedulers `. This function gets the value associated to the given key. That key may refer to stored data, as is the case with ``'x'``, or a task as is the case with ``'z'``. In the latter case, ``get`` should perform all necessary computation to retrieve the computed value. .. _scheduler: scheduler-overview.rst .. code-block:: python >>> from dask.threaded import get >>> from operator import add >>> dsk = {'x': 1, ... 'y': 2, ... 'z': (add, 'x', 'y'), ... 'w': (sum, ['x', 'y', 'z'])} .. code-block:: python >>> get(dsk, 'x') 1 >>> get(dsk, 'z') 3 >>> get(dsk, 'w') 6 Additionally if given a ``list``, get should simultaneously acquire values for multiple keys: .. code-block:: python >>> get(dsk, ['x', 'y', 'z']) [1, 2, 3] Because we accept lists of keys as keys, we support nested lists. .. code-block:: python >>> get(dsk, [['x', 'y'], ['z', 'w']]) [[1, 2], [3, 6]] Internally ``get`` can be arbitrarily complex, calling out to distributed computing, using caches, and so on. Why use tuples -------------- With ``(add, 'x', 'y')`` we wish to encode "the result of calling ``add`` on the values corresponding to the keys ``'x'`` and ``'y'``. We intend the following meaning: .. code-block:: python add('x', 'y') # after x and y have been replaced But this will err because Python executes the function immediately, before we know values for ``'x'`` and ``'y'``. We delay the execution by moving the opening parenthesis one term to the left, creating a tuple: .. code:: Before: add( 'x', 'y') After: (add, 'x', 'y') This lets us store the desired computation as data that we can analyze using other Python code, rather than cause immediate execution. LISP users will identify this as an s-expression, or as a rudimentary form of quoting. dask-0.16.0/docs/source/support.rst000066400000000000000000000025121320364734500172250ustar00rootroot00000000000000Contact and Support =================== Where to ask for help --------------------- Dask conversation happens in the following places: 1. `StackOverflow #dask tag`_: for usage questions 2. `Github Issue Tracker`_: for new features or established bugs 3. `Gitter chat`_: for real-time discussion among developers For usage questions and bug reports we strongly prefer the use of StackOverflow and Github issues respectively over Gitter chat. Github and StackOverflow are more easily searchable by future users and so is more efficient for everyone's time. Gitter chat is strictly reserved for developer and community discussion. When asking questions it is ideal to create `minimal, complete, verifiable examples `_. This significantly reduces the time that answerers spend understanding your situation and so usually results in higher quality answers much more quickly. .. _`StackOverflow #dask tag`: http://stackoverflow.com/questions/tagged/dask .. _`Github Issue Tracker`: https://github.com/dask/dask/issues/ .. _`Gitter chat`: https://gitter.im/dask/dask Paid support ------------ Dask is an open source project that originated at `Anaconda Inc. `_. In addition to the previous options, Anaconda offers paid training and support: ``_ dask-0.16.0/docs/source/use-cases.rst000066400000000000000000000377131320364734500174140ustar00rootroot00000000000000Use Cases ========= Dask is a versatile tool that supports a variety of workloads. This page contains brief and illustrative examples for how people use Dask in practice. This page emphasizes breadth and hopefully inspires readers to find new ways that Dask can serve them beyond their original intent. Overview -------- Dask use cases can be roughly divided in the following two categories: 1. Large NumPy/Pandas/Lists with :doc:`dask.array`, :doc:`dask.dataframe`, :doc:`dask.bag` to analyze large datasets with familiar techniques. This is similar to Databases, Spark_, or big array libraries. 2. Custom task scheduling. You submit a graph of functions that depend on each other for custom workloads. This is similar to Luigi_, Airflow_, Celery_, or Makefiles_. Most people today approach Dask assuming it is a framework like Spark, designed for the first use case around large collections of uniformly shaped data. However, many of the more productive and novel use cases fall into the second category, using Dask to parallelize custom workflows. Dask compute environments can be divided into the following two categories: 1. Single machine parallelism with threads or processes: The Dask single-machine scheduler leverages the full CPU power of a laptop or a large workstation and changes the space limitation from "fits in memory" to "fits on disk". This scheduler is simple to use and doesn't have the computational or conceptual overhead of most "big data" systems. 2. Distributed cluster parallelism on multiple nodes: The Dask distributed scheduler coordinates the actions of multiple machines on a cluster. It scales anywhere from a single machine to a thousand machines, but not significantly beyond. The single machine scheduler is useful to more individuals (more people have personal laptops than have access to clusters) and probably accounts for 80+% of the use of Dask today. The distributed machine scheduler is useful to larger organizations like universities, research labs, or private companies. .. _Airflow: http://airflow.incubator.apache.org/ .. _Luigi: https://luigi.readthedocs.io/en/latest/ .. _Celery: http://www.celeryproject.org/ .. _Spark: https://spark.apache.org/ .. _Makefiles: https://en.wikipedia.org/wiki/Make_(software) Below we give specific examples of how people use Dask. We start with large NumPy/Pandas/List examples because they're somewhat more familiar to people looking at "big data" frameworks. We then follow with custom scheduling examples, which tend to be applicable more often, and are arguably a bit more interesting. Collection Examples ------------------- Dask contains large parallel collections for n-dimensional arrays (similar to NumPy), dataframes (similar to Pandas), and lists (similar to PyToolz or PySpark). On disk arrays ~~~~~~~~~~~~~~ Scientists studying the earth have 10GB to 100GB of regularly gridded weather data on their laptop's hard drive stored as many individual HDF5 or NetCDF files. They use :doc:`dask.array` to treat this stack of HDF5 or NetCDF files as a single NumPy_ array (or a collection of NumPy arrays with the XArray_ project). They slice, perform reductions, perform seasonal averaging etc. all with straight Numpy syntax. These computations take a few minutes (reading 100GB from disk is somewhat slow) but previously infeasible computations become convenient from the comfort of a personal laptop. It's not so much parallel computing that is valuable here but rather the ability to comfortably compute on larger-than-memory data without special hardware. .. code-block:: python import h5py dataset = h5py.File('myfile.hdf5')['/x'] import dask.array as da x = da.from_array(dataset, chunks=dataset.chunks) y = x[::10] - x.mean(axis=0) y.compute() .. _NumPy: http://www.numpy.org/ .. _XArray: http://xarray.pydata.org/en/stable/ Directory of CSV or tabular HDF files ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Analysts studying time series data have a large directory of CSV, HDF, or otherwise formatted tabular files. They usually use Pandas_ for this kind of data but either the volume is too large or dealing with a large number of files is confusing. They use :doc:`dask.dataframe` to logically wrap all of these different files into one logical dataframe that is built on demand to save space. Most of their Pandas workflow is the same (Dask.dataframe is a subset of Pandas) so they switch from Pandas to Dask.dataframe and back easily without significantly changing their code. .. code-block:: python import dask.dataframe as dd df = dd.read_csv('data/2016-*.*.csv', parse_dates=['timestamp']) df.groupby(df.timestamp.dt.hour).value.mean().compute() .. _Pandas: http://pandas.pydata.org/ Directory of CSV files on HDFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The same analyst as above uses dask.dataframe with the dask.distributed_ scheduler to analyze terabytes of data on their institution's Hadoop cluster straight from Python. This uses the HDFS3_ Python library for HDFS management This solution is particularly attractive because it stays within the Python ecosystem, and uses the speed and algorithm set of Pandas_, a tool with which the analyst is already very comfortable. .. code-block:: python from dask.distributed import Client client = Client('cluster-address:8786') import dask.dataframe as dd df = dd.read_csv('hdfs://data/2016-*.*.csv', parse_dates=['timestamp']) df.groupby(df.timestamp.dt.hour).value.mean().compute() .. _HDFS3: https://hdfs3.readthedocs.io/en/latest/ Directories of custom format files ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The same analyst has a bunch of files of a custom format not supported by Dask.dataframe, or perhaps these files are in a directory structure that encodes important information about his data (such as the date or other metadata.) They use :doc:`dask.delayed` to teach Dask.dataframe how to load the data and then pass into dask.dataframe for tabular algorithms. * Example Notebook: https://gist.github.com/mrocklin/e7b7b3a65f2835cda813096332ec73ca JSON data ~~~~~~~~~ Data Engineers with click stream data from a website or mechanical engineers with telemetry data from mechanical instruments have large volumes of data in JSON or some other semi-structured format. They use :doc:`dask.bag` to manipulate many Python objects in parallel either on their personal machine, where they stream the data through memory or across a cluster. .. code-block:: python import dask.bag as db import json records = db.read_text('data/2015-*-*.json').map(json.loads) records.filter(lambda d: d['name'] == 'Alice').pluck('id').frequencies() Custom Examples --------------- The large collections (array, dataframe, bag) are wonderful when they fit the application, for example if you want to perform a groupby on a directory of CSV data. However several parallel computing applications don't fit neatly into one of these higher level abstractions. Fortunately, Dask provides a wide variety of ways to parallelize more custom applications. These use the same machinery as the arrays and dataframes, but allow the user to develop custom algorithms specific to their problem. Embarrassingly parallel computation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A programmer has a function that they want to run many times on different inputs. Their function and inputs might use arrays or dataframes internally, but conceptually their problem isn't a single large array or dataframe. They want to run these functions in parallel on their laptop while they prototype but they also intend to eventually use an in-house cluster. They wrap their function in :doc:`dask.delayed` and let the appropriate dask scheduler parallelize and load balance the work. .. code-block:: python def process(data): ... return ... **Normal Sequential Processing**: .. code-block:: python results = [process(x) for x in inputs] **Build Dask Computation**: .. code-block:: python from dask import compute, delayed values = [delayed(process)(x) for x in inputs] **Multiple Threads**: .. code-block:: python import dask.threaded results = compute(*values, get=dask.threaded.get) **Multiple Processes**: .. code-block:: python import dask.multiprocessing results = compute(*values, get=dask.multiprocessing.get) **Distributed Cluster**: .. code-block:: python from dask.distributed import Client client = Client("cluster-address:8786") results = compute(*values, get=client.get) Complex dependencies ~~~~~~~~~~~~~~~~~~~~ A financial analyst has many models that depend on each other in a complex web of computations. .. code-block:: python data = [load(fn) for fn in filenames] reference = load_from_database(query) A = [model_a(x, reference) for x in data] B = [model_b(x, reference) for x in data] roll_A = [roll(A[i], A[i + 1]) for i in range(len(A) - 1)] roll_B = [roll(B[i], B[i + 1]) for i in range(len(B) - 1)] compare = [compare_ab(a, b) for a, b in zip(A, B)] results = summarize(compare, roll_A, roll_B) These models are time consuming and need to be run on a variety of inputs and situations. The analyst has his code now as a collection of Python functions and is trying to figure out how to parallelize such a codebase. They use dask.delayed to wrap their function calls and capture the implicit parallelism. .. code-block:: python from dask import compute, delayed data = [delayed(load)(fn) for fn in filenames] reference = delayed(load_from_database)(query) A = [delayed(model_a)(x, reference) for x in data] B = [delayed(model_b)(x, reference) for x in data] roll_A = [delayed(roll)(A[i], A[i + 1]) for i in range(len(A) - 1)] roll_B = [delayed(roll)(B[i], B[i + 1]) for i in range(len(B) - 1)] compare = [delayed(compare_ab)(a, b) for a, b in zip(A, B)] lazy_results = delayed(summarize)(compare, roll_A, roll_B) They then depend on the dask schedulers to run this complex web of computations in parallel. .. code-block:: python results = compute(lazy_results) They appreciate how easy it was to transition from the experimental code to a scalable parallel version. This code is also easy enough for their teammates to understand easily and extend in the future. Algorithm developer ~~~~~~~~~~~~~~~~~~~ A graduate student in machine learning is prototyping novel parallel algorithms. They are in a situation much like the financial analyst above except that they need to benchmark and profile their computation heavily under a variety of situations and scales. The dask profiling tools (:doc:`single machine diagnostics` and `distributed diagnostics`_) provide the feedback they need to understand their parallel performance, including how long each task takes, how intense communication is, and their scheduling overhead. They scale their algorithm between 1 and 50 cores on single workstations and then scale out to a cluster running their computation at thousands of cores. They don't have access to an institutional cluster, so instead they use dask-ec2_ to easily provision clusters of varying sizes. Their algorithm is written the same in all cases, drastically reducing the cognitive load, and letting the readers of their work experiment with their system on their own machines, aiding reproducibility. .. _`distributed diagnostics`: https://distributed.readthedocs.io/en/latest/web.html .. _dask-ec2: https://distributed.readthedocs.io/en/latest/ec2.html Scikit-Learn or Joblib User ~~~~~~~~~~~~~~~~~~~~~~~~~~~ A data scientist wants to scale their machine learning pipeline to run on their cluster to accelerate parameter searches. They already use the ``sklearn`` ``njobs=`` parameter to accelerate their computation on their local computer with Joblib_. Now they wrap their ``sklearn`` code with a context manager to parallelize the exact same code across a cluster (also available with IPyParallel_) .. code-block:: python import distributed.joblib with joblib.parallel_backend('distributed', scheduler_host=('192.168.1.100', 8786)): result = GridSearchCV( ... ) # normal sklearn code .. _IPyParallel: https://ipyparallel.readthedocs.io/en/latest/ Academic Cluster Administrator ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A system administrator for a university compute cluster wants to enable many researchers to use the available cluster resources, which are currently lying idle. The research faculty and graduate students lack experience with job schedulers and MPI, but are comfortable interacting with Python code through a Jupyter notebook. Teaching the faculty and graduate students to parallelize software has proven time consuming. Instead the administrator sets up dask.distributed_ on a sandbox allocation of the cluster and broadly publishes the address of the scheduler, pointing researchers to the `dask.distributed quickstart`_. Utilization of the cluster climbs steadily over the next week as researchers are more easily able to parallelize their computations without having to learn foreign interfaces. The administrator is happy because resources are being used without significant hand-holding. As utilization increases the administrator has a new problem; the shared dask.distributed cluster is being overused. The administrator tracks use through Dask diagnostics to identify which users are taking most of the resources. They contact these users and teach them how to launch_ their own dask.distributed clusters using the traditional job scheduler on their cluster, making space for more new users in the sandbox allocation. .. _`dask.distributed quickstart`: https://distributed.readthedocs.io/en/latest/quickstart.html .. _launch: https://distributed.readthedocs.io/en/latest/setup.html Financial Modeling Team ~~~~~~~~~~~~~~~~~~~~~~~ Similar to the case above, a team of modelers working at a financial institution run a complex network of computational models on top of each other. They started using :doc:`dask.delayed` individually, as suggested above, but realized that they often perform highly overlapping computations, such as always reading the same data. Now they decide to use the same Dask cluster collaboratively to save on these costs. Because Dask intelligently hashes computations in a way similar to how Git works, they find that when two people submit similar computations the overlapping part of the computation runs only once. Ever since working collaboratively on the same cluster they find that their frequently running jobs run much faster, because most of the work is already done by previous users. When they share scripts with colleagues they find that those repeated scripts complete immediately rather than taking several hours. They are now able to iterate and share data as a team more effectively, decreasing their time to result and increasing their competitive edge. As this becomes more heavily used on the company cluster they decide to set up an auto-scaling system. They use their dynamic job scheduler (perhaps SGE, LSF, Mesos, or Marathon) to run a single ``dask-scheduler`` 24/7 and then scale up and down the number of ``dask-workers`` running on the cluster based on computational load. This solution ends up being more responsive (and thus more heavily used) than their previous attempts to provide institution-wide access to parallel computing but because it responds to load it still acts as a good citizen in the cluster. Streaming data engineering ~~~~~~~~~~~~~~~~~~~~~~~~~~ A data engineer responsible for watching a data feed needs to scale out a continuous process. They `combine dask.distributed with normal Python Queues`_ to produce a rudimentary but effective stream processing system. Because dask.distributed is elastic, they can scale up or scale down their cluster resources in response to demand. .. _`combine dask.distributed with normal Python Queues`: https://distributed.readthedocs.io/en/latest/queues.html .. _Joblib: https://pythonhosted.org/joblib/parallel.html .. _dask.distributed: https://distributed.readthedocs.io/en/latest/ dask-0.16.0/setup.cfg000066400000000000000000000013131320364734500143460ustar00rootroot00000000000000[flake8] # References: # https://flake8.readthedocs.io/en/latest/user/configuration.html # https://flake8.readthedocs.io/en/latest/user/error-codes.html # Note: there cannot be spaces after comma's here exclude = __init__.py ignore = # Extra space in brackets E20, # Multiple spaces around "," E231,E241, # Comments E26, # Import formatting E4, # Comparing types instead of isinstance E721, # Assigning lambda expression E731, # Ambiguous variable names E741 max-line-length = 120 [versioneer] VCS = git style = pep440 versionfile_source = dask/_version.py versionfile_build = dask/_version.py tag_prefix = parentdir_prefix = dask- [aliases] test = pytest dask-0.16.0/setup.py000077500000000000000000000032341320364734500142460ustar00rootroot00000000000000#!/usr/bin/env python import sys from os.path import exists from setuptools import setup import versioneer # NOTE: These are tested in `continuous_integration/travis/test_imports.sh` If # you modify these, make sure to change the corresponding line there. extras_require = { 'array': ['numpy', 'toolz >= 0.7.3'], 'bag': ['cloudpickle >= 0.2.1', 'toolz >= 0.7.3', 'partd >= 0.3.8'], 'dataframe': ['numpy', 'pandas >= 0.19.0', 'toolz >= 0.7.3', 'partd >= 0.3.8', 'cloudpickle >= 0.2.1'], 'distributed': ['distributed >= 1.20'], 'delayed': ['toolz >= 0.7.3'], } extras_require['complete'] = sorted(set(sum(extras_require.values(), []))) packages = ['dask', 'dask.array', 'dask.bag', 'dask.store', 'dask.bytes', 'dask.dataframe', 'dask.dataframe.io', 'dask.dataframe.tseries', 'dask.diagnostics'] tests = [p + '.tests' for p in packages] # Only include pytest-runner in setup_requires if we're invoking tests if {'pytest', 'test', 'ptr'}.intersection(sys.argv): setup_requires = ['pytest-runner'] else: setup_requires = [] setup(name='dask', version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), description='Parallel PyData with Task Scheduling', url='http://github.com/dask/dask/', maintainer='Matthew Rocklin', maintainer_email='mrocklin@gmail.com', license='BSD', keywords='task-scheduling parallel numpy pandas pydata', packages=packages + tests, long_description=open('README.rst').read() if exists('README.rst') else '', setup_requires=setup_requires, tests_require=['pytest'], extras_require=extras_require, zip_safe=False) dask-0.16.0/versioneer.py000066400000000000000000002003231320364734500152620ustar00rootroot00000000000000 # Version: 0.16 """The Versioneer - like a rocketeer, but for versions. The Versioneer ============== * like a rocketeer, but for versions! * https://github.com/warner/python-versioneer * Brian Warner * License: Public Domain * Compatible With: python2.6, 2.7, 3.3, 3.4, 3.5, and pypy * [![Latest Version] (https://pypip.in/version/versioneer/badge.svg?style=flat) ](https://pypi.python.org/pypi/versioneer/) * [![Build Status] (https://travis-ci.org/warner/python-versioneer.png?branch=master) ](https://travis-ci.org/warner/python-versioneer) This is a tool for managing a recorded version number in distutils-based python projects. The goal is to remove the tedious and error-prone "update the embedded version string" step from your release process. Making a new release should be as easy as recording a new tag in your version-control system, and maybe making new tarballs. ## Quick Install * `pip install versioneer` to somewhere to your $PATH * add a `[versioneer]` section to your setup.cfg (see below) * run `versioneer install` in your source tree, commit the results ## Version Identifiers Source trees come from a variety of places: * a version-control system checkout (mostly used by developers) * a nightly tarball, produced by build automation * a snapshot tarball, produced by a web-based VCS browser, like github's "tarball from tag" feature * a release tarball, produced by "setup.py sdist", distributed through PyPI Within each source tree, the version identifier (either a string or a number, this tool is format-agnostic) can come from a variety of places: * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows about recent "tags" and an absolute revision-id * the name of the directory into which the tarball was unpacked * an expanded VCS keyword ($Id$, etc) * a `_version.py` created by some earlier build step For released software, the version identifier is closely related to a VCS tag. Some projects use tag names that include more than just the version string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool needs to strip the tag prefix to extract the version identifier. For unreleased software (between tags), the version identifier should provide enough information to help developers recreate the same tree, while also giving them an idea of roughly how old the tree is (after version 1.2, before version 1.3). Many VCS systems can report a description that captures this, for example `git describe --tags --dirty --always` reports things like "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has uncommitted changes. The version identifier is used for multiple purposes: * to allow the module to self-identify its version: `myproject.__version__` * to choose a name and prefix for a 'setup.py sdist' tarball ## Theory of Operation Versioneer works by adding a special `_version.py` file into your source tree, where your `__init__.py` can import it. This `_version.py` knows how to dynamically ask the VCS tool for version information at import time. `_version.py` also contains `$Revision$` markers, and the installation process marks `_version.py` to have this marker rewritten with a tag name during the `git archive` command. As a result, generated tarballs will contain enough information to get the proper version. To allow `setup.py` to compute a version too, a `versioneer.py` is added to the top level of your source tree, next to `setup.py` and the `setup.cfg` that configures it. This overrides several distutils/setuptools commands to compute the version when invoked, and changes `setup.py build` and `setup.py sdist` to replace `_version.py` with a small static file that contains just the generated version data. ## Installation First, decide on values for the following configuration variables: * `VCS`: the version control system you use. Currently accepts "git". * `style`: the style of version string to be produced. See "Styles" below for details. Defaults to "pep440", which looks like `TAG[+DISTANCE.gSHORTHASH[.dirty]]`. * `versionfile_source`: A project-relative pathname into which the generated version strings should be written. This is usually a `_version.py` next to your project's main `__init__.py` file, so it can be imported at runtime. If your project uses `src/myproject/__init__.py`, this should be `src/myproject/_version.py`. This file should be checked in to your VCS as usual: the copy created below by `setup.py setup_versioneer` will include code that parses expanded VCS keywords in generated tarballs. The 'build' and 'sdist' commands will replace it with a copy that has just the calculated version string. This must be set even if your project does not have any modules (and will therefore never import `_version.py`), since "setup.py sdist" -based trees still need somewhere to record the pre-calculated version strings. Anywhere in the source tree should do. If there is a `__init__.py` next to your `_version.py`, the `setup.py setup_versioneer` command (described below) will append some `__version__`-setting assignments, if they aren't already present. * `versionfile_build`: Like `versionfile_source`, but relative to the build directory instead of the source directory. These will differ when your setup.py uses 'package_dir='. If you have `package_dir={'myproject': 'src/myproject'}`, then you will probably have `versionfile_build='myproject/_version.py'` and `versionfile_source='src/myproject/_version.py'`. If this is set to None, then `setup.py build` will not attempt to rewrite any `_version.py` in the built tree. If your project does not have any libraries (e.g. if it only builds a script), then you should use `versionfile_build = None`. To actually use the computed version string, your `setup.py` will need to override `distutils.command.build_scripts` with a subclass that explicitly inserts a copy of `versioneer.get_version()` into your script file. See `test/demoapp-script-only/setup.py` for an example. * `tag_prefix`: a string, like 'PROJECTNAME-', which appears at the start of all VCS tags. If your tags look like 'myproject-1.2.0', then you should use tag_prefix='myproject-'. If you use unprefixed tags like '1.2.0', this should be an empty string, using either `tag_prefix=` or `tag_prefix=''`. * `parentdir_prefix`: a optional string, frequently the same as tag_prefix, which appears at the start of all unpacked tarball filenames. If your tarball unpacks into 'myproject-1.2.0', this should be 'myproject-'. To disable this feature, just omit the field from your `setup.cfg`. This tool provides one script, named `versioneer`. That script has one mode, "install", which writes a copy of `versioneer.py` into the current directory and runs `versioneer.py setup` to finish the installation. To versioneer-enable your project: * 1: Modify your `setup.cfg`, adding a section named `[versioneer]` and populating it with the configuration values you decided earlier (note that the option names are not case-sensitive): ```` [versioneer] VCS = git style = pep440 versionfile_source = src/myproject/_version.py versionfile_build = myproject/_version.py tag_prefix = parentdir_prefix = myproject- ```` * 2: Run `versioneer install`. This will do the following: * copy `versioneer.py` into the top of your source tree * create `_version.py` in the right place (`versionfile_source`) * modify your `__init__.py` (if one exists next to `_version.py`) to define `__version__` (by calling a function from `_version.py`) * modify your `MANIFEST.in` to include both `versioneer.py` and the generated `_version.py` in sdist tarballs `versioneer install` will complain about any problems it finds with your `setup.py` or `setup.cfg`. Run it multiple times until you have fixed all the problems. * 3: add a `import versioneer` to your setup.py, and add the following arguments to the setup() call: version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), * 4: commit these changes to your VCS. To make sure you won't forget, `versioneer install` will mark everything it touched for addition using `git add`. Don't forget to add `setup.py` and `setup.cfg` too. ## Post-Installation Usage Once established, all uses of your tree from a VCS checkout should get the current version string. All generated tarballs should include an embedded version string (so users who unpack them will not need a VCS tool installed). If you distribute your project through PyPI, then the release process should boil down to two steps: * 1: git tag 1.0 * 2: python setup.py register sdist upload If you distribute it through github (i.e. users use github to generate tarballs with `git archive`), the process is: * 1: git tag 1.0 * 2: git push; git push --tags Versioneer will report "0+untagged.NUMCOMMITS.gHASH" until your tree has at least one tag in its history. ## Version-String Flavors Code which uses Versioneer can learn about its version string at runtime by importing `_version` from your main `__init__.py` file and running the `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can import the top-level `versioneer.py` and run `get_versions()`. Both functions return a dictionary with different flavors of version information: * `['version']`: A condensed version string, rendered using the selected style. This is the most commonly used value for the project's version string. The default "pep440" style yields strings like `0.11`, `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section below for alternative styles. * `['full-revisionid']`: detailed revision identifier. For Git, this is the full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that this is only accurate if run in a VCS checkout, otherwise it is likely to be False or None * `['error']`: if the version string could not be computed, this will be set to a string describing the problem, otherwise it will be None. It may be useful to throw an exception in setup.py if this is set, to avoid e.g. creating tarballs with a version string of "unknown". Some variants are more useful than others. Including `full-revisionid` in a bug report should allow developers to reconstruct the exact code being tested (or indicate the presence of local changes that should be shared with the developers). `version` is suitable for display in an "about" box or a CLI `--version` output: it can be easily compared against release notes and lists of bugs fixed in various releases. The installer adds the following text to your `__init__.py` to place a basic version in `YOURPROJECT.__version__`: from ._version import get_versions __version__ = get_versions()['version'] del get_versions ## Styles The setup.cfg `style=` configuration controls how the VCS information is rendered into a version string. The default style, "pep440", produces a PEP440-compliant string, equal to the un-prefixed tag name for actual releases, and containing an additional "local version" section with more detail for in-between builds. For Git, this is TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and that this commit is two revisions ("+2") beyond the "0.11" tag. For released software (exactly equal to a known tag), the identifier will only contain the stripped tag, e.g. "0.11". Other styles are available. See details.md in the Versioneer source tree for descriptions. ## Debugging Versioneer tries to avoid fatal errors: if something goes wrong, it will tend to return a version of "0+unknown". To investigate the problem, run `setup.py version`, which will run the version-lookup code in a verbose mode, and will display the full contents of `get_versions()` (including the `error` string, which may help identify what went wrong). ## Updating Versioneer To upgrade your project to a new release of Versioneer, do the following: * install the new Versioneer (`pip install -U versioneer` or equivalent) * edit `setup.cfg`, if necessary, to include any new configuration settings indicated by the release notes * re-run `versioneer install` in your source tree, to replace `SRC/_version.py` * commit any changed files ### Upgrading to 0.16 Nothing special. ### Upgrading to 0.15 Starting with this version, Versioneer is configured with a `[versioneer]` section in your `setup.cfg` file. Earlier versions required the `setup.py` to set attributes on the `versioneer` module immediately after import. The new version will refuse to run (raising an exception during import) until you have provided the necessary `setup.cfg` section. In addition, the Versioneer package provides an executable named `versioneer`, and the installation process is driven by running `versioneer install`. In 0.14 and earlier, the executable was named `versioneer-installer` and was run without an argument. ### Upgrading to 0.14 0.14 changes the format of the version string. 0.13 and earlier used hyphen-separated strings like "0.11-2-g1076c97-dirty". 0.14 and beyond use a plus-separated "local version" section strings, with dot-separated components, like "0.11+2.g1076c97". PEP440-strict tools did not like the old format, but should be ok with the new one. ### Upgrading from 0.11 to 0.12 Nothing special. ### Upgrading from 0.10 to 0.11 You must add a `versioneer.VCS = "git"` to your `setup.py` before re-running `setup.py setup_versioneer`. This will enable the use of additional version-control systems (SVN, etc) in the future. ## Future Directions This tool is designed to make it easily extended to other version-control systems: all VCS-specific components are in separate directories like src/git/ . The top-level `versioneer.py` script is assembled from these components by running make-versioneer.py . In the future, make-versioneer.py will take a VCS name as an argument, and will construct a version of `versioneer.py` that is specific to the given VCS. It might also take the configuration arguments that are currently provided manually during installation by editing setup.py . Alternatively, it might go the other direction and include code from all supported VCS systems, reducing the number of intermediate scripts. ## License To make Versioneer easier to embed, all its code is dedicated to the public domain. The `_version.py` that it creates is also in the public domain. Specifically, both are released under the Creative Commons "Public Domain Dedication" license (CC0-1.0), as described in https://creativecommons.org/publicdomain/zero/1.0/ . """ from __future__ import print_function try: import configparser except ImportError: import ConfigParser as configparser import errno import json import os import re import subprocess import sys class VersioneerConfig: """Container for Versioneer configuration parameters.""" def get_root(): """Get the project root directory. We require that all commands are run from the project root, i.e. the directory that contains setup.py, setup.cfg, and versioneer.py . """ root = os.path.realpath(os.path.abspath(os.getcwd())) setup_py = os.path.join(root, "setup.py") versioneer_py = os.path.join(root, "versioneer.py") if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): # allow 'python path/to/setup.py COMMAND' root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) setup_py = os.path.join(root, "setup.py") versioneer_py = os.path.join(root, "versioneer.py") if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): err = ("Versioneer was unable to run the project root directory. " "Versioneer requires setup.py to be executed from " "its immediate directory (like 'python setup.py COMMAND'), " "or in a way that lets it use sys.argv[0] to find the root " "(like 'python path/to/setup.py COMMAND').") raise VersioneerBadRootError(err) try: # Certain runtime workflows (setup.py install/develop in a setuptools # tree) execute all dependencies in a single python process, so # "versioneer" may be imported multiple times, and python's shared # module-import table will cache the first one. So we can't use # os.path.dirname(__file__), as that will find whichever # versioneer.py was first imported, even in later projects. me = os.path.realpath(os.path.abspath(__file__)) if os.path.splitext(me)[0] != os.path.splitext(versioneer_py)[0]: print("Warning: build in %s is using versioneer.py from %s" % (os.path.dirname(me), versioneer_py)) except NameError: pass return root def get_config_from_root(root): """Read the project setup.cfg file to determine Versioneer config.""" # This might raise EnvironmentError (if setup.cfg is missing), or # configparser.NoSectionError (if it lacks a [versioneer] section), or # configparser.NoOptionError (if it lacks "VCS="). See the docstring at # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.SafeConfigParser() with open(setup_cfg, "r") as f: parser.readfp(f) VCS = parser.get("versioneer", "VCS") # mandatory def get(parser, name): if parser.has_option("versioneer", name): return parser.get("versioneer", name) return None cfg = VersioneerConfig() cfg.VCS = VCS cfg.style = get(parser, "style") or "" cfg.versionfile_source = get(parser, "versionfile_source") cfg.versionfile_build = get(parser, "versionfile_build") cfg.tag_prefix = get(parser, "tag_prefix") if cfg.tag_prefix in ("''", '""'): cfg.tag_prefix = "" cfg.parentdir_prefix = get(parser, "parentdir_prefix") cfg.verbose = get(parser, "verbose") return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" # these dictionaries contain VCS-specific tools LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator """Decorator to mark a method as the handler for a particular VCS.""" def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): """Call the given command(s).""" assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %s" % dispcmd) print(e) return None else: if verbose: print("unable to find command, tried %s" % (commands,)) return None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) return None return stdout LONG_VERSION_PY['git'] = ''' # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. Generated by # versioneer-0.16 (https://github.com/warner/python-versioneer) """Git implementation of _version.py.""" import errno import os import re import subprocess import sys def get_keywords(): """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" keywords = {"refnames": git_refnames, "full": git_full} return keywords class VersioneerConfig: """Container for Versioneer configuration parameters.""" def get_config(): """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "%(STYLE)s" cfg.tag_prefix = "%(TAG_PREFIX)s" cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" cfg.verbose = False return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator """Decorator to mark a method as the handler for a particular VCS.""" def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): """Call the given command(s).""" assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %%s" %% dispcmd) print(e) return None else: if verbose: print("unable to find command, tried %%s" %% (commands,)) return None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: print("unable to run %%s (error)" %% dispcmd) return None return stdout def versions_from_parentdir(parentdir_prefix, root, verbose): """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. """ dirname = os.path.basename(root) if not dirname.startswith(parentdir_prefix): if verbose: print("guessing rootdir is '%%s', but '%%s' doesn't start with " "prefix '%%s'" %% (root, dirname, parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None} @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): """Get version information from git keywords.""" if not keywords: raise NotThisMethod("no keywords at all, weird") refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %%d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: print("discarding '%%s', no digits" %% ",".join(refs-tags)) if verbose: print("likely tags: %%s" %% ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %%s" %% r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags"} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ if not os.path.exists(os.path.join(root, ".git")): if verbose: print("no .git in %%s" %% root) raise NotThisMethod("no .git directory") GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out = run_command(GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%%s*" %% tag_prefix], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%%s'" %% describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%%s' doesn't start with prefix '%%s'" print(fmt %% (full_tag, tag_prefix)) pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" %% (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits return pieces def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): """TAG[.post.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post.devDISTANCE """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post.dev%%d" %% pieces["distance"] else: # exception #1 rendered = "0.post.dev%%d" %% pieces["distance"] return rendered def render_pep440_post(pieces): """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%%s" %% pieces["short"] else: # exception #1 rendered = "0.post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%%s" %% pieces["short"] return rendered def render_pep440_old(pieces): """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Eexceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"]} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%%s'" %% style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None} def get_versions(): """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which # case we can only use expanded keywords. cfg = get_config() verbose = cfg.verbose try: return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass try: root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for i in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to find root of source tree"} try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) return render(pieces, cfg.style) except NotThisMethod: pass try: if cfg.parentdir_prefix: return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) except NotThisMethod: pass return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version"} ''' @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): """Get version information from git keywords.""" if not keywords: raise NotThisMethod("no keywords at all, weird") refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs-tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %s" % r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags"} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ if not os.path.exists(os.path.join(root, ".git")): if verbose: print("no .git in %s" % root) raise NotThisMethod("no .git directory") GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out = run_command(GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s*" % tag_prefix], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits return pieces def do_vcs_install(manifest_in, versionfile_source, ipy): """Git-specific installation logic for Versioneer. For Git, this means creating/changing .gitattributes to mark _version.py for export-time keyword substitution. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] files = [manifest_in, versionfile_source] if ipy: files.append(ipy) try: me = __file__ if me.endswith(".pyc") or me.endswith(".pyo"): me = os.path.splitext(me)[0] + ".py" versioneer_file = os.path.relpath(me) except NameError: versioneer_file = "versioneer.py" files.append(versioneer_file) present = False try: f = open(".gitattributes", "r") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() except EnvironmentError: pass if not present: f = open(".gitattributes", "a+") f.write("%s export-subst\n" % versionfile_source) f.close() files.append(".gitattributes") run_command(GITS, ["add", "--"] + files) def versions_from_parentdir(parentdir_prefix, root, verbose): """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. """ dirname = os.path.basename(root) if not dirname.startswith(parentdir_prefix): if verbose: print("guessing rootdir is '%s', but '%s' doesn't start with " "prefix '%s'" % (root, dirname, parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None} SHORT_VERSION_PY = """ # This file was generated by 'versioneer.py' (0.16) from # revision-control system data, or from the parent directory name of an # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. import json import sys version_json = ''' %s ''' # END VERSION_JSON def get_versions(): return json.loads(version_json) """ def versions_from_file(filename): """Try to determine the version from _version.py if present.""" try: with open(filename) as f: contents = f.read() except EnvironmentError: raise NotThisMethod("unable to read _version.py") mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) if not mo: raise NotThisMethod("no version_json in _version.py") return json.loads(mo.group(1)) def write_to_version_file(filename, versions): """Write the given version number to the given _version.py file.""" os.unlink(filename) contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) print("set %s to '%s'" % (filename, versions["version"])) def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): """TAG[.post.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post.devDISTANCE """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post.dev%d" % pieces["distance"] else: # exception #1 rendered = "0.post.dev%d" % pieces["distance"] return rendered def render_pep440_post(pieces): """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%s" % pieces["short"] return rendered def render_pep440_old(pieces): """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Eexceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"]} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%s'" % style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None} class VersioneerBadRootError(Exception): """The project root directory is unknown or missing key files.""" def get_versions(verbose=False): """Get the project version from whatever source is available. Returns dict with two keys: 'version' and 'full'. """ if "versioneer" in sys.modules: # see the discussion in cmdclass.py:get_cmdclass() del sys.modules["versioneer"] root = get_root() cfg = get_config_from_root(root) assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" handlers = HANDLERS.get(cfg.VCS) assert handlers, "unrecognized VCS '%s'" % cfg.VCS verbose = verbose or cfg.verbose assert cfg.versionfile_source is not None, \ "please set versioneer.versionfile_source" assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" versionfile_abs = os.path.join(root, cfg.versionfile_source) # extract version from first of: _version.py, VCS command (e.g. 'git # describe'), parentdir. This is meant to work for developers using a # source checkout, for users of a tarball created by 'setup.py sdist', # and for users of a tarball/zipball created by 'git archive' or github's # download-from-tag feature or the equivalent in other VCSes. get_keywords_f = handlers.get("get_keywords") from_keywords_f = handlers.get("keywords") if get_keywords_f and from_keywords_f: try: keywords = get_keywords_f(versionfile_abs) ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) if verbose: print("got version from expanded keyword %s" % ver) return ver except NotThisMethod: pass try: ver = versions_from_file(versionfile_abs) if verbose: print("got version from file %s %s" % (versionfile_abs, ver)) return ver except NotThisMethod: pass from_vcs_f = handlers.get("pieces_from_vcs") if from_vcs_f: try: pieces = from_vcs_f(cfg.tag_prefix, root, verbose) ver = render(pieces, cfg.style) if verbose: print("got version from VCS %s" % ver) return ver except NotThisMethod: pass try: if cfg.parentdir_prefix: ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) if verbose: print("got version from parentdir %s" % ver) return ver except NotThisMethod: pass if verbose: print("unable to compute version") return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version"} def get_version(): """Get the short version string for this project.""" return get_versions()["version"] def get_cmdclass(): """Get the custom setuptools/distutils subclasses used by Versioneer.""" if "versioneer" in sys.modules: del sys.modules["versioneer"] # this fixes the "python setup.py develop" case (also 'install' and # 'easy_install .'), in which subdependencies of the main project are # built (using setup.py bdist_egg) in the same python process. Assume # a main project A and a dependency B, which use different versions # of Versioneer. A's setup.py imports A's Versioneer, leaving it in # sys.modules by the time B's setup.py is executed, causing B to run # with the wrong versioneer. Setuptools wraps the sub-dep builds in a # sandbox that restores sys.modules to it's pre-build state, so the # parent is protected against the child's "import versioneer". By # removing ourselves from sys.modules here, before the child build # happens, we protect the child from the parent's versioneer too. # Also see https://github.com/warner/python-versioneer/issues/52 cmds = {} # we add "version" to both distutils and setuptools from distutils.core import Command class cmd_version(Command): description = "report generated version string" user_options = [] boolean_options = [] def initialize_options(self): pass def finalize_options(self): pass def run(self): vers = get_versions(verbose=True) print("Version: %s" % vers["version"]) print(" full-revisionid: %s" % vers.get("full-revisionid")) print(" dirty: %s" % vers.get("dirty")) if vers["error"]: print(" error: %s" % vers["error"]) cmds["version"] = cmd_version # we override "build_py" in both distutils and setuptools # # most invocation pathways end up running build_py: # distutils/build -> build_py # distutils/install -> distutils/build ->.. # setuptools/bdist_wheel -> distutils/install ->.. # setuptools/bdist_egg -> distutils/install_lib -> build_py # setuptools/install -> bdist_egg ->.. # setuptools/develop -> ? # we override different "build_py" commands for both environments if "setuptools" in sys.modules: from setuptools.command.build_py import build_py as _build_py else: from distutils.command.build_py import build_py as _build_py class cmd_build_py(_build_py): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() _build_py.run(self) # now locate _version.py in the new build/ directory and replace # it with an updated value if cfg.versionfile_build: target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) cmds["build_py"] = cmd_build_py if "cx_Freeze" in sys.modules: # cx_freeze enabled? from cx_Freeze.dist import build_exe as _build_exe class cmd_build_exe(_build_exe): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() target_versionfile = cfg.versionfile_source print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) _build_exe.run(self) os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write(LONG % {"DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, }) cmds["build_exe"] = cmd_build_exe del cmds["build_py"] # we override different "sdist" commands for both environments if "setuptools" in sys.modules: from setuptools.command.sdist import sdist as _sdist else: from distutils.command.sdist import sdist as _sdist class cmd_sdist(_sdist): def run(self): versions = get_versions() self._versioneer_generated_versions = versions # unless we update this, the command will keep using the old # version self.distribution.metadata.version = versions["version"] return _sdist.run(self) def make_release_tree(self, base_dir, files): root = get_root() cfg = get_config_from_root(root) _sdist.make_release_tree(self, base_dir, files) # now locate _version.py in the new base_dir directory # (remembering that it may be a hardlink) and replace it with an # updated value target_versionfile = os.path.join(base_dir, cfg.versionfile_source) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, self._versioneer_generated_versions) cmds["sdist"] = cmd_sdist return cmds CONFIG_ERROR = """ setup.cfg is missing the necessary Versioneer configuration. You need a section like: [versioneer] VCS = git style = pep440 versionfile_source = src/myproject/_version.py versionfile_build = myproject/_version.py tag_prefix = parentdir_prefix = myproject- You will also need to edit your setup.py to use the results: import versioneer setup(version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), ...) Please read the docstring in ./versioneer.py for configuration instructions, edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. """ SAMPLE_CONFIG = """ # See the docstring in versioneer.py for instructions. Note that you must # re-run 'versioneer.py setup' after changing this section, and commit the # resulting files. [versioneer] #VCS = git #style = pep440 #versionfile_source = #versionfile_build = #tag_prefix = #parentdir_prefix = """ INIT_PY_SNIPPET = """ from ._version import get_versions __version__ = get_versions()['version'] del get_versions """ def do_setup(): """Main VCS-independent setup function for installing Versioneer.""" root = get_root() try: cfg = get_config_from_root(root) except (EnvironmentError, configparser.NoSectionError, configparser.NoOptionError) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print("Adding sample versioneer config to setup.cfg", file=sys.stderr) with open(os.path.join(root, "setup.cfg"), "a") as f: f.write(SAMPLE_CONFIG) print(CONFIG_ERROR, file=sys.stderr) return 1 print(" creating %s" % cfg.versionfile_source) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write(LONG % {"DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, }) ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: with open(ipy, "r") as f: old = f.read() except EnvironmentError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) with open(ipy, "a") as f: f.write(INIT_PY_SNIPPET) else: print(" %s unmodified" % ipy) else: print(" %s doesn't exist, ok" % ipy) ipy = None # Make sure both the top-level "versioneer.py" and versionfile_source # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so # they'll be copied into source distributions. Pip won't be able to # install the package without this. manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: with open(manifest_in, "r") as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) except EnvironmentError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so # it might give some false negatives. Appending redundant 'include' # lines is safe, though. if "versioneer.py" not in simple_includes: print(" appending 'versioneer.py' to MANIFEST.in") with open(manifest_in, "a") as f: f.write("include versioneer.py\n") else: print(" 'versioneer.py' already in MANIFEST.in") if cfg.versionfile_source not in simple_includes: print(" appending versionfile_source ('%s') to MANIFEST.in" % cfg.versionfile_source) with open(manifest_in, "a") as f: f.write("include %s\n" % cfg.versionfile_source) else: print(" versionfile_source already in MANIFEST.in") # Make VCS-specific changes. For git, this means creating/changing # .gitattributes to mark _version.py for export-time keyword # substitution. do_vcs_install(manifest_in, cfg.versionfile_source, ipy) return 0 def scan_setup_py(): """Validate the contents of setup.py against Versioneer's expectations.""" found = set() setters = False errors = 0 with open("setup.py", "r") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") if "versioneer.get_cmdclass()" in line: found.add("cmdclass") if "versioneer.get_version()" in line: found.add("get_version") if "versioneer.VCS" in line: setters = True if "versioneer.versionfile_source" in line: setters = True if len(found) != 3: print("") print("Your setup.py appears to be missing some important items") print("(but I might be wrong). Please make sure it has something") print("roughly like the following:") print("") print(" import versioneer") print(" setup( version=versioneer.get_version(),") print(" cmdclass=versioneer.get_cmdclass(), ...)") print("") errors += 1 if setters: print("You should remove lines like 'versioneer.VCS = ' and") print("'versioneer.versionfile_source = ' . This configuration") print("now lives in setup.cfg, and should be removed from setup.py") print("") errors += 1 return errors if __name__ == "__main__": cmd = sys.argv[1] if cmd == "setup": errors = do_setup() errors += scan_setup_py() if errors: sys.exit(1)