pax_global_header00006660000000000000000000000064147247010600014513gustar00rootroot0000000000000052 comment=f13bd517fb549f7d9ded58cd29ed311355ca0fc4 ubelt-1.3.7/000077500000000000000000000000001472470106000126365ustar00rootroot00000000000000ubelt-1.3.7/.gitattributes000066400000000000000000000001301472470106000155230ustar00rootroot00000000000000*.py text eol=lf *.md text eol=lf *.ini text eol=lf *.txt text eol=lf *.yml text eol=lf ubelt-1.3.7/.github/000077500000000000000000000000001472470106000141765ustar00rootroot00000000000000ubelt-1.3.7/.github/ISSUE_TEMPLATE/000077500000000000000000000000001472470106000163615ustar00rootroot00000000000000ubelt-1.3.7/.github/ISSUE_TEMPLATE/bug_report.md000066400000000000000000000011531472470106000210530ustar00rootroot00000000000000--- name: Bug report about: Create a report to help us improve title: '' labels: '' assignees: '' --- **Describe the bug** A clear and concise description of what the bug is. **To Reproduce** Describe steps to reproduce the behavior. Ideally provide a minimal working example: ```python import ubelt as ub ub.call_that_is_behaving_weird() ``` **Expected behavior** A clear and concise description of what you expected to happen. **Desktop (please complete the following information):** - OS: [e.g. iOS] - Ubelt version - Python version **Additional context** Add any other context about the problem here. ubelt-1.3.7/.github/ISSUE_TEMPLATE/feature_request.md000066400000000000000000000011231472470106000221030ustar00rootroot00000000000000--- name: Feature request about: Suggest an idea for this project title: '' labels: '' assignees: '' --- **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you want to happen. **Describe alternatives you've considered** A clear and concise description of any alternative solutions or features you've considered. **Additional context** Add any other context or screenshots about the feature request here. ubelt-1.3.7/.github/PULL_REQUEST_TEMPLATE/000077500000000000000000000000001472470106000174555ustar00rootroot00000000000000ubelt-1.3.7/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md000066400000000000000000000010131472470106000244110ustar00rootroot00000000000000# PR Details ## Checklist - [ ] My code follows the code style of this project. - [ ] I have updated the documentation accordingly. - [ ] I have added tests to cover my changes. - [ ] All new and existing tests passed. ubelt-1.3.7/.github/dependabot.yml000066400000000000000000000002671472470106000170330ustar00rootroot00000000000000version: 2 updates: # Maintain dependencies for GitHub Actions - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly" day: "friday" ubelt-1.3.7/.github/workflows/000077500000000000000000000000001472470106000162335ustar00rootroot00000000000000ubelt-1.3.7/.github/workflows/tests.yml000066400000000000000000000540151472470106000201250ustar00rootroot00000000000000# This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions # Based on ~/code/xcookie/xcookie/rc/tests.yml.in # Now based on ~/code/xcookie/xcookie/builders/github_actions.py # See: https://github.com/Erotemic/xcookie name: PurePyCI on: push: pull_request: branches: [ main ] jobs: lint_job: ## # Run quick linting and typing checks. # To disable all linting add "linter=false" to the xcookie config. # To disable type checks add "notypes" to the xcookie tags. ## runs-on: ubuntu-latest steps: - name: Checkout source uses: actions/checkout@v4.1.1 - name: Set up Python 3.13 for linting uses: actions/setup-python@v5.1.1 with: python-version: '3.13' - name: Install dependencies run: |- python -m pip install --upgrade pip python -m pip install flake8 - name: Lint with flake8 run: |- # stop the build if there are Python syntax errors or undefined names flake8 ./ubelt --count --select=E9,F63,F7,F82 --show-source --statistics - name: Typecheck with mypy run: |- python -m pip install mypy pip install -r requirements/runtime.txt mypy --install-types --non-interactive ./ubelt mypy ./ubelt build_and_test_sdist: ## # Build the pure python package from source and test it in the # same environment. ## name: Build sdist runs-on: ubuntu-latest steps: - name: Checkout source uses: actions/checkout@v4.1.1 - name: Set up Python 3.13 uses: actions/setup-python@v5.1.1 with: python-version: '3.13' - name: Upgrade pip run: |- python -m pip install --upgrade pip python -m pip install --prefer-binary -r requirements/tests.txt python -m pip install --prefer-binary -r requirements/runtime.txt - name: Build sdist shell: bash run: |- python -m pip install setuptools>=0.8 wheel build twine python -m build --sdist --outdir wheelhouse python -m twine check ./wheelhouse/ubelt*.tar.gz - name: Install sdist run: |- ls -al wheelhouse pip install --prefer-binary wheelhouse/ubelt*.tar.gz -v - name: Test minimal loose sdist run: |- pwd ls -al # Run in a sandboxed directory WORKSPACE_DNAME="testsrcdir_minimal_${CI_PYTHON_VERSION}_${GITHUB_RUN_ID}_${RUNNER_OS}" mkdir -p $WORKSPACE_DNAME cd $WORKSPACE_DNAME # Run the tests # Get path to installed package MOD_DPATH=$(python -c "import ubelt, os; print(os.path.dirname(ubelt.__file__))") echo "MOD_DPATH = $MOD_DPATH" python -m pytest --verbose --cov=ubelt $MOD_DPATH ../tests cd .. - name: Test full loose sdist run: |- pwd ls -al true # Run in a sandboxed directory WORKSPACE_DNAME="testsrcdir_full_${CI_PYTHON_VERSION}_${GITHUB_RUN_ID}_${RUNNER_OS}" mkdir -p $WORKSPACE_DNAME cd $WORKSPACE_DNAME # Run the tests # Get path to installed package MOD_DPATH=$(python -c "import ubelt, os; print(os.path.dirname(ubelt.__file__))") echo "MOD_DPATH = $MOD_DPATH" python -m pytest --verbose --cov=ubelt $MOD_DPATH ../tests cd .. - uses: actions/upload-artifact@v4.4.0 name: Upload sdist artifact with: name: sdist_wheels path: ./wheelhouse/ubelt*.tar.gz build_purepy_wheels: ## # Download and test the pure-python wheels that were build in the # build_purepy_wheels and test them in this independent environment. ## name: ${{ matrix.python-version }} on ${{ matrix.os }}, arch=${{ matrix.arch }} with ${{ matrix.install-extras }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: - ubuntu-latest python-version: - '3.13' arch: - auto steps: - name: Checkout source uses: actions/checkout@v4.1.1 - name: Set up QEMU uses: docker/setup-qemu-action@v3.0.0 if: runner.os == 'Linux' && matrix.arch != 'auto' with: platforms: all - name: Setup Python uses: actions/setup-python@v5.1.1 with: python-version: ${{ matrix.python-version }} - name: Build pure wheel shell: bash run: |- python -m pip install setuptools>=0.8 wheel build twine python -m build --wheel --outdir wheelhouse python -m twine check ./wheelhouse/ubelt*.whl - name: Show built files shell: bash run: ls -la wheelhouse - uses: actions/upload-artifact@v4.4.0 name: Upload wheels artifact with: name: wheels-${{ matrix.os }}-${{ matrix.arch }} path: ./wheelhouse/ubelt*.whl test_purepy_wheels: name: ${{ matrix.python-version }} on ${{ matrix.os }}, arch=${{ matrix.arch }} with ${{ matrix.install-extras }} if: "! startsWith(github.event.ref, 'refs/heads/release')" runs-on: ${{ matrix.os }} needs: - build_purepy_wheels strategy: fail-fast: false matrix: # Xcookie generates an explicit list of environments that will be used # for testing instead of using the more concise matrix notation. include: - python-version: '3.6' install-extras: tests-strict,runtime-strict os: ubuntu-20.04 arch: auto - python-version: '3.6' install-extras: tests-strict,runtime-strict os: macos-13 arch: auto - python-version: '3.6' install-extras: tests-strict,runtime-strict os: windows-latest arch: auto - python-version: '3.13' install-extras: tests-strict,runtime-strict,optional-strict os: ubuntu-latest arch: auto - python-version: '3.13' install-extras: tests-strict,runtime-strict,optional-strict os: macOS-latest arch: auto - python-version: '3.13' install-extras: tests-strict,runtime-strict,optional-strict os: windows-latest arch: auto - python-version: '3.13' install-extras: tests os: macOS-latest arch: auto - python-version: '3.13' install-extras: tests os: windows-latest arch: auto - python-version: '3.6' install-extras: tests,optional os: ubuntu-20.04 arch: auto - python-version: '3.7' install-extras: tests,optional os: ubuntu-latest arch: auto - python-version: '3.8' install-extras: tests,optional os: ubuntu-latest arch: auto - python-version: '3.9' install-extras: tests,optional os: ubuntu-latest arch: auto - python-version: '3.10' install-extras: tests,optional os: ubuntu-latest arch: auto - python-version: '3.11' install-extras: tests,optional os: ubuntu-latest arch: auto - python-version: '3.12' install-extras: tests,optional os: ubuntu-latest arch: auto - python-version: '3.13' install-extras: tests,optional os: ubuntu-latest arch: auto - python-version: pypy-3.9 install-extras: tests,optional os: ubuntu-latest arch: auto - python-version: '3.6' install-extras: tests,optional os: macos-13 arch: auto - python-version: '3.7' install-extras: tests,optional os: macos-13 arch: auto - python-version: '3.8' install-extras: tests,optional os: macOS-latest arch: auto - python-version: '3.9' install-extras: tests,optional os: macOS-latest arch: auto - python-version: '3.10' install-extras: tests,optional os: macOS-latest arch: auto - python-version: '3.11' install-extras: tests,optional os: macOS-latest arch: auto - python-version: '3.12' install-extras: tests,optional os: macOS-latest arch: auto - python-version: '3.13' install-extras: tests,optional os: macOS-latest arch: auto - python-version: pypy-3.9 install-extras: tests,optional os: macOS-latest arch: auto - python-version: '3.6' install-extras: tests,optional os: windows-latest arch: auto - python-version: '3.7' install-extras: tests,optional os: windows-latest arch: auto - python-version: '3.8' install-extras: tests,optional os: windows-latest arch: auto - python-version: '3.9' install-extras: tests,optional os: windows-latest arch: auto - python-version: '3.10' install-extras: tests,optional os: windows-latest arch: auto - python-version: '3.11' install-extras: tests,optional os: windows-latest arch: auto - python-version: '3.12' install-extras: tests,optional os: windows-latest arch: auto - python-version: '3.13' install-extras: tests,optional os: windows-latest arch: auto - python-version: pypy-3.9 install-extras: tests,optional os: windows-latest arch: auto steps: - name: Checkout source uses: actions/checkout@v4.1.1 - name: Enable MSVC 64bit uses: ilammy/msvc-dev-cmd@v1 if: matrix.os == 'windows-latest' - name: Set up QEMU uses: docker/setup-qemu-action@v3.0.0 if: runner.os == 'Linux' && matrix.arch != 'auto' with: platforms: all - name: Setup Python uses: actions/setup-python@v5.1.1 with: python-version: ${{ matrix.python-version }} - uses: actions/download-artifact@v4.1.8 name: Download wheels with: pattern: wheels-* merge-multiple: true path: wheelhouse - name: Install wheel ${{ matrix.install-extras }} shell: bash env: INSTALL_EXTRAS: ${{ matrix.install-extras }} run: |- echo "Finding the path to the wheel" ls wheelhouse || echo "wheelhouse does not exist" echo "Installing helpers" pip install setuptools>=0.8 setuptools_scm wheel build -U pip install tomli pkginfo export WHEEL_FPATH=$(python -c "if 1: import pathlib dist_dpath = pathlib.Path('wheelhouse') candidates = list(dist_dpath.glob('ubelt*.whl')) candidates += list(dist_dpath.glob('ubelt*.tar.gz')) fpath = sorted(candidates)[-1] print(str(fpath).replace(chr(92), chr(47))) ") export MOD_VERSION=$(python -c "if 1: from pkginfo import Wheel, SDist fpath = '$WHEEL_FPATH' cls = Wheel if fpath.endswith('.whl') else SDist print(cls(fpath).version) ") echo "WHEEL_FPATH=$WHEEL_FPATH" echo "INSTALL_EXTRAS=$INSTALL_EXTRAS" echo "MOD_VERSION=$MOD_VERSION" pip install --prefer-binary "ubelt[$INSTALL_EXTRAS]==$MOD_VERSION" -f wheelhouse echo "Install finished." - name: Test wheel ${{ matrix.install-extras }} shell: bash env: CI_PYTHON_VERSION: py${{ matrix.python-version }} run: |- echo "Creating test sandbox directory" export WORKSPACE_DNAME="testdir_${CI_PYTHON_VERSION}_${GITHUB_RUN_ID}_${RUNNER_OS}" echo "WORKSPACE_DNAME=$WORKSPACE_DNAME" mkdir -p $WORKSPACE_DNAME echo "cd-ing into the workspace" cd $WORKSPACE_DNAME pwd ls -altr # Get the path to the installed package and run the tests export MOD_DPATH=$(python -c "import ubelt, os; print(os.path.dirname(ubelt.__file__))") export MOD_NAME=ubelt echo " --- MOD_DPATH = $MOD_DPATH --- running the pytest command inside the workspace --- " python -m pytest --verbose -p pytester -p no:doctest --xdoctest --cov-config ../pyproject.toml --cov-report term --durations=100 --cov="$MOD_NAME" "$MOD_DPATH" ../tests echo "pytest command finished, moving the coverage file to the repo root" ls -al # Move coverage file to a new name mv .coverage "../.coverage.$WORKSPACE_DNAME" echo "changing directory back to th repo root" cd .. ls -al - name: Combine coverage Linux if: runner.os == 'Linux' run: |- echo '############ PWD' pwd cp .wheelhouse/.coverage* . || true ls -al python -m pip install coverage[toml] echo '############ combine' coverage combine . || true echo '############ XML' coverage xml -o ./coverage.xml || true echo '### The cwd should now have a coverage.xml' ls -altr pwd - uses: codecov/codecov-action@v4.5.0 name: Codecov Upload with: file: ./coverage.xml token: ${{ secrets.CODECOV_TOKEN }} test_deploy: name: Deploy Test runs-on: ubuntu-latest if: github.event_name == 'push' && ! startsWith(github.event.ref, 'refs/tags') && ! startsWith(github.event.ref, 'refs/heads/release') needs: - build_and_test_sdist - build_purepy_wheels steps: - name: Checkout source uses: actions/checkout@v4.1.1 - uses: actions/download-artifact@v4.1.8 name: Download wheels with: pattern: wheels-* merge-multiple: true path: wheelhouse - uses: actions/download-artifact@v4.1.8 name: Download sdist with: name: sdist_wheels path: wheelhouse - name: Show files to upload shell: bash run: ls -la wheelhouse - name: Sign and Publish env: TWINE_REPOSITORY_URL: https://test.pypi.org/legacy/ TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.TEST_TWINE_PASSWORD }} CI_SECRET: ${{ secrets.CI_SECRET }} run: |- GPG_EXECUTABLE=gpg $GPG_EXECUTABLE --version openssl version $GPG_EXECUTABLE --list-keys echo "Decrypting Keys" openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:CI_SECRET -d -a -in dev/ci_public_gpg_key.pgp.enc | $GPG_EXECUTABLE --import openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:CI_SECRET -d -a -in dev/gpg_owner_trust.enc | $GPG_EXECUTABLE --import-ownertrust openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:CI_SECRET -d -a -in dev/ci_secret_gpg_subkeys.pgp.enc | $GPG_EXECUTABLE --import echo "Finish Decrypt Keys" $GPG_EXECUTABLE --list-keys || true $GPG_EXECUTABLE --list-keys || echo "first invocation of gpg creates directories and returns 1" $GPG_EXECUTABLE --list-keys VERSION=$(python -c "import setup; print(setup.VERSION)") pip install twine pip install urllib3 requests[security] twine GPG_KEYID=$(cat dev/public_gpg_key) echo "GPG_KEYID = '$GPG_KEYID'" GPG_SIGN_CMD="$GPG_EXECUTABLE --batch --yes --detach-sign --armor --local-user $GPG_KEYID" WHEEL_PATHS=(wheelhouse/*.whl wheelhouse/*.tar.gz) WHEEL_PATHS_STR=$(printf '"%s" ' "${WHEEL_PATHS[@]}") echo "$WHEEL_PATHS_STR" for WHEEL_PATH in "${WHEEL_PATHS[@]}" do echo "------" echo "WHEEL_PATH = $WHEEL_PATH" $GPG_SIGN_CMD --output $WHEEL_PATH.asc $WHEEL_PATH $GPG_EXECUTABLE --verify $WHEEL_PATH.asc $WHEEL_PATH || echo "hack, the first run of gpg very fails" $GPG_EXECUTABLE --verify $WHEEL_PATH.asc $WHEEL_PATH done ls -la wheelhouse pip install opentimestamps-client ots stamp wheelhouse/*.whl wheelhouse/*.tar.gz wheelhouse/*.asc ls -la wheelhouse twine upload --username __token__ --password "$TWINE_PASSWORD" --repository-url "$TWINE_REPOSITORY_URL" wheelhouse/*.whl wheelhouse/*.tar.gz --skip-existing --verbose || { echo "failed to twine upload" ; exit 1; } - uses: actions/upload-artifact@v4.4.0 name: Upload deploy artifacts with: name: deploy_artifacts path: |- wheelhouse/*.whl wheelhouse/*.zip wheelhouse/*.tar.gz wheelhouse/*.asc wheelhouse/*.ots live_deploy: name: Deploy Live runs-on: ubuntu-latest if: github.event_name == 'push' && (startsWith(github.event.ref, 'refs/tags') || startsWith(github.event.ref, 'refs/heads/release')) needs: - build_and_test_sdist - build_purepy_wheels steps: - name: Checkout source uses: actions/checkout@v4.1.1 - uses: actions/download-artifact@v4.1.8 name: Download wheels with: pattern: wheels-* merge-multiple: true path: wheelhouse - uses: actions/download-artifact@v4.1.8 name: Download sdist with: name: sdist_wheels path: wheelhouse - name: Show files to upload shell: bash run: ls -la wheelhouse - name: Sign and Publish env: TWINE_REPOSITORY_URL: https://upload.pypi.org/legacy/ TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} CI_SECRET: ${{ secrets.CI_SECRET }} run: |- GPG_EXECUTABLE=gpg $GPG_EXECUTABLE --version openssl version $GPG_EXECUTABLE --list-keys echo "Decrypting Keys" openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:CI_SECRET -d -a -in dev/ci_public_gpg_key.pgp.enc | $GPG_EXECUTABLE --import openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:CI_SECRET -d -a -in dev/gpg_owner_trust.enc | $GPG_EXECUTABLE --import-ownertrust openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:CI_SECRET -d -a -in dev/ci_secret_gpg_subkeys.pgp.enc | $GPG_EXECUTABLE --import echo "Finish Decrypt Keys" $GPG_EXECUTABLE --list-keys || true $GPG_EXECUTABLE --list-keys || echo "first invocation of gpg creates directories and returns 1" $GPG_EXECUTABLE --list-keys VERSION=$(python -c "import setup; print(setup.VERSION)") pip install twine pip install urllib3 requests[security] twine GPG_KEYID=$(cat dev/public_gpg_key) echo "GPG_KEYID = '$GPG_KEYID'" GPG_SIGN_CMD="$GPG_EXECUTABLE --batch --yes --detach-sign --armor --local-user $GPG_KEYID" WHEEL_PATHS=(wheelhouse/*.whl wheelhouse/*.tar.gz) WHEEL_PATHS_STR=$(printf '"%s" ' "${WHEEL_PATHS[@]}") echo "$WHEEL_PATHS_STR" for WHEEL_PATH in "${WHEEL_PATHS[@]}" do echo "------" echo "WHEEL_PATH = $WHEEL_PATH" $GPG_SIGN_CMD --output $WHEEL_PATH.asc $WHEEL_PATH $GPG_EXECUTABLE --verify $WHEEL_PATH.asc $WHEEL_PATH || echo "hack, the first run of gpg very fails" $GPG_EXECUTABLE --verify $WHEEL_PATH.asc $WHEEL_PATH done ls -la wheelhouse pip install opentimestamps-client ots stamp wheelhouse/*.whl wheelhouse/*.tar.gz wheelhouse/*.asc ls -la wheelhouse twine upload --username __token__ --password "$TWINE_PASSWORD" --repository-url "$TWINE_REPOSITORY_URL" wheelhouse/*.whl wheelhouse/*.tar.gz --skip-existing --verbose || { echo "failed to twine upload" ; exit 1; } - uses: actions/upload-artifact@v4.4.0 name: Upload deploy artifacts with: name: deploy_artifacts path: |- wheelhouse/*.whl wheelhouse/*.zip wheelhouse/*.tar.gz wheelhouse/*.asc wheelhouse/*.ots release: name: Create Github Release if: github.event_name == 'push' && (startsWith(github.event.ref, 'refs/tags') || startsWith(github.event.ref, 'refs/heads/release')) runs-on: ubuntu-latest permissions: contents: write needs: - live_deploy steps: - name: Checkout source uses: actions/checkout@v4.1.1 - uses: actions/download-artifact@v4.1.8 name: Download artifacts with: name: deploy_artifacts path: wheelhouse - name: Show files to release shell: bash run: ls -la wheelhouse - run: 'echo "Automatic Release Notes. TODO: improve" > ${{ github.workspace }}-CHANGELOG.txt' - name: Tag Release Commit if: (startsWith(github.event.ref, 'refs/heads/release')) run: |- export VERSION=$(python -c "import setup; print(setup.VERSION)") git tag "v$VERSION" git push origin "v$VERSION" - uses: softprops/action-gh-release@v1 name: Create Release id: create_release env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: body_path: ${{ github.workspace }}-CHANGELOG.txt tag_name: ${{ github.ref }} name: Release ${{ github.ref }} body: Automatic Release generate_release_notes: true draft: true prerelease: false files: |- wheelhouse/*.whl wheelhouse/*.asc wheelhouse/*.ots wheelhouse/*.zip wheelhouse/*.tar.gz ### # Unfortunately we cant (yet) use the yaml docstring trick here # https://github.community/t/allow-unused-keys-in-workflow-yaml-files/172120 #__doc__: | # # How to run locally # # https://packaging.python.org/guides/using-testpypi/ # git clone https://github.com/nektos/act.git $HOME/code/act # chmod +x $HOME/code/act/install.sh # (cd $HOME/code/act && ./install.sh -b $HOME/.local/opt/act) # # load_secrets # unset GITHUB_TOKEN # $HOME/.local/opt/act/act \ # --secret=EROTEMIC_TWINE_PASSWORD=$EROTEMIC_TWINE_PASSWORD \ # --secret=EROTEMIC_TWINE_USERNAME=$EROTEMIC_TWINE_USERNAME \ # --secret=EROTEMIC_CI_SECRET=$EROTEMIC_CI_SECRET \ # --secret=EROTEMIC_TEST_TWINE_USERNAME=$EROTEMIC_TEST_TWINE_USERNAME \ # --secret=EROTEMIC_TEST_TWINE_PASSWORD=$EROTEMIC_TEST_TWINE_PASSWORDubelt-1.3.7/.gitignore000066400000000000000000000021361472470106000146300ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/build/ # PyBuilder target/ # IPython Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # dotenv .env # virtualenv venv/ ENV/ # Spyder project settings .spyderproject # Rope project settings .ropeproject .pytest_cache profile_output.* _skbuild pip-wheel-metadata/ wheelhouse ubelt-1.3.7/.mailmap000066400000000000000000000004101472470106000142520ustar00rootroot00000000000000Jon Crall Jon Crall Jon Crall jon.crall Jon Crall joncrall Jon Crall joncrall ubelt-1.3.7/.readthedocs.yml000066400000000000000000000006441472470106000157300ustar00rootroot00000000000000# .readthedocs.yml # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # # See Also: # https://readthedocs.org/dashboard/ubelt/advanced/ # Required version: 2 build: os: "ubuntu-22.04" tools: python: "3.11" sphinx: configuration: docs/source/conf.py formats: all python: install: - requirements: requirements/docs.txt - method: pip path: . ubelt-1.3.7/CHANGELOG.md000066400000000000000000000755151472470106000144640ustar00rootroot00000000000000# Changelog We are currently working on porting this changelog to the specifications in [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). This project (loosely) adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Version 1.3.7 - Unreleased ### Added * Added `requestkw` to `ub.download`. ### Fixed: * Minor test issues. * `ub.IndexableWalker.diff` for empty inputs * Bug in `memoize_method` which could produce incorrect results if methods from different instances are assigned to variables. * Rare error in `test_copy_directory_cases` test due to list sorting. ### Changed * Added module name printout to `schedule_deprecation` * `ub.hash_data` now supports `datatime.datetime`, `datetime.date`, `decimal.Decimal` objects. ## Version 1.3.6 - Released 2024-06-08 ### Added: * Add `ub.IndexableWalker.diff` ### Fixed: * Added workarounds for copy / symlinks via `ub.Path` and `ub.symlink` on pypy. * `ub.import_module_from_path` now correctly accepts `PathLike` objects. * `ub.modname_to_modpath` fixed in cases where editable installs use type annotations in their MAPPING definition. ### Added * Support for UNIX special permission (suid/sgid/svtx) codes in `Path.chmod`. ### Changed * Moved windows dependencies from requires to optional. Windows users that make use of these will need to update their ubelt install or explicitly depend on them as well. ## Version 1.3.5 - Released 2024-03-20 ### Added: * New wrapper around `pathlib.Path.chmod` in `ubelt.Path.chmod`. Can now specify string codes like "u+x" or "+rw". Old stat logic works as it previously did. ### Changed: * Allow the argument to `ubelt.cmd` to be a `PathLike` object, which we will expect to be an executable. ### Fixed * `ub.modname_to_modpath` now handles cases where editable packages have modules where the name is different than the package. * Fixed deprecated usage of `ast.Num` ## Version 1.3.4 - 2023-10-27 ### Added * Add backend option to `highlight_code` which can be "pygments" or "rich". * Support for Python 3.12 ### Changed * Improve speed of inplace dictionary set operations. ### Fixed * Align in the case of `nobraces=1` for `ubelt.urepr`. ## Version 1.3.3 - 2023-07-10 ### Fixed * Bug where the first call to `ub.hash_data` would error if it was given a ndarray with object type. * Actually exposed `ChDir`. ### Changed * Docs and typing improvements ### Notes * Skipped a release version due to a bad github tag. ## Version 1.3.0 - 2023-06-14 ### Changed * Removed experimental warning from `Path.copy` and `Path.move`; they are now well enough tested. * The `util_format` submodule has been renamed to `util_repr`. The `util_format` is now a deprecated alias for `util_repr`. * Stub out merge methods for SetDict. * Renamed `FormatterExtensions` to `ReprExtensions`, with the former now being a deprecated alias. * `ubelt.cmd` now returns an object that ducktypes `subprocess.CompletedProcess`. * `ubelt.cmd` now contains a `capture` argument which will disable capturing of output. * `ubelt.cmd` ...detaching now only prints if verbosity is > 1 ### Fixed: * Exposed `ChDir`. * Removed usage of deprecated `pipes` module * `ubelt.cmd` return dictionaries are now more consistent ### Deprecated * The `recreate` argument to `ensuredir` ## Version 1.2.4 - 2023-02-28 ### Added * Added `ChDir` to `util_path`. * Add transient option to `JobPool` so references to futures are released after they are yielded to the user. * Added "base32" as an option for `hash_data`. Note that it is unpadded, hence non-RFC compliant. * Added `pattern` as a convenience option to `ubelt.Path.ls`. ### Changed * Updated vendored ProgIter to match progiter 1.3.0 * The download progress bar now gives more relevant information and updates less frequently. * `IndexableWalker.allclose` can now take unwrapped objects. ### Fixed * `ubelt.cmd` now respects `cwd` when `system=True`. ## Version 1.2.3 - Released 2022-12-03 ### Added * Support for FreeBSD in `util_platform` * `ub.Path.copy` and `ub.Path.move` * Tentative Python 3.11 support * `ub.urepr` which is `ub.repr2` with new defaults. * `ub.IndexableWalker.allclose` method to replace `indexable_allclose` ### Changed * `ub.schedule_deprecation` can now accept the strings "soon" or "now" for "when" arguments. * `ub.schedule_deprecation` can now accept `stacklevel` as as an argument. * `ub.Path.apppdir` can now be called without arguments. * Deprecate `TempDir` * `ub.Timer` can now accept `ns` as a keyword to enable nanosecond resolution. * `ProgIter.format_message` return value has changed. * `ub.Path.mkdir` now returns itself * Speedup `dict_hist` in the basic case by 2x ### Fixed * Issue in `indexable_allclose` where tolerances were not respected. * Issue in `modname_to_modpath` with exclude and editable installs. * Incompatibility with pathlib in `PythonPathContext` * Fixed issue in progiter with line clears * Issue in `import_module_from_modname` with editable installs where it would try to match modules that had the requested module as a prefix. * The `timeout` argument is now respected in the thread / process case of `JobPool` ### Deprecated * `ub.indexable_allclose`. Use `ub.IndexableWalker.allclose` instead. ## Version 1.2.2 - Released 2022-09-05 ### Added * Add `ubelt.util_dict.UDict.take`. * Added `__add__`, `__radd__`, `startswith`, and `endswith` methods to `ubelt.Path` to make it a better drop-in replacement for string based paths. ### Changed * Reverse dunder methods of `ubelt.SetDict` now promote the type. * Add `cls` keyword argument to `ubelt.SetDict` methods. * Deprecate: `ensure_app_cache_dir`, `ensure_app_config_dir`, `ensure_app_data_dir`, `get_app_cache_dir`, `get_app_config_dir`, `get_app_data_dir`, `readfrom`, `writeto`, `ensure_unicode`. * Initial work on new Path methods for move and copy. * CacheStamp.renew now does nothing and returns None if the stamp is disabled. * AutoDict, SetDict, and UDict are now always ordered. In Python 3.6 it inherits from OrderedDict. Otherwise it uses regular dict, which will be ordered in 3.7+ * AutoDict now inherits from UDict. * PathLike objects can now be used in args to ub.cmd when the command is an iterable. ### Deprecated * deprecate AutoOrderedDict, which is now indistinguishable from AutoDict ### Fixed * Tentative fixes for new `__editable__` based install path with `ub.modname_to_modpath` ## Version 1.2.1 - Released 2022-08-06 ### Fixed * Implemented inplace and reverse versions of dictionary set operations * Added copy to setdict ## Version 1.2.0 - Released 2022-08-02 ### Added * Added keywords argument to `ub.compatible`. * Added `warncls` argument to `ub.schedule_deprecation`. * Experimental SetDict, UDict and aliases sdict, udict. ### Fixed * Race condition on win32 in `ubelt.symlink` * Issue with `ubelt.memoize_method` where the method name and docstring were not wrapped correctly. * The `timeout` argument now works correctly with `ub.cmd` when `tee=True`. * Added `appname` to `ubelt.download` which was supposed to exist as indicated by the docs, but didn't. * The resources used by `ubelt.cmd` are now properly closed. ### Changed * `ub.compatible` no longer errors on positional only functions, instead it returns the keyword-compatible arguments. * An issue in `ubelt.symlink` with unintuitive behavior when an empty string was given as the link path. This now raises an error. * The main implementations of `ubelt.sorted_vals` and `ubelt.map_vals` were renamed to `ubelt.sorted_values` and `ubelt.map_values`, but the old names are still available as aliases. * Positional arguments in `Path.augment` have been modified. * In `Path.augment`, deprecate overloaded `suffix` and introduce `stemsuffix` as an alternative. * Added cls to a lot of util_dict funcs ## Version 1.1.2 - Released 2022-06-30 ### Added * Added new module `util_deprecate` with the function `schedule_deprecation`, which is generally useful for library maintenance. ### Fixed * Fixed issue where ubelt Cacher triggered its own warnings * Fixed deprecated usage of LooseVersion ### Changed * Tentative deprecation or backwards incompatible change in ub.Path.augment with suffix or prefix keywords ## Version 1.1.1 - Released 2022-06-09 ### Changed * Removed warning from `ubelt.Cacher` when depends is not specified. * `ub.timestamp` / `ub.timeparse` now respects a `default_timezone` argument and handles `datetime.date` objects. * Type stubs are now included in the distribution ### Fixed * Issue #113, where a `ub.find_exe` test failed on Gentoo. Fixed by #114 * Issue where older versions of CacheStamp would be interpreted as 1.1 stamps. ## Version 1.1.0 - Released 2022-06-03 ### Added * New method: `ub.timeparse` can parse the result of `ub.timestamp` into a `datetime` object. Can optionally use `dateutil.parser.parse` under the hood. * `ub.Path.ls` a convenience function that aliases `list(path.iterdir())`. * `ub.Path.walk` to wrap `os.walk`. ### Changed * Register `pathlib.Path` with `ub.repr2` * Can now register global `ub.repr2` extensions via `ub.repr2.register` * Can now register global `ub.hash_data` extensions via `ub.hash_data.register` * Removed deprecated arguments from `ubelt.cmd`. * `ub.CacheStamp` will now check the mtime and size to quickly check if the products have changed and force expiration. * `ub.CacheStamp` now takes an `expires` keyword arg, which will keep the cache valid only for the specified amount of time. * `ub.CacheStamp` now takes an `hash_prefix` keyword arg, which will check that it matches the hash of the product. * `ub.cmd` now has a `system` argument for modularity with `os.system`. * `ub.cmd` now accepts a `timeout` argument (tee support is pending). * `ub.JobPool` now contains a protected `_prog` variable allowing the user finer-grained progress controls. * `ub.JobPool` now contains a convenience method `join` that executes all jobs and returns a list of results. * `ub.timestamp` can now accept a `datetime` object as an argument, and will return the timestamp for that object. * The `ubelt.util_download.grabdata` function now uses `CacheStamp` instead of implementing its own stamp solution. * The `ubelt.util_hash.HashableExtensions` implementation was updated to use `functools.singledispatch` instead of the custom solution. This seems faster and should not have any API impact. ### Deprecated * `product` and `cfgstr` arguments to `CacheStamp.expired` * `product` and `cfgstr` arguments to `CacheStamp.renew` * Passing `hasher` as an instance to functions like `grabdata` or `CacheStamp` can cause unexpected hashes as they may be used more than once. ### Fixed * `ub.hash_data` now recognizes subclasses of registered types. * `ub.timestamp()` has been outputting incorrect (negated) UTC offsets. This is now fixed. * `ub.timestamp()` now works correctly when the year has less than 4 digits. ## Version 1.0.1 - Released 2022-02-20 ### Fixed * Bug where six was used but not listed as a dependency. Six is now removed as a dependency. * Fixed out of date docs in some places. ## Version 1.0.0 - Released 2022-02-15 ### Added * :func:`ubelt.Path.appdir` which functions like the `get_app_*_dir` methods in `util_platform`. * Add `tail` argument to :func:`ubelt.Path.augment` and :func:`ubelt.util_path.augpath` * Add json `backend` option to Cacher. ### Changed * `IndexableWalker` behavior has been changed, each time `iter` is called it resets its global state. * Remove support for Python 2.7 and Python 3.5 * Removed deprecated functions scheduled for removal. * :func:`ubelt.util_dict.dict_diff` now preserves original dictionary order in Python 3.7+. * `ub.hash_data` can now hash slice objects. * INTENTION OF BREAKING CHANGE NOTIFICATION: `ubelt.util_format.repr2` may no longer sort dictionaries by default. Looking into a backwards compatible way to work around this. ## Version 0.11.1 - Released 2022-02-15 ### Added * More `ubelt.Path` extensions for `delete` * Add `timeout` parameter to `ubelt.download` ### Changed * Modified default `ubelt.Path` behavior for `touch` to return a self-reference for chaining ## Version 0.11.0 - Released 2022-01-03 ### Added * Added `ubelt.Path`, as an extension and quicker-to-type version of pathlib.Path with extra functionality. * Added `progkw` as argument to `JobPool.as_completed` to control progress reporting * Added `progkw` as argument to `ub.download` / `ub.grabdat` to control progress reporting * Added `util_zip` with the `zopen` function. Access a file inside a zipfile with a standard `open` like interface. ### Fixed * `ubelt.hash_data` now handles non-numeric float values. * `ubelt.chunks` now works correctly when nchunks is specified. ### Changed * Changed default of `_hashable_sequence` `types` arg from True to False to be consistent, but kept existing types=True behavior in hashable extensions. Changes should be backwards compatible, but in the future we may introduce a breaking change to make hash behavior more consistent. ## Version 0.10.2 - Released 2021-12-07 ### Added * Added pyi type annotation files. (Used a custom script to export docstring type annotations) * Added `default` keyword argument to signature of `ub.peek` ### Fixed * Added `map` function to the executor classes. * `ub.Executor` now correctly returns itself from `__enter__` * Docstrings now have better type annotations * ProgIter had a bug in `time_thresh`, where it was never used (modified adjustment rules). * Fixed performance regression in ProgIter ### Changed * New CI GPG Keys: Erotemic-CI: 70858F4D01314BF21427676F3D568E6559A34380 for reference the old signing key was 98007794ED130347559354B1109AC852D297D757. * Verbose test from symlink previously showed "real -> link" which makes no sense because a link should be the object that "is pointing". Thus it now shows "link -> real" * `ub.download` should now generate less stdout text * New in-repo "dev" benchmarks ## Version 0.10.1 - Released 2021-08-23 ### Changed * Documentation fixes ## Version 0.10.0 - Released 2021-08-22 ### Added * new hashing 36-character base with alias (alphanum / abc123 / 36) * Added "compact" argument to `ub.repr2` * added candidate utilities: `named_product`, `varied_values` to `util_dict` * added candidate utilities: `compatible` to `util_func` * Added `util_indexable` and `IndexableWalker` (ported from kwcoco) * Added `util_futures` with `ub.JobPool` and `ub.Executor` (ported from kwcoco) * Added `util_download_manager` with simple implementation of `ub.DownloadManager` * Added candidate functions to `ubelt` proper ### Fixed * `ubelt.download` now errors earlier if the parent directory does not exist * PyTest no longer throws warnings * Fixed issue with `download` and ByteIO objects in 3.8 * Bug in Python 3.8+ on win32 that did not account for the change on `os.readlink` behavior ### Changed * Modified corner cases in `ub.repr2` to move towards behavior that is easier to reason about. * Remove support for Python 3.4 ## Version 0.9.5 - Released 2021-02-05 ### Added * `blake3` is now an optional hasher ### Changed * `ubelt.hash_data` can now hash dictionaries and sets by default. * increased test speed * Internal change in how external hashers are maintained. ### Fixes * On windows colorama init is no longer called if it was ever initialized before. This fixes rare infinite recursion bugs when using pytest. ## Version 0.9.4 - Released 2021-01-15 ### Added * Added `maxbytes` parameter to `hash_file` to allow for only hashing a prefix. ### Fixed * Docs seem to be building correctly now ### Changed * Made import time 13x faster (was 109680, is now 8120) by using lazy external type registration in `util_hash` and removing other eager imports. * Removed import dependency on six. There is still a runtime dependency, but we are moving away from six. This is a first step to deprecating Python2 support * Changed default of "blocksize" in `hash_file` to `2 ** 20` based on benchmarks. * Removing Travis-CI, will soon migrate to Circle-CI ## Version 0.9.3 - Released 2020-10-24 ### Added * Added `meta` and `depends` to `CacheStamp` to agree with `Cacher` * `ProgIter.step` can now accept the `force` keyword argument to force display * `ProgIter.step` returns True if the display was written ### Fixed * Bug in `dict_isect` where order was not taken into account * Bug in `ProgIter` display frequency adjustment ### Changed * Tweaked display frequency adjustment in `ProgIter` * `ProgIter` no longer displays wall time by default. Set `show_wall=True` to regain this functionality. When true this now shows the date and time. ## [Version 0.9.2] - 2020-08-26 ### Added * `ub.repr2` now accept type name strings at register time (which makes it easier to lazy-load heavy libraries) * `ub.repr2` now handles pandas.DataFrame objects by default * `ub.repr2` now accepts the `align` keyword arg, which will align dictionary kv separators. * functions in `ub.util_color` now respects a global `NO_COLOR` flag which prevents ANSI coloration. ### Changed * `ProgIter.step` now respects update freq, and will not update the estimates if too few iterations have passed. This prevents `ub.download` from generating extremely large amounts of standard out. * `ub.Cacher` now reports the file size of the cache file. * `ub.Cacher` now defaults to the latest pickle protocol (-1), which may cause compatibility issues. ### Fixed * `ProgIter` now correctly checks if it needs to displays a message on every iteration. * Fixed uninitialized `_cursor_at_newline ` variable in `ProgIter`. ## [Version 0.9.1] - 2020-03-30 ### Changed * `ub.repr2` now encodes inf and nan as `float('inf')` and `float('nan')` to allow output to be evaluated. * `ub.grab_data` now uses the hasher name in the cached hash stamp file. ## [Version 0.9.0] - 2020-02-22 ### Fixed * Fixed issue in setup.py that broke the previous release. ## [Version 0.8.9] - 2020-02-20 NOTE: THIS RELEASE WAS BROKEN DUE TO AN ISSUE WITH THE SETUP SCRIPT ### Added * `dpath` and `fname` keywords to the `ub.download` function. * `modname_to_modpath` can now find modules referenced by egg-link files. * `ub.sorted_keys` and `ub.sorted_vals` for sorting dictionaries ### Fixed * `ub.download` now accepts `sha256` and `md5` hashes. ### Changed * The argument names in `ub.group_items`, `groupids` was changed to `key`. * The argument names in `ub.dict_hist`. `item_list` was changed to `items`, `weight_list` was changed to `weights`. * The argument names in `ub.flatten`. `nested_list` was changed to `nested` ## [Version 0.8.8] - 2020-01-12 ### Added * Added `check` kwarg to `ub.cmd`, which when True will raise a `CalledProcessError` if the exit-code is non-zero. * Added support for pypy. ### Changed * Moved `timerit` to its own module. ## [Version 0.8.7] - 2019-12-06 ### Fixed * Fixed corner case where `util_hash` raised an import error when python was not compiled with OpenSSL. ## [Version 0.8.6] - 2019-12-05 ### Fixed * Removed the `NoParam.__call__` method. This method should not have been defined, and by existing it caused issues when using `NoParam` as a column-key in pandas. ## [Version 0.8.5] - 2019-11-26 ### Added * Timerit now has 3 new properties `measures`, `rankings`, and `consistency`. These keep track of and analyze differences in timings between labeled timerit runs. * `ub.take` now accepts `default=NoParam` keyword argument. ### Changed * Substantially improved documentation. * The following functions are now officially deprecated: `dict_take` ## [Version 0.8.4] - 2019-11-20 ### Changed * The following functions are now officially deprecated: `startfile`, `truepath`, `compressuser`, `editfile`, `platform_resource_dir`, `get_app_resource_dir`, and `ensure_app_resource_dir`, `dict_take` * Improve docs * `Timerit` and `ProgIter` are back, remove dependency on the external modules. ## [Version 0.8.3] - 2019-11-06 ### Changed * `PythonPathContext` now works in more corner cases, although some rarer corner cases will now break. This trade-off should be a net positive. ## [Version 0.8.2] - 2019-07-11 ### Added * Added `dpath` as an argument to `ub.augpath` ### Fixed * Custom extensions for `ub.hash_data` are fixed. Previously they were not passed down more than a single level. * The `convert` option for `ub.hash_data` was previously not hooked up. * Correctly expose `dict_diff` * Fixed issue in `ub.augpath` where `multidot` did not preserve the original extension ### Changed * `ub.Cacher` no longer ensures that the `dpath` exists on construction. This check is delayed until `save` is called. * `ub.CacheStamp` now accepts the `enabled` keyword. * `modpath_to_modname` now properly handles compiled modules with ABI tags. ## [Version 0.8.0] - 2019-05-12 ### Added * Add `ub.dict_diff`, which removes keys from a dictionary similar to `set` difference. * Add `ub.paragraph`, which helps with writing log messages * Add some benchmarks * Add lots more documentation. ### Changed * `ub.identity` now accepts `*args` and `**kwargs` and defaults the first argument to `None`, but still only returns the first argument. * The `sort` kwarg of `ub.repr2` can now accept a callable, which will act as a key to the `sorted` function * `ub.hash_data` now accepts the `extensions` kwarg, which allows the user to define how particular types are hashed. ### Fixed * Fix GH #53 * the `index` argument in `import_module_from_path` is now correctly used. ## [Version 0.7.1] - 2019-03-19 ### Fixed * Fixed bug in `ub.dict_hist` when `ordered=True`. (half of the keys would be lost). Also effected `dict_take`. * `platform_data_dir` now correctly raises an exception when the operating system is unknown. ## [Version 0.7.0] - 2019-03-12 ### Added * Add `memoize_property` ### Changed * `ub.cmd` now reports `cwd` on exception * Reworked requirements to minimize dependencies. * The `xxhash` and `pygments` dependencies are now optional. * The testing dependencies are now optional. ## [Version 0.6.3] - ??? ### Added * new tests * add `util_stream` ### Fixed * Fixed issue in `ub.download` with bad content header urls ## [Version 0.6.2] - 2019-02-14 ### Added * `ub.platform_cache_dir` and `ub.platform_config_dir` now respect XDG environs on Linux systems. ### Changed * `ub.download` can now accept `fpath` as either a file path or a `io.BytesIO` object * `ub.FormatterExtensions.register` can now accept a type or tuple of types. ### Deprecated * `ub.platform_resource_dir` is deprecated in favor of `ub.platform_config_dir`. ## [Version 0.6.1] - 2019-01-08 ### Changed * `ub.repr2` now accepts negative values for `newlines`, which means use newlines until the current height is only `-newline`. * `ub.repr2` now keeps track of nesting depth from the bottom * Make result `ub.memoize_method` appear more like a bound method. ### Added * Add custom extensions to `ub.repr2` and expose `ub.FormatterExtensions` * Add `dict_isect` to `util_dict`. ### Fixed * Fixed misspelling in docs * Fixed misspelled detach kwarg in `ub.cmd` (the old `detatch` argument is now deprecated and will be removed) ## [Version 0.6.0] - 2018-11-10 ### Added * Add class variable `FORCE_DISABLE` to `ub.Cacher` * Add the `xxhash` algorithm as an option to `ub.hash_data` * Add `ub.peek` - 4-letter syntactic sugar for `lambda x: next(iter(x))` ## [Version 0.5.3] - 2018-10-24 ### Added * Add `key` to `ub.find_duplicates` ### Changed * Renamed first argument of `ub.chunks` from sequence to items * Improved type hints in google-style docstrings * `ub.cmd` verbose >= 3 now uses nicer Unicode characters if possible ### Fixed * Fixed GH#41 ## [Version 0.5.2] - 2018-09-04 ### Added * Add verbose flag to `ub.CacheStamp` ### Changed * `ub.group_items` argument names have changed, and it can now take a callable as the second argument. The `sorted_` argument is now deprecated. * Symlink now reports location of old target when the new target does not match * Docstrings now uses `PathLike` as the type for arguments and attributes that should be considered paths (note strings are still accepted). * `ub.download` will now keep a potentially corrupted file if the hash does not match. * `ub.grabdata` will compute the hash of an existing file on disk if the .hash stamp is missing to try and avoid re-downloading the file. * Improved efficiency of `ub.argmax` ## [Version 0.5.0] - 2018-07-23 ### Added * added `ub.expandpath` ### Changed * Certain imports are now lazy to optimize startup time * change `ub.cmd` `tee` parameter to `tee_backend` (BREAKING CHANGE) * change `ub.cmd` `verbout` parameter to `tee` (BREAKING CHANGE) * `import_module_from_path` can now handle zip-imports where the zip-file is followed by a slash rather than a colon ### Removed * `tee` parameter from `ub.cmd` to `tee_backend` * `verbout` from `ub.cmd` ## [Version 0.4.0] - 2018-07-12 ### Added * `ub.find_exe` - a python implementation of which * `ub.find_path` - finds matching files in your PATH variables * `ub.CacheStamp` ### Modified * Replace in-house implementation of `OrderedSet` with the ordered-set PyPI package. * `ub.download` now accepts `hash_prefix` and `hasher` args. * `ub.hash_file` now accepts `types` args * `ub.augpath` now accepts `multidot` args * `ub.cmd` now accepts `cwd` and `env` args * Changing default behavior of `util_hash`. (BREAKING CHANGE) - Default of `ub.Cacher` `maxlen` changed to 40 for sha1 considerations - Default of `ub.hash_data` `base` changed from `abc` to `hex` - Default of `ub.hash_data` `types` changed from True to False. - Moved argument position of `hashlen` to the end. ### Removed * Remove `ub.OrderedSet`.extend ### Fixed * `ub.NoParam` is now Falsey ## [Version 0.3.0] - 2019-07-12 ### Changed * `ub.import_module_from_path` can now import modules within zip-files ### Removed * `ub.PY2` and `ub.PY3`. Use `six` instead. ## [Version 0.2.1] - 2018-05-27 ### Modified * `ub.dzip` now accepts a backend dict class as a keyword argument * `OrderedSet.intersection` can now handle a single argument * `Timerit` `num` now defaults to 1 * Add function `print` to Timerit ## [Version 0.2.0] - 2018-05-05 * Fix timezone issue with negative time-zones * Move internal `__init__` auto-generation logic to new `mkinit` repo * Network tests no longer run by default and require `--network` ## [Version 0.1.1] - 2018-04-20 * Add `ub.argmin` and `ub.argmax` * `ub.Cacher` can now be used as a decorator. * Rename `util_decor.py` to `util_memoize.py` * Add `key` argument to `ub.unique` and `ub.unique_flags` * Add `ub.argunique` * `import_module_from_path` now prefers the path module when there are name conflicts * Fix `ub.repr2` precision with numpy scalars * Add `ub.dzip` ## [Version 0.1.0] - 2018-04-02 ### Added * Add `inject_method` to `util_func.py`. * Add `allsame` ### Modified * simplified dynamic imports * `memoize_method` now handles kwargs * Can now update `ProgIter` description on the fly * Add methods to `OrderedSet` to complete the set API (e.g. `intersection`, `difference`, etc...) * Can now index into an `OrderedSet` using a slice * Add `appname` keyword argument to `grabdata` * Add `extend` to ordered set API * Increase `tqdm` compatibility with `ProgIter` ### Fixed * Fixed issue with `OrderedSet.union` where it ignored `self` * Fixed issue with `OrderedSet.union` where `__eq__` and `isdisjoint` were wrong * Fix issue with `ub.repr2` dictionaries with newlines in keys * Fix issue with relative paths and symlink ## [Version 0.0.44] - 2018-03-12 ### Added * `ub.iter_window` ## [Version 0.0.43] - 2018-03-09 ### Modified * Spelling: changed the `Timer.ellapsed` attribute to `Timer.elapsed`. * Verbosity of `Timer` and `Timerit` now depends on if a label was specified. * `Timer.tic` now returns a reference to the `Timer` instance. ### Removed * Remove `util_stress`, it was out of scope. ## [Version 0.0.42] - 2018-02-26 ## Modified * `hash_data` can now accept `OrderedDict` input * `dict_union` now returns `OrderedDict` if the first argument is one ### Fixed * bug in `hash_data` where negative integers did not work. ## [Version 0.0.41] - ??? ### Added * `OrderedSet` / `oset` * Add `symlink` function that works on UNIX and Windows*. (*if use has symlink permissions, it works just like UNIX without caveats. Otherwise `ub.symlink` falls back to using junctions and hard links, which should still work mostly the same, except `os.path.islink` and `os.readlink` will not work among other minor issues). ### Modified * Add base to `augpath` * `ub.delete` now treats nested junctions as symlinks, unlike `shutil.rmtree`. ## [Version 0.0.40] - 2018-02-04 ### Modified * Add `numpy` support to `ub.repr2` ## [Version 0.0.39] - 2018-01-18 ### Modified * Changed `ub.Timerit`.call API to return a reference to the Timerit object instead of of the average seconds. Note this change is backwards incompatible. ## [Version 0.0.38] - ??? ### Added * `ub.hash_data` and `ub.hash_file` for easy hashing of arbitrary structured data and file. * `ub.dict_union` combines multiple dictionaries and returns the result. ### Modified * `ub.Timerit` reports better measures of expected time. * New argument `total` to `ub.chunks` lets you specify how long an iterable is if `len` is not available (for generators) ## [Version 0.0.37] - ??? ### Added * Add `ub.TempDir` * Add `ub.import_module_from_path` * Add `ub.import_module_from_name` ### Modified * can now choose `ub.cmd` tee backend (select or thread) on POSIX. * `ProgIter` now supports a more `tqdm`-like API * Add standard deviation to `timerit` * Minor enhancements to `ub.Cacher` ### Fixed * fixed unused argument `chunksize` in `util_download` * `ub.cmd` tests now work on windows * terminal colors now work on windows ### Deprecated * Remove most of the `static_analysis` module. Use code in xdoctest for now. Note: some of this functionality may return as general utilities in the future, but the existing constructs were only needed for doctests, which are now done via xdoctest. ## [Version 0.0.34] - 2017-11-11 ### Added - `ub.truepath` - `ub.iterable` - `util_func.py` with `ub.identity` - `util_download.py` with `ub.download` and `ub.grabdata` ### Changed - The`__init__` imports are now statically generated, this fixes the random third party attributes (e.g. `expanduser`, `Thread`) that were exposed in the `__init__` file. - `ProgIter` now uses scientific notation when it is small - `ub.AutoOrderedDict` now inherits from `ub.AutoDict` - tests are now running using `pytest` and `xdoctest` - `ub.cmd` now uses thread based logging ### Fixed - Fixed many failing tests on windows - Small bug and documentation fixes. ### Issues - `ub.cmd` does not work correctly on windows - some Unicode formatting does not work correctly on windows ## [Version 0.0.33] - 2017-09-13 ### Added - `ub.repr2` and `ub.hzcat` - `ub.color_text` ## [Version 0.0.31] - 2017-09-04 ### Added - Add `ub.argflag` and `ub.argval` ## [Version 0.0.28] - 2017-07-05 ### Added - `ub.AutoDict` and `ub.AutoOrderedDict`. - Many undocumented changed - Starting a changelog ## [Version 0.0.1] - 2017-02-01 ### Added - First release of ubelt - Changed from and before this time are undocumented ## Version 0.9.3 - Released 2020-10-24 ubelt-1.3.7/LICENSE000066400000000000000000000261151472470106000136500ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2022 "Jon Crall" Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ubelt-1.3.7/MANIFEST.in000066400000000000000000000000321472470106000143670ustar00rootroot00000000000000include requirements/*.txtubelt-1.3.7/README.rst000066400000000000000000001667701472470106000143460ustar00rootroot00000000000000|GithubActions| |ReadTheDocs| |Pypi| |Downloads| |Codecov| |CircleCI| |Appveyor| .. .. |CodeQuality| |TwitterFollow| .. The large version wont work because github strips rst image rescaling. https://i.imgur.com/AcWVroL.png .. image:: https://i.imgur.com/PoYIsWE.png :height: 100px :align: left .. .. raw:: html .. Ubelt is a utility library for Python with a stdlib like feel. Elevator Pitch: =============== Is the Python standard library good? Yes. Could it's conciseness be improved? Yes. Ubelt aims to provide a quicker way to express things you can do in the standard library. Progress? `ub.ProgIter `_. Hashing? `ub.hash_data `_ / `ub.hash_file `_. Caching? `ub.Cacher `_ / `ub.CacheStamp `_. Shell commands? `ub.cmd `_. There are similar functions for downloading data, futures-based parallel (or serial) job execution, pretty reprs, path management, iteration, and one of my favorites: set operation enriched dictionaries: `ub.udict `_. There are 120ish functions and classes to help make your code shorter and easier to express concisely. The library is fast to install and import, all dependencies are optional. As of 2023 it is 6 years old, regularly maintained, and mature. It is well tested and has moderate usage. To learn more, the function usefulness chart is a good place to start. This shows how often I use particular functions, and while some of the less used ones are candidates for removal, some of them still worth checking out. For a slightly slower start, read the introduction: Introduction: ============= Ubelt is a lightweight library of robust, tested, documented, and simple functions that extend the Python standard library. It has a flat API that all behaves similarly on Windows, Mac, and Linux (up to some small unavoidable differences). Almost every function in ``ubelt`` was written with a doctest. This provides helpful documentation and example usage as well as helping achieve 100% test coverage (with minor exceptions on Windows). * Goal: provide simple functions that accomplish common tasks not yet addressed by the python standard library. * Constraints: Must be low-impact pure python; it should be easy to install and use. * Method: All functions are written with docstrings and doctests to ensure that a baseline level of documentation and testing always exists (even if functions are copy/pasted into other libraries) * Motto: Good utilities lift all codes. Read the docs here: http://ubelt.readthedocs.io/en/latest/auto/ These are some of the tasks that ubelt's API enables: - extended pathlib with expand, ensuredir, endswith, augment, delete (ub.Path) - get paths to cross platform data/cache/config directories (ub.Path.appdir, ...) - perform set operations on dictionaries (SetDict) - a dictionary with extended helper methods like subdict, take, peek_value, invert, sorted_keys, sorted_vals (UDict) - hash common data structures like list, dict, int, str, etc. (hash_data) - hash files (hash_file) - cache a block of code (Cacher, CacheStamp) - time a block of code (Timer) - show loop progress with less overhead than tqdm (ProgIter) - download a file with optional caching and hash verification (download, grabdata) - run shell commands (cmd) - find a file or directory in candidate locations (find_path, find_exe) - string-repr for nested data structures (urepr) - color text with ANSI tags (color_text) - horizontally concatenate multiline strings (hzcat) - create cross platform symlinks (symlink) - import a module using the path to that module (import_module_from_path) - check if a particular flag or value is on the command line (argflag, argval) - memoize functions (memoize, memoize_method, memoize_property) - build ordered sets (oset) - argmax/min/sort on lists and dictionaries (argmin, argsort,) - get a histogram of items or find duplicates in a list (dict_hist, find_duplicates) - group a sequence of items by some criterion (group_items) Ubelt is small. Its top-level API is defined using roughly 40 lines: .. code:: python from ubelt.util_arg import (argflag, argval,) from ubelt.util_cache import (CacheStamp, Cacher,) from ubelt.util_colors import (NO_COLOR, color_text, highlight_code,) from ubelt.util_const import (NoParam,) from ubelt.util_cmd import (cmd,) from ubelt.util_dict import (AutoDict, AutoOrderedDict, SetDict, UDict, ddict, dict_diff, dict_hist, dict_isect, dict_subset, dict_union, dzip, find_duplicates, group_items, invert_dict, map_keys, map_vals, map_values, named_product, odict, sdict, sorted_keys, sorted_vals, sorted_values, udict, varied_values,) from ubelt.util_deprecate import (schedule_deprecation,) from ubelt.util_download import (download, grabdata,) from ubelt.util_download_manager import (DownloadManager,) from ubelt.util_func import (compatible, identity, inject_method,) from ubelt.util_repr import (ReprExtensions, urepr,) from ubelt.util_futures import (Executor, JobPool,) from ubelt.util_io import (delete, touch,) from ubelt.util_links import (symlink,) from ubelt.util_list import (allsame, argmax, argmin, argsort, argunique, boolmask, chunks, compress, flatten, iter_window, iterable, peek, take, unique, unique_flags,) from ubelt.util_hash import (hash_data, hash_file,) from ubelt.util_import import (import_module_from_name, import_module_from_path, modname_to_modpath, modpath_to_modname, split_modpath,) from ubelt.util_indexable import (IndexableWalker, indexable_allclose,) from ubelt.util_memoize import (memoize, memoize_method, memoize_property,) from ubelt.util_mixins import (NiceRepr,) from ubelt.util_path import (ChDir, Path, TempDir, augpath, ensuredir, expandpath, shrinkuser, userhome,) from ubelt.util_platform import (DARWIN, LINUX, POSIX, WIN32, find_exe, find_path, platform_cache_dir, platform_config_dir, platform_data_dir,) from ubelt.util_str import (codeblock, hzcat, indent, paragraph,) from ubelt.util_stream import (CaptureStdout, CaptureStream, TeeStringIO,) from ubelt.util_time import (Timer, timeparse, timestamp,) from ubelt.util_zip import (split_archive, zopen,) from ubelt.orderedset import (OrderedSet, oset,) from ubelt.progiter import (ProgIter,) Installation: ============= Ubelt is distributed on pypi as a universal wheel and can be pip installed on Python 3.6+. Installations are tested on CPython and PyPy implementations. :: pip install ubelt Note that our distributions on pypi are signed with GPG. The signing public key is ``D297D757``; this should agree with the value in `dev/public_gpg_key`. For older versions of Python, this table provides the last officially supported version of ubelt. +------------------+---------------------------------------------+ | Python Version | Most Recent Supported Ubelt Version | +==================+=============================================+ | 3.8 - 3.13 | latest | +------------------+---------------------------------------------+ | 3.6 - 3.7 | latest (will remove support in 1.4.0) | +------------------+---------------------------------------------+ | 2.7, 3.5 | 0.11.1 | +------------------+---------------------------------------------+ | 3.4 | 0.6.1 | +------------------+---------------------------------------------+ Function Usefulness =================== When I had to hand pick a set of functions that I thought were the most useful I chose these and provided some comment on why: .. code:: python import ubelt as ub ub.Path # inherits from pathlib.Path with quality of life improvements ub.UDict # inherits from dict with keywise set operations and quality of life improvements ub.Cacher # configuration based on-disk cachine ub.CacheStamp # indirect caching with corruption detection ub.hash_data # hash mutable python containers, useful with Cacher to config strings ub.cmd # combines the best of subprocess.Popen and os.system ub.download # download a file with a single command. Also see grabdata for the same thing, but caching from CacheStamp. ub.JobPool # easy multi-threading / multi-procesing / or single-threaded processing ub.ProgIter # a minimal progress iterator. It's single threaded, informative, and faster than tqdm. ub.memoize # like ``functools.cache``, but uses ub.hash_data if the args are not hashable. ub.urepr # readable representations of nested data structures But a better way might to objectively measure the frequency of usage and built a histogram of usefulness. I generated this histogram using ``python dev/maintain/gen_api_for_docs.py``, which roughly counts the number of times I've used a ubelt function in another project. Note: this measure is biased towards older functions. ===================================================================================================================================================== ================ Function name Usefulness ===================================================================================================================================================== ================ `ubelt.urepr `__ 4327 `ubelt.Path `__ 2125 `ubelt.paragraph `__ 1349 `ubelt.ProgIter `__ 747 `ubelt.cmd `__ 657 `ubelt.codeblock `__ 611 `ubelt.udict `__ 603 `ubelt.expandpath `__ 508 `ubelt.take `__ 462 `ubelt.oset `__ 342 `ubelt.ddict `__ 341 `ubelt.iterable `__ 313 `ubelt.flatten `__ 303 `ubelt.group_items `__ 287 `ubelt.NiceRepr `__ 270 `ubelt.ensuredir `__ 267 `ubelt.map_vals `__ 265 `ubelt.peek `__ 262 `ubelt.NoParam `__ 248 `ubelt.dzip `__ 239 `ubelt.odict `__ 236 `ubelt.hash_data `__ 200 `ubelt.argflag `__ 184 `ubelt.grabdata `__ 161 `ubelt.dict_hist `__ 156 `ubelt.identity `__ 156 `ubelt.dict_isect `__ 152 `ubelt.Timer `__ 145 `ubelt.memoize `__ 142 `ubelt.argval `__ 134 `ubelt.allsame `__ 133 `ubelt.color_text `__ 129 `ubelt.schedule_deprecation `__ 123 `ubelt.augpath `__ 120 `ubelt.dict_diff `__ 117 `ubelt.IndexableWalker `__ 116 `ubelt.compress `__ 116 `ubelt.JobPool `__ 107 `ubelt.named_product `__ 104 `ubelt.hzcat `__ 90 `ubelt.delete `__ 88 `ubelt.unique `__ 84 `ubelt.WIN32 `__ 78 `ubelt.dict_union `__ 76 `ubelt.symlink `__ 76 `ubelt.indent `__ 69 `ubelt.ensure_app_cache_dir `__ 67 `ubelt.iter_window `__ 62 `ubelt.invert_dict `__ 58 `ubelt.memoize_property `__ 57 `ubelt.import_module_from_name `__ 56 `ubelt.argsort `__ 55 `ubelt.timestamp `__ 54 `ubelt.modname_to_modpath `__ 53 `ubelt.find_duplicates `__ 53 `ubelt.hash_file `__ 51 `ubelt.find_exe `__ 50 `ubelt.map_keys `__ 50 `ubelt.dict_subset `__ 50 `ubelt.Cacher `__ 49 `ubelt.chunks `__ 47 `ubelt.sorted_vals `__ 40 `ubelt.CacheStamp `__ 38 `ubelt.highlight_code `__ 37 `ubelt.argmax `__ 36 `ubelt.writeto `__ 36 `ubelt.ensure_unicode `__ 32 `ubelt.sorted_keys `__ 30 `ubelt.memoize_method `__ 29 `ubelt.compatible `__ 24 `ubelt.import_module_from_path `__ 24 `ubelt.Executor `__ 23 `ubelt.readfrom `__ 23 `ubelt.modpath_to_modname `__ 17 `ubelt.AutoDict `__ 17 `ubelt.touch `__ 17 `ubelt.inject_method `__ 14 `ubelt.timeparse `__ 13 `ubelt.ChDir `__ 11 `ubelt.shrinkuser `__ 11 `ubelt.argmin `__ 10 `ubelt.varied_values `__ 9 `ubelt.split_modpath `__ 8 `ubelt.LINUX `__ 8 `ubelt.download `__ 7 `ubelt.NO_COLOR `__ 7 `ubelt.OrderedSet `__ 6 `ubelt.zopen `__ 6 `ubelt.CaptureStdout `__ 6 `ubelt.DARWIN `__ 5 `ubelt.boolmask `__ 4 `ubelt.find_path `__ 4 `ubelt.get_app_cache_dir `__ 4 `ubelt.indexable_allclose `__ 3 `ubelt.UDict `__ 3 `ubelt.SetDict `__ 2 `ubelt.AutoOrderedDict `__ 2 `ubelt.argunique `__ 2 `ubelt.map_values `__ 1 `ubelt.unique_flags `__ 1 `ubelt.userhome `__ 0 `ubelt.split_archive `__ 0 `ubelt.sorted_values `__ 0 `ubelt.sdict `__ 0 `ubelt.platform_data_dir `__ 0 `ubelt.platform_config_dir `__ 0 `ubelt.platform_cache_dir `__ 0 `ubelt.get_app_data_dir `__ 0 `ubelt.get_app_config_dir `__ 0 `ubelt.ensure_app_data_dir `__ 0 `ubelt.ensure_app_config_dir `__ 0 `ubelt.TempDir `__ 0 `ubelt.TeeStringIO `__ 0 `ubelt.ReprExtensions `__ 0 `ubelt.POSIX `__ 0 `ubelt.DownloadManager `__ 0 `ubelt.CaptureStream `__ 0 ===================================================================================================================================================== ================ Examples ======== The most up to date examples are the doctests. We also have a Jupyter notebook: https://github.com/Erotemic/ubelt/blob/main/docs/notebooks/Ubelt%20Demo.ipynb Here are some examples of some features inside ``ubelt`` Paths ----- Ubelt extends ``pathlib.Path`` by adding several new (often chainable) methods. Namely, ``augment``, ``delete``, ``expand``, ``ensuredir``, ``shrinkuser``. It also modifies behavior of ``touch`` to be chainable. (New in 1.0.0) .. code:: python >>> # Ubelt extends pathlib functionality >>> import ubelt as ub >>> dpath = ub.Path('~/.cache/ubelt/demo_path').expand().ensuredir() >>> fpath = dpath / 'text_file.txt' >>> aug_fpath = fpath.augment(suffix='.aux', ext='.jpg').touch() >>> aug_dpath = dpath.augment('demo_path2') >>> assert aug_fpath.read_text() == '' >>> fpath.write_text('text data') >>> assert aug_fpath.exists() >>> assert not aug_fpath.delete().exists() >>> assert dpath.exists() >>> assert not dpath.delete().exists() >>> print(f'{fpath.shrinkuser()}') >>> print(f'{dpath.shrinkuser()}') >>> print(f'{aug_fpath.shrinkuser()}') >>> print(f'{aug_dpath.shrinkuser()}') ~/.cache/ubelt/demo_path/text_file.txt ~/.cache/ubelt/demo_path ~/.cache/ubelt/demo_path/text_file.aux.jpg ~/.cache/ubelt/demo_pathdemo_path2 Hashing ------- The ``ub.hash_data`` constructs a hash for common Python nested data structures. Extensions to allow it to hash custom types can be registered. By default it handles lists, dicts, sets, slices, uuids, and numpy arrays. .. code:: python >>> import ubelt as ub >>> data = [('arg1', 5), ('lr', .01), ('augmenters', ['flip', 'translate'])] >>> ub.hash_data(data, hasher='sha256') 0d95771ff684756d7be7895b5594b8f8484adecef03b46002f97ebeb1155fb15 Support for torch tensors and pandas data frames are also included, but needs to be explicitly enabled. There also exists an non-public plugin architecture to extend this function to arbitrary types. While not officially supported, it is usable and will become better integrated in the future. See ``ubelt/util_hash.py`` for details. Caching ------- Cache intermediate results from blocks of code inside a script with minimal boilerplate or modification to the original code. For direct caching of data, use the ``Cacher`` class. By default results will be written to the ubelt's appdir cache, but the exact location can be specified via ``dpath`` or the ``appname`` arguments. Additionally, process dependencies can be specified via the ``depends`` argument, which allows for implicit cache invalidation. As far as I can tell, this is the most concise way (4 lines of boilerplate) to cache a block of code with existing Python syntax (as of 2022-06-03). .. code:: python >>> import ubelt as ub >>> depends = ['config', {'of': 'params'}, 'that-uniquely-determine-the-process'] >>> cacher = ub.Cacher('test_process', depends=depends, appname='myapp') >>> # start fresh >>> cacher.clear() >>> for _ in range(2): >>> data = cacher.tryload() >>> if data is None: >>> myvar1 = 'result of expensive process' >>> myvar2 = 'another result' >>> data = myvar1, myvar2 >>> cacher.save(data) >>> myvar1, myvar2 = data For indirect caching, use the ``CacheStamp`` class. This simply writes a "stamp" file that marks that a process has completed. Additionally you can specify criteria for when the stamp should expire. If you let ``CacheStamp`` know about the expected "product", it will expire the stamp if that file has changed, which can be useful in situations where caches might becomes corrupt or need invalidation. .. code:: python >>> import ubelt as ub >>> dpath = ub.Path.appdir('ubelt/demo/cache').delete().ensuredir() >>> params = {'params1': 1, 'param2': 2} >>> expected_fpath = dpath / 'file.txt' >>> stamp = ub.CacheStamp('name', dpath=dpath, depends=params, >>> hasher='sha256', product=expected_fpath, >>> expires='2101-01-01T000000Z', verbose=3) >>> # Start fresh >>> stamp.clear() >>> >>> for _ in range(2): >>> if stamp.expired(): >>> expected_fpath.write_text('expensive process') >>> stamp.renew() See ``_ for more details about ``Cacher`` and ``CacheStamp``. Loop Progress ------------- ``ProgIter`` is a no-threads attached Progress meter that writes to stdout. It is a mostly drop-in alternative to `tqdm `__. *The advantage of ``ProgIter`` is that it does not use any python threading*, and therefore can be safer with code that makes heavy use of multiprocessing. Note: ``ProgIter`` is also defined in a standalone module: ``pip install progiter``) .. code:: python >>> import ubelt as ub >>> def is_prime(n): ... return n >= 2 and not any(n % i == 0 for i in range(2, n)) >>> for n in ub.ProgIter(range(1000), verbose=2): >>> # do some work >>> is_prime(n) 0/1000... rate=0.00 Hz, eta=?, total=0:00:00, wall=14:05 EST 1/1000... rate=82241.25 Hz, eta=0:00:00, total=0:00:00, wall=14:05 EST 257/1000... rate=177204.69 Hz, eta=0:00:00, total=0:00:00, wall=14:05 EST 642/1000... rate=94099.22 Hz, eta=0:00:00, total=0:00:00, wall=14:05 EST 1000/1000... rate=71886.74 Hz, eta=0:00:00, total=0:00:00, wall=14:05 EST Command Line Interaction ------------------------ The builtin Python ``subprocess.Popen`` module is great, but it can be a bit clunky at times. The ``os.system`` command is easy to use, but it doesn't have much flexibility. The ``ub.cmd`` function aims to fix this. It is as simple to run as ``os.system``, but it returns a dictionary containing the return code, standard out, standard error, and the ``Popen`` object used under the hood. This utility is designed to provide as consistent as possible behavior across different platforms. We aim to support Windows, Linux, and OSX. .. code:: python >>> import ubelt as ub >>> info = ub.cmd('gcc --version') >>> print(ub.urepr(info)) { 'command': 'gcc --version', 'err': '', 'out': 'gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609\nCopyright (C) 2015 Free Software Foundation, Inc.\nThis is free software; see the source for copying conditions. There is NO\nwarranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\n', 'proc': , 'ret': 0, } Also note the use of ``ub.urepr`` (previously ``ub.repr2``) to nicely format the output dictionary. Additionally, if you specify ``verbose=True``, ``ub.cmd`` will simultaneously capture the standard output and display it in real time (i.e. it will "`tee `__" the output). .. code:: python >>> import ubelt as ub >>> info = ub.cmd('gcc --version', verbose=True) gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609 Copyright (C) 2015 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. A common use case for ``ub.cmd`` is parsing version numbers of programs .. code:: python >>> import ubelt as ub >>> cmake_version = ub.cmd('cmake --version')['out'].splitlines()[0].split()[-1] >>> print('cmake_version = {!r}'.format(cmake_version)) cmake_version = 3.11.0-rc2 This allows you to easily run a command line executable as part of a python process, see what it is doing, and then do something based on its output, just as you would if you were interacting with the command line itself. The idea is that ``ub.cmd`` removes the need to think about if you need to pass a list of args, or a string. Both will work. New in ``1.0.0``, a third variant with different consequences for executing shell commands. Using the ``system=True`` kwarg will directly use ``os.system`` instead of ``Popen`` entirely. In this mode it is not possible to ``tee`` the output because the program is executing directly in the foreground. This is useful for doing things like spawning a vim session and returning if the user manages to quit vim. Downloading Files ----------------- The function ``ub.download`` provides a simple interface to download a URL and save its data to a file. .. code:: python >>> import ubelt as ub >>> url = 'http://i.imgur.com/rqwaDag.png' >>> fpath = ub.download(url, verbose=0) >>> print(ub.shrinkuser(fpath)) ~/.cache/ubelt/rqwaDag.png The function ``ub.grabdata`` works similarly to ``ub.download``, but whereas ``ub.download`` will always re-download the file, ``ub.grabdata`` will check if the file exists and only re-download it if it needs to. .. code:: python >>> import ubelt as ub >>> url = 'http://i.imgur.com/rqwaDag.png' >>> fpath = ub.grabdata(url, verbose=0, hash_prefix='944389a39') >>> print(ub.shrinkuser(fpath)) ~/.cache/ubelt/rqwaDag.png New in version 0.4.0: both functions now accepts the ``hash_prefix`` keyword argument, which if specified will check that the hash of the file matches the provided value. The ``hasher`` keyword argument can be used to change which hashing algorithm is used (it defaults to ``"sha512"``). Dictionary Set Operations ------------------------- Dictionary operations that are analogous to set operations. See each functions documentation for more details on the behavior of the values. Typically the last seen value is given priority. I hope Python decides to add these to the stdlib someday. * ``ubelt.dict_union`` corresponds to ``set.union``. * ``ubelt.dict_isect`` corresponds to ``set.intersection``. * ``ubelt.dict_diff`` corresponds to ``set.difference``. .. code:: python >>> d1 = {'a': 1, 'b': 2, 'c': 3} >>> d2 = {'c': 10, 'e': 20, 'f': 30} >>> d3 = {'e': 10, 'f': 20, 'g': 30, 'a': 40} >>> ub.dict_union(d1, d2, d3) {'a': 40, 'b': 2, 'c': 10, 'e': 10, 'f': 20, 'g': 30} >>> ub.dict_isect(d1, d2) {'c': 3} >>> ub.dict_diff(d1, d2) {'a': 1, 'b': 2} New in Version 1.2.0: Ubelt now contains a dictionary subclass with set operations that can be invoked as ``ubelt.SetDict`` or ``ub.sdict``. Note that n-ary operations are supported. .. code:: python >>> d1 = ub.sdict({'a': 1, 'b': 2, 'c': 3}) >>> d2 = {'c': 10, 'e': 20, 'f': 30} >>> d3 = {'e': 10, 'f': 20, 'g': 30, 'a': 40} >>> d1 | d2 | d3 {'a': 40, 'b': 2, 'c': 10, 'e': 10, 'f': 20, 'g': 30} >>> d1 & d2 {'c': 3} >>> d1 - d2 {'a': 1, 'b': 2} >>> ub.sdict.intersection({'a': 1, 'b': 2, 'c': 3}, ['b', 'c'], ['c', 'e']) {'c': 3} Note this functionality and more is available in ``ubelt.UDict`` or ``ub.udict``. Grouping Items -------------- Given a list of items and corresponding ids, create a dictionary mapping each id to a list of its corresponding items. In other words, group a sequence of items of type ``VT`` and corresponding keys of type ``KT`` given by a function or corresponding list, group them into a ``Dict[KT, List[VT]`` such that each key maps to a list of the values associated with the key. This is similar to `pandas.DataFrame.groupby `_. Group ids can be specified by a second list containing the id for each corresponding item. .. code:: python >>> import ubelt as ub >>> # Group via a corresponding list >>> item_list = ['ham', 'jam', 'spam', 'eggs', 'cheese', 'bannana'] >>> groupid_list = ['protein', 'fruit', 'protein', 'protein', 'dairy', 'fruit'] >>> dict(ub.group_items(item_list, groupid_list)) {'dairy': ['cheese'], 'fruit': ['jam', 'bannana'], 'protein': ['ham', 'spam', 'eggs']} They can also be given by a function that is executed on each item in the list .. code:: python >>> import ubelt as ub >>> # Group via a function >>> item_list = ['ham', 'jam', 'spam', 'eggs', 'cheese', 'bannana'] >>> def grouper(item): ... return item.count('a') >>> dict(ub.group_items(item_list, grouper)) {1: ['ham', 'jam', 'spam'], 0: ['eggs', 'cheese'], 3: ['bannana']} Dictionary Histogram -------------------- Find the frequency of items in a sequence. Given a list or sequence of items, this returns a dictionary mapping each unique value in the sequence to the number of times it appeared. This is similar to `pandas.DataFrame.value_counts `_. .. code:: python >>> import ubelt as ub >>> item_list = [1, 2, 39, 900, 1232, 900, 1232, 2, 2, 2, 900] >>> ub.dict_hist(item_list) {1232: 2, 1: 1, 2: 4, 900: 3, 39: 1} Each item can also be given a weight .. code:: python >>> import ubelt as ub >>> item_list = [1, 2, 39, 900, 1232, 900, 1232, 2, 2, 2, 900] >>> weights = [1, 1, 0, 0, 0, 0, 0.5, 0, 1, 1, 0.3] >>> ub.dict_hist(item_list, weights=weights) {1: 1, 2: 3, 39: 0, 900: 0.3, 1232: 0.5} Dictionary Manipulation ----------------------- Map functions across dictionaries to transform the keys or values in a dictionary. The ``ubelt.map_keys`` function applies a function to each key in a dictionary and returns this transformed copy of the dictionary. Key conflict behavior currently raises and error, but may be configurable in the future. The ``ubelt.map_vals`` function is the same except the function is applied to each value instead. I these functions are useful enough to be ported to Python itself. .. code:: python >>> import ubelt as ub >>> dict_ = {'a': [1, 2, 3], 'bb': [], 'ccc': [2,]} >>> dict_keymod = ub.map_keys(len, dict_) >>> dict_valmod = ub.map_vals(len, dict_) >>> print(dict_keymod) >>> print(dict_valmod) {1: [1, 2, 3], 2: [], 3: [2]} {'a': 3, 'bb': 0, 'ccc': 1} Take a subset of a dictionary. Note this is similar to ``ub.dict_isect``, except this will raise an error if the given keys are not in the dictionary. .. code:: python >>> import ubelt as ub >>> dict_ = {'K': 3, 'dcvs_clip_max': 0.2, 'p': 0.1} >>> subdict_ = ub.dict_subset(dict_, ['K', 'dcvs_clip_max']) >>> print(subdict_) {'K': 3, 'dcvs_clip_max': 0.2} The ``ubelt.take`` function works on dictionaries (and lists). It is similar to ``ubelt.dict_subset``, except that it returns just a list of the values, and discards information about the keys. It is also possible to specify a default value. .. code:: python >>> import ubelt as ub >>> dict_ = {1: 'a', 2: 'b', 3: 'c'} >>> print(list(ub.take(dict_, [1, 3, 4, 5], default=None))) ['a', 'c', None, None] Invert the mapping defined by a dictionary. By default ``invert_dict`` assumes that all dictionary values are distinct (i.e. the mapping is one-to-one / injective). .. code:: python >>> import ubelt as ub >>> mapping = {0: 'a', 1: 'b', 2: 'c', 3: 'd'} >>> ub.invert_dict(mapping) {'a': 0, 'b': 1, 'c': 2, 'd': 3} However, by specifying ``unique_vals=False`` the inverted dictionary builds a set of keys that were associated with each value. .. code:: python >>> import ubelt as ub >>> mapping = {'a': 0, 'A': 0, 'b': 1, 'c': 2, 'C': 2, 'd': 3} >>> ub.invert_dict(mapping, unique_vals=False) {0: {'A', 'a'}, 1: {'b'}, 2: {'C', 'c'}, 3: {'d'}} New in Version 1.2.0: Ubelt now contains a dictionary subclass ``ubelt.UDict`` with these quality of life operations (and also inherits from ``ubelt.SetDict``). The alias ``ubelt.udict`` can be used for quicker access. .. code:: python >>> import ubelt as ub >>> d1 = ub.udict({'a': 1, 'b': 2, 'c': 3}) >>> d1 & {'a', 'c'} {'a': 1, 'c': 3} >>> d1.map_keys(ord) {97: 1, 98: 2, 99: 3} >>> d1.invert() {1: 'a', 2: 'b', 3: 'c'} >>> d1.subdict(['b', 'c', 'e'], default=None) {'b': 2, 'c': 3, 'e': None} >>> d1.sorted_keys() OrderedDict([('a', 1), ('b', 2), ('c', 3)]) >>> d1.peek_key() 'a' >>> d1.peek_value() 1 Next time you have a default configuration dictionary like and you allow the developer to pass keyword arguments to modify these behaviors, consider using dictionary intersection (&) to separate out only the relevant parts and dictionary union (|) to update those relevant parts. You can also use dictionary differences (-) if you need to check for unused arguments. .. code:: python import ubelt as ub def run_multiple_algos(**kwargs): algo1_defaults = {'opt1': 10, 'opt2': 11} algo2_defaults = {'src': './here/', 'dst': './there'} kwargs = ub.udict(kwargs) algo1_specified = kwargs & algo1_defaults algo2_specified = kwargs & algo2_defaults algo1_config = algo1_defaults | algo1_specified algo2_config = algo2_defaults | algo2_specified unused_kwargs = kwargs - (algo1_defaults | algo2_defaults) print('algo1_specified = {}'.format(ub.urepr(algo1_specified, nl=1))) print('algo2_specified = {}'.format(ub.urepr(algo2_specified, nl=1))) print(f'algo1_config={algo1_config}') print(f'algo2_config={algo2_config}') print(f'The following kwargs were unused {unused_kwargs}') print(chr(10)) print('-- Run with some specified --') run_multiple_algos(src='box', opt2='fox') print(chr(10)) print('-- Run with extra unspecified --') run_multiple_algos(a=1, b=2) Produces: .. code:: -- Run with some specified -- algo1_specified = { 'opt2': 'fox', } algo2_specified = { 'src': 'box', } algo1_config={'opt1': 10, 'opt2': 'fox'} algo2_config={'src': 'box', 'dst': './there'} The following kwargs were unused {} -- Run with extra unspecified -- algo1_specified = {} algo2_specified = {} algo1_config={'opt1': 10, 'opt2': 11} algo2_config={'src': './here/', 'dst': './there'} The following kwargs were unused {'a': 1, 'b': 2} Find Duplicates --------------- Find all duplicate items in a list. More specifically, ``ub.find_duplicates`` searches for items that appear more than ``k`` times, and returns a mapping from each duplicate item to the positions it appeared in. .. code:: python >>> import ubelt as ub >>> items = [0, 0, 1, 2, 3, 3, 0, 12, 2, 9] >>> ub.find_duplicates(items, k=2) {0: [0, 1, 6], 2: [3, 8], 3: [4, 5]} Cross-Platform Config and Cache Directories ------------------------------------------- If you have an application which writes configuration or cache files, the standard place to dump those files differs depending if you are on Windows, Linux, or Mac. Ubelt offers a unified functions for determining what these paths are. New in version 1.0.0: the ``ub.Path.appdir`` classmethod provides a way to achieve the above with a chainable object oriented interface. The ``ub.Path.appdir(..., type='cache')``, ``ub.Path.appdir(..., type='config')``, and ``ub.Path.appdir(..., type='data')`` functions find the correct platform-specific location for these files and calling ``ensuredir`` ensures that the directories exist. The config root directory is ``~/AppData/Roaming`` on Windows, ``~/.config`` on Linux and ``~/Library/Application Support`` on Mac. The cache root directory is ``~/AppData/Local`` on Windows, ``~/.config`` on Linux and ``~/Library/Caches`` on Mac. Example usage on Linux might look like this: .. code:: python >>> import ubelt as ub >>> print(ub.Path.appdir('my_app').ensuredir().shrinkuser()) # default is cache ~/.cache/my_app >>> print(ub.Path.appdir('my_app', type='config').ensuredir().shrinkuser()) ~/.config/my_app Symlinks -------- The ``ub.symlink`` function will create a symlink similar to ``os.symlink``. The main differences are that 1) it will not error if the symlink exists and already points to the correct location. 2) it works\* on Windows (\*hard links and junctions are used if real symlinks are not available) .. code:: python >>> import ubelt as ub >>> dpath = ub.Path('ubelt', 'demo_symlink') >>> real_path = dpath / 'real_file.txt' >>> link_path = dpath / 'link_file.txt' >>> real_path.write_text('foo') >>> ub.symlink(real_path, link_path) AutoDict - Autovivification --------------------------- While the ``collections.defaultdict`` is nice, it is sometimes more convenient to have an infinitely nested dictionary of dictionaries. .. code:: python >>> import ubelt as ub >>> auto = ub.AutoDict() >>> print('auto = {!r}'.format(auto)) auto = {} >>> auto[0][10][100] = None >>> print('auto = {!r}'.format(auto)) auto = {0: {10: {100: None}}} >>> auto[0][1] = 'hello' >>> print('auto = {!r}'.format(auto)) auto = {0: {1: 'hello', 10: {100: None}}} String-based imports -------------------- Ubelt contains functions to import modules dynamically without using the python ``import`` statement. While ``importlib`` exists, the ``ubelt`` implementation is simpler to user and does not have the disadvantage of breaking ``pytest``. Note ``ubelt`` simply provides an interface to this functionality, the core implementation is in ``xdoctest`` (over as of version ``0.7.0``, the code is statically copied into an autogenerated file such that ``ubelt`` does not actually depend on ``xdoctest`` during runtime). .. code:: python >>> import ubelt as ub >>> try: >>> # This is where I keep ubelt on my machine, so it is not expected to work elsewhere. >>> module = ub.import_module_from_path(ub.expandpath('~/code/ubelt/ubelt')) >>> print('module = {!r}'.format(module)) >>> except OSError: >>> pass >>> >>> module = ub.import_module_from_name('ubelt') >>> print('module = {!r}'.format(module)) >>> # >>> try: >>> module = ub.import_module_from_name('does-not-exist') >>> raise AssertionError >>> except ModuleNotFoundError: >>> pass >>> # >>> modpath = ub.Path(ub.util_import.__file__) >>> print(ub.modpath_to_modname(modpath)) >>> modname = ub.util_import.__name__ >>> assert ub.Path(ub.modname_to_modpath(modname)).resolve() == modpath.resolve() module = >>> module = ub.import_module_from_name('ubelt') >>> print('module = {!r}'.format(module)) module = Related to this functionality are the functions ``ub.modpath_to_modname`` and ``ub.modname_to_modpath``, which *statically* transform (i.e. no code in the target modules is imported or executed) between module names (e.g. ``ubelt.util_import``) and module paths (e.g. ``~/.local/conda/envs/cenv3/lib/python3.5/site-packages/ubelt/util_import.py``). .. code:: python >>> import ubelt as ub >>> modpath = ub.util_import.__file__ >>> print(ub.modpath_to_modname(modpath)) ubelt.util_import >>> modname = ub.util_import.__name__ >>> assert ub.modname_to_modpath(modname) == modpath Horizontal String Concatenation ------------------------------- Sometimes its just prettier to horizontally concatenate two blocks of text. .. code:: python >>> import ubelt as ub >>> B = ub.urepr([[1, 2], [3, 4]], nl=1, cbr=True, trailsep=False) >>> C = ub.urepr([[5, 6], [7, 8]], nl=1, cbr=True, trailsep=False) >>> print(ub.hzcat(['A = ', B, ' * ', C])) A = [[1, 2], * [[5, 6], [3, 4]] [7, 8]] Timing ------ Quickly time a single line. .. code:: python >>> import math >>> import ubelt as ub >>> timer = ub.Timer('Timer demo!', verbose=1) >>> with timer: >>> math.factorial(100000) tic('Timer demo!') ...toc('Timer demo!')=0.1453s External tools -------------- Some of the tools in ``ubelt`` also exist as standalone modules. I haven't decided if its best to statically copy them into ubelt or require on pypi to satisfy the dependency. There are some tools that are not used by default unless you explicitly allow for them. Code that is currently statically included (vendored): - ProgIter - https://github.com/Erotemic/progiter - OrderedSet - https://github.com/LuminosoInsight/ordered-set Code that is completely optional, and only used in specific cases: - Numpy - ``ub.urepr`` will format a numpy array nicely by default - xxhash - this can be specified as a hasher to ``ub.hash_data`` - Pygments - used by the ``util_color`` module. - dateutil - used by the ``util_time`` module. Similar Tools ------------- UBelt is one of many Python utility libraries. A selection of similar libraries are listed here. Libraries that contain a broad scope of utilities: * Boltons: https://github.com/mahmoud/boltons * Toolz: https://github.com/pytoolz/toolz * CyToolz: https://github.com/pytoolz/cytoolz/ * UnStdLib: https://github.com/shazow/unstdlib.py Libraries that contain a specific scope of utilities: * More-Itertools: iteration tools: https://pypi.org/project/more-itertools/ * Funcy: functional tools: https://github.com/Suor/funcy * Rich: pretty CLI displays - https://github.com/willmcgugan/rich * tempora: time related tools - https://github.com/jaraco/tempora Libraries that contain one specific data structure or utility: * Benedict: dictionary tools - https://pypi.org/project/python-benedict/ * tqdm: progress bars - https://pypi.org/project/tqdm/ * pooch: data downloading - https://pypi.org/project/pooch/ * timerit: snippet timing for benchmarks - https://github.com/Erotemic/timerit Jaraco (i.e. Jason R. Coombs) has an extensive library of utilities: * jaraco.classes - https://github.com/jaraco/jaraco.classes * jaraco.collections - https://github.com/jaraco/jaraco.collections * jaraco.context - https://github.com/jaraco/jaraco.context * jaraco.crypto - https://github.com/jaraco/jaraco.crypto * jaraco.functools - https://github.com/jaraco/jaraco.functools * jaraco.geo - https://github.com/jaraco/jaraco.geo * jaraco.imaging - https://github.com/jaraco/jaraco.imaging * jaraco.itertools - https://github.com/jaraco/jaraco.itertools * jaraco.logging - https://github.com/jaraco/jaraco.logging * jaraco.media - https://github.com/jaraco/jaraco.media * jaraco.path - https://github.com/jaraco/jaraco.path * jaraco.text - https://github.com/jaraco/jaraco.text * jaraco.util - https://github.com/jaraco/jaraco.util * jaraco.windows - https://github.com/jaraco/jaraco.windows * and many others not listed here. See: https://github.com/jaraco?tab=repositories&q=jaraco. Ubelt is included in the the [bestof-python list](https://github.com/ml-tooling/best-of-python), which contains many other tools that you should check out. History: ======== Ubelt is a migration of the most useful parts of ``utool``\ (https://github.com/Erotemic/utool) into a standalone module with minimal dependencies. The ``utool`` library contains a number of useful utility functions, but it also contained non-useful functions, as well as the kitchen sink. A number of the functions were too specific or not well documented. The ``ubelt`` is a port of the simplest and most useful parts of ``utool``. Note that there are other cool things in ``utool`` that are not in ``ubelt``. Notably, the doctest harness ultimately became `xdoctest `__. Code introspection and dynamic analysis tools were ported to `xinspect `__. The more IPython-y tools were ported to `xdev `__. Parts of it made their way into `scriptconfig `__. The init-file generation was moved to `mkinit `__. Some vim and system-y things can be found in `vimtk `__. Development on ubelt started 2017-01-30 and development of utool mostly stopped on utool was stopped later that year, but received patches until about 2020. Ubelt achieved 1.0.0 and removed support for Python 2.7 and 3.5 on 2022-01-07. Notes. ------ PRs are welcome. Also check out my other projects which are powered by ubelt: - xinspect https://github.com/Erotemic/xinspect - xdev https://github.com/Erotemic/xdev - vimtk https://github.com/Erotemic/vimtk - graphid https://github.com/Erotemic/graphid - ibeis https://github.com/Erotemic/ibeis - kwarray https://github.com/Kitware/kwarray - kwimage https://github.com/Kitware/kwimage - kwcoco https://github.com/Kitware/kwcoco And my projects related to ubelt: - ProgIter https://github.com/Erotemic/progiter - Timerit https://github.com/Erotemic/timerit - mkinit https://github.com/Erotemic/mkinit - xdoctest https://github.com/Erotemic/xdoctest .. |CircleCI| image:: https://circleci.com/gh/Erotemic/ubelt.svg?style=svg :target: https://circleci.com/gh/Erotemic/ubelt .. |Travis| image:: https://img.shields.io/travis/Erotemic/ubelt/main.svg?label=Travis%20CI :target: https://travis-ci.org/Erotemic/ubelt?branch=main .. |Appveyor| image:: https://ci.appveyor.com/api/projects/status/github/Erotemic/ubelt?branch=main&svg=True :target: https://ci.appveyor.com/project/Erotemic/ubelt/branch/main .. |Codecov| image:: https://codecov.io/github/Erotemic/ubelt/badge.svg?branch=main&service=github :target: https://codecov.io/github/Erotemic/ubelt?branch=main .. |Pypi| image:: https://img.shields.io/pypi/v/ubelt.svg :target: https://pypi.python.org/pypi/ubelt .. |Downloads| image:: https://img.shields.io/pypi/dm/ubelt.svg :target: https://pypistats.org/packages/ubelt .. |ReadTheDocs| image:: https://readthedocs.org/projects/ubelt/badge/?version=latest :target: http://ubelt.readthedocs.io/en/latest/ .. |CodeQuality| image:: https://api.codacy.com/project/badge/Grade/4d815305fc014202ba7dea09c4676343 :target: https://www.codacy.com/manual/Erotemic/ubelt?utm_source=github.com&utm_medium=referral&utm_content=Erotemic/ubelt&utm_campaign=Badge_Grade .. |GithubActions| image:: https://github.com/Erotemic/ubelt/actions/workflows/tests.yml/badge.svg?branch=main :target: https://github.com/Erotemic/ubelt/actions?query=branch%3Amain .. |TwitterFollow| image:: https://img.shields.io/twitter/follow/Erotemic.svg?style=social :target: https://twitter.com/Erotemic ubelt-1.3.7/clean.sh000077500000000000000000000003601472470106000142560ustar00rootroot00000000000000#!/bin/bash __doc__=" Remove intermediate clutter files " find . -regex ".*\(__pycache__\|\.py[co]\)" -delete || find . -iname "*.pyc" -delete || find . -iname "*.pyo" -delete rm -rf build rm -rf htmlcov rm -f .coverage rm -f .coverage.* ubelt-1.3.7/conftest.py000066400000000000000000000003051472470106000150330ustar00rootroot00000000000000# content of conftest.py import pytest # NOQA def pytest_addoption(parser): # Allow --network to be passed in as an option on sys.argv parser.addoption("--network", action="store_true") ubelt-1.3.7/dev/000077500000000000000000000000001472470106000134145ustar00rootroot00000000000000ubelt-1.3.7/dev/bench/000077500000000000000000000000001472470106000144735ustar00rootroot00000000000000ubelt-1.3.7/dev/bench/bench_defaultdict.py000066400000000000000000000210631472470106000204760ustar00rootroot00000000000000""" This code is based on a template that lives in: https://github.com/Erotemic/timerit/blob/main/examples/benchmark_template.py Or typically on a dev machine in ~/code/timerit/examples/benchmark_template.py SeeAlso: ~/code/ubelt/dev/bench/bench_dict_hist.py CommandLine: python ~/code/ubelt/dev/bench/bench_defaultdict.py """ def benchmark_template(): import ubelt as ub import pandas as pd import timerit from collections import defaultdict plot_labels = { 'x': 'Input Size', 'y': 'Time', 'title': 'Dict[Any, List] Accumulation Benchmark', } # Some bookkeeping needs to be done to build a dictionary that maps the # method names to the functions themselves. method_lut = {} def register_method(func): method_lut[func.__name__] = func return func # Define the methods you want to benchmark. The arguments should be # parameters that you want to vary in the test. @register_method def accum_defaultdict(input_data): d = defaultdict(list) for item in input_data: d[item].append(item) return d @register_method def accumulate_dict_setdefault(input_data): d = dict() for item in input_data: d.setdefault(item, []) d[item].append(item) return d @register_method def accumulate_dict_ifcontains(input_data): d = dict() for item in input_data: if item not in d: d[item] = [] d[item].append(item) return d @register_method def accumulate_dict_tryexcept(input_data): d = dict() for item in input_data: try: d[item].append(item) except KeyError: d[item] = [] d[item].append(item) return d # @register_method # def accumulate_dict_tryexcept_v3(input_data): # d = dict() # for item in input_data: # try: # d[item].append(item) # except KeyError: # d[item] = [item] # return d # @register_method # def accumulate_dict_tryexcept_v2(input_data): # d = dict() # for item in input_data: # try: # arr = d[item] # except KeyError: # arr = d[item] = [] # arr.append(item) # return d def generate_input(input_size, num_unique_items): import random rng = random.Random() pool = range(num_unique_items) input_data = rng.choices(pool, k=input_size) return input_data # Change params here to modify number of trials ti = timerit.Timerit(1000, bestof=100, verbose=1) # if True, record every trail run and show variance in seaborn # if False, use the standard timerit min/mean measures RECORD_ALL = True # These are the parameters that we benchmark over basis = { 'method': list(method_lut), # i.e. ['method1', 'method2'] 'input_size': [10, 100, 500, 1000, 2500, 5000, 7500, 10_000, 20_000], 'num_unique_items': [ # 1, 10, 100 100, ], # 'zparam': [2, 3] # 'param_name': [param values], } xlabel = 'input_size' group_labels = { 'style': ['num_unique_items'], # 'size': ['zparam'], } group_labels['hue'] = list( (ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: params = ub.udict(params) group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.urepr( params & labels, compact=1, si=1) key = ub.urepr(params, compact=1, si=1) method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... input_size = params['input_size'] num_unique_items = params['num_unique_items'] input_data = generate_input(input_size, num_unique_items) with timer: # Put the logic you want to time here method(input_data) if RECORD_ALL: # Seaborn will show the variance if this is enabled, otherwise # use the robust timerit mean / min times # chunk_iter = ub.chunks(ti.times, ti.bestof) # times = list(map(min, chunk_iter)) # TODO: timerit method for this times = ti.robust_times() for time in times: row = { # 'mean': ti.mean(), 'time': time, 'key': key, **group_keys, **params, } rows.append(row) else: row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) time_key = 'time' if RECORD_ALL else 'min' # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values(time_key) if RECORD_ALL: # Show the min / mean if we record all min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1) mean_times = data.groupby('key')[['time']].mean().rename({'time': 'mean'}, axis=1) stats_data = pd.concat([min_times, mean_times], axis=1) stats_data = stats_data.sort_values('min') else: stats_data = data USE_OPENSKILL = 0 if USE_OPENSKILL: # Lets try a real ranking method # https://github.com/OpenDebates/openskill.py import openskill method_ratings = {m: openskill.Rating() for m in basis['method']} other_keys = sorted(set(stats_data.columns) - {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'}) for params, variants in stats_data.groupby(other_keys): variants = variants.sort_values('mean') ranking = variants['method'].reset_index(drop=True) mean_speedup = variants['mean'].max() / variants['mean'] stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup min_speedup = variants['min'].max() / variants['min'] stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup if USE_OPENSKILL: # The idea is that each setting of parameters is a game, and each # "method" is a player. We rank the players by which is fastest, # and update their ranking according to the Weng-Lin Bayes ranking # model. This does not take the fact that some "games" (i.e. # parameter settings) are more important than others, but it should # be fairly robust on average. old_ratings = [[r] for r in ub.take(method_ratings, ranking)] new_values = openskill.rate(old_ratings) # Not inplace new_ratings = [openskill.Rating(*new[0]) for new in new_values] method_ratings.update(ub.dzip(ranking, new_ratings)) print('Statistics:') print(stats_data) if USE_OPENSKILL: from openskill import predict_win win_prob = predict_win([[r] for r in method_ratings.values()]) skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False) print('Aggregated Rankings =\n{}'.format(skill_agg)) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plt = kwplot.autoplt() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y=time_key, marker='o', ax=ax, **plotkw) ax.set_title(plot_labels['title']) ax.set_xlabel(plot_labels['x']) ax.set_ylabel(plot_labels['y']) ax.set_xscale('log') ax.set_yscale('log') try: __IPYTHON__ except NameError: plt.show() if __name__ == '__main__': benchmark_template() ubelt-1.3.7/dev/bench/bench_dict_diff_impl.py000066400000000000000000000241371472470106000211470ustar00rootroot00000000000000 def variant(): import random import ubelt as ub num_items = 100 num_other = 1 first_keys = [random.randint(0, 1000) for _ in range(num_items)] remove_sets = [list(ub.unique(random.choices(first_keys, k=10) + [random.randint(0, 1000) for _ in range(num_items)])) for _ in range(num_other)] first_dict = {k: k for k in first_keys} args = [first_dict] + [{k: k for k in ks} for ks in remove_sets] dictclass = dict import timerit ti = timerit.Timerit(100, bestof=10, verbose=2) for timer in ti.reset('orig'): with timer: keys = set(first_dict) keys.difference_update(*map(set, args[1:])) new0 = dictclass((k, first_dict[k]) for k in keys) for timer in ti.reset('alt1'): with timer: remove_keys = {k for ks in args[1:] for k in ks} new1 = dictclass((k, v) for k, v in first_dict.items() if k not in remove_keys) for timer in ti.reset('alt2'): with timer: remove_keys = set.union(*map(set, args[1:])) new2 = dictclass((k, v) for k, v in first_dict.items() if k not in remove_keys) for timer in ti.reset('alt3'): with timer: remove_keys = set.union(*map(set, args[1:])) new3 = dictclass((k, first_dict[k]) for k in first_dict.keys() if k not in remove_keys) # Cannot use until 3.6 is dropped (it is faster) for timer in ti.reset('alt4'): with timer: remove_keys = set.union(*map(set, args[1:])) new4 = {k: v for k, v in first_dict.items() if k not in remove_keys} assert new1 == new0 assert new2 == new0 assert new3 == new0 assert new4 == new0 def benchmark_dict_diff_impl(): import ubelt as ub import pandas as pd import timerit import random def method_diffkeys(*args): first_dict = args[0] keys = set(first_dict) keys.difference_update(*map(set, args[1:])) new0 = dict((k, first_dict[k]) for k in keys) return new0 def method_diffkeys_list(*args): first_dict = args[0] remove_keys = set.union(*map(set, args[1:])) keep_keys = [k for k in first_dict.keys() if k not in remove_keys] new = dict((k, first_dict[k]) for k in keep_keys) return new def method_diffkeys_oset(*args): first_dict = args[0] keys = ub.oset(first_dict) keys.difference_update(*map(set, args[1:])) new0 = dict((k, first_dict[k]) for k in keys) return new0 def method_ifkeys_setcomp(*args): first_dict = args[0] remove_keys = {k for ks in args[1:] for k in ks} new1 = dict((k, v) for k, v in first_dict.items() if k not in remove_keys) return new1 def method_ifkeys_setunion(*args): first_dict = args[0] remove_keys = set.union(*map(set, args[1:])) new2 = dict((k, v) for k, v in first_dict.items() if k not in remove_keys) return new2 def method_ifkeys_getitem(*args): first_dict = args[0] remove_keys = set.union(*map(set, args[1:])) new3 = dict((k, first_dict[k]) for k in first_dict.keys() if k not in remove_keys) return new3 def method_ifkeys_dictcomp(*args): # Cannot use until 3.6 is dropped (it is faster) first_dict = args[0] remove_keys = set.union(*map(set, args[1:])) new4 = {k: v for k, v in first_dict.items() if k not in remove_keys} return new4 def method_ifkeys_dictcomp_getitem(*args): # Cannot use until 3.6 is dropped (it is faster) first_dict = args[0] remove_keys = set.union(*map(set, args[1:])) new4 = {k: first_dict[k] for k in first_dict.keys() if k not in remove_keys} return new4 method_lut = locals() # can populate this some other way def make_data(num_items, num_other, remove_fraction, keytype): if keytype == 'str': keytype = str if keytype == 'int': keytype = int first_keys = [random.randint(0, 1000) for _ in range(num_items)] k = int(remove_fraction * len(first_keys)) remove_sets = [list(ub.unique(random.choices(first_keys, k=k) + [random.randint(0, 1000) for _ in range(num_items)])) for _ in range(num_other)] first_dict = {keytype(k): k for k in first_keys} args = [first_dict] + [{keytype(k): k for k in ks} for ks in remove_sets] return args ti = timerit.Timerit(200, bestof=1, verbose=2) basis = { 'method': [ # Cant use because unordered # 'method_diffkeys', # Cant use because python 3.6 'method_ifkeys_dictcomp', 'method_ifkeys_dictcomp_getitem', 'method_ifkeys_setunion', 'method_ifkeys_getitem', 'method_diffkeys_list', # Probably not good # 'method_ifkeys_setcomp', # 'method_diffkeys_oset', ], 'num_items': [10, 100, 1000], 'num_other': [1, 3, 5], # 'num_other': [1], 'remove_fraction': [0, 0.2, 0.5, 0.7, 1.0], # 'remove_fraction': [0.2, 0.8], 'keytype': ['str', 'int'], # 'keytype': ['str'], # 'param_name': [param values], } xlabel = 'num_items' kw_labels = ['num_items', 'num_other', 'remove_fraction', 'keytype'] group_labels = { 'style': ['num_other', 'keytype'], 'size': ['remove_fraction'], } group_labels['hue'] = list( (ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2( ub.dict_isect(params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) kwargs = ub.dict_isect(params.copy(), kw_labels) args = make_data(**kwargs) method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... with timer: # Put the logic you want to time here method(*args) row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values('min') print(data) # for each parameter setting, group all methods with that used those exact # comparable params. Then rank how good each method did. That will be a # preference profile. We will give that preference profile a weight (e.g. # based on the fastest method in the bunch) and then aggregate them with # some voting method. USE_OPENSKILL = 1 if USE_OPENSKILL: # Lets try a real ranking method # https://github.com/OpenDebates/openskill.py import openskill method_ratings = {m: openskill.Rating() for m in basis['method']} weighted_rankings = ub.ddict(lambda: ub.ddict(float)) for params, variants in data.groupby(['num_other', 'keytype', 'remove_fraction', 'num_items']): variants = variants.sort_values('mean') ranking = variants['method'].reset_index(drop=True) if USE_OPENSKILL: # The idea is that each setting of parameters is a game, and each # "method" is a player. We rank the players by which is fastest, # and update their ranking according to the Weng-Lin Bayes ranking # model. This does not take the fact that some "games" (i.e. # parameter settings) are more important than others, but it should # be fairly robust on average. old_ratings = [[r] for r in ub.take(method_ratings, ranking)] new_values = openskill.rate(old_ratings) # Not inplace new_ratings = [openskill.Rating(*new[0]) for new in new_values] method_ratings.update(ub.dzip(ranking, new_ratings)) # Choose a ranking weight scheme weight = variants['mean'].min() # weight = 1 for rank, method in enumerate(ranking): weighted_rankings[method][rank] += weight weighted_rankings[method]['total'] += weight # Probably a more robust voting method to do this weight_rank_rows = [] for method_name, ranks in weighted_rankings.items(): weights = ub.dict_diff(ranks, ['total']) p_rank = ub.map_values(lambda w: w / ranks['total'], weights) for rank, w in p_rank.items(): weight_rank_rows.append({'rank': rank, 'weight': w, 'name': method_name}) weight_rank_df = pd.DataFrame(weight_rank_rows) piv = weight_rank_df.pivot(['name'], ['rank'], ['weight']) print(piv) if USE_OPENSKILL: from openskill import predict_win win_prob = predict_win([[r] for r in method_ratings.values()]) skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False) print('skill_agg =\n{}'.format(skill_agg)) aggregated = (piv * piv.columns.levels[1].values).sum(axis=1).sort_values() print('weight aggregated =\n{}'.format(aggregated)) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y='min', marker='o', ax=ax, **plotkw) ax.set_title('Benchmark') ax.set_xlabel('A better x-variable description') ax.set_ylabel('A better y-variable description') ubelt-1.3.7/dev/bench/bench_dict_hist.py000066400000000000000000000077651472470106000201750ustar00rootroot00000000000000""" SeeAlso: ~/code/ubelt/dev/bench/bench_defaultdict.py """ def bench_dict_hist_finalize(): """ CommandLine: xdoctest -m ~/code/ubelt/dev/bench_dict_hist.py bench_dict_hist_finalize Results: Timed best=48.330 µs, mean=49.437 ± 1.0 µs for dict_subset_iter Timed best=59.392 µs, mean=63.395 ± 11.9 µs for dict_subset_list Timed best=47.203 µs, mean=47.632 ± 0.2 µs for direct_itemgetter """ import operator as op import ubelt as ub import timerit import random import string rng = random.Random(0) items = [rng.choice(string.printable) for _ in range(5000)] hist_ = ub.ddict(lambda: 0) for item in items: hist_[item] += 1 OrderedDict = ub.odict ti = timerit.Timerit(1000, bestof=10, verbose=1) for timer in ti.reset('dict_subset_iter'): with timer: getval = op.itemgetter(1) key_order = (key for (key, value) in sorted(hist_.items(), key=getval)) hist = ub.dict_subset(hist_, key_order) for timer in ti.reset('dict_subset_list'): with timer: getval = op.itemgetter(1) key_order = [key for (key, value) in sorted(hist_.items(), key=getval)] hist = ub.dict_subset(hist_, key_order) for timer in ti.reset('direct_itemgetter'): with timer: # WINNER getval = op.itemgetter(1) hist = OrderedDict([ (key, value) for (key, value) in sorted(hist_.items(), key=getval) ]) del hist def bench_dict_hist(): """ CommandLine: xdoctest -m ~/code/ubelt/dev/bench_dict_hist.py bench_dict_hist_finalize Results: Timed best=48.330 µs, mean=49.437 ± 1.0 µs for dict_subset_iter Timed best=59.392 µs, mean=63.395 ± 11.9 µs for dict_subset_list Timed best=47.203 µs, mean=47.632 ± 0.2 µs for direct_itemgetter """ import ubelt as ub import timerit import random import string import collections rng = random.Random(0) items = [rng.choice(string.printable) for _ in range(5000)] ti = timerit.Timerit(1000, bestof=10, verbose=1) for timer in ti.reset('ub.dict_hist'): with timer: hist1 = ub.dict_hist(items) for timer in ti.reset('collections.Counter direct, no postprocess'): with timer: hist2 = collections.Counter(items) for timer in ti.reset('collections.Counter direct'): with timer: hist2 = dict(collections.Counter(items)) for timer in ti.reset('collections.Counter iterated'): with timer: hist3 = collections.Counter() for item in items: hist3.update(item) hist3 = dict(hist3) assert hist2 == hist1 assert hist3 == hist1 def bench_sort_dictionary(): """ CommandLine: xdoctest -m ~/code/ubelt/dev/bench_dict_hist.py bench_sort_dictionary Results: Timed best=25.484 µs, mean=25.701 ± 0.1 µs for itemgetter Timed best=28.810 µs, mean=29.138 ± 0.3 µs for lambda """ import operator as op import ubelt as ub import random import string rng = random.Random(0) items = [rng.choice(string.printable) for _ in range(5000)] hist_ = ub.ddict(lambda: 0) for item in items: hist_[item] += 1 ti = ub.Timerit(1000, bestof=10, verbose=1) for timer in ti.reset('itemgetter'): with timer: # WINNER getval = op.itemgetter(1) key_order = [key for (key, value) in sorted(hist_.items(), key=getval)] for timer in ti.reset('lambda'): with timer: key_order = [key for (key, value) in sorted(hist_.items(), key=lambda x: x[1])] del key_order if __name__ == '__main__': """ CommandLine: python ~/code/ubelt/dev/bench/bench_dict_hist.py bench_sort_dictionary python ~/code/ubelt/dev/bench/bench_dict_hist.py bench_dict_hist_finalize """ import fire fire.Fire() ubelt-1.3.7/dev/bench/bench_dict_operations.py000066400000000000000000000044651472470106000214030ustar00rootroot00000000000000def bench_dict_isect(): import ubelt as ub def random_dict(n): import random keys = set(random.randint(0, n) for _ in range(n)) return {k: k for k in keys} d1 = random_dict(1000) d2 = random_dict(1000) import xdev xdev.profile_now(ub.dict_isect)(d1, d2) xdev.profile_now(dict_isect_variant0)(d1, d2) xdev.profile_now(dict_isect_variant1)(d1, d2) xdev.profile_now(dict_isect_variant2)(d1, d2) xdev.profile_now(dict_isect_variant3)(d1, d2) import timerit ti = timerit.Timerit(100, bestof=10, verbose=2) for timer in ti.reset('current'): with timer: ub.dict_isect(d1, d2) for timer in ti.reset('inline'): with timer: {k: v for k, v in d1.items() if k in d2} for timer in ti.reset('dict_isect_variant0'): with timer: dict_isect_variant0(d1, d2) for timer in ti.reset('dict_isect_variant1'): with timer: dict_isect_variant1(d1, d2) for timer in ti.reset('dict_isect_variant2'): with timer: dict_isect_variant1(d1, d2) for timer in ti.reset('dict_isect_variant3'): with timer: dict_isect_variant3(d1, d2) print('ti.rankings = {}'.format(ub.repr2(ti.rankings['min'], precision=8, align=':', nl=1, sort=0))) def dict_isect_variant0(d1, d2): return {k: v for k, v in d1.items() if k in d2} def dict_isect_variant1(*args): if not args: return {} else: dictclass = args[0].__class__ common_keys = set.intersection(*map(set, args)) first_dict = args[0] return dictclass((k, first_dict[k]) for k in first_dict if k in common_keys) def dict_isect_variant2(*args): if not args: return {} else: dictclass = args[0].__class__ common_keys = set.intersection(*map(set, args)) first_dict = args[0] return dictclass((k, first_dict[k]) for k in common_keys) def dict_isect_variant3(*args): if not args: return {} else: common_keys = set.intersection(*map(set, args)) first_dict = args[0] return {k: first_dict[k] for k in common_keys} if __name__ == '__main__': """ CommandLine: python ~/code/ubelt/dev/bench_dict_operations.py """ bench_dict_isect() ubelt-1.3.7/dev/bench/bench_group_items.py000066400000000000000000000210731472470106000205440ustar00rootroot00000000000000""" This code is based on a template that lives in: https://github.com/Erotemic/timerit/blob/main/examples/benchmark_template.py Or typically on a dev machine in ~/code/timerit/examples/benchmark_template.py SeeAlso: ~/code/ubelt/dev/bench/bench_dict_hist.py CommandLine: python ~/code/ubelt/dev/bench/bench_defaultdict.py """ def benchmark_template(): import ubelt as ub import pandas as pd import timerit from collections import defaultdict from itertools import groupby def get_0(arr): return arr[0] plot_labels = { 'x': 'Input Size', 'y': 'Time', 'title': 'Dict[Any, List] Accumulation Benchmark', } def keyfunc(r): return r['group_id'] # Some bookkeeping needs to be done to build a dictionary that maps the # method names to the functions themselves. method_lut = {} def register_method(func): method_lut[func.__name__] = func return func # Define the methods you want to benchmark. The arguments should be # parameters that you want to vary in the test. @register_method def groupitems_simple_append(items): pair_list = ((keyfunc(item), item) for item in items) id_to_items = defaultdict(list) # Insert each item into the correct group for key, item in pair_list: id_to_items[key].append(item) @register_method def groupitems_groupby_and_extend_v1(items): pair_list = ((keyfunc(item), item) for item in items) id_to_items = defaultdict(list) # Is this more efficient? Seems like the answer is no. for key, grouper in groupby(pair_list, key=get_0): id_to_items[key].extend(grouper) @register_method def groupitems_groupby_and_extend_v2(items): id_to_items = defaultdict(list) # Is this more efficient? Seems like the answer is no. # even when we optimize for the case of key being a callable # EXCEPT in the case where there are large numbers of consecutive items. for key, grouper in groupby(items, key=keyfunc): id_to_items[key].extend(grouper) def generate_input(input_size, num_unique_items): """ input_size, num_unique_items = 10, 10 """ import random rng = random.Random() pool = range(num_unique_items) group_ids = rng.choices(pool, k=input_size) items = [] for index, group_id in enumerate(group_ids): items.append({ 'index': index, 'group_id': group_id, }) return items # Change params here to modify number of trials ti = timerit.Timerit(100, bestof=10, verbose=1) # if True, record every trail run and show variance in seaborn # if False, use the standard timerit min/mean measures RECORD_ALL = True # These are the parameters that we benchmark over basis = { 'method': list(method_lut), # i.e. ['method1', 'method2'] 'input_size': [10, 100, 500, 1000, 2500, 5000, 7500, 10_000, 20_000], 'num_unique_items': [ # 1, # 2, 10, # 100 # 100, ], # 'zparam': [2, 3] # 'param_name': [param values], } xlabel = 'input_size' group_labels = { 'style': ['num_unique_items'], # 'size': ['zparam'], } group_labels['hue'] = list( (ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: params = ub.udict(params) group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.urepr( params & labels, compact=1, si=1) key = ub.urepr(params, compact=1, si=1) method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... input_size = params['input_size'] num_unique_items = params['num_unique_items'] input_data = generate_input(input_size, num_unique_items) with timer: # Put the logic you want to time here method(input_data) if RECORD_ALL: # Seaborn will show the variance if this is enabled, otherwise # use the robust timerit mean / min times # chunk_iter = ub.chunks(ti.times, ti.bestof) # times = list(map(min, chunk_iter)) # TODO: timerit method for this times = ti.robust_times() for time in times: row = { # 'mean': ti.mean(), 'time': time, 'key': key, **group_keys, **params, } rows.append(row) else: row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) time_key = 'time' if RECORD_ALL else 'min' # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values(time_key) if RECORD_ALL: # Show the min / mean if we record all min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1) mean_times = data.groupby('key')[['time']].mean().rename({'time': 'mean'}, axis=1) stats_data = pd.concat([min_times, mean_times], axis=1) stats_data = stats_data.sort_values('min') else: stats_data = data USE_OPENSKILL = 0 if USE_OPENSKILL: # Lets try a real ranking method # https://github.com/OpenDebates/openskill.py import openskill method_ratings = {m: openskill.Rating() for m in basis['method']} other_keys = sorted(set(stats_data.columns) - {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'}) for params, variants in stats_data.groupby(other_keys): variants = variants.sort_values('mean') ranking = variants['method'].reset_index(drop=True) mean_speedup = variants['mean'].max() / variants['mean'] stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup min_speedup = variants['min'].max() / variants['min'] stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup if USE_OPENSKILL: # The idea is that each setting of parameters is a game, and each # "method" is a player. We rank the players by which is fastest, # and update their ranking according to the Weng-Lin Bayes ranking # model. This does not take the fact that some "games" (i.e. # parameter settings) are more important than others, but it should # be fairly robust on average. old_ratings = [[r] for r in ub.take(method_ratings, ranking)] new_values = openskill.rate(old_ratings) # Not inplace new_ratings = [openskill.Rating(*new[0]) for new in new_values] method_ratings.update(ub.dzip(ranking, new_ratings)) print('Statistics:') print(stats_data) if USE_OPENSKILL: from openskill import predict_win win_prob = predict_win([[r] for r in method_ratings.values()]) skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False) print('Aggregated Rankings =\n{}'.format(skill_agg)) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plt = kwplot.autoplt() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y=time_key, marker='o', ax=ax, **plotkw) ax.set_title(plot_labels['title']) ax.set_xlabel(plot_labels['x']) ax.set_ylabel(plot_labels['y']) ax.set_xscale('log') ax.set_yscale('log') try: __IPYTHON__ except NameError: plt.show() if __name__ == '__main__': benchmark_template() ubelt-1.3.7/dev/bench/bench_hash.py000066400000000000000000000063221472470106000171320ustar00rootroot00000000000000 def benchmark_hash_data(): """ CommandLine: python ~/code/ubelt/dev/bench_hash.py --convert=True --show python ~/code/ubelt/dev/bench_hash.py --convert=False --show """ import ubelt as ub #ITEM = 'JUST A STRING' * 100 ITEM = [0, 1, 'a', 'b', ['JUST A STRING'] * 4] HASHERS = ['sha1', 'sha512', 'xxh32', 'xxh64', 'blake3'] scales = list(range(5, 13)) results = ub.AutoDict() # Use json is faster or at least as fast it most cases # xxhash is also significantly faster than sha512 convert = ub.argval('--convert', default='True').lower() == 'True' print('convert = {!r}'.format(convert)) ti = ub.Timerit(9, bestof=3, verbose=1, unit='ms') for s in ub.ProgIter(scales, desc='benchmark', verbose=3): N = 2 ** s print(' --- s={s}, N={N} --- '.format(s=s, N=N)) data = [ITEM] * N for hasher in HASHERS: for timer in ti.reset(hasher): ub.hash_data(data, hasher=hasher, convert=convert) results[hasher].update({N: ti.mean()}) col = {h: results[h][N] for h in HASHERS} sortx = ub.argsort(col) ranking = ub.dict_subset(col, sortx) print('walltime: ' + ub.repr2(ranking, precision=9, nl=0)) best = next(iter(ranking)) #pairs = list(ub.iter_window( 2)) pairs = [(k, best) for k in ranking] ratios = [ranking[k1] / ranking[k2] for k1, k2 in pairs] nicekeys = ['{}/{}'.format(k1, k2) for k1, k2 in pairs] relratios = ub.odict(zip(nicekeys, ratios)) print('speedup: ' + ub.repr2(relratios, precision=4, nl=0)) # xdoc +REQUIRES(--show) # import pytest # pytest.skip() import pandas as pd df = pd.DataFrame.from_dict(results) df.columns.name = 'hasher' df.index.name = 'N' ratios = df.copy().drop(columns=df.columns) for k1, k2 in [('sha512', 'xxh32'), ('sha1', 'xxh32'), ('xxh64', 'xxh32')]: ratios['{}/{}'.format(k1, k2)] = df[k1] / df[k2] print() print('Seconds per iteration') print(df.to_string(float_format='%.9f')) print() print('Ratios of seconds') print(ratios.to_string(float_format='%.2f')) print() print('Average Ratio (over all N)') print('convert = {!r}'.format(convert)) print(ratios.mean().sort_values()) if ub.argflag('--show'): import kwplot kwplot.autompl() xdata = sorted(ub.peek(results.values()).keys()) ydata = ub.map_values(lambda d: [d[x] for x in xdata], results) kwplot.multi_plot(xdata, ydata, xlabel='N', ylabel='seconds', title='convert = {}'.format(convert)) kwplot.show_if_requested() def benchmark_hash_extensions(): """" xdoctest ~/code/ubelt/dev/bench/bench_hash.py benchmark_hash_extensions """ import ubelt as ub import uuid import numpy as np datas = [ ub.Path('/'), uuid.uuid4(), np.array([1, 2, 3]) ] import timerit ti = timerit.Timerit(10000, bestof=10, verbose=2) for timer in ti.reset('time'): with timer: for data in datas: ub.hash_data(data) if __name__ == '__main__': """ CommandLine: python ~/code/ubelt/dev/bench_hash.py """ benchmark_hash_data() ubelt-1.3.7/dev/bench/bench_hash_file.py000066400000000000000000000254111472470106000201310ustar00rootroot00000000000000import random import string from os.path import join import ubelt as ub def hash_file2(fpath, blocksize=65536, hasher='xx64'): r""" Hashes the data in a file on disk using xxHash xxHash is much faster than sha1, bringing computation time down from .57 seconds to .12 seconds for a 387M file. fpath = ub.truepath('~/Crayfish_low_contrast.jpeg') xdata = 2 ** np.array([8, 12, 14, 16]) ydatas = ub.ddict(list) for blocksize in xdata: print('blocksize = {!r}'.format(blocksize)) ydatas['sha1'].append(ub.Timerit(2).call(ub.hash_file, fpath, hasher='sha1', blocksize=blocksize).min()) ydatas['sha256'].append(ub.Timerit(2).call(ub.hash_file, fpath, hasher='sha256', blocksize=blocksize).min()) ydatas['sha512'].append(ub.Timerit(2).call(ub.hash_file, fpath, hasher='sha512', blocksize=blocksize).min()) ydatas['md5'].append(ub.Timerit(2).call(ub.hash_file, fpath, hasher='md5', blocksize=blocksize).min()) ydatas['xx32'].append(ub.Timerit(2).call(hash_file2, fpath, hasher='xx32', blocksize=blocksize).min()) ydatas['xx64'].append(ub.Timerit(2).call(hash_file2, fpath, hasher='xx64', blocksize=blocksize).min()) ydatas['blake3'].append(ub.Timerit(2).call(hash_file2, fpath, hasher='blake3', blocksize=blocksize).min()) import netharn as nh nh.util.qtensure() nh.util.multi_plot(xdata, ydatas) """ import xxhash if hasher == 'xx32': hasher = xxhash.xxh32() elif hasher == 'xx64': hasher = xxhash.xxh64() with open(fpath, 'rb') as file: buf = file.read(blocksize) # otherwise hash the entire file while len(buf) > 0: hasher.update(buf) buf = file.read(blocksize) # Get the hashed representation text = ub.util_hash._digest_hasher(hasher, base=ub.util_hash.DEFAULT_ALPHABET) return text def _random_data(rng, num): return ''.join([rng.choice(string.hexdigits) for _ in range(num)]) def _write_random_file(dpath, part_pool, size_pool, rng): namesize = 16 # Choose 1, 4, or 16 parts of data num_parts = rng.choice(size_pool) chunks = [rng.choice(part_pool) for _ in range(num_parts)] contents = ''.join(chunks) fname_noext = _random_data(rng, namesize) ext = ub.hash_data(contents)[0:4] fname = '{}.{}'.format(fname_noext, ext) fpath = join(dpath, fname) with open(fpath, 'w') as file: file.write(contents) return fpath def bench_hashfile_blocksize(): """ Test speed of hashing with various blocksize strategies """ dpath = ub.ensuredir(ub.expandpath('$HOME/raid/data/tmp')) size_pool = [10000] rng = random.Random(0) # Create a pool of random chunks of data chunksize = int(2 ** 20) pool_size = 8 part_pool = [_random_data(rng, chunksize) for _ in range(pool_size)] # Write a big file (~600 MB) fpath = _write_random_file(dpath, part_pool, size_pool, rng) import os size_mb = os.stat(fpath).st_size / 1e6 print('file size = {!r} MB'.format(size_mb)) from ubelt.util_hash import _rectify_hasher hasher_algo = 'xx64' import timerit ti = timerit.Timerit(4, bestof=2, verbose=2) # hasher = _rectify_hasher(hash_algo)() # with timer: # with open(fpath, 'rb') as file: # buf = file.read(blocksize) # while len(buf) > 0: # hasher.update(buf) # buf = file.read(blocksize) # result = hasher.hexdigest() results = [] # Constant blocksize is the winner as long as its chosen right. for timer in ti.reset('constant blocksize'): blocksize = int(2 ** 20) hasher = _rectify_hasher(hasher_algo)() with timer: with open(fpath, 'rb') as file: buf = file.read(blocksize) while len(buf) > 0: hasher.update(buf) buf = file.read(blocksize) result = hasher.hexdigest() results.append(result) for timer in ti.reset('double blocksize'): blocksize = int(2 ** 20) hasher = _rectify_hasher(hasher_algo)() with timer: with open(fpath, 'rb') as file: buf = file.read(blocksize) while len(buf) > 0: hasher.update(buf) blocksize *= 2 buf = file.read(blocksize) result = hasher.hexdigest() results.append(result) for timer in ti.reset('double blocksize + limit'): max_blocksize = int(2 ** 20) * 16 blocksize = int(2 ** 20) hasher = _rectify_hasher(hasher_algo)() with timer: with open(fpath, 'rb') as file: buf = file.read(blocksize) while len(buf) > 0: hasher.update(buf) blocksize = min(2 * blocksize, max_blocksize) buf = file.read(blocksize) result = hasher.hexdigest() results.append(result) def bench_find_optimal_blocksize(): r""" This function can help find the optimal blocksize for your usecase:w Notes: # Usage cd ~/code/ubelt/dev xdoctest bench_hash_file.py bench_find_optimal_blocksize \ --dpath \ --size \ --hash_algo \ # Benchmark on an HDD xdoctest bench_hash_file.py bench_find_optimal_blocksize \ --size 500 \ --dpath $HOME/raid/data/tmp \ --hash_algo xx64 # Benchmark on an SSD xdoctest bench_hash_file.py bench_find_optimal_blocksize \ --size 500 \ --dpath $HOME/.cache/ubelt/tmp \ --hash_algo xx64 # Test a small file xdoctest bench_hash_file.py bench_find_optimal_blocksize \ --size 1 \ --dpath $HOME/.cache/ubelt/tmp \ --hash_algo xx64 Throughout our tests on SSDs / HDDs with small and large files we are finding a chunksize of 2 ** 20 consistently working best with xx64. # Test with a slower hash algo xdoctest bench_hash_file.py bench_find_optimal_blocksize \ --size 500 \ --dpath $HOME/raid/data/tmp \ --hash_algo sha1 Even that shows 2 ** 20 working well. """ import os import numpy as np import timerit dpath = ub.argval('--dpath', default=None) if dpath is None: # dpath = ub.ensuredir(ub.expandpath('$HOME/raid/data/tmp')) dpath = ub.ensure_app_cache_dir('ubelt/hash_test') else: ub.ensuredir(dpath) print('dpath = {!r}'.format(dpath)) target_size = int(ub.argval('--size', default=600)) hash_algo = ub.argval('--hash_algo', default='xx64') print('hash_algo = {!r}'.format(hash_algo)) print('target_size = {!r}'.format(target_size)) # Write a big file (~600 MB) MB = int(2 ** 20) size_pool = [target_size] rng = random.Random(0) # pool_size = max(target_size // 2, 1) # pool_size = max(1, target_size // 10) pool_size = 8 part_pool = [_random_data(rng, MB) for _ in range(pool_size)] fpath = _write_random_file(dpath, part_pool, size_pool, rng) print('fpath = {!r}'.format(fpath)) size_mb = os.stat(fpath).st_size / MB print('file size = {!r} MB'.format(size_mb)) ti = timerit.Timerit(4, bestof=2, verbose=2) results = [] # Find an optimal constant blocksize min_power = 16 max_power = 24 blocksize_candiates = [int(2 ** e) for e in range(min_power, max_power)] for blocksize in blocksize_candiates: for timer in ti.reset('constant blocksize=2 ** {} = {}'.format(np.log2(float(blocksize)), blocksize)): result = ub.hash_file(fpath, blocksize=blocksize, hasher=hash_algo) results.append(result) print('ti.rankings = {}'.format(ub.repr2(ti.rankings, nl=2, align=':'))) assert ub.allsame(results) def benchmark_hash_file(): """ CommandLine: python ~/code/ubelt/dev/bench_hash.py --show python ~/code/ubelt/dev/bench_hash.py --show """ import ubelt as ub import random # dpath = ub.ensuredir(ub.expandpath('$HOME/raid/data/tmp')) dpath = ub.ensuredir(ub.expandpath('$HOME/tmp')) rng = random.Random(0) # Create a pool of random chunks of data chunksize = int(2 ** 20) pool_size = 8 part_pool = [_random_data(rng, chunksize) for _ in range(pool_size)] #ITEM = 'JUST A STRING' * 100 HASHERS = ['sha1', 'sha512', 'xxh32', 'xxh64', 'blake3'] scales = list(range(5, 10)) import os results = ub.AutoDict() # Use json is faster or at least as fast it most cases # xxhash is also significantly faster than sha512 ti = ub.Timerit(9, bestof=3, verbose=1, unit='ms') for s in ub.ProgIter(scales, desc='benchmark', verbose=3): N = 2 ** s print(' --- s={s}, N={N} --- '.format(s=s, N=N)) # Write a big file size_pool = [N] fpath = _write_random_file(dpath, part_pool, size_pool, rng) megabytes = os.stat(fpath).st_size / (2 ** 20) print('megabytes = {!r}'.format(megabytes)) for hasher in HASHERS: for timer in ti.reset(hasher): ub.hash_file(fpath, hasher=hasher) results[hasher].update({N: ti.mean()}) col = {h: results[h][N] for h in HASHERS} sortx = ub.argsort(col) ranking = ub.dict_subset(col, sortx) print('walltime: ' + ub.repr2(ranking, precision=9, nl=0)) best = next(iter(ranking)) #pairs = list(ub.iter_window( 2)) pairs = [(k, best) for k in ranking] ratios = [ranking[k1] / ranking[k2] for k1, k2 in pairs] nicekeys = ['{}/{}'.format(k1, k2) for k1, k2 in pairs] relratios = ub.odict(zip(nicekeys, ratios)) print('speedup: ' + ub.repr2(relratios, precision=4, nl=0)) # xdoc +REQUIRES(--show) # import pytest # pytest.skip() import pandas as pd df = pd.DataFrame.from_dict(results) df.columns.name = 'hasher' df.index.name = 'N' ratios = df.copy().drop(columns=df.columns) for k1, k2 in [('sha512', 'xxh64'), ('sha1', 'xxh64'), ('xxh32', 'xxh64'), ('blake3', 'xxh64')]: ratios['{}/{}'.format(k1, k2)] = df[k1] / df[k2] print() print('Seconds per iteration') print(df.to_string(float_format='%.9f')) print() print('Ratios of seconds') print(ratios.to_string(float_format='%.2f')) print() print('Average Ratio (over all N)') print(ratios.mean().sort_values()) if ub.argflag('--show'): import kwplot kwplot.autompl() xdata = sorted(ub.peek(results.values()).keys()) ydata = ub.map_values(lambda d: [d[x] for x in xdata], results) kwplot.multi_plot(xdata, ydata, xlabel='N', ylabel='seconds') kwplot.show_if_requested() ubelt-1.3.7/dev/bench/bench_hash_impls.py000066400000000000000000000243541472470106000203430ustar00rootroot00000000000000""" Check iterative versus recursive implementation of hash_data """ from ubelt import NoParam from ubelt.util_hash import ( _HASHABLE_EXTENSIONS, _rectify_hasher, _rectify_base, _digest_hasher, _int_to_bytes ) from xdev import profile _SEP = b'_,_' _ITER_PREFIX = b'_[_' _ITER_SUFFIX = b'_]_' @profile def _convert_to_hashable(data, types=True, extensions=None): r""" Converts ``data`` into a hashable byte representation if an appropriate hashing function is known. """ # HANDLE MOST COMMON TYPES FIRST if data is None: hashable = b'NONE' prefix = b'NULL' elif isinstance(data, bytes): hashable = data prefix = b'TXT' elif isinstance(data, str): # convert unicode into bytes hashable = data.encode('utf-8') prefix = b'TXT' elif isinstance(data, int): # warnings.warn('Hashing ints is slow, numpy is preferred') hashable = _int_to_bytes(data) # hashable = data.to_bytes(8, byteorder='big') prefix = b'INT' elif isinstance(data, float): data_ = float(data) # convert to a base-float try: a, b = data_.as_integer_ratio() except (ValueError, OverflowError): hashable = str(data_).encode('utf-8') # handle and nan, inf else: hashable = _int_to_bytes(a) + b'/' + _int_to_bytes(b) prefix = b'FLT' else: if extensions is None: extensions = _HASHABLE_EXTENSIONS # Then dynamically look up any other type hash_func = extensions.lookup(data) prefix, hashable = hash_func(data) if types: return prefix, hashable else: return b'', hashable @profile def _update_hasher_recursive(hasher, data, types=True, extensions=None): """ Converts ``data`` into a byte representation and calls update on the hasher :class:`hashlib._hashlib.HASH` algorithm. Args: hasher (Hasher): instance of a hashlib algorithm data (object): ordered data with structure types (bool): include type prefixes in the hash extensions (HashableExtensions | None): overrides global extensions Example: >>> hasher = hashlib.sha512() >>> data = [1, 2, ['a', 2, 'c']] >>> _update_hasher_recursive(hasher, data) >>> print(hasher.hexdigest()[0:8]) e2c67675 """ if extensions is None: extensions = _HASHABLE_EXTENSIONS # Determine if the data should be hashed directly or iterated through if isinstance(data, (tuple, list, zip)): needs_iteration = True else: needs_iteration = any(check(data) for check in extensions.iterable_checks) if needs_iteration: # Denote that we are hashing over an iterable # Multiple structure bytes make it harder to accidentally introduce # conflicts, but this is not perfect. # SEP = b'_,_' # ITER_PREFIX = b'_[_' # ITER_SUFFIX = b'_]_' iter_ = iter(data) hasher.update(_ITER_PREFIX) # first, try to nest quickly without recursive calls # (this works if all data in the sequence is a non-iterable) try: for item in iter_: prefix, hashable = _convert_to_hashable(item, types, extensions=extensions) binary_data = prefix + hashable + _SEP hasher.update(binary_data) hasher.update(_ITER_SUFFIX) except TypeError: # need to use recursive calls # Update based on current item _update_hasher_recursive(hasher, item, types, extensions=extensions) # !>> WHOOPS: THIS IS A BUG. THERE SHOULD BE A # !>> hasher.update(_SEP) # !>> SEPARATOR HERE. # !>> BUT FIXING IT WILL BREAK BACKWARDS COMPAT. # !>> We will need to expose versions of the hasher that can be # configured, and ideally new versions will have speed improvements. for item in iter_: # Ensure the items have a spacer between them _update_hasher_recursive(hasher, item, types, extensions=extensions) hasher.update(_SEP) hasher.update(_ITER_SUFFIX) else: prefix, hashable = _convert_to_hashable(data, types, extensions=extensions) binary_data = prefix + hashable hasher.update(binary_data) # EXPERIMENTAL VARIANT to attempt to speedup update_hasher @profile def _update_hasher_iterative(hasher, data, types=True, extensions=None): """ Converts ``data`` into a byte representation and calls update on the hasher :class:`hashlib._hashlib.HASH` algorithm. Args: hasher (Hasher): instance of a hashlib algorithm data (object): ordered data with structure types (bool): include type prefixes in the hash extensions (HashableExtensions | None): overrides global extensions Example: >>> hasher = hashlib.sha512() >>> data = [1, 2, ['a', 2, 'c']] >>> _update_hasher_iterative(hasher, data) >>> print(hasher.hexdigest()[0:8]) e2c67675 """ if extensions is None: extensions = _HASHABLE_EXTENSIONS DAT_TYPE = 1 SEP_TYPE = 2 stack = [(DAT_TYPE, data)] while stack: _type, data = stack.pop() if _type is SEP_TYPE: hasher.update(data) continue # Determine if the data should be hashed directly or iterated through if isinstance(data, (tuple, list, zip)): needs_iteration = True else: needs_iteration = any(check(data) for check in extensions.iterable_checks) if needs_iteration: # Denote that we are hashing over an iterable # Multiple structure bytes make it harder to accidentally introduce # conflicts, but this is not perfect. iter_ = iter(data) hasher.update(_ITER_PREFIX) # first, try to nest quickly without recursive calls # (this works if all data in the sequence is a non-iterable) try: for item in iter_: prefix, hashable = _convert_to_hashable(item, types, extensions=extensions) binary_data = prefix + hashable + _SEP hasher.update(binary_data) hasher.update(_ITER_SUFFIX) except TypeError: # need to recurse into the iterable. stack.append((SEP_TYPE, _ITER_SUFFIX)) for subitem in reversed(list(iter_)): stack.append((SEP_TYPE, _SEP)) stack.append((DAT_TYPE, subitem)) # BUG: should have a _SEP here. # !>> WHOOPS: THIS IS A BUG. THERE SHOULD BE A # !>> hasher.update(_SEP) # !>> SEPARATOR HERE. # !>> BUT FIXING IT WILL BREAK BACKWARDS COMPAT. # !>> We will need to expose versions of the hasher that can be stack.append((DAT_TYPE, item)) else: prefix, hashable = _convert_to_hashable(data, types, extensions=extensions) binary_data = prefix + hashable hasher.update(binary_data) @profile def hash_data_iterative(data, hasher=NoParam, base=NoParam, types=False, convert=False, extensions=None): """ """ if convert and not isinstance(data, str): # nocover import json try: data = json.dumps(data) except TypeError: # import warnings # warnings.warn('Unable to encode input as json due to: {!r}'.format(ex)) pass base = _rectify_base(base) hasher = _rectify_hasher(hasher)() # Feed the data into the hasher _update_hasher_iterative(hasher, data, types=types, extensions=extensions) # Get the hashed representation text = _digest_hasher(hasher, base) return text @profile def hash_data_recursive(data, hasher=NoParam, base=NoParam, types=False, convert=False, extensions=None): """ """ if convert and not isinstance(data, str): # nocover import json try: data = json.dumps(data) except TypeError: # import warnings # warnings.warn('Unable to encode input as json due to: {!r}'.format(ex)) pass base = _rectify_base(base) hasher = _rectify_hasher(hasher)() # Feed the data into the hasher _update_hasher_recursive(hasher, data, types=types, extensions=extensions) # Get the hashed representation text = _digest_hasher(hasher, base) return text def main(): import numpy as np import string import random np_data = np.empty((1, 1)) data = [1, 2, ['a', 2, 'c'], [1] * 100, [[[], np_data]], {'a': [1, 2, [3, 4, [5, 6]]]}] def make_nested_data(leaf_width=10, branch_width=10, depth=0): data = {} for i in range(leaf_width): key = ''.join(random.choices(string.printable, k=16)) value = ''.join(random.choices(string.printable, k=16)) data[key] = value if depth > 0: for i in range(branch_width): key = ''.join(random.choices(string.printable, k=16)) value = make_nested_data( leaf_width=leaf_width, branch_width=branch_width, depth=depth - 1) data[key] = value return data data = make_nested_data(leaf_width=10, branch_width=2, depth=8) import timerit ti = timerit.Timerit(100, bestof=10, verbose=2) for timer in ti.reset('recursive'): with timer: result1 = hash_data_recursive(data) for timer in ti.reset('iterative'): with timer: result2 = hash_data_iterative(data) print(f'result1={result1}') print(f'result2={result2}') assert result1 == result2 if __name__ == '__main__': """ CommandLine: XDEV_PROFILE=1 python ~/code/ubelt/dev/bench/bench_hash_impls.py python ~/code/ubelt/dev/bench/bench_hash_impls.py """ main() ubelt-1.3.7/dev/bench/bench_highlight.py000066400000000000000000000052641472470106000201620ustar00rootroot00000000000000""" Test if pygments or rich is faster when it comes to highlighting. Results: pygments is a lot faster """ import sys import ubelt as ub import warnings def _pygments_highlight(text, lexer_name, **kwargs): """ Original pygments highlight logic """ if sys.platform.startswith('win32'): # nocover # Hack on win32 to support colored output try: import colorama if not colorama.initialise.atexit_done: # Only init if it hasn't been done colorama.init() except ImportError: warnings.warn( 'colorama is not installed, ansi colors may not work') import pygments # type: ignore import pygments.lexers # type: ignore import pygments.formatters # type: ignore import pygments.formatters.terminal # type: ignore formatter = pygments.formatters.terminal.TerminalFormatter(bg='dark') lexer = pygments.lexers.get_lexer_by_name(lexer_name, **kwargs) new_text = pygments.highlight(text, lexer, formatter) return new_text def _rich_highlight(text, lexer_name): """ Alternative rich-based highlighter References: https://github.com/Textualize/rich/discussions/3076 """ from rich.syntax import Syntax from rich.console import Console import io syntax = Syntax(text, lexer_name, background_color='default') stream = io.StringIO() write_console = Console(file=stream, soft_wrap=True, color_system='standard') write_console.print(syntax) new_text = write_console.file.getvalue() return new_text def main(): # Benchmark which is faster import timerit lexer_name = 'python' ti = timerit.Timerit(100, bestof=10, verbose=2) text = 'import ubelt as ub; print(ub)' for timer in ti.reset('small-pygments'): pygments_text = _pygments_highlight(text, lexer_name) for timer in ti.reset('small-rich'): rich_text = _rich_highlight(text, lexer_name) print(pygments_text) print(rich_text) # Use bigger text try: text = ub.Path(__file__).read_text() except NameError: text = ub.Path('~/code/ubelt/dev/bench/bench_highlight.py').expand().read_text() for timer in ti.reset('big-pygments'): pygments_text = _pygments_highlight(text, lexer_name) for timer in ti.reset('big-rich'): rich_text = _rich_highlight(text, lexer_name) print(pygments_text) print(rich_text) print(ub.urepr(ti.measures['mean'], align=':', precision=8)) print(ub.urepr(ti.measures['min'], align=':', precision=8)) if __name__ == '__main__': """ CommandLine: python ~/code/ubelt/dev/bench/bench_highlight.py """ main() ubelt-1.3.7/dev/bench/bench_import_time.py000066400000000000000000000150251472470106000205370ustar00rootroot00000000000000 def benchmark_import_time(): import ubelt as ub info = ub.cmd('python -X importtime -c "import ubelt"') print(info['err']) print(info['err'].rstrip().split('\n')[-1]) info = ub.cmd('python -X importtime -c "from concurrent import futures"') print(info['err'].rstrip().split('\n')[-1]) info = ub.cmd('python -X importtime -c "import numpy"') print(info['err'].rstrip().split('\n')[-1]) info = ub.cmd('python -X importtime -c "import hashlib"') print(info['err'].rstrip().split('\n')[-1]) info = ub.cmd('python -X importtime -c "import typing"') print(info['err'].rstrip().split('\n')[-1]) info = ub.cmd('python -X importtime -c "import json"') print(info['err'].rstrip().split('\n')[-1]) info = ub.cmd('python -X importtime -c "import uuid"') print(info['err'].rstrip().split('\n')[-1]) info = ub.cmd('python -X importtime -c "import xxhash"') print(info['err'].rstrip().split('\n')[-1]) def benchmark_multi_or_combined_import(): """ Combining all imports into a single line is slightly faster """ import ubelt as ub attr_names = [ 'altsep', 'basename', 'commonpath', 'commonprefix', 'curdir', 'defpath', 'devnull', 'dirname', 'exists', 'expanduser', 'expandvars', 'extsep', 'genericpath', 'getatime', 'getctime', 'getmtime', 'getsize', 'isabs', 'isdir', 'isfile', 'islink', 'ismount', 'join', 'lexists', 'normcase', 'normpath', 'os', 'pardir', 'pathsep', 'realpath', 'relpath', 'samefile', ] combined_lines = 'from os.path import ' + ', '.join(attr_names) multi_lines = '; '.join(['from os.path import ' + name for name in attr_names]) import timerit ti = timerit.Timerit(10, bestof=3, verbose=2) for timer in ti.reset('combined_lines'): with timer: ub.cmd('python -c "{}"'.format(combined_lines), check=True) for timer in ti.reset('multi_lines'): with timer: info = ub.cmd('python -c "{}"'.format(multi_lines)) # NOQA def benchmark_ubelt_import_time_robust(): import pandas as pd import ubelt as ub import kwplot sns = kwplot.autosns(force='Qt5Agg') # plt = kwplot.autoplt() # NOQA prog = ub.codeblock( r''' def _main(): import subprocess import ubelt as ub import pandas as pd measurements = [] num_iters = 200 num_iters = 10 for i in range(num_iters): row = {} prog = subprocess.Popen('python -X importtime -c "import ubelt"', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) _, text = prog.communicate() text = text.decode() final_line = text.rstrip().split('\n')[-1] partial = final_line.split(':')[1].split('|') row['self_us'] = float(partial[0].strip()) row['cummulative'] = float(partial[1].strip()) measurements.append(row) df = pd.DataFrame(measurements) stats = pd.DataFrame({ 'mean': df.mean(), 'std': df.std(), 'min': df.min(), 'max': df.max(), 'total': df.sum(), }) info = stats.to_dict() info['version'] = ub.__version__ print(info) _main() ''') dpath = ub.Path(ub.ensure_app_cache_dir('ubelt/tests/test_version_import2')) repo_root = ub.Path('$HOME/code/ubelt').expand() info = ub.cmd('git branch --show-current', cwd=repo_root) current_branch = info['out'].strip() info = ub.cmd('git tag', cwd=repo_root) versions = [p for p in info['out'].split('\n') if p] branches = [current_branch, 'main'] + versions[::-1] tmp_copy = dpath / repo_root.name tmp_copy.delete() ub.cmd(f'git clone {repo_root} {tmp_copy}', cwd=dpath) fpath = tmp_copy / 'do_test.py' fpath.write_text(prog) bname_to_info = {} rows = [] try: for bname in ub.ProgIter(branches, desc='looping over versions', verbose=3): print('bname = {!r}'.format(bname)) ub.cmd('git checkout {}'.format(bname), cwd=tmp_copy, verbose=3, check=True) info = ub.cmd('python {}'.format(fpath), verbose=2, cwd=tmp_copy) dict_info = eval(info['out']) bname_to_info[bname] = dict_info for stat in ['mean', 'min', 'max']: for type in ['self_us', 'cummulative']: rows.append({ 'version': dict_info['version'], 'stat': stat, 'type': type, 'time': dict_info[stat][type], }) df = pd.DataFrame(rows[-1:]) print(df) # ax.cla() # sns.lineplot(data=df, x='version', y='time', hue='stat', style='type', ax=ax) except KeyboardInterrupt: pass finally: ub.cmd('git checkout {}'.format(current_branch), cwd=tmp_copy) df = pd.DataFrame(rows) # from packaging.version import Version from distutils.version import LooseVersion unique_versions = list(map(str, sorted(map(LooseVersion, df['version'].unique())))) df['release_index'] = df['version'].apply(lambda x: unique_versions.index(x)) xtick_to_label = ub.sorted_keys(ub.dzip( ub.oset(df['release_index']), ub.oset(df['version']) )) xticks = list(xtick_to_label.keys()) xticklabels = list(xtick_to_label.values()) kwplot.figure(fnum=2, pnum=(2, 1, 1), doclf=True) ax = sns.lineplot(data=df[df['type'] == 'cummulative'], x='release_index', y='time', hue='stat', style='type', marker='o') ax.set_title('Ubelt cumulative import time over release history') ax.set_xticks(xticks, labels=xticklabels, rotation='vertical') ax.set_xlabel('Version') ax.set_ylabel('Time (μs)') # ax.set_yscale('log') kwplot.figure(fnum=2, pnum=(2, 1, 2)) ax = sns.lineplot(data=df[df['type'] == 'self_us'], x='release_index', y='time', hue='stat', style='type', marker='o') ax.set_xticks(xticks, labels=xticklabels, rotation='vertical') ax.set_title('Ubelt self import time over release history') ax.set_xlabel('Version') ax.set_ylabel('Time (μs)') # ax.set_yscale('log') kwplot.show_if_requested() if __name__ == '__main__': """ CommandLine: python ~/code/ubelt/dev/bench/bench_import_time.py --show """ # benchmark_import_time() benchmark_ubelt_import_time_robust() # benchmark_multi_or_combined_import() ubelt-1.3.7/dev/bench/bench_memoize.py000066400000000000000000000021141472470106000176470ustar00rootroot00000000000000 def bench_memoize(): import ubelt as ub @ub.memoize def memoized_func(): return object() def raw_func(): return object() class Foo(object): @ub.memoize_property def a_memoized_property(self): return object() @ub.memoize_method def a_memoized_method(self): return object() @property def a_raw_property(self): return object() def a_raw_method(self): return object() self = Foo() ti = ub.Timerit(1000, bestof=100, verbose=1, unit='ns') ti.reset('memoized method').call(lambda: self.a_memoized_method()) ti.reset('raw method').call(lambda: self.a_raw_method()) ti.reset('memoized func').call(lambda: memoized_func()) ti.reset('raw func').call(lambda: raw_func()) ti.reset('memoized property').call(lambda: self.a_memoized_property) ti.reset('raw property').call(lambda: self.a_raw_property) if __name__ == '__main__': """ CommandLine: python ~/code/ubelt/dev/bench_memoize.py """ bench_memoize() ubelt-1.3.7/dev/bench/bench_modaccess_overhead.py000066400000000000000000000025601472470106000220250ustar00rootroot00000000000000 def main(): import ubelt as ub from ubelt import util_list from ubelt.util_list import take import random from math import e # # Data N = 100 array = [random.random() for _ in range(N)] indices = [random.randint(0, N - 1) for _ in range(int(N // e))] ti = ub.Timerit(2 ** 11, bestof=2 ** 8, verbose=1) for timer in ti.reset('take'): with timer: list(take(array, indices)) for timer in ti.reset('util_list.take'): with timer: list(util_list.take(array, indices)) for timer in ti.reset('ub.take'): with timer: list(ub.take(array, indices)) print('---') # import pandas as pd # df = pd.DataFrame(rankings) # print('df =\n{}'.format(df)) print('rankings = {}'.format(ub.repr2(ti.rankings, precision=9, nl=2))) print('consistency = {}'.format(ub.repr2(ti.consistency, precision=9, nl=2))) positions = ub.ddict(list) for m1, v1 in ti.rankings.items(): for pos, label in enumerate(ub.argsort(v1), start=0): positions[label].append(pos) average_position = ub.map_values(lambda x: sum(x) / len(x), positions) print('average_position = {}'.format(ub.repr2(average_position))) if __name__ == '__main__': """ CommandLine: xdoctest -m ~/code/ubelt/dev/bench_modaccess_overhead.py main """ main() ubelt-1.3.7/dev/bench/bench_perf_counters.py000066400000000000000000000164561472470106000210760ustar00rootroot00000000000000 def benchmark_template(): import ubelt as ub import pandas as pd import inspect import timerit import time from fractions import Fraction _perf_counter_ns = time.perf_counter_ns # Some bookkeeping needs to be done to build a dictionary that maps the # method names to the functions themselves. method_lut = {} def register_method(func): method_lut[func.__name__] = func return func @register_method def method_ns_frac1(n): from fractions import Fraction for _ in range(n): Fraction(time.perf_counter_ns(), 1_000_000_000) @register_method def method_ns_frac2(n): for _ in range(n): Fraction(time.perf_counter_ns(), 1_000_000_000) @register_method def method_ns_frac3(n): for _ in range(n): Fraction(_perf_counter_ns(), 1_000_000_000) @register_method def method_ns_float(n): for _ in range(n): time.perf_counter_ns() / 1_000_000_000 @register_method def method_perf_counter_raw(n): for _ in range(n): time.perf_counter() @register_method def method_perf_counter_ns_raw(n): for _ in range(n): time.perf_counter() # Change params here to modify number of trials ti = timerit.Timerit(100000, bestof=100, verbose=1) # if True, record every trail run and show variance in seaborn # if False, use the standard timerit min/mean measures RECORD_ALL = True # These are the parameters that we benchmark over basis = { 'method': list(method_lut), 'n': [0, 16, 64, 128, 256, 1024], # 'param_name': [param values], } xlabel = 'n' # Set these to param labels that directly transfer to method kwargs # kw_labels = ['n'] kw_labels = list(inspect.signature(ub.peek(method_lut.values())).parameters) # Set these to empty lists if they are not used group_labels = { 'style': [], 'size': [], } group_labels['hue'] = list( (ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.repr2( ub.dict_isect(params, labels), compact=1, si=1) key = ub.repr2(params, compact=1, si=1) # Make any modifications you need to compute input kwargs for each # method here. kwargs = ub.dict_isect(params.copy(), kw_labels) method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... with timer: # Put the logic you want to time here method(**kwargs) if RECORD_ALL: # Seaborn will show the variance if this is enabled, otherwise # use the robust timerit mean / min times # chunk_iter = ub.chunks(ti.times, ti.bestof) # times = list(map(min, chunk_iter)) # TODO: timerit method for this times = ti.robust_times() for _time in times: row = { # 'mean': ti.mean(), 'time': _time, 'key': key, **group_keys, **params, } rows.append(row) else: row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) time_key = 'time' if RECORD_ALL else 'min' # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values(time_key) if RECORD_ALL: # Show the min / mean if we record all min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1) mean_times = data.groupby('key')[['time']].mean().rename({'time': 'mean'}, axis=1) stats_data = pd.concat([min_times, mean_times], axis=1) stats_data = stats_data.sort_values('min') else: stats_data = data USE_OPENSKILL = 0 if USE_OPENSKILL: # Lets try a real ranking method # https://github.com/OpenDebates/openskill.py import openskill method_ratings = {m: openskill.Rating() for m in basis['method']} other_keys = sorted(set(stats_data.columns) - {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'}) for params, variants in stats_data.groupby(other_keys): variants = variants.sort_values('mean') ranking = variants['method'].reset_index(drop=True) mean_speedup = variants['mean'].max() / variants['mean'] stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup min_speedup = variants['min'].max() / variants['min'] stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup if USE_OPENSKILL: # The idea is that each setting of parameters is a game, and each # "method" is a player. We rank the players by which is fastest, # and update their ranking according to the Weng-Lin Bayes ranking # model. This does not take the fact that some "games" (i.e. # parameter settings) are more important than others, but it should # be fairly robust on average. old_ratings = [[r] for r in ub.take(method_ratings, ranking)] new_values = openskill.rate(old_ratings) # Not inplace new_ratings = [openskill.Rating(*new[0]) for new in new_values] method_ratings.update(ub.dzip(ranking, new_ratings)) print('Statistics:') print(stats_data) if USE_OPENSKILL: from openskill import predict_win win_prob = predict_win([[r] for r in method_ratings.values()]) skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False) print('Aggregated Rankings =\n{}'.format(skill_agg)) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plt = kwplot.autoplt() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y=time_key, marker='o', ax=ax, **plotkw) ax.set_title('Benchmark Name') ax.set_xlabel('Size (todo: A better x-variable description)') ax.set_ylabel('Time (todo: A better y-variable description)') # ax.set_xscale('log') # ax.set_yscale('log') try: __IPYTHON__ except NameError: plt.show() if __name__ == '__main__': """ CommandLine: python ~/code/timerit/examples/benchmark_template.py """ benchmark_template() ubelt-1.3.7/dev/bench/bench_urepr_vs_alternatives.py000066400000000000000000000207121472470106000226340ustar00rootroot00000000000000""" Determine how slow ubelt.urepr is versus other repr methods for large data This code is based on a template that lives in: https://github.com/Erotemic/timerit/blob/main/examples/benchmark_template.py Or typically on a dev machine in ~/code/timerit/examples/benchmark_template.py """ from functools import cache @cache def make_nested_data(num_items=10_000): import numpy as np items = {} for index in range(num_items): item = { 'name': f'item_{index}', 'prop1': {'foo': None, 'bar': 1.0, 'baz': None}, 'prop2': (32, 32), 'prop3': (32, 32), 'prop4': [32, 32], 'prop5': np.array([32, 32]), 'prop6': 1, 'prop7': 1, 'prop8': 1, 'prop9': 0, } items[item['name']] = item return items def benchmark_urepr_vs_alternatives2(): items = make_nested_data() import timerit import ubelt as ub ti = timerit.Timerit(1, bestof=1, verbose=2) for timer in ti.reset('ubelt.repr2'): with timer: _ = ub.repr2(items) for timer in ti.reset('repr'): with timer: _ = repr(items) def benchmark_urepr_vs_alternatives(): import ubelt as ub import pandas as pd import timerit import pprint # import inspect plot_labels = { 'x': 'Size', 'y': 'Time', 'title': 'Benchmark Name', } # Some bookkeeping needs to be done to build a dictionary that maps the # method names to the functions themselves. method_lut = {} def register_method(func): method_lut[func.__name__] = func return func # Define the methods you want to benchmark. The arguments should be # parameters that you want to vary in the test. @register_method def ubelt_urepr(items): return ub.repr2(items) @register_method def stdlib_repr(items): return repr(items) @register_method def pprint_pformat(items): return pprint.pformat(items) # Change params here to modify number of trials ti = timerit.Timerit(100, bestof=10, verbose=1) # if True, record every trail run and show variance in seaborn # if False, use the standard timerit min/mean measures RECORD_ALL = True # These are the parameters that we benchmark over basis = { 'method': list(method_lut), # i.e. ['method1', 'method2'] # 'num_items': [0, 100, 1_000, 10_000], 'num_items': [0, 250, 500, 1_000, 2_500, 5_000], # 'num_items': [0, 250, 500, 1_000], # 'num_items': [0, 1, 2, 4, 8, 16, 32, 64, 128, 256], # 'yparam': [0, 100], # 'zparam': [2, 3] # 'param_name': [param values], } # Set these to param labels that directly transfer to method kwargs # kw_labels = list(inspect.signature(ub.peek(method_lut.values())).parameters) # i.e. # kw_labels = ['xparam', 'y', 'z'] # Set these to empty lists if they are not used, removing dict items breaks # the code. xlabel = 'num_items' group_labels = { # 'style': ['yparam'], # 'size': ['zparam'], } group_labels['hue'] = list( (ub.oset(basis) - {xlabel}) - set.union(set(), *map(set, group_labels.values()))) grid_iter = list(ub.named_product(basis)) # For each variation of your experiment, create a row. rows = [] for params in grid_iter: params = ub.udict(params) group_keys = {} for gname, labels in group_labels.items(): group_keys[gname + '_key'] = ub.urepr( params & labels, compact=1, si=1) key = ub.urepr(params, compact=1, si=1) # Make any modifications you need to compute input kwargs for each # method here. items = make_nested_data(params['num_items']) kwargs = {'items': items} method = method_lut[params['method']] # Timerit will run some user-specified number of loops. # and compute time stats with similar methodology to timeit for timer in ti.reset(key): # Put any setup logic you dont want to time here. # ... with timer: # Put the logic you want to time here method(**kwargs) if RECORD_ALL: # Seaborn will show the variance if this is enabled, otherwise # use the robust timerit mean / min times # chunk_iter = ub.chunks(ti.times, ti.bestof) # times = list(map(min, chunk_iter)) # TODO: timerit method for this times = ti.robust_times() for time in times: row = { # 'mean': ti.mean(), 'time': time, 'key': key, **group_keys, **params, } rows.append(row) else: row = { 'mean': ti.mean(), 'min': ti.min(), 'key': key, **group_keys, **params, } rows.append(row) time_key = 'time' if RECORD_ALL else 'min' # The rows define a long-form pandas data array. # Data in long-form makes it very easy to use seaborn. data = pd.DataFrame(rows) data = data.sort_values(time_key) if RECORD_ALL: # Show the min / mean if we record all min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1) mean_times = data.groupby('key')[['time']].mean().rename({'time': 'mean'}, axis=1) stats_data = pd.concat([min_times, mean_times], axis=1) stats_data = stats_data.sort_values('min') else: stats_data = data USE_OPENSKILL = 0 if USE_OPENSKILL: # Lets try a real ranking method # https://github.com/OpenDebates/openskill.py import openskill method_ratings = {m: openskill.Rating() for m in basis['method']} other_keys = sorted(set(stats_data.columns) - {'key', 'method', 'min', 'mean', 'hue_key', 'size_key', 'style_key'}) for params, variants in stats_data.groupby(other_keys): variants = variants.sort_values('mean') ranking = variants['method'].reset_index(drop=True) mean_speedup = variants['mean'].max() / variants['mean'] stats_data.loc[mean_speedup.index, 'mean_speedup'] = mean_speedup min_speedup = variants['min'].max() / variants['min'] stats_data.loc[min_speedup.index, 'min_speedup'] = min_speedup if USE_OPENSKILL: # The idea is that each setting of parameters is a game, and each # "method" is a player. We rank the players by which is fastest, # and update their ranking according to the Weng-Lin Bayes ranking # model. This does not take the fact that some "games" (i.e. # parameter settings) are more important than others, but it should # be fairly robust on average. old_ratings = [[r] for r in ub.take(method_ratings, ranking)] new_values = openskill.rate(old_ratings) # Not inplace new_ratings = [openskill.Rating(*new[0]) for new in new_values] method_ratings.update(ub.dzip(ranking, new_ratings)) print('Statistics:') print(stats_data) if USE_OPENSKILL: from openskill import predict_win win_prob = predict_win([[r] for r in method_ratings.values()]) skill_agg = pd.Series(ub.dzip(method_ratings.keys(), win_prob)).sort_values(ascending=False) print('Aggregated Rankings =\n{}'.format(skill_agg)) plot = True if plot: # import seaborn as sns # kwplot autosns works well for IPython and script execution. # not sure about notebooks. import kwplot sns = kwplot.autosns() plt = kwplot.autoplt() plotkw = {} for gname, labels in group_labels.items(): if labels: plotkw[gname] = gname + '_key' # Your variables may change ax = kwplot.figure(fnum=1, doclf=True).gca() sns.lineplot(data=data, x=xlabel, y=time_key, marker='o', ax=ax, **plotkw) ax.set_title(plot_labels['title']) ax.set_xlabel(plot_labels['x']) ax.set_ylabel(plot_labels['y']) # ax.set_xscale('log') # ax.set_yscale('log') try: __IPYTHON__ except NameError: plt.show() if __name__ == '__main__': """ CommandLine: python ~/code/ubelt/dev/bench/bench_urepr_vs_alternatives.py """ benchmark_urepr_vs_alternatives() ubelt-1.3.7/dev/check_gpg.py000066400000000000000000000017071472470106000157050ustar00rootroot00000000000000 def main(): """ Checks that the latest wheels on pypi agree with the gpg key """ import requests package_name = 'ubelt' url = "https://pypi.python.org/pypi/{}/json".format(package_name) package = requests.get(url).json() max_ver = max(package["releases"].keys()) # ... check compatibility latest_wheel_info_list = package['releases'][max_ver] for wheel_info in latest_wheel_info_list: import ubelt as ub whl_fpath = ub.grabdata( wheel_info['url'], hash_prefix=wheel_info['digests']['sha256'], hasher='sha256' ) if not wheel_info['has_sig']: raise ValueError('info says no sig') sig_fpath = ub.download( wheel_info['url'] + '.asc', ) info = ub.cmd('gpg --verify {} {}'.format(sig_fpath, whl_fpath), verbose=3) assert info['ret'] == 0 if __name__ == '__main__': pass ubelt-1.3.7/dev/check_import_time.sh000066400000000000000000000001171472470106000174340ustar00rootroot00000000000000python -X importtime -c "import ubelt" 2> dump_importtime cat dump_importtime ubelt-1.3.7/dev/ci_public_gpg_key.pgp.enc000066400000000000000000000042731472470106000203340ustar00rootroot00000000000000U2FsdGVkX196fSaWP7ysr6UqXJwPUfqCv7RWjSvYaqdZ0HGVJkJBcZz3IqOpxBNu LCklqy9RENdBhqt52nqv+6nZU6LXgAT4fFyUn4wIjZ5L6PJLk4DlJ47f/vc+4p1l pixSzgOB6ah8tmh6HsbbEEShfGCwDf73EUdztCVOGb2+mE2trd2OD8mg5OBM4Ouk dYy3tXbSVr82FBiTHoYnx34FCcMGEVOLQoykjGUpjC547GsGgGQR5M7cInSU3WB1 lBzBkHIwXarSBGzK6xnC0wXm8mv2R/yVHukLT03VNUEE4uWXV9AtTZZ43s8kAxmj ltX/+a4vCDQ1i6E/Umj4YJBPHVZ/s8V0TTfKm+wd10M0wlkDSVfDQpzyMZDNM738 lwEt84neadh+kFKmu7ogaau+16To+zRBYuLuYrwAQWT/ayglWfz31rYSYRU9ibbx UW5FP9wtxzCJzHiLc0BoLESV85vN2oRCPMmJ+yQVD/jWsoFLxTeCjmw39//AyIy/ EcxGyJ+GyrYO0kXuyDXDmVlRdD5ll54z4c+jD+gTUgqfONGy/G9IPCZlfQJEqMlG N6MpPJT+NVnaSf9acjLE3O5pFu0n2060jfHKWC3t5rKpRi+6H6d1H8xrZGWupa2I gzpmR2LigovN4UjwFuNqYgtSvcmBz4Afe2yewO/wm+JnQxcKP+jofASbEisNNv+m 7uI9DBbclFpztauzTa0eopPvJJV5ZL1A6Hn3vK2+Unhmbdkb0LWtUVIG++0ugUfO cmOdgFt2Zh+0CHqiJRcsxxRn75NQdp0NWR5qqHkA9iroCIjf22wQWCHt8kGjQu69 pXCAiA4ihLRBOo+n4lLfLpJgf+Iexti3RrvM8Gcl4j4O8S8WTH1DQASxU+7i4yWK B1CbMNWJQWWHfmjDNaxcRkolD2/RGHnoF7ensEnmnBN7woBUhw8C9Xm3ZZRIwSmr eYUZPGPm3keIOwQNSSmbvMy/ietIjNBqnncltLKpgpZ+SHHjPLzD3wHwXmlz6Ha9 RaJmIAROG/RRYFKwrU0sFWC+tzjZQJewJam/fLiOl8aZnay6uTxVCWNiYlih5BCk mcyj++ZvKREQ+5ULJeXrNlphyoSBN0vOBqIyhkdvDkEvULrJOn8LECpb7GXzs7yl r7q4qyFo+0AmQqNfQE/2ctgyt5Wj0kM/XEr2+MpcU9XGMuV72GZ58BfsmsXk2lYs edB8APBfblZo7gx+hNftV8Yuo3Udx5jswc59heWY4j94yJkj57qAGyyfpoAiZPay Ohye3Fxcp8fRsTeapA4zT9VZNmtd/rovs1MuypaLfW7Zf+SErk3V/oLDSSZtO2f7 5y2vzItJ41VCJELYikZexhAEqANZlluOoV/jF8J9GcKvLXYbEe49oXrp+Mpi8Wxy O/Ys7Gft9QUy0wmWjSAwbVR2qFo+TuAmqB58DQlGMpIVuKuLExBdxYcf9PgQKF/L 9SAsMHjk4gB3MQwVyF/ObnyvPPto0ZyijLoAfGcAoZBEPYVtSnZiCD/uPAgLH3ph hloG9jY3I527FP9CAEHCW2R/In9eDUe/Snk6nO6eIP5Oa7IJXbM5Hu4QZTQM6/DK HuUoHUNgQflGxBgTmZOB/4wHecXWVg8pXUkvU0EFi7sqOtByFRAITKmqZJEqdSyD NoTmkd4kGPY0j5NtHlTPAM9HDqrRUWX5loeYf4VRBqRX+kJNHwOIy9hgommPt+mn /izUM56RdA4bYbBWe72SJognQNU4+vjgDLGPWZc4Qg9CIeMWR9zcL1C3/6CALP1T Wi5+v5O55bRWwAKWMeAU9x4rzYGI3dM6Bj7ZsS9lv7xZwFTRuNFXBf98BxUKfrsn 8u1qFiHEJ+UdMRGQ55dsXrR1xCda9VfO33IokCY2jnwEFt948VZccirZ4kvTQ0c8 oD4u+3Q8fHqEOI2wchKjom/f8eagnp9kcHLBaOXdx2rmAL/bGlVsBA0jhKhsmVar 4N8F/aTVxreF2SYgZ4mvqeuF+OKIi32uP2DESmE7m0FBGJ8PjkMnc8OXiGKzbD+p 0qlFHsAVSZ251cysTjGW7lchjp6MJf+78cpdfYpNPjBKaAFH1OZcYfk7A1w5ha+e Ap/ejiM6z4PzK4AexxReDZ+bHrp4knS9Xca0iFRibIZcmXrXzxWK+ESYGX6fVbTE IRmFELXCYoFb58wO1l9R5w== ubelt-1.3.7/dev/ci_secret_gpg_key.pgp.enc000066400000000000000000000022221472470106000203330ustar00rootroot00000000000000U2FsdGVkX18fDz27Rs+cYMIkCTTY2smJw0xjqfVFbYWtE6XGrJa0g8Mu/3bnZH/L u9Ssc+0PbyVDu8WPoKlOIqCw41hMlphlXDY4AsnDq4Y/QD679R67IZD7L+CpRdiu OugsnBNIqOgqxKQ40x7syBdaHT1w2GBlXGjUqUTJOY5WKayl92zGtNYtl71cl6Ei 1j+PCRe9ZGdyM1Aw/0hjLB/4Q8i8eSZV7RCsPYoa29avfV+qvdvaO4/hrh4iPCu5 szKdFixmo2F6v0ocd+0G2JBlHgaLAoPGlaHd4RxhXu56BYdfvoEIRd5VkSeFc61N ulVMIjTQ1QSNuB3CWXIdw+yzSXUSgbG6felRmDgtAGX1aEKQF1/IbzUn7QqV3Tgz 8oB0udhNfSwWSTzpFGyVINCK6gWsNAFfJM5e8/i+duF8IHO9mxmducB/16MQU1LS Bzp2B/jPQBgUgMreZ2X6m1ckLhX5t+nnUcDASt0BzOypOTCcGUOupFLC+MD8scos 0IsACzWpxFYnfF7ZT6ljPs+Wtv63mNrOL3pZAi8iQW4Zt3e3sV5SuH/PlgeEQllI S/iYbsJxT34yohu/Tm7BuE7Ugk4AOgQwS4N91qmME9OWu7GXCxKrFwz7Yp/iBIEl Jehg5p9CIcW4/QxW1U2nRlwzhEQLIcQ8God9LYUsjDR87ddpcqqWPEKjPNUTq+0N guzRwoVRA4M5Y7JEJrHlLurNos2WJDvrEPqAxNLvOBpDHlKk50NpJBn7Z0BRayRr ZnrPSf68UuioHqwuIdhKM737o+bHzkqm1zmow/Piz3x3Z2XDcrVUI+gDPulUo8UL 6XBuSeyhZ020wTki6MXy4VBgmj9JT+aNBPMDLiVI7PwJuNNosvDSLXpdZOrMZPyv rApyr1X9ejFapMfilJHcuTgtq2PLfZiGR6MXsNt6nqdDJNqWeq7nv+2lUN6MuXHP LAn0X08NZzN5sIwGONg/y91ocKsh9lJVDS3ULeMzSqhjWcsW1+K6dQzPO69ZfqLA jUgFmPvi+77mr07KzNWBPdQW5jax8Y6D9KQ5wn7gnynawZ8Uj8M30jZqF4YWNIWC +MO7EvdvmXxmFV3FsXiqEU9w86W1PKcbhHlJYxA11wv9meTwplgh8SukRqfaaaQO ubelt-1.3.7/dev/ci_secret_gpg_subkeys.pgp.enc000066400000000000000000000033331472470106000212340ustar00rootroot00000000000000U2FsdGVkX18lC/fgWkC10FcF9u2guc3f9Ksn64TRkVrm4wtabl17/Y5yRE+3d7rg wtAvfDhjzvUX1BCZLM5x7H//b1Zp8VxfNSR7o9YUeLnTzT+ztB3Mb82Mzf9LqlLi zJ6vf2z917s+8bbFheNKIx6AqxPCS59k32teLR2CG4GmRVOw29ZAamubZxtelcKG 31XlYS5AICp1Sp+YRL/n+gScpFZsEAIpduTatiXsvd0FRR3GHzCGcWJJt9T0nx3T nEz1s0xyEeNhdQE8rDpDkLM/1Ckf2WS2wqQn9m5D1nMltEtZEG4H11KN38fAAvCz 4+wRy3TgX4GGt45/Ik33rm4LJ9SnQAXpTCoxC3Qz2gMpLaC8KILi8Sj5ifp7EkhV pgXTgPx5LbJfH9p3ksjbee25E2LKl9rgiexlGshPyhPyJrxRA1zFrLk5p69VU/LS oSdD3MimFs+1yapkQd3EcA3jFg2dm1wrwFTgUJrdDsx2lp4CesxK9q+/EQfwO7kb 95yGcVjcpEl2ajZNJ7RT6Qm2ok7jCRRVCVwCdwU0O1zChsem1XxDx4TY6v3/pMRY TB7ArgYUD/GQMrjE1G1oHo//DJjrcbnxCwSEX6p64aGm2ytKrndup6x21lHum1XG GJs3s3PzlC3H+jpHm3+E8oYf8lxzKt+GCtMqOBLj7ergvcmymLLXsIJsLNT5dgc8 /NkH6fl4a3STw0b1E/vrUn+Z93zyLqlvQFnPDiKPSH4FJzxz40rRUuz+a5BY+1TZ bxvrVueu+GINUIKz8GB5glPb62LL+3fbz/xijAh+EKoyyz3Ikrrhq6c0GgafIPZO ITkURK0I08iF63Tu0lQk8579Llou4xakeVX2QR7YG8zOcrMSR9e3QN+3xtUb1qcd XRq8dUpdZbgdF7dbVLOy9UpOgHXoZpoJGfnAZYB/esPmAiHCxHJjFAYQUkRKgHJk ta+W9QMkcq6sMwlWc9NTfYZVCy+uZIXymoVnOGkpuCXBDE9H7t6GIJibLIW61mbx VekjzDGCYmfFC7k3pf4DMkC5ny5SrTND2UXo57NpJkPwhqLnhW09XaoXxeutI4bM vOg7UROMaHOf+gDrlioJTKMQ/2QUJShdKbtIU/t3cRytT1k+I4+rxCOLY/VBu/kY svNikQyra+uY/oF0oLmED5e9zoWp60WcCZkT9ucbP5Qg6v4pFKsi57+fDiVwpPcq wGLGYWHzI9WHBVJdRLktG5py8palNGfRhZkAPxY8T5pmw63TqdWIADzRFq+St74h 4KRNbkSkJQfJJ54hPyVWcwDQ1/J4zbqPOTOfKw46E3DEPMMA7Q3AdbPEJLGCmy0X 9eG7aRXTWAFCxE2yLaU07jMNzVPvoXl7l6jm1l3hTlai7aKBjx5bK1VpnXOveHFj GYVGMMqEHl7iHbnUwRNPNsYJpyAJmvI8dpi9KZ6bvtQVd4AO0m3jKAHpaH0ms1vS umgV3D+1t9xJRiE5MsTwOCICYbiiJXoRxNmCNqD747X0pZTZHBVe0yEjzL0WiSSI GD9OjFq4IHtQwXTrkipQ2Us0ztIulF+Qwo5bdQ5Dy1FLn404lZnJBTr1+Xa/k/Ij h/p1umpsq9JkNLRzseBcRckI7q0OruZmIjhFmSEcCIxEufMDxnMR1jiwo3hEA54G V+84rP3v3qtrBlJ/Od7Y+6iQODTJOiLdd6Za9ZyGGfVGg+d7nPOWjSShOImg0ze+ ubelt-1.3.7/dev/examples/000077500000000000000000000000001472470106000152325ustar00rootroot00000000000000ubelt-1.3.7/dev/examples/example_named_product.py000066400000000000000000000015711472470106000221470ustar00rootroot00000000000000 def demo_named_product(): import ubelt as ub import pandas as pd import numpy as np def some_function(thresh1, thresh2): x, y = thresh1, thresh2 z = ((x ** 2 + y ** 2 - 1) ** 3 - x ** 2 * y ** 3) return np.log(z) s = 2.5 basis = { 'thresh1': np.linspace(-s, s, 128), 'thresh2': np.linspace(-s, s, 128), } grid_iter = ub.named_product(basis) rows = [] for params in grid_iter: key = ub.repr2(params, compact=1) row = { 'key': key, **params, } score = some_function(**ub.compatible(params, some_function)) row['score'] = score rows.append(row) data = pd.DataFrame(rows) print(data) # import seaborn as sns import kwplot sns = kwplot.autosns() sns.scatterplot(data=data, x='thresh1', y='thresh2', hue='score') ubelt-1.3.7/dev/examples/use_cases.py000066400000000000000000000050311472470106000175550ustar00rootroot00000000000000""" Ubelt Use Cases Each use case starts with a modivation and follows with a solution. This makes these cases perfect for presentations. """ def multiple_items_from_a_dictionary(): """ Spotlight: ubelt.take Modivation: Working with Lists of Dictionaries Requires: kwimage """ ... """ When working with data, a common pattern is to iterate through it, and gather information about the work to be done, so a you can make a final structured pass through the data. In python we might do this by initializing an empty list and appending **dictionary of information**, to the list. (or you might yield dictionaries of information from a generator instead, either way you have a flat list). Some people might use lists of tuples instead of Lists of dictionaries, but using dictionaries makes it easy to add new information later (and it works very will with pandas). """ import ubelt as ub import kwimage kwimage_test_image_names = ['airport', 'amazon', 'astro', 'carl', 'lowcontrast'] rows = [] for test_image in kwimage_test_image_names: fpath = ub.Path(kwimage.grab_test_image_fpath(test_image)) imdata = kwimage.imread(fpath) row = { 'mean': imdata.mean(), 'std': imdata.std(), 'sum': imdata.sum(), 'max': imdata.max(), 'min': imdata.min(), } rows.append(row) """ For each row, you might want to grab multiple specific items from it. But having a separate assignment on each row wastes a lot of vertical space. """ for row in rows: mean = row['mean'] std = row['std'] sum = row['sum'] min = row['min'] max = row['max'] """ You might put them one line explicitly, but that wastes a lot of horizontal space """ for row in rows: mean, std, sum, min, max = row['mean'], row['std'], row['sum'], row['min'], row['max'] """ What if we try to be clever? We can use a list comprehension """ for row in rows: mean, std, sum, min, max = [row[k] for k in ['mean', 'std', 'sum', 'min', 'max']] """ That's not too bad, but we can do better """ for row in rows: mean, std, sum, min, max = ub.take(row, ['mean', 'std', 'sum', 'min', 'max']) """ And now even better: """ for row in map(ub.udict, rows): mean, std, sum, min, max = row.take(['mean', 'std', 'sum', 'min', 'max']) ubelt-1.3.7/dev/experimental/000077500000000000000000000000001472470106000161115ustar00rootroot00000000000000ubelt-1.3.7/dev/experimental/async_executor_poc.py000066400000000000000000000207611472470106000223650ustar00rootroot00000000000000""" Attempt to allow ubelt.Executor to use Pythons builtin async / await Goal: be able to put ub.Executor in asyncio mode, which lets it coorporative scheduling. """ import concurrent.futures import asyncio import types # async def _async_worker(executor_reference, work_queue, initializer=None, initargs=None): # if initializer is not None: # try: # initializer(*initargs) # except BaseException: # _base.LOGGER.critical('Exception in initializer:', exc_info=True) # executor = executor_reference() # if executor is not None: # executor._initializer_failed() # return # try: # while True: # work_item = work_queue.get(block=True) # if work_item is not None: # work_item.run() # # Delete references to object. See issue16284 # del work_item # # attempt to increment idle count # executor = executor_reference() # if executor is not None: # executor._idle_semaphore.release() # del executor # continue # executor = executor_reference() # # Exit if: # # - The interpreter is shutting down OR # # - The executor that owns the worker has been collected OR # # - The executor that owns the worker has been shutdown. # if _shutdown or executor is None or executor._shutdown: # # Flag the executor as shutting down as early as possible if it # # is not gc-ed yet. # if executor is not None: # executor._shutdown = True # # Notice other workers # work_queue.put(None) # return # del executor # except BaseException: # _base.LOGGER.critical('Exception in worker', exc_info=True) async def _async_call(func, *args, **kwargs): return func(*args, **kwargs) class AsyncIOExecutor: """ Mimic concurrent.futures with asyncio This might not be possible. Defer... """ def __init__(self): self.max_workers = 0 self.loop = None self._work_queue = asyncio.Queue() try: self.loop = asyncio.get_event_loop() except RuntimeError: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) self.loop = asyncio.get_event_loop() def __enter__(self): return self def __exit__(self, ex_type, ex_value, ex_traceback): ... def submit(self, fn, /, *args, **kwargs): coroutine = _async_call(fn, *args, **kwargs) task = self.loop.create_task(coroutine) return FakeFuture(task, self) # with self._shutdown_lock, _global_shutdown_lock: # if self._broken: # raise BrokenThreadPool(self._broken) # if self._shutdown: # raise RuntimeError('cannot schedule new futures after shutdown') # if _shutdown: # raise RuntimeError('cannot schedule new futures after ' # 'interpreter shutdown') # f = _AsyncFuture() # w = _AsyncWorkItem(f, fn, args, kwargs) # self._work_queue.put(w) # self._adjust_thread_count() # return f # return task def shutdown(self): ... def map(self, fn, *iterables, **kwargs): kwargs.pop('chunksize', None) kwargs.pop('timeout', None) if len(kwargs) != 0: # nocover raise ValueError('Unknown arguments {}'.format(kwargs)) fs = [self.submit(fn, *args) for args in zip(*iterables)] for f in fs: yield f.result() class FakeFuture: def __init__(self, task, executor): self.task = task self.executor = executor def result(self): return self.executor.loop.run_until_complete(self.task) class _AsyncWorkItem(object): def __init__(self, future, fn, args, kwargs): self.future = future self.fn = fn self.args = args self.kwargs = kwargs def run(self): if not self.future.set_running_or_notify_cancel(): return try: result = self.fn(*self.args, **self.kwargs) except BaseException as exc: self.future.set_exception(exc) # Break a reference cycle with the exception 'exc' self = None else: self.future.set_result(result) __class_getitem__ = classmethod(types.GenericAlias) class _AsyncFuture(concurrent.futures.Future): """ Non-threading / multiprocessing version of future for drop in compatibility with concurrent.futures. Attributes: func (Callable): function to be called args (Tuple): positional arguments to call the function with kw (Dict): keyword arguments to call the function with """ def __init__(self, func, *args, **kw): super(_AsyncFuture, self).__init__() # self.func = func # self.args = args # self.kw = kw # # self._condition = FakeCondition() # self._run_count = 0 # # fake being finished to cause __get_result to be called # self._state = concurrent.futures._base.FINISHED def _run(self): result = self.func(*self.args, **self.kw) self.set_result(result) self._run_count += 1 def set_result(self, result): """ Overrides the implementation to revert to pre python3.8 behavior Example: >>> # Just for coverage >>> from ubelt.util_futures import SerialFuture # NOQA >>> self = SerialFuture(print, 'arg1', 'arg2') >>> self.add_done_callback(lambda x: print('done callback got x = {}'.format(x))) >>> print('result() before set_result()') >>> ret = self.result() >>> print('ret = {!r}'.format(ret)) >>> self.set_result(1) >>> ret = self.result() >>> print('ret = {!r}'.format(ret)) >>> # >>> print('set_result() before result()') >>> self = SerialFuture(print, 'arg1', 'arg2') >>> self.add_done_callback(lambda x: print('done callback got x = {}'.format(x))) >>> self.set_result(1) >>> ret = self.result() >>> print('ret = {!r}'.format(ret)) """ with self._condition: self._result = result self._state = concurrent.futures._base.FINISHED # I'm cheating a little by not covering this. # Lets call it, cheating in good faith. *shifty eyes* # I don't know how to test it, and its not a critical pieces of the # library. Consider it a bug. help wanted. for waiter in self._waiters: # nocover waiter.add_result(self) self._condition.notify_all() self._invoke_callbacks() def _Future__get_result(self): # overrides private __getresult method if not self._run_count: self._run() return self._result async def expensive_async_call(): import random import asyncio time = random.randint(0, 10) sleep_coroutine = asyncio.sleep(time) return await sleep_coroutine GLOBAL_COUNTER = 0 def my_function(arg): import kwutil import random import time import asyncio global GLOBAL_COUNTER GLOBAL_COUNTER += 1 duration = random.random() * 1 time.sleep(duration) now = kwutil.datetime.now() snapshot = int(GLOBAL_COUNTER) result = {'arg': arg, 'rank': snapshot, 'time': now, 'duration': duration} return result def devcheck(): import ubelt as ub self = ub.Executor(mode='thread', max_workers=10) self = AsyncIOExecutor() futures = [] for i in ub.ProgIter(range(10), desc='submit'): future = self.submit(my_function, i) futures.append(future) self.loop.run_until_complete() with ub.Timer(label='collecting'): total = 0 for future in futures: result = future.result() print(result) total += result['duration'] print(f'total={total}') # future = async_call(func, *args) # import asyncio # loop = asyncio.get_event_loop() # self.loop.run_in_executor(func, *args) # future = self.loop.run_in_executor(None, func, *args) if __name__ == '__main__': """ CommandLine: python ~/code/ubelt/dev/experimental/async_executor_poc.py """ devcheck() ubelt-1.3.7/dev/experimental/better_deprecation.py000066400000000000000000000213251472470106000223300ustar00rootroot00000000000000""" The ubelt.schedule_deprecation function is pretty useful, but it could be generalized and would probably work better as a class. As a design / UX goal we need to ensure: 1. There is a concise way of getting minimal behavior where raise a deprecation warning when we go into a deprecated codepath. 2. There is a way of controlling details in a readable way that is natural, expressive, but not burdensome. Such a basic API might look like: .. code:: python # Hacking import ubelt as ub import sys, os experiment_dpath = ub.Path('~/code/ubelt/dev/experimental').expand() sys.path.append(os.fspath(experiment_dpath)) from better_deprecation import * # NOQA Deprecation.schedule( ''' This is marking a feature that is deprecated and the first positional argument gives the user nearly complete control over the message. By default the warning emits now. Perhaps some extra context is added by trying to introspect which module you are currently in. ''') And perhaps the expressive API looks like .. code:: python # Hacking import ubelt as ub import sys, os experiment_dpath = ub.Path('~/code/ubelt/dev/experimental').expand() sys.path.append(os.fspath(experiment_dpath)) from better_deprecation import * # NOQA import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) logger.info('ensure this prints for the test') import rich self = Deprecation( warn='1.1.0', error='1.2.0', remove='1.3.0', message='The foobar is deprecated.', migration=ub.paragraph( ''' This text should explain the way to migrate to non-deprecated behavior. '''), warncls=DeprecationWarning, logger=logger, print=rich.print, ) print(f'self.__dict__ = {ub.urepr(self.__dict__, nl=1)}') def foo(): self.emit() foo() We should also have a decorator API: .. code:: python # Hacking import ubelt as ub import sys, os experiment_dpath = ub.Path('~/code/ubelt/dev/experimental').expand() sys.path.append(os.fspath(experiment_dpath)) @Deprecation('', module_name='ubelt', print=True) def old_function(): ... old_function() What should the class be called? * Deprecation? * Deprecator? * Deprecate? """ class Deprecation: """ """ def __init__( self, message=None, name=None, type=None, migration=None, deprecate='now', warn='soon', error='soon', remove='soon', module_name=None, module_version=None, logger=None, print=None, warncls=DeprecationWarning, ): self.message = message self.name = name self.type = type self.migration = migration self.deprecate = deprecate self.warn = warn self.error = error self.remove = remove self.module_name = module_name self.module_version = module_version self.warncls = warncls self.logger = logger if print is True: import builtins print = builtins.print self.print = print self.loud = False self._modname_str = None self._current_module_version = None self._deprecate_now = None self._remove_now = None self._error_now = None self._deprecate_str = None self._remove_str = None self._error_str = None self._full_message = None @classmethod def schedule( cls, message=None, name=None, type=None, migration=None, deprecate='now', error='soon', remove='soon', module_name=None, module_version=None, warncls=DeprecationWarning, stacklevel=1, ): """ Concise classmethod to construct and emit the deprecation warning. """ self = cls( message=message, name=name, type=type, migration=migration, deprecate=deprecate, error=error, remove=remove, module_name=module_name, module_version=module_version, warncls=warncls, ) self.emit(stacklevel=1 + stacklevel) return self def _resolve_module_version(self): import sys from packaging.version import parse as Version if self.module_name is not None: module = sys.modules[self.module_name] self._current_module_version = Version(module.__version__) else: # TODO: use the inspect module to get the function / module this was # called from and fill in unspecified values. self._current_module_version = 'unknown' if self.module_name is None: self._modname_str = '' else: self._modname_str = f'{self.module_name} ' def _handle_when(self, when, default): from packaging.version import parse as Version if when is None: is_now = default when_str = '' elif isinstance(when, str): if when in {'soon', 'now'}: when_str = ' {}{}'.format(self._modname_str, when) is_now = when == 'now' else: when = Version(when) when_str = ' in {}{}'.format(self._modname_str, when) if self._current_module_version == 'unknown': is_now = default else: is_now = self._current_module_version >= when else: is_now = bool(when) when_str = '' return is_now, when_str def _resolve_timeline(self): self._deprecate_now, self._deprecate_str = self._handle_when( self.deprecate, default=True ) self._remove_now, self._remove_str = self._handle_when( self.remove, default=False ) self._error_now, self._error_str = self._handle_when(self.error, default=False) def _build_full_message(self): self._resolve_module_version() self._resolve_timeline() parts = [] if self.message: parts.append(self.message) if self.name is not None: _name = self.name or "" _type = self.type or "" what_str = f'The "{_name}" {_type}' else: what_str = 'This' parts.append( f'{what_str} was deprecated{self._deprecate_str}, will cause ' f'an error{self._error_str} and will be removed{self._remove_str}. ' ) parts.append( f'The current {self._modname_str}version is {self._current_module_version}. ' ) if self.migration: parts.append(self.migration) # TODO: make the message more customizable. self._full_message = ' '.join(parts).strip() def emit(self, stacklevel=1): """ Emit the deprecation message via the requested channels. """ import warnings self._build_full_message() if self._remove_now: error_message = ( 'Forgot to remove deprecated: ' + self._full_message + ' ' + 'Remove the function, or extend the scheduled remove version.' ) if self.logger is not None: self.logger.error(error_message, stacklevel=1 + stacklevel) if self.print: self.print(error_message) raise AssertionError(error_message) if self._error_now: if self.logger is not None: self.logger.error(self._full_message, stacklevel=1 + stacklevel) if self.print: self.print(self._full_message) raise RuntimeError(self._full_message) if self._deprecate_now: if self.logger is not None: self.logger.warn(self._full_message, stacklevel=1 + stacklevel) if self.print: self.print(self._full_message) warnings.warn(self._full_message, self.warncls, stacklevel=1 + stacklevel) return self def decorator(self, func): import functools if self.name is None: self.name = func.__name__ if self.type is None: self.type = type(func).__name__ @functools.wraps(func) def _deprecated_func(*args, **kwargs): self.emit() result = func(*args, **kwargs) return result _deprecated_func._deprecation = self return _deprecated_func def __call__(self, func): return self.decorator(func) ubelt-1.3.7/dev/experimental/demodis.py000066400000000000000000000013601472470106000201070ustar00rootroot00000000000000def demo(): import dis def func1(x): return x + 1 def func2(x): if True: return x + 1 else: return x + 2 import io file = io.StringIO() print('--- DIS1 ---') dis.dis(func1, file=file) file.seek(0) dis1 = file.read() print('--- DIS2 ---') file = io.StringIO() dis.dis(func2, file=file) file.seek(0) dis2 = file.read() print('dis1 =\n{}'.format(dis1)) print('dis2 =\n{}'.format(dis2)) print('dis1 == dis2 = {}'.format(dis1 == dis2)) print('repr(dis1) ~= repr(dis2) = {}'.format(repr(dis1)[10:] == repr(dis2)[10:])) if __name__ == '__main__': """ CommandLine: python ~/code/ubelt/dev/demodis.py """ demo() ubelt-1.3.7/dev/experimental/fsspec_downloader.py000066400000000000000000000026231472470106000221670ustar00rootroot00000000000000""" Alternative to ub.DownloadManager """ def check_fsspec(): import ubelt as ub from os.path import join dpath = ub.ensure_app_cache_dir('ubelt/simple_server') info = ub.cmd(['python', '-m', 'http.server', '--directory', dpath], detach=True) fnames = ['file_{}.txt'.format(i) for i in range(100)] for fname in fnames: ub.writeto(join(dpath, fname), ub.hash_data(fname)) # info = ub.cmd('python -m http.server --directory "{}"'.format(dpath), verbose=3) # proc = info['proc'] # TODO: ub.cmd return with some object that can tee the output on demand? # _proc_iteroutput = ub.util_cmd._proc_iteroutput_thread(proc) # line = next(_proc_iteroutput) urls = ['http://localhost:8000/{}'.format(fname) for fname in fnames] import fsspec file = fsspec.open(urls[0]).open().read() with ub.Timer(label='fsspec.cat', verbose=1): fs = fsspec.filesystem("http") out = fs.cat(urls) # fetches data concurrently with ub.Timer(label='ub.DownloadManager', verbose=1): dpath = ub.ensure_app_cache_dir('ubelt/simple_download_root') dman = ub.DownloadManager(dpath) for url in urls: dman.submit(url) results = [] for future in dman.as_completed(prog=True): fpath = future.result() results.append(fpath) # print('fpath = {!r}'.format(fpath)) proc.terminate() ubelt-1.3.7/dev/experimental/google_docstring_mypy_plugin.py000066400000000000000000000023431472470106000244510ustar00rootroot00000000000000""" POC google docstring plugin for mypy """ from mypy.plugin import Plugin from typing import Callable, Optional from mypy.plugin import FunctionSigContext from mypy.types import CallableType class CustomPlugin(Plugin): """ cd $HOME/code/ubelt mypy -m ubelt.util_dict stubgen -m ubelt.util_dict """ # def get_type_analyze_hook(self, fullname: str): # print('get_type_analyze_hook: fullname = {!r}'.format(fullname)) def get_function_signature_hook(self, fullname: str ) -> Optional[Callable[[FunctionSigContext], CallableType]]: """Adjust the signature of a function. This method is called before type checking a function call. Plugin may infer a better type for the function. from lib import Class, do_stuff do_stuff(42) Class() This method will be called with 'lib.do_stuff' and then with 'lib.Class'. """ if 'ubelt' in fullname or 'util_' in fullname: print('get_function_signature_hook: fullname = {!r}'.format(fullname)) return None def plugin(version: str): # ignore version argument if the plugin works with all mypy versions. return CustomPlugin ubelt-1.3.7/dev/experimental/indexable_walk_variant.py000066400000000000000000000075221472470106000231660ustar00rootroot00000000000000def _walk_iterables(self): """ EXPERIMENTAL # TODO: it would likely be helpful to have some method of directly # modifying the underlying value without needing to traverse the entire # path to get to it. # This could be implemented in two ways: ## # 1. We send the new value to the generator, but this might result in # an awkward API with the existing "False" method of controlling # iteration. ## # 2. A new walking method that works more like os.walk where instead # of yielding for every item, we yield for every iterable. This seems # like a nicer change and the user could simply modify the returned # iterable inplace to both prevent subsequent iteration and modify # values. ## # 3. We can return the parent with the path, value. # this is kinda blegh. This is the solution for point 2. But this might not have advantage over the existing walk if the previous walk has access to iterable values anyway... In fact this probably isn't worth it. This is a new style generator that is more similar to os.walk Yields: Tuple[List, [Iterable]]: path (List) - a "path" through the nested data structure level (Iterable) - the iterable at this level. Ignore: >>> # A somewhat clever way of mapping a filesystem into a dict >>> fs_tree = {} >>> fs_walker = ub.IndexableWalker(fs_tree) >>> root = ub.Path.appdir('ubelt') >>> for r, ds, fs in root.walk(): >>> p = ['.'] + list(r.relative_to(root).parts) >>> fs_walker[p] = {} >>> fs_walker[p].update({f: None for f in fs}) >>> # The above gives us some richer demo data >>> fs_walker = ub.IndexableWalker(fs_tree) >>> for path, nodes, leafs, data in _walk_iterables(fs_walker): >>> print(f'path={path}') >>> print(f'nodes={nodes}') >>> print(f'leafs={leafs}') >>> import numpy as np >>> import ubelt as ub >>> data = ub.ddict(lambda: int) >>> data['foo'] = ub.ddict(lambda: int) >>> data['bar'] = np.array([1, 2, 3]) >>> data['foo']['a'] = 1 >>> data['foo']['b'] = np.array([1, 2, 3]) >>> data['foo']['c'] = [1, 2, [[1, 1, 1, 1], [], [1, {'a': [[2, 1], [3, 4]]}, 2, [2, 3]]]] >>> data['baz'] = 3 >>> print('data = {}'.format(ub.repr2(data, nl=True))) >>> # We can walk through every node in the nested tree >>> walker = ub.IndexableWalker(data) >>> for path, nodes, leafs, data in _walk_iterables(walker): >>> print(f'path={path}') >>> print(f'leafs={leafs}') """ stack = [([], self.data)] while stack: path, data = stack.pop() # Create an items iterable of depending on the indexable data type if isinstance(data, self.list_cls): items = enumerate(data) elif isinstance(data, self.dict_cls): items = data.items() else: raise TypeError(type(data)) nodes = [] leafs = [] # Iterate through this level and determine which keys are # leaf-endpoints and which keys are recursable nodes. for key, value in items: if isinstance(value, self.indexable_cls): nodes.append(key) else: leafs.append(key) # The user is given: # * path - where we are in the tree # * data - the iterable at this level # * nodes - which keys will be descended into # * leafs - which keys will not be descended into yield path, nodes, leafs, data # If the user modifies: "nodes" that will change how we iterate. # The user can also modify data at this level without hassle for key in nodes: stack.append((path + [key], data[key])) ubelt-1.3.7/dev/experimental/minimal_refactor_proposal.md000066400000000000000000000056441472470106000236760ustar00rootroot00000000000000Draft: Proposal to Refactor Ubelt to as an Un-Standard Library ============================================================== This may will have to be resolved the existing but dormant unstdlib project. I've found ubelt to be one of the most important pieces of software I've developed over the past decade. The fact that it contains everything in a single place is great for me, but for new people it can 1. prove daunting 2. provide too many things they don't need It would be nice if things were separated out into separate pip installable modules. This has been done to some degree with progiter and timerit. But this also has a negative impact on ubelt itself, because now it has a choice either vendor in those libraries, or depend on them. Originally we added them as dependencies, but eventually settled on vendoring to reduce our apparent dependency footprint and because of issues with cross-domain documentation (which should be solved via intersphinx). However, this first problem is mitigated if all of the packages are available as standalone modules, because then the user can just depend on what they actually use, but it becomes difficult to interoperate with normal ubelt (all of these std functions are top-level by default) paradigms. We seek the best of both worlds. Consider if ubelt was broken into subpackages: What would the name of the stand alone packages be? Some packages might get standalone love: progiter, thats about it. But misc ones could be prefixed with ub or something. Given this splitup, the ubelt package would point at each member and depend on it. It would expose it as normal. Lets try and take a subset of ubelt and map them to packages. progiter ubelt.ProgIter ubhash - `ub.hash_data` - `ub.hash_file` ubpathlib - ub.Path ubcmd - ub.cmd ubcache - Cacher, CacheStamp, memoize, memoize_property - can this be folded into memoize? memoize_method ubdownload - grabdata, download ubdict ubfutures JobPool Executor ubimport modname_to_modpath import_module_from_name import_module_from_path ubiter ? ubseq ? ublist ? ubbase? ubdevelop ? ubdev ? ubdesign ? ubmaintain ? ubwarn schedule_deprecation identity? flatten chunks group_items take compress peek allsame iterable unique named_product iter_window find_duplicates varied_values argmax argmin argsort argunique boolmask? unique_flags? dzip oset ubtext codeblock paragraph hzcat hzcat CaptureStdout ? CaptureStream ? IndexableWalker indexable_allclose do NiceRepr, NoParam, ubtime: timestamp Timer timeparse ### Cut argflag argval delete repr2 ? This might be a good function, but it really needs a new name. expandpath ensuredir map_vals dict_isect dict_diff odict ddict - This might still be useful color_text symlink - rename and put in Path? find_exe - maybe? Or just add which to path? highlight_code compatible inject_method zopen ubelt-1.3.7/dev/experimental/pipecall.py000066400000000000000000000010141472470106000202500ustar00rootroot00000000000000""" I want a way to cast to a list without having to backspace. This is a way to do it. But at what cost? """ class RorListType(type): cls = list @classmethod def __ror__(mcls, obj): return mcls.cls(obj) @classmethod def __or__(mcls, obj): return mcls.cls(obj) class RorList(RorListType.cls, metaclass=RorListType): """ Example: >>> iter(range(5)) | RorList [0, 1, 2, 3, 5] >>> L = RorList >>> range(3) | L [0, 1, 2] """ ... ubelt-1.3.7/dev/experimental/proposal_cacher.md000066400000000000000000000057721472470106000216120ustar00rootroot00000000000000NOTE: This idea was rejected in the Python Ideas mailing list. I still think something like it is a good idea, so I'm saving it in case it is revisited later. I ran into another case where I wish I had some sort of conditional if. I was writing code to cache autogenerated demodata. import ubelt as ub from os.path import join import json kwargs = {'example': 'config'} dpath = ub.ensure_app_cache_dir('my_modname', 'demodata') fpath = join(dpath, 'data.json') stamp = ub.CacheStamp('demodata', depends=kwargs, dpath=dpath) if stamp.expired(): data = { 'complicated': 'data', 'key1': 'val1', 'key2': 'val2', 'keyN': 'valN', } with open(fpath, 'w') as file: json.dump(data, file) stamp.renew() else: with open(fpath, 'r') as file: data = json.load(file) I really wish I didn't have to have that stamp.renew() at the end of the if block. It unnecessary boilerplate --- the caching logic would all be in a contiguous block if not for the need for this. It wastes a little bit of vertical, which while not critical, is a consideration. I want to focus on the context: stamp = ub.CacheStamp('demodata', depends=kwargs, dpath=dpath) if stamp.expired(): [code] stamp.renew() I could make `CacheStamp` a context manager and tell it to do it in its `__exit__` clause, as such: stamp = ub.CacheStamp('demodata', depends=kwargs, dpath=dpath) if stamp.expired(): with stamp: [code] This removes the need for the `stamp.renew`, makes the logic contiguous, has the same vertical space, however, it adds a TON of horizontal space depending on the complexity of the logic. It's often beneficial to minimize nesting and try restricting it 2 or 3 levels. BUT if we had just a tiny bit of new syntax rules we could write something like this: stamp = ub.CacheStamp('demodata', depends=kwargs, dpath=dpath) if stamp.expired() with stamp: [code] There are even more conservative changes, like requiring a colon before the `with`: `if stamp.expired(): with stamp:` I know this is just saving a line. But I use this pattern with `ubelt.CacheStamp` and `ubelt.Cacher` frequently, and I always feel a strong want for this syntactic sugar as I'm writing it. Wasting that horizontal space is not an option, and I really would like the caching logic to be contiguous. To be clear, in the proposed syntax: if [condition] with [obj]: [code] Would behave exactly as: if [condition]: with [obj]: [code] Is there any chance that this conditional context manager syntax might be considered? Does anyone but myself think this might be a good idea? On the python ideas list someone mentioned this idea is generalized by allowing the programmer to omit the newline after an if statement. There are pros and cons to the idea, but I kind of like it as an option (although it will certainly be abused in ways I will not like). ubelt-1.3.7/dev/experimental/setdict.py000066400000000000000000000523201472470106000201240ustar00rootroot00000000000000""" An implementation of a set-enabled dictionary that we might include References: .. [SetDictRecipe1] https://gist.github.com/rossmacarthur/38fa948b175abb512e12c516cc3b936d .. [SetDictRecipe2] https://code.activestate.com/recipes/577471-setdict/ """ import itertools as it from ubelt import NoParam class SetDict(dict): """ A dictionary subclass where all set operations are defined. All of the set operations are defined in a key-wise fashion, that is it is like performing the operation on sets of keys. Value-wise or item-wise operations are in general not hashable and therefore not supported. A heavier extension would be needed for that. Example: >>> import ubelt as ub >>> primes = SetDict({v: f'prime_{v}' for v in [2, 3, 5, 7, 11]}) >>> evens = SetDict({v: f'even_{v}' for v in [0, 2, 4, 6, 8, 10]}) >>> odds = SetDict({v: f'odd_{v}' for v in [1, 3, 5, 7, 9, 11]}) >>> squares = SetDict({v: f'square_{v}' for v in [0, 1, 4, 9]}) >>> div3 = SetDict({v: f'div3_{v}' for v in [0, 3, 6, 9]}) >>> # All of the set methods are defined >>> results1 = {} >>> results1['ints'] = odds.union(evens) >>> results1['composites'] = ints.difference(primes) >>> results1['even_primes'] = evens.intersection(primes) >>> results1['odd_nonprimes_and_two'] = odds.symmetric_difference(primes) >>> print('results1 = {}'.format(ub.repr2(results1, nl=2, sort=True))) results1 = { 'composites': { 0: 'even_0', 1: 'odd_1', 4: 'even_4', 6: 'even_6', 8: 'even_8', 9: 'odd_9', 10: 'even_10', }, 'even_primes': { 2: 'even_2', }, 'ints': { 0: 'even_0', 1: 'odd_1', 2: 'even_2', 3: 'odd_3', 4: 'even_4', 5: 'odd_5', 6: 'even_6', 7: 'odd_7', 8: 'even_8', 9: 'odd_9', 10: 'even_10', 11: 'odd_11', }, 'odd_nonprimes_and_two': { 1: 'odd_1', 2: 'prime_2', 9: 'odd_9', }, } >>> # As well as their corresponding binary operators >>> assert results1['ints'] == odds | evens >>> assert results1['composites'] == ints - primes >>> assert results1['even_primes'] == evens & primes >>> assert results1['odd_nonprimes_and_two'] == odds ^ primes >>> # These can also be used as classmethods >>> assert results1['ints'] = SetDict.union(odds, evens) >>> assert results1['composites'] = SetDict.difference(ints, primes) >>> assert results1['even_primes'] = SetDict.intersection(evens, primes) >>> assert results1['odd_nonprimes_and_two'] = SetDict.symmetric_difference(odds, primes) >>> # The narry variants are also implemented >>> results2 = {} >>> results2['nary_union'] = SetDict.union(primes, div3, odds) >>> results2['nary_difference'] = SetDict.difference(primes, div3, odds) >>> results2['nary_intersection'] = SetDict.intersection(primes, div3, odds) >>> # Note that the definition of symmetric difference might not be what you think in the nary case. >>> results2['nary_symmetric_difference'] = SetDict.symmetric_difference(primes, div3, odds) >>> print('results2 = {}'.format(ub.repr2(results2, nl=2, sort=True))) results2 = { 'nary_difference': { 2: 'prime_2', }, 'nary_intersection': { 3: 'prime_3', }, 'nary_symmetric_difference': { 0: 'div3_0', 1: 'odd_1', 2: 'prime_2', 3: 'odd_3', 6: 'div3_6', }, 'nary_union': { 0: 'div3_0', 1: 'odd_1', 2: 'prime_2', 3: 'odd_3', 5: 'odd_5', 6: 'div3_6', 7: 'odd_7', 9: 'odd_9', 11: 'odd_11', }, } >>> # Lastly there is also a subdict method, which is similar to >>> # Intersect, but it will error if the key doesn't exist unless >>> # a default value is given >>> sub_primes = primes.subdict([2, 3, 5]) >>> import pytest >>> with pytest.raises(KeyError): >>> sub_primes = primes.subdict([1, 3, 5]) >>> bad_sub_primes = primes.subdict([1, 3, 5], default='DEFAULT') >>> print(f'sub_primes={sub_primes}') >>> print(f'bad_sub_primes={bad_sub_primes}') sub_primes={2: 'prime_2', 3: 'prime_3', 5: 'prime_5'} bad_sub_primes={1: 'DEFAULT', 3: 'prime_3', 5: 'prime_5'} Example: >>> # A neat thing about our implementation is that often the right >>> # hand side is not required to be a dictionary, just something >>> # that can be cast to a set. >>> primes = SetDict({2: 'a', 3: 'b', 5: 'c', 7: 'd', 11: 'e'}) >>> primes - {2, 3} {5: 'c', 7: 'd', 11: 'e'} >>> primes & {2, 3} {2: 'a', 3: 'b'} >>> # Union does need to have a second dictionary >>> import pytest >>> with pytest.raises(AttributeError): >>> primes | {2, 3} """ # We could just use the builtin variant for this specific operation def __or__(self, other): """ The | union operator """ return self.union(other) def __and__(self, other): """ The & intersection operator """ return self.intersection(other) def __sub__(self, other): """ The - difference operator """ return self.difference(other) def __xor__(self, other): """ The ^ symmetric_difference operator """ return self.symmetric_difference(other) ### Main set operations def union(self, *others): """ Return the key-wise union of two or more dictionaries. For items with intersecting keys, dictionaries towards the end of the sequence are given precedence. Args: self (SetDict | dict): if called as a static method this must be provided. *others : other dictionary like objects that have an ``items`` method. (i.e. it must return an iterable of 2-tuples where the first item is hashable.) Returns: dict : whatever the dictionary type of the first argument is Example: >>> a = SetDict({k: 'A_' + chr(97 + k) for k in [2, 3, 5, 7]}) >>> b = SetDict({k: 'B_' + chr(97 + k) for k in [2, 4, 0, 7]}) >>> c = SetDict({k: 'C_' + chr(97 + k) for k in [2, 8, 3]}) >>> d = SetDict({k: 'D_' + chr(97 + k) for k in [9, 10, 11]}) >>> e = SetDict({k: 'E_' + chr(97 + k) for k in []}) >>> a | b {2: 'B_c', 3: 'A_d', 5: 'A_f', 7: 'B_h', 4: 'B_e', 0: 'B_a'} >>> a.union(b) >>> a | b | c >>> res = SetDict.union(a, b, c, d, e) >>> print(ub.repr2(res, sort=1, nl=0, si=1)) {0: B_a, 2: C_c, 3: C_d, 4: B_e, 5: A_f, 7: B_h, 8: C_i, 9: D_j, 10: D_k, 11: D_l} """ cls = self.__class__ args = it.chain([self], others) new = cls(it.chain.from_iterable(d.items() for d in args)) return new def intersection(self, *others): """ Return the key-wise intersection of two or more dictionaries. All items returned will be from the first dictionary for keys that exist in all other dictionaries / sets provided. Args: self (SetDict | dict): if called as a static method this must be provided. *others : other dictionary or set like objects that can be coerced into a set of keys. Returns: dict : whatever the dictionary type of the first argument is Example: >>> a = SetDict({k: 'A_' + chr(97 + k) for k in [2, 3, 5, 7]}) >>> b = SetDict({k: 'B_' + chr(97 + k) for k in [2, 4, 0, 7]}) >>> c = SetDict({k: 'C_' + chr(97 + k) for k in [2, 8, 3]}) >>> d = SetDict({k: 'D_' + chr(97 + k) for k in [9, 10, 11]}) >>> e = SetDict({k: 'E_' + chr(97 + k) for k in []}) >>> a & b {2: 'A_c', 7: 'A_h'} >>> a.intersection(b) >>> a & b & c >>> res = SetDict.intersection(a, b, c, d, e) >>> print(ub.repr2(res, sort=1, nl=0, si=1)) {} """ cls = self.__class__ isect_keys = set(self.keys()) for v in others: isect_keys.intersection_update(v) new = cls((k, self[k]) for k in self if k in isect_keys) return new def difference(self, *others): """ Return the key-wise difference between this dictionary and one or more other dictionary / keys. The returned items will be from the first dictionary, and will only contain keys that do not appear in any of the other dictionaries / sets. Args: self (SetDict | dict): if called as a static method this must be provided. *others : other dictionary or set like objects that can be coerced into a set of keys. Returns: dict : whatever the dictionary type of the first argument is Example: >>> a = SetDict({k: 'A_' + chr(97 + k) for k in [2, 3, 5, 7]}) >>> b = SetDict({k: 'B_' + chr(97 + k) for k in [2, 4, 0, 7]}) >>> c = SetDict({k: 'C_' + chr(97 + k) for k in [2, 8, 3]}) >>> d = SetDict({k: 'D_' + chr(97 + k) for k in [9, 10, 11]}) >>> e = SetDict({k: 'E_' + chr(97 + k) for k in []}) >>> a - b {3: 'A_d', 5: 'A_f'} >>> a.difference(b) >>> a - b - c >>> res = SetDict.difference(a, b, c, d, e) >>> print(ub.repr2(res, sort=1, nl=0, si=1)) {5: 'A_f'} """ cls = self.__class__ other_keys = set() for v in others: other_keys.update(v) # Looping over original keys is important to maintain partial order. new = cls((k, self[k]) for k in self.keys() if k not in other_keys) return new def symmetric_difference(self, *others): """ Return the key-wise symmetric difference between this dictionary and one or more other dictionaries. Returns items that are (key-wise) in an odd number of the given dictionaries. This is consistent with the standard n-ary definition of symmetric difference [WikiSymDiff]_ and corresponds with the xor operation. It is unclear if this is the best definition, and I'm open to modifying this API. See also [PySymDiff]_. Args: self (SetDict | dict): if called as a static method this must be provided. *others : other dictionary or set like objects that can be coerced into a set of keys. Returns: dict : whatever the dictionary type of the first argument is References: .. [PySymDiff] https://www.geeksforgeeks.org/python-symmetric-difference-of-dictionaries/ .. [WikiSymDiff] https://en.wikipedia.org/wiki/Symmetric_difference Example: >>> a = SetDict({k: 'A_' + chr(97 + k) for k in [2, 3, 5, 7]}) >>> b = SetDict({k: 'B_' + chr(97 + k) for k in [2, 4, 0, 7]}) >>> c = SetDict({k: 'C_' + chr(97 + k) for k in [2, 8, 3]}) >>> d = SetDict({k: 'D_' + chr(97 + k) for k in [9, 10, 11]}) >>> e = SetDict({k: 'E_' + chr(97 + k) for k in []}) >>> a ^ b {3: 'A_d', 5: 'A_f', 4: 'B_e', 0: 'B_a'} >>> a.symmetric_difference(b) >>> a - b - c >>> res = SetDict.symmetric_difference(a, b, c, d, e) >>> print(ub.repr2(res, sort=1, nl=0, si=1)) {0: B_a, 2: C_c, 4: B_e, 5: A_f, 8: C_i, 9: D_j, 10: D_k, 11: D_l} """ from collections import defaultdict cls = self.__class__ accum_count = defaultdict(lambda: 0) accum_refs = {} for d in it.chain([self], others): for k in d.keys(): accum_count[k] += 1 accum_refs[k] = d new = cls((k, accum_refs[k][k]) for k, count in accum_count.items() if count % 2 == 1) return new ### Extra set operations def subdict(self, keys, default=NoParam): """ Get a subset of a dictionary Args: self (Dict[KT, VT]): superset dictionary keys (Iterable[KT]): keys to take from ``dict_`` default (Optional[object] | NoParamType): if specified uses default if keys are missing. Raises: KeyError : if a key does not exist and default is not specified Example: >>> a = SetDict({k: 'A_' + chr(97 + k) for k in [2, 3, 5, 7]}) >>> s = a.subdict({2, 5}) >>> print('s = {}'.format(ub.repr2(s, nl=0))) s = {2: 'A_c', 5: 'A_f'} >>> import pytest >>> with pytest.raises(KeyError): >>> s = a.subdict({2, 5, 100}) >>> s = a.subdict({2, 5, 100}, default='DEF') >>> print('s = {}'.format(ub.repr2(s, nl=0))) s = {2: 'A_c', 5: 'A_f', 100: 'DEF'} """ cls = self.__class__ if default is NoParam: new = cls([(k, self[k]) for k in keys]) else: new = cls([(k, self.get(k, default)) for k in keys]) return new sdict = SetDict class UbeltDict(SetDict): def map_keys(self, func): import ubelt as ub return ub.map_keys(func, self) def map_values(self, func): import ubelt as ub return ub.map_values(func, self) def invert(self, unique_vals=True): import ubelt as ub return ub.invert_dict(self, unique_vals=unique_vals) def intersection_method_bench(): """ Ignore: import random num_sets = 100 num_others = 100 num_core = 5 import ubelt import sys sys.path.append(ubelt.expandpath('~/code/ultrajson/json_benchmarks')) sys.path.append(ubelt.expandpath('~/code/ultrajson')) from json_benchmarks.benchmarker.benchmarker import * # NOQA import benchmarker def data_lut(num_sets=100, num_others=100, num_core=5): rng = random.Random(0) core = set(range(0, num_core)) datas = [] for _ in range(num_sets): nextset = core.copy() nextset.update({rng.randint(0, 1000) for _ in range(num_others)}) datas.append(nextset) return datas def method_loop_isect_update(datas): isect_keys = set(datas[0]) for v in datas[1:]: isect_keys.intersection_update(v) def method_isect_map_set(datas): set.intersection(*map(set, datas)) basis = { 'impl': ['method_loop_isect_update', 'method_isect_map_set'], 'num_sets': [1, 3, 10, 50, 100,] } impl_lut = vars() self = Benchmarker(name='set-isect', num=10000, bestof=30, basis=basis) for params in self.iter_params(): impl = impl_lut[params['impl']] datas = data_lut(**ub.compatible(params, data_lut)) for timer in self.measure(): with timer: impl(datas) print('self.result = {}'.format(ub.repr2(self.result.__json__(), sort=0, nl=2, precision=8))) dpath = ub.Path.appdir('benchmarker/demo').ensuredir() self.dump_in_dpath(dpath) results = self.result.to_result_list() metric_key = "mean_time" analysis = benchmarker.result_analysis.ResultAnalysis( results, metrics=[metric_key], params=["impl"], metric_objectives={ "min_time": "min", "mean_time": "min", "time": "min", }, ) import kwplot kwplot.autompl() analysis.analysis() xlabel = 'num_sets' metric_key = 'mean_time' group_labels = { # 'fig': ['u'], # 'col': ['y', 'v'], 'hue': ['impl'], } analysis.plot(xlabel, metric_key, group_labels) """ def bench(): """ Benchmark: import sys, ubelt sys.path.append(ubelt.expandpath('~/code/ubelt/dev')) from setdict import * # NOQA # pip install nprime import nprime size = 10 primes = SetDict({v: f'prime_{v}' for v in nprime.generate_primes(size)}) evens = SetDict({v: f'even_{v}' for v in range(0, size, 2)}) odds = SetDict({v: f'odd_{v}' for v in range(1, size, 2)}) ints = SetDict({v: f'int_{v}' for v in range(0, size)}) squares = SetDict({v: f'square_{v}' for _ in range(0, size) if (v:= _ ** 2) < size}) div3 = SetDict({v: f'div3_{v}' for v in range(0, size) if v % 3 == 0}) evens - squares odds & primes squares | primes odds ^ primes ints.intersection(odds, primes) evens.union(odds, primes) ints.difference(primes, squares) odds.symmetric_difference(primes, div3) ints.subset(primes.keys()) base_dicts = {} base_dicts['primes'] = SetDict({v: f'prime_{v}' for v in nprime.generate_primes(size)}) base_dicts['evens'] = SetDict({v: f'even_{v}' for v in range(0, size, 2)}) base_dicts['odds'] = SetDict({v: f'odd_{v}' for v in range(1, size, 2)}) base_dicts['ints'] = SetDict({v: f'int_{v}' for v in range(0, size)}) base_dicts['squares'] = SetDict({v: f'square_{v}' for _ in range(0, size) if (v:= _ ** 2) < size}) #### Benchmarks keysets = {k: set(v.keys()) for k, v in base_dicts.items()} many_keysets = {k: [{n} for n in v.keys()] for k, v in base_dicts.items()} import timerit ti = timerit.Timerit(10000, bestof=50, verbose=2) k1 = 'primes' k2 = 'odds' d1 = base_dicts[k1] d2 = base_dicts[k2] ks2 = keysets[k2] many_ks2 = many_keysets[k2] print('---') for timer in ti.reset(f'{k1}.union({k2})'): d1.union(d2) for timer in ti.reset(f'ubelt.dict_ {k1}.union({k2})'): ub.dict_union(d1, d2) # --- print('---') for timer in ti.reset(f'{k1}.intersection({k2})'): d1.intersection(d2) for timer in ti.reset(f'{k1}.intersection(keyset-{k2})'): d1.intersection(ks2) for timer in ti.reset(f'{k1}.intersection(many-keyset-{k2})'): d1.intersection(*many_ks2) for timer in ti.reset(f'ubelt.dict_ {k1}.intersection({k2})'): ub.dict_isect(d1, d2) for timer in ti.reset(f'ubelt.dict_ {k1}.intersection(keyset-{k2})'): ub.dict_isect(d1, ks2) for timer in ti.reset(f'ubelt.dict_ {k1}.intersection(many-keyset-{k2})'): ub.dict_isect(d1, *many_ks2) # --- print('---') for timer in ti.reset(f'{k1}.difference({k2})'): d1.difference(d2) for timer in ti.reset(f'{k1}.difference(keyset-{k2})'): d1.difference(ks2) for timer in ti.reset(f'{k1}.difference(many-keyset-{k2})'): d1.difference(*many_ks2) for timer in ti.reset(f'ubelt.dict_ {k1}.difference({k2})'): ub.dict_diff(d1, d2) for timer in ti.reset(f'ubelt.dict_ {k1}.difference(keyset-{k2})'): ub.dict_diff(d1, ks2) for timer in ti.reset(f'ubelt.dict_ {k1}.difference(many-keyset-{k2})'): ub.dict_diff(d1, *many_ks2) # Test builtin dictionary union op dict.__or__(evens, odds) self = evens others = (odds,) evens.difference(odds) evens.union(odds) evens.intersection(odds) odds.difference(primes) odds.intersection(primes) odds.union(primes) """ import ubelt as ub # NOQA class RorUDictType(type): cls = ub.UDict @classmethod def __ror__(mcls, obj): return mcls.cls(obj) @classmethod def __or__(mcls, obj): return mcls.cls(obj) class RorUDict(RorUDictType.cls, metaclass=RorUDictType): pass try: type({'10': 10} | RorUDict) except Exception as ex: print(f'ex={ex}') try: RorUDict | {'10': 10} except Exception as ex: print(f'ex={ex}') a = {1: 10, 2: 20, 3: 30, 5: 50, 7: 70, 11: 110} b = {1: 11, 2: 21, 3: 31, 5: 51, 8: 81, 13: 131} t1 = (RorUDict | a) t2 = (a | RorUDict) assert t1 == t2 assert t1 is not t2 assert isinstance(t1, RorUDictType.cls) assert isinstance(t2, RorUDictType.cls) (RorUDict | a) & b (a | RorUDict) & b ubelt-1.3.7/dev/gpg_owner_trust.enc000066400000000000000000000011421472470106000173310ustar00rootroot00000000000000U2FsdGVkX19qfnjMoQaRH8u5MQ6b6Mz5waKE+WvpCoHRU47ShBq3MaaugFNsWV9b p2VAVk3yu94s3n9LD3jc7eyxa00hVa0hnXpv1jFQTdUG6QycMxwsHZa2kVXq/Mrh +m6g85h62JZQKpewcVKoAAiNsPcKqclv/oBRC2W+rneLCc3GNaSucdN7tlz8qmAt 7Z9UzXrO8cFKXlRCUml82vKxug6jHSMW4tvr6bCMpCsjoYfXkMGufYgdg/McNbOM W/M3eDtud8l6ftQmlrO8ymYcbJ+i5Yn7dgW8tndDXmT+DSUONGIqa1DVma9+pMJr KI/oyUV+Ry8+kLJ1Tsb6mzQyHD6utA61vsIoRnsXsEzlNh01kspS8JV1Z96A82sn jbqOSPoQ9jFyiT35K6/Yaqc+n7ktWKMNcak8eA6XEuzeVEkNOEgFNewYRD9xLZbP H9HD4GXNLBYxDUDhlBiixNavYqMLYSyeyQqt0ZwAQbBK3VHHmJSQLcM1SWslJ9Jc s5VYtZxwLaJ+yOfwFf3ov1u963bP6m7V3fJfBKFLj013kXktahzvGEBX0S+w5IeD cYccwFRYtbhSnWo1DGBWPQ== ubelt-1.3.7/dev/maintain/000077500000000000000000000000001472470106000152145ustar00rootroot00000000000000ubelt-1.3.7/dev/maintain/count_usage_freq.py000077500000000000000000000071441472470106000211300ustar00rootroot00000000000000#!/usr/bin/env python # # REMOVE ME # import scriptconfig as scfg # class UsageConfig(scfg.Config): # default = { # 'print_packages': False, # 'remove_zeros': False, # 'hardcoded_ubelt_hack': True, # 'extra_modnames': [], # } # def count_package_usage(pkgname='ubelt'): # config = UsageConfig(cmdline=True) # import ubelt as ub # import glob # from os.path import join # names = [ # 'xdoctest', 'netharn', 'xdev', 'xinspect', 'xcookie', 'ndsampler', # 'kwarray', 'kwimage', 'kwplot', 'kwcoco', # 'scriptconfig', 'vimtk', # 'mkinit', 'futures_actors', 'graphid', # 'kwutil', 'git_well', 'line_profiler', 'delayed_image', 'simple_dvc', # 'pypogo', # 'ibeis', 'plottool_ibeis', 'guitool_ibeis', 'utool', 'dtool_ibeis', # 'vtool_ibeis', 'hesaff', 'torch_liberator', 'liberator', # ] + config['extra_modnames'] # code_repos = [ub.Path('~/code').expand() / name for name in names] # repo_dpaths = code_repos + [ # # ub.Path('~/local').expand(), # ub.Path('~/misc').expand(), # ] # all_fpaths = [] # for repo_dpath in repo_dpaths: # name = repo_dpath.stem # fpaths = glob.glob(join(repo_dpath, '**', '*.py'), recursive=True) # for fpath in fpaths: # all_fpaths.append((name, fpath)) # import re # pat = re.compile(r'\bub\.(?P[a-zA-Z_][A-Za-z_0-9]*)\b') # import ubelt as ub # pkg_to_hist = ub.ddict(lambda: ub.ddict(int)) # for name, fpath in ub.ProgIter(all_fpaths): # with open(fpath, 'r') as file: # text = file.read() # for match in pat.finditer(text): # attr = match.groupdict()['attr'] # if attr in ub.__all__: # pkg_to_hist[name][attr] += 1 # hist_iter = iter(pkg_to_hist.values()) # usage = next(hist_iter).copy() # for other in hist_iter: # for k, v in other.items(): # usage[k] += v # for attr in ub.__all__: # usage[attr] += 0 # for name in pkg_to_hist.keys(): # pkg_to_hist[name] = ub.odict(sorted(pkg_to_hist[name].items(), key=lambda t: t[1])[::-1]) # usage = ub.odict(sorted(usage.items(), key=lambda t: t[1])[::-1]) # if config['print_packages']: # print(ub.repr2(pkg_to_hist, nl=2)) # if config['remove_zeros']: # for k, v in list(usage.items()): # if v == 0: # usage.pop(k) # if config['hardcoded_ubelt_hack']: # blocklist = [ # 'progiter', 'timerit', 'orderedset', # ] # for k in list(usage): # if k in blocklist: # usage.pop(k, None) # elif k.startswith('util_'): # usage.pop(k, None) # elif k.startswith('_util_'): # usage.pop(k, None) # # ub._util_deprecated # # from ubelt import _util_deprecated # # if k in dir(_util_deprecated): # # usage.pop(k, None) # if 1: # # Renamed Aliases # usage['urepr'] += usage.pop('repr2') # usage['ReprExtensions'] += usage.pop('FormatterExtensions') # usage = ub.udict(usage).sorted_values(reverse=True) # print(ub.repr2(usage, nl=1)) # return usage # if __name__ == '__main__': # """ # For Me: # ~/internal/dev/pkg_usage_stats_update.sh # CommandLine: # python ~/code/ubelt/dev/maintain/count_usage_freq.py --help # python ~/code/ubelt/dev/maintain/count_usage_freq.py --remove_zeros=False --print_packages=True # """ # count_package_usage() ubelt-1.3.7/dev/maintain/gen_api_for_docs.py000077500000000000000000000205131472470106000210520ustar00rootroot00000000000000#!/usr/bin/env python import scriptconfig as scfg class UsageConfig(scfg.Config): default = { 'print_packages': False, 'remove_zeros': False, 'hardcoded_ubelt_hack': True, 'extra_modnames': [], } def count_package_usage(modname): import ubelt as ub import glob from os.path import join import re config = UsageConfig(cmdline=True) names = [ 'xdoctest', 'netharn', 'xdev', 'xinspect', 'xcookie', 'ndsampler', 'kwarray', 'kwimage', 'kwplot', 'kwcoco', 'scriptconfig', 'vimtk', 'mkinit', 'futures_actors', 'graphid', 'kwutil', 'git_well', 'line_profiler', 'delayed_image', 'simple_dvc', 'pypogo', 'ibeis', 'plottool_ibeis', 'guitool_ibeis', 'utool', 'dtool_ibeis', 'vtool_ibeis', 'hesaff', 'torch_liberator', 'liberator', ] + config['extra_modnames'] code_repos = [ub.Path('~/code').expand() / name for name in names] repo_dpaths = code_repos + [ # ub.Path('~/local').expand(), ub.Path('~/misc').expand(), ] all_fpaths = [] for repo_dpath in repo_dpaths: name = repo_dpath.stem fpaths = glob.glob(join(repo_dpath, '**', '*.py'), recursive=True) for fpath in fpaths: all_fpaths.append((name, fpath)) pat = re.compile(r'\bub\.(?P[a-zA-Z_][A-Za-z_0-9]*)\b') modname = modname module = ub.import_module_from_name(modname) package_name = module.__name__ package_allvar = module.__all__ pat = re.compile(r'\b' + package_name + r'\.(?P[a-zA-Z_][A-Za-z_0-9]*)\b') pkg_to_hist = ub.ddict(lambda: ub.ddict(int)) for name, fpath in ub.ProgIter(all_fpaths): with open(fpath, 'r') as file: text = file.read() for match in pat.finditer(text): attr = match.groupdict()['attr'] if attr in package_allvar: pkg_to_hist[name][attr] += 1 hist_iter = iter(pkg_to_hist.values()) usage = next(hist_iter).copy() for other in hist_iter: for k, v in other.items(): usage[k] += v for attr in package_allvar: usage[attr] += 0 for name in pkg_to_hist.keys(): pkg_to_hist[name] = ub.odict(sorted(pkg_to_hist[name].items(), key=lambda t: t[1])[::-1]) usage = ub.odict(sorted(usage.items(), key=lambda t: t[1])[::-1]) if config['print_packages']: print(ub.repr2(pkg_to_hist, nl=2)) if config['remove_zeros']: for k, v in list(usage.items()): if v == 0: usage.pop(k) if config['hardcoded_ubelt_hack']: blocklist = [ 'progiter', 'timerit', 'orderedset', ] for k in list(usage): if k in blocklist: usage.pop(k, None) elif k.startswith('util_'): usage.pop(k, None) elif k.startswith('_util_'): usage.pop(k, None) # ub._util_deprecated # from ubelt import _util_deprecated # if k in dir(_util_deprecated): # usage.pop(k, None) if 1: # Renamed Aliases try: usage['urepr'] += usage.pop('repr2') usage['ReprExtensions'] += usage.pop('FormatterExtensions') except Exception: ... usage = ub.udict(usage).sorted_values(reverse=True) print(ub.repr2(usage, nl=1)) return usage def gen_api_for_docs(modname): """ import sys, ubelt sys.path.append(ubelt.expandpath('~/code/ubelt/dev/maintain')) from gen_api_for_docs import * # NOQA """ import ubelt as ub usage = count_package_usage(modname) module = ub.import_module_from_name(modname) attrnames = module.__all__ if hasattr(module, '__protected__'): # Hack for lazy imports for subattr in module.__protected__: submod = ub.import_module_from_name(modname + '.' + subattr) setattr(module, subattr, submod) attrnames += module.__protected__ # Reorgnaize data to contain more information rows = [] unseen = usage.copy() for attrname in attrnames: member = getattr(module, attrname) submembers = getattr(member, '__all__', None) if attrname.startswith('util_'): if not submembers: from mkinit.static_mkinit import _extract_attributes submembers = _extract_attributes(member.__file__) if submembers: for subname in submembers: parent_module = f'{modname}.{attrname}' short_name = '{modname}.{subname}'.format(**locals()) full_name = '{parent_module}.{subname}'.format(**locals()) url = 'https://{modname}.readthedocs.io/en/latest/{parent_module}.html#{full_name}'.format(**locals()) rst_ref = ':func:`{short_name}<{full_name}>`'.format(**locals()) url_ref = '`{short_name} <{url}>`__'.format(**locals()) rows.append({ 'attr': subname, 'parent_module': parent_module, 'usage': unseen.pop(subname, 0), 'short_name': short_name, 'full_name': full_name, 'url': url, 'rst_ref': rst_ref, 'url_ref': url_ref, }) attr_to_infos = ub.group_items(rows, lambda x: x['attr']) if 'urepr' in attr_to_infos: urepr2_infos = attr_to_infos['urepr'] cannon_urepr2_infos = [d for d in urepr2_infos if 'repr' in d['parent_module']] cannon_urepr2_info = cannon_urepr2_infos[0] attr_to_infos['urepr'] = [cannon_urepr2_info] import numpy as np import kwarray if ub.argflag('--url-mode'): ref_key = 'url_ref' else: ref_key = 'rst_ref' name_len = max(len(row[ref_key]) for row in rows) + 1 num_len = 16 guard = ('=' * name_len + ' ' + '=' * num_len) print(guard) column_fmt = '{:<' + str(name_len) + '} {:>' + str(num_len) + '}' print(column_fmt.format(' Function name ', 'Usefulness')) print(guard) for key, value in usage.items(): infos = attr_to_infos[key] if len(infos) == 0: print(column_fmt.format(f':func:`{modname}.' + key + '`', value)) else: if len(infos) != 1: print('infos = {}'.format(ub.urepr(infos, nl=1))) raise AssertionError info = infos[0] print(column_fmt.format(info[ref_key], value)) print(guard) raw_scores = np.array(list(usage.values())) print('\n.. code:: python\n') print(ub.indent('usage stats = ' + ub.repr2(kwarray.stats_dict( raw_scores, median=True, sum=True), nl=1))) for attrname in attrnames: member = getattr(module, attrname) submembers = getattr(member, '__all__', None) # if attrname.startswith('util_'): if not submembers: from mkinit.static_mkinit import _extract_attributes try: submembers = _extract_attributes(member.__file__) except AttributeError: pass if submembers: parent_module = f'{modname}.{attrname}' title = ':mod:`{}`'.format(parent_module) print('\n' + title) print('-' * len(title)) for subname in submembers: if not subname.startswith('_'): rst_ref = ( f':func:`<{modname}.{subname}><{parent_module}.{subname}>`' ) print(rst_ref) submembers = dir(member) if __name__ == '__main__': """ For Me: ~/internal/dev/ubelt_stats_update.sh ~/internal/dev/pkg_usage_stats_update.sh CommandLine: # For index.rst python ~/code/ubelt/dev/maintain/gen_api_for_docs.py # For README python ~/code/ubelt/dev/maintain/gen_api_for_docs.py --url-mode python ~/code/ubelt/dev/maintain/gen_api_for_docs.py --extra_modnames=bioharn,geowatch --remove_zeros=False --url-mode # First run and copy the table: python ~/code/ubelt/dev/maintain/count_usage_freq.py python ~/code/ubelt/dev/maintain/gen_api_for_docs.py --extra_modnames=bioharn,geowatch --remove_zeros=False # Then edit: TODO make less manual ~/code/ubelt/docs/source/function_usefulness.rst """ gen_api_for_docs('ubelt') ubelt-1.3.7/dev/maintain/gen_typed_stubs.py000077500000000000000000000517541472470106000210030ustar00rootroot00000000000000""" MOVED TO xdev: SEE xdev xdev doctypes ubelt ~/code/xdev/xdev/cli/docstr_stubgen.py Script for auto-generating pyi type extension files from google-style docstrings Requirements: pip install mypy autoflake yapf CommandLine: # Run script to parse google-style docstrings and write pyi files python ~/code/ubelt/dev/maintain/gen_typed_stubs.py # Run mypy to check that type annotations are correct mypy ubelt """ from mypy.stubgen import (StubGenerator, find_self_initializers, FUNC, EMPTY, METHODS_WITH_RETURN_VALUE,) import sys from typing import (List, Dict, Optional) from mypy.nodes import ( # Expression, IntExpr, UnaryExpr, StrExpr, BytesExpr, NameExpr, FloatExpr, MemberExpr, # TupleExpr, ListExpr, ComparisonExpr, CallExpr, IndexExpr, EllipsisExpr, # ClassDef, MypyFile, Decorator, AssignmentStmt, TypeInfo, # IfStmt, ImportAll, ImportFrom, Import, FuncDef, # FuncBase, Block, # Statement, OverloadedFuncDef, ARG_POS, ARG_STAR, ARG_STAR2, # ARG_NAMED, ) # from mypy.stubgenc import generate_stub_for_c_module # from mypy.stubutil import ( # default_py2_interpreter, CantImport, generate_guarded, # walk_packages, find_module_path_and_all_py2, find_module_path_and_all_py3, # report_missing, fail_missing, remove_misplaced_type_comments, common_dir_prefix # ) from mypy.types import ( # Type, TypeStrVisitor, CallableType, # UnboundType, NoneType, TupleType, TypeList, Instance, AnyType, get_proper_type ) from mypy.traverser import ( all_yield_expressions, has_return_statement, has_yield_expression ) def generate_typed_stubs(): """ Attempt to use google-style docstrings, xdoctest, and mypy to generate typed stub files. pyfile mypy.stubgen # Delete compiled versions so we can hack it # THIS DOES NOT WORK # MYPY_DPTH=$(python -c "import mypy, pathlib; print(pathlib.Path(mypy.__file__).parent)") # echo $MYPY_DPTH # ls $MYPY_DPTH/*.so # rm $MYPY_DPTH/*.so # ls $VIRTUAL_ENV/lib/*/site-packages/mypy/*.so # rm $VIRTUAL_ENV/lib/*/site-packages/mypy/*.so # rm ~/.pyenv/versions/3.8.6/envs/pyenv3.8.6/lib/python3.8/site-packages/mypy/*.cpython-38-x86_64-linux-gnu.so # This works I think? if [[ ! -e "$HOME/code/mypy" ]]; then git clone https://github.com/python/mypy.git $HOME/code/mypy fi (cd $HOME/code/mypy && git pull) pip install -e $HOME/code/mypy pip install MonkeyType monkeytype run run_tests.py monkeytype stub ubelt.util_dict from typing import TypeVar from mypy.applytype import get_target_type z = TypeVar('Iterable') get_target_type(z) from mypy.expandtype import expand_type expand_type(z, env={}) from mypy.types import get_proper_type get_proper_type(z) get_proper_type(dict) import typing get_proper_type(typing.Iterable) from mypy.types import deserialize_type, UnboundType import mypy.types as mypy_types z = UnboundType('Iterable') get_proper_type(dict) from mypy.fastparse import parse_type_string parse_type_string('dict', 'dict', 0, 0) z = parse_type_string('typing.Iterator', 'Any', 0, 0) get_proper_type(z) """ import pathlib import ubelt import os import autoflake import yapf from mypy import stubgen from mypy import defaults from xdoctest import static_analysis from os.path import dirname, join ubelt_dpath = dirname(ubelt.__file__) for p in pathlib.Path(ubelt_dpath).glob('*.pyi'): p.unlink() files = list(static_analysis.package_modpaths(ubelt_dpath, recursive=True, with_libs=1, with_pkg=0)) files = [f for f in files if 'deprecated' not in f] # files = [join(ubelt_dpath, 'util_dict.py')] options = stubgen.Options( pyversion=defaults.PYTHON3_VERSION, no_import=True, doc_dir='', search_path=[], interpreter=sys.executable, ignore_errors=False, parse_only=True, include_private=False, output_dir=dirname(ubelt_dpath), modules=[], packages=[], files=files, verbose=False, quiet=False, export_less=True) # generate_stubs(options) mypy_opts = stubgen.mypy_options(options) py_modules, c_modules = stubgen.collect_build_targets(options, mypy_opts) # Collect info from docs (if given): sigs = class_sigs = None # type: Optional[Dict[str, str]] if options.doc_dir: sigs, class_sigs = stubgen.collect_docs_signatures(options.doc_dir) # Use parsed sources to generate stubs for Python modules. stubgen.generate_asts_for_modules(py_modules, options.parse_only, mypy_opts, options.verbose) for mod in py_modules: assert mod.path is not None, "Not found module was not skipped" target = mod.module.replace('.', '/') if os.path.basename(mod.path) == '__init__.py': target += '/__init__.pyi' else: target += '.pyi' target = join(options.output_dir, target) files.append(target) with stubgen.generate_guarded(mod.module, target, options.ignore_errors, options.verbose): stubgen.generate_stub_from_ast(mod, target, options.parse_only, options.pyversion, options.include_private, options.export_less) gen = ExtendedStubGenerator(mod.runtime_all, pyversion=options.pyversion, include_private=options.include_private, analyzed=not options.parse_only, export_less=options.export_less) assert mod.ast is not None, "This function must be used only with analyzed modules" mod.ast.accept(gen) # print('gen.import_tracker.required_names = {!r}'.format(gen.import_tracker.required_names)) # print(gen.import_tracker.import_lines()) print('mod.path = {!r}'.format(mod.path)) known_one_letter_types = [ # 'T', 'K', 'A', 'B', 'C', 'V', 'DT', 'KT', 'VT', 'T' ] for type_var_name in sorted(set(gen.import_tracker.required_names) & set(known_one_letter_types)): gen.add_typing_import('TypeVar') # gen.add_import_line('from typing import {}\n'.format('TypeVar')) gen._output = ['{} = TypeVar("{}")\n'.format(type_var_name, type_var_name)] + gen._output custom_types = {'Hasher'} for type_var_name in sorted(set(gen.import_tracker.required_names) & set(custom_types)): gen.add_typing_import('TypeVar') # gen.add_import_line('from typing import {}\n'.format('TypeVar')) gen._output = ['{} = TypeVar("{}")\n'.format(type_var_name, type_var_name)] + gen._output # Hack for specific module # if mod.path.endswith('util_path.py'): # gen.add_typing_import('TypeVar') # # hack for variable inheritance # gen._output = ['import pathlib\nimport os\n', "_PathBase = pathlib.WindowsPath if os.name == 'nt' else pathlib.PosixPath\n"] + gen._output if mod.path.endswith('util_dict.py'): # hack for util_dict gen.add_import_line('import sys\n') text = ''.join(gen.output()) # Hack to remove lines caused by Py2 compat text = text.replace('Generator = object\n', '') text = text.replace('select = NotImplemented\n', '') text = text.replace('iteritems: Any\n', '') text = text.replace('text_type = str\n', '') text = text.replace('text_type: Any\n', '') text = text.replace('string_types: Any\n', '') text = text.replace('PY2: Any\n', '') text = text.replace('__win32_can_symlink__: Any\n', '') if 'DictBase' in text: # Hack for util_dict text = text.replace('DictBase = OrderedDict\n', '') text = text.replace('DictBase = dict\n', 'DictBase = OrderedDict if sys.version_info[0:2] <= (3, 6) else dict') # text = text.replace('odict = OrderedDict', '') # text = text.replace('ddict = defaultdict', '') if mod.path.endswith('util_path.py'): # hack for forward reference text = text.replace(' -> Path:', " -> 'Path':") text = text.replace('class Path(_PathBase)', "class Path") # Format the PYI file nicely text = autoflake.fix_code(text, remove_unused_variables=True, remove_all_unused_imports=True) # import autopep8 # text = autopep8.fix_code(text, options={ # 'aggressive': 0, # 'experimental': 0, # }) style = yapf.yapf_api.style.CreatePEP8Style() text, _ = yapf.yapf_api.FormatCode( text, filename='', style_config=style, lines=None, verify=False) # print(text) # Write output to file. subdir = dirname(target) if subdir and not os.path.isdir(subdir): os.makedirs(subdir) with open(target, 'w') as file: file.write(text) def hack_annotated_type_from_docstring(): pass class ExtendedStubGenerator(StubGenerator): def visit_func_def(self, o: FuncDef, is_abstract: bool = False, is_overload: bool = False) -> None: if (self.is_private_name(o.name, o.fullname) or self.is_not_in_all(o.name) or (self.is_recorded_name(o.name) and not is_overload)): self.clear_decorators() return if not self._indent and self._state not in (EMPTY, FUNC) and not o.is_awaitable_coroutine: self.add('\n') if not self.is_top_level(): self_inits = find_self_initializers(o) for init, value in self_inits: if init in self.method_names: # Can't have both an attribute and a method/property with the same name. continue init_code = self.get_init(init, value) if init_code: self.add(init_code) # dump decorators, just before "def ..." for s in self._decorators: self.add(s) self.clear_decorators() self.add("%s%sdef %s(" % (self._indent, 'async ' if o.is_coroutine else '', o.name)) self.record_name(o.name) # import ubelt as ub # if o.name == 'dzip': # import xdev # xdev.embed() def _hack_for_info(info): if info['type'] is None: return for typing_arg in ['Iterable', 'Callable', 'Dict', 'List', 'Union', 'Type', 'Mapping', 'Tuple', 'Optional', 'Sequence', 'Iterator', 'Set', 'Dict']: if typing_arg in info['type']: self.add_typing_import(typing_arg) self.add_import_line('from typing import {}\n'.format(typing_arg)) if 'io.' in info['type']: self.add_import_line('import io\n') if 'datetime.' in info['type']: self.add_import_line('import datetime\n') if '|' in info['type']: self.add_typing_import('Union') self.add_import_line('from typing import {}\n'.format('Union')) if 'ModuleType' in info['type']: self.add_import_line('from types import {}\n'.format('ModuleType')) # types.ModuleType if 'NoParamType' in info['type']: self.add_import_line('from ubelt.util_const import {}\n'.format('NoParamType')) if 'hashlib._hashlib' in info['type']: self.add_import_line('import hashlib._hashlib\n') if 'PathLike' in info['type']: self.add_import_line('from os import {}\n'.format('PathLike')) if 'concurrent.futures.Future' in info['type']: self.add_import_line('import concurrent.futures\n') if info['type'].startswith('callable'): # TODO: generalize, allow the "callable" func to be transformed # into the type if given in the docstring self.add_typing_import('Callable') info['type'] = info['type'].replace('callable', 'Callable') self.add_import_line('from typing import {}\n'.format(typing_arg)) name_to_parsed_docstr_info = {} return_parsed_docstr_info = None fullname = o.name if getattr(self, '_IN_CLASS', None) is not None: fullname = self._IN_CLASS + '.' + o.name from ubelt import util_import curr = util_import.import_module_from_name(self.module) # curr = sys.modules.get(self.module) # print('o.name = {!r}'.format(o.name)) # print('fullname = {!r}'.format(fullname)) for part in fullname.split('.'): # print('part = {!r}'.format(part)) # print('curr = {!r}'.format(curr)) curr = getattr(curr, part, None) # print('curr = {!r}'.format(curr)) real_func = curr # print('real_func = {!r}'.format(real_func)) # if o.name == 'dict_union': # import xdev # xdev.embed() if real_func is not None and real_func.__doc__ is not None: from mypy import fastparse from xdoctest.docstr import docscrape_google parsed_args = None # parsed_ret = None blocks = docscrape_google.split_google_docblocks(real_func.__doc__) for key, block in blocks: lines = block[0] if key == 'Returns': for retdict in docscrape_google.parse_google_retblock(lines): _hack_for_info(retdict) return_parsed_docstr_info = (key, retdict['type']) if key == 'Yields': for retdict in docscrape_google.parse_google_retblock(lines): _hack_for_info(retdict) return_parsed_docstr_info = (key, retdict['type']) if key == 'Args': # hack for *args lines = '\n'.join([line.lstrip('*') for line in lines.split('\n')]) # print('lines = {!r}'.format(lines)) parsed_args = list(docscrape_google.parse_google_argblock(lines)) for info in parsed_args: _hack_for_info(info) name = info['name'].replace('*', '') name_to_parsed_docstr_info[name] = info parsed_rets = list(docscrape_google.parse_google_returns(real_func.__doc__)) ret_infos = [] for info in parsed_rets: try: got = fastparse.parse_type_string(info['type'], 'Any', 0, 0) ret_infos.append(got) except Exception: pass # print('o = {!r}'.format(o)) # print('o.arguments = {!r}'.format(o.arguments)) args: List[str] = [] for i, arg_ in enumerate(o.arguments): var = arg_.variable kind = arg_.kind name = var.name annotated_type = (o.unanalyzed_type.arg_types[i] if isinstance(o.unanalyzed_type, CallableType) else None) if annotated_type is None: if name in name_to_parsed_docstr_info: name = name.replace('*', '') doc_type_str = name_to_parsed_docstr_info[name].get('type', None) if doc_type_str is not None: doc_type_str = doc_type_str.split(', default')[0] # annotated_type = doc_type_str # import mypy.types as mypy_types from mypy import fastparse # globals_ = {**mypy_types.__dict__} try: # # got = mypy_types.deserialize_type(doc_type_str) # got = eval(doc_type_str, globals_) # got = mypy_types.get_proper_type(got) # got = mypy_types.Iterable got = fastparse.parse_type_string(doc_type_str, 'Any', 0, 0) except Exception as ex: print('ex = {!r}'.format(ex)) print('Failed to parse doc_type_str = {!r}'.format(doc_type_str)) else: annotated_type = got # print('PARSED: annotated_type = {!r}'.format(annotated_type)) # print('annotated_type = {!r}'.format(annotated_type)) # I think the name check is incorrect: there are libraries which # name their 0th argument other than self/cls is_self_arg = i == 0 and name == 'self' is_cls_arg = i == 0 and name == 'cls' annotation = "" if annotated_type and not is_self_arg and not is_cls_arg: # Luckily, an argument explicitly annotated with "Any" has # type "UnboundType" and will not match. if not isinstance(get_proper_type(annotated_type), AnyType): annotation = ": {}".format(self.print_annotation(annotated_type)) if arg_.initializer: if kind.is_named() and not any(arg.startswith('*') for arg in args): args.append('*') if not annotation: typename = self.get_str_type_of_node(arg_.initializer, True, False) if typename == '': annotation = '=...' else: annotation = ': {} = ...'.format(typename) else: annotation += ' = ...' arg = name + annotation elif kind == ARG_STAR: arg = '*%s%s' % (name, annotation) elif kind == ARG_STAR2: arg = '**%s%s' % (name, annotation) else: arg = name + annotation args.append(arg) retname = None if o.name != '__init__' and isinstance(o.unanalyzed_type, CallableType): if isinstance(get_proper_type(o.unanalyzed_type.ret_type), AnyType): # Luckily, a return type explicitly annotated with "Any" has # type "UnboundType" and will enter the else branch. retname = None # implicit Any else: retname = self.print_annotation(o.unanalyzed_type.ret_type) elif isinstance(o, FuncDef) and (o.is_abstract or o.name in METHODS_WITH_RETURN_VALUE): # Always assume abstract methods return Any unless explicitly annotated. Also # some dunder methods should not have a None return type. retname = None # implicit Any elif has_yield_expression(o): self.add_abc_import('Generator') yield_name = 'None' send_name = 'None' return_name = 'None' for expr, in_assignment in all_yield_expressions(o): if expr.expr is not None and not self.is_none_expr(expr.expr): self.add_typing_import('Any') yield_name = 'Any' if in_assignment: self.add_typing_import('Any') send_name = 'Any' if has_return_statement(o): self.add_typing_import('Any') return_name = 'Any' generator_name = self.typing_name('Generator') if return_parsed_docstr_info is not None: yield_name = return_parsed_docstr_info[1] retname = f'{generator_name}[{yield_name}, {send_name}, {return_name}]' # print('o.name = {}'.format(ub.repr2(o.name, nl=1))) # print('retname = {!r}'.format(retname)) # print('retfield = {!r}'.format(retfield)) elif not has_return_statement(o) and not is_abstract: retname = 'None' if retname is None: if return_parsed_docstr_info is not None: retname = return_parsed_docstr_info[1] retfield = '' if retname is not None: retfield = ' -> ' + retname self.add(', '.join(args)) self.add("){}: ...\n".format(retfield)) self._state = FUNC def visit_class_def(self, o) -> None: self._IN_CLASS = o.name # print('o.name = {!r}'.format(o.name)) ret = super().visit_class_def(o) self._IN_CLASS = None return ret if __name__ == '__main__': """ CommandLine: python ~/code/ubelt/dev/maintain/gen_typed_stubs.py """ generate_typed_stubs() ubelt-1.3.7/dev/maintain/port_progiter.py000066400000000000000000000030271472470106000204670ustar00rootroot00000000000000""" Vendor progiter into ubelt. """ #!/usr/bin/env python3 import scriptconfig as scfg import ubelt as ub class PortProgiterConfig(scfg.DataConfig): yes = scfg.Value(False, isflag=True) def main(cmdline=1, **kwargs): """ Example: >>> # xdoctest: +SKIP >>> cmdline = 0 >>> kwargs = dict( >>> ) >>> main(cmdline=cmdline, **kwargs) """ import xdev config = PortProgiterConfig.cli(cmdline=cmdline, data=kwargs, strict=True) print('config = ' + ub.urepr(dict(config), nl=1)) fpath1 = ub.Path('~/code/progiter/progiter/progiter.py').expand() fpath2 = ub.Path('~/code/ubelt/ubelt/progiter.py').expand() text1 = fpath1.read_text() text2 = fpath2.read_text() print(xdev.difftext(text2, text1, colored=1)) import rich.prompt ans = config.yes or rich.prompt.Confirm.ask('do write?') if ans: fpath2.write_text(text1) fpath1 = ub.Path('~/code/progiter/tests/test_progiter.py').expand() fpath2 = ub.Path('~/code/ubelt/tests/test_progiter.py').expand() text1 = fpath1.read_text() text1 = text1.replace('from progiter import ProgIter', 'from ubelt import ProgIter') text2 = fpath2.read_text() print(xdev.difftext(text2, text1, colored=1)) import rich.prompt ans = config.yes or rich.prompt.Confirm.ask('do write?') if ans: fpath2.write_text(text1) if __name__ == '__main__': """ CommandLine: python ~/code/ubelt/dev/maintain/port_progiter.py python -m port_progiter """ main() ubelt-1.3.7/dev/make_docs.sh000077500000000000000000000005571472470106000157070ustar00rootroot00000000000000#!/bin/bash __heredoc__=""" Requirements: pip install -r docs/requirements.txt sphinx Notes: cd ~/code/ubelt/docs make html sphinx-apidoc -f -o ~/code/ubelt/docs/source ~/code/ubelt/ubelt --separate make html cd ~/code/sphinx github-add-fork source https://github.com/sphinx-doc/sphinx.git """ (cd ~/code/ubelt/docs && make html) ubelt-1.3.7/dev/make_strict_req.sh000077500000000000000000000006531472470106000171330ustar00rootroot00000000000000#!/bin/bash __doc__=""" Make a strict version of requirements ./dev/make_strict_req.sh """ mkdir -p requirements-strict sed 's/>=/==/' requirements/runtime.txt > requirements-strict/runtime.txt sed 's/>=/==/' requirements/optional.txt > requirements-strict/optional.txt sed 's/>=/==/' requirements/tests.txt > requirements-strict/tests.txt sed 's/requirements/requirements-strict/' requirements.txt > requirements-strict.txt ubelt-1.3.7/dev/notes/000077500000000000000000000000001472470106000145445ustar00rootroot00000000000000ubelt-1.3.7/dev/notes/odd_error.py000066400000000000000000000172461472470106000171070ustar00rootroot000000000000003.10 on ubuntu-latest, arch=auto with tests failed 2 hours ago in 30s Run # Find the path to the wheel Processing ./wheelhouse/ubelt-1.2.1-py3-none-any.whl Collecting coverage Downloading coverage-6.4.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 212.3/212.3 kB 10.6 MB/s eta 0:00:00 Collecting pytest Downloading pytest-7.1.2-py3-none-any.whl (297 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 297.0/297.0 kB 31.0 MB/s eta 0:00:00 Collecting pytest-cov Downloading pytest_cov-3.0.0-py3-none-any.whl (20 kB) Collecting codecov Downloading codecov-2.1.12-py2.py3-none-any.whl (16 kB) Collecting requests Downloading requests-2.28.1-py3-none-any.whl (62 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 62.8/62.8 kB 24.0 MB/s eta 0:00:00 Collecting xdoctest Downloading xdoctest-1.0.1-py3-none-any.whl (130 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 130.5/130.5 kB 43.5 MB/s eta 0:00:00 Collecting pytest-timeout Downloading pytest_timeout-2.1.0-py3-none-any.whl (12 kB) Collecting idna<4,>=2.5 Downloading idna-3.3-py3-none-any.whl (61 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 61.2/61.2 kB 25.4 MB/s eta 0:00:00 Collecting certifi>=2017.4.17 Downloading certifi-2022.6.15-py3-none-any.whl (160 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 160.2/160.2 kB 54.2 MB/s eta 0:00:00 Collecting charset-normalizer<3,>=2 Downloading charset_normalizer-2.1.0-py3-none-any.whl (39 kB) Collecting urllib3<1.27,>=1.21.1 Downloading urllib3-1.26.11-py2.py3-none-any.whl (139 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 139.9/139.9 kB 49.1 MB/s eta 0:00:00 Collecting py>=1.8.2 Downloading py-1.11.0-py2.py3-none-any.whl (98 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 98.7/98.7 kB 39.4 MB/s eta 0:00:00 Collecting iniconfig Downloading iniconfig-1.1.1-py2.py3-none-any.whl (5.0 kB) Requirement already satisfied: packaging in /opt/hostedtoolcache/Python/3.10.5/x64/lib/python3.10/site-packages (from pytest->ubelt==1.2.1) (21.3) Collecting attrs>=19.2.0 Downloading attrs-22.1.0-py2.py3-none-any.whl (58 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 58.8/58.8 kB 25.7 MB/s eta 0:00:00 Requirement already satisfied: tomli>=1.0.0 in /opt/hostedtoolcache/Python/3.10.5/x64/lib/python3.10/site-packages (from pytest->ubelt==1.2.1) (2.0.1) Collecting pluggy<2.0,>=0.12 Downloading pluggy-1.0.0-py2.py3-none-any.whl (13 kB) Collecting six Downloading six-1.16.0-py2.py3-none-any.whl (11 kB) Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/hostedtoolcache/Python/3.10.5/x64/lib/python3.10/site-packages (from packaging->pytest->ubelt==1.2.1) (3.0.9) Installing collected packages: iniconfig, urllib3, ubelt, six, py, pluggy, idna, coverage, charset-normalizer, certifi, attrs, xdoctest, requests, pytest, pytest-timeout, pytest-cov, codecov Successfully installed attrs-22.1.0 certifi-2022.6.15 charset-normalizer-2.1.0 codecov-2.1.12 coverage-6.4.2 idna-3.3 iniconfig-1.1.1 pluggy-1.0.0 py-1.11.0 pytest-7.1.2 pytest-cov-3.0.0 pytest-timeout-2.1.0 requests-2.28.1 six-1.16.0 ubelt-1.2.1 urllib3-1.26.11 xdoctest-1.0.1 MOD_DPATH = /opt/hostedtoolcache/Python/3.10.5/x64/lib/python3.10/site-packages/ubelt ============================= test session starts ============================== platform linux -- Python 3.10.5, pytest-7.1.2, pluggy-1.0.0 rootdir: /home/runner/work/ubelt/ubelt, configfile: pyproject.toml plugins: timeout-2.1.0, cov-3.0.0, xdoctest-1.0.1 collected 517 items =================================== FAILURES =================================== ________________________________ test_grabdata _________________________________ def test_grabdata(): import ubelt as ub import json import time # fname = 'foo.bar' # url = 'http://i.imgur.com/rqwaDag.png' # prefix1 = '944389a39dfb8fa9' url = _demo_url(128 * 11) prefix1 = 'b7fa848cd088ae842a89' fname = 'foo2.bar' # print('1. Download the file once') fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1, hasher='sha512') stat0 = ub.Path(fpath).stat() stamp_fpath = ub.Path(fpath).augment(tail='.stamp_sha512.json') assert json.loads(stamp_fpath.read_text())['hash'][0].startswith(prefix1) # print("2. Rerun and check that the download doesn't happen again") fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1) stat1 = ub.Path(fpath).stat() > assert stat0 == stat1, 'the file should not be modified' E AssertionError: the file should not be modified E assert os.stat_resul...me=1659755459) == os.stat_resul...me=1659755459) E At index 7 diff: 1659755459 != 1659755460 E Full diff: E - os.stat_result(st_mode=33152, st_ino=62343, st_dev=2049, st_nlink=1, st_uid=1001, st_gid=121, st_size=1408, st_atime=1659755460, st_mtime=1659755459, st_ctime=1659755459) E ? ^^ E + os.stat_result(st_mode=33152, st_ino=62343, st_dev=2049, st_nlink=1, st_uid=1001, st_gid=121, st_size=1408, st_atime=1659755459, st_mtime=1659755459, st_ctime=1659755459) E ? ^^ ../tests/test_download.py:573: AssertionError ----------------------------- Captured stdout call ----------------------------- 1. Download the file once [cacher] ... foo2.bar.stamp cache miss [cacher] stamp expired no_cert Downloading url='http://localhost:43031/file_1408_0.txt' to fpath='/home/runner/.cache/ubelt/foo2.bar' 0/1408... rate=0 Hz, eta=?, total=0:00:00 1408/1408... rate=16487312.50 Hz, eta=0:00:00, total=0:00:00 [cacher] ... foo2.bar.stamp cache save 2. Rerun and check that the download doesn't happen again =============================== warnings summary =============================== tests/test_color.py::test_global_color_disable tests/test_links.py::test_rel_dir_link tests/test_links.py::test_rel_file_link tests/test_links.py::test_delete_symlinks tests/test_links.py::test_broken_link tests/test_links.py::test_cant_overwrite_file_with_symlink tests/test_links.py::test_overwrite_symlink /home/runner/work/ubelt/ubelt/ubelt/util_colors.py:171: UserWarning: pygments is not installed, text will not be colored warnings.warn('pygments is not installed, text will not be colored') tests/test_color.py::test_global_color_disable /home/runner/work/ubelt/ubelt/ubelt/util_colors.py:98: UserWarning: pygments is not installed, code will not be highlighted warnings.warn('pygments is not installed, code will not be highlighted') -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html ---------- coverage: platform linux, python 3.10.5-final-0 ----------- Name Stmts Miss Branch BrPart Cover ----------------------------------------------------------------- ubelt-1.3.7/dev/oneoff/000077500000000000000000000000001472470106000146705ustar00rootroot00000000000000ubelt-1.3.7/dev/oneoff/remove_ancient_constructs.py000066400000000000000000000016671472470106000225410ustar00rootroot00000000000000def remove_old_python2_headers(): """ Helper to modernize the code """ import re import ubelt as ub from xdev import search_replace from xdev import patterns repo_dpath = ub.Path('.') # fpaths = set(ub.Path('~/code/watch/').expand().glob('**/*.py')) fpaths = set(repo_dpath.glob('**/*.py')) lines_to_remove = [ patterns.Pattern.from_regex(re.escape('from __future__ import absolute_import, ') + '.*', dotall=True), patterns.Pattern.from_regex(re.escape('# -*- coding: utf-8 -*-') + '.*', dotall=True), ] fpaths = {f for f in fpaths if 'remove_ancient_constructs' not in str(f)} # fpaths = fpaths - {ub.Path('~/code/ubelt/dev/remove_ancient_constructs.py').expand()} dry = 0 for fpath in fpaths: # x = fpath.read_text().split('\n')[0:1][0] for pat in lines_to_remove: search_replace.sedfile(fpath, regexpr=pat, repl='', dry=dry, verbose=3) ubelt-1.3.7/dev/public_gpg_key000066400000000000000000000000511472470106000163160ustar00rootroot0000000000000070858F4D01314BF21427676F3D568E6559A34380 ubelt-1.3.7/dev/secrets_configuration.sh000066400000000000000000000005721472470106000203530ustar00rootroot00000000000000export VARNAME_CI_SECRET="EROTEMIC_CI_SECRET" export VARNAME_TWINE_PASSWORD="EROTEMIC_PYPI_MASTER_TOKEN" export VARNAME_TEST_TWINE_PASSWORD="EROTEMIC_TEST_PYPI_MASTER_TOKEN" export VARNAME_TWINE_USERNAME="EROTEMIC_PYPI_MASTER_TOKEN_USERNAME" export VARNAME_TEST_TWINE_USERNAME="EROTEMIC_TEST_PYPI_MASTER_TOKEN_USERNAME" export GPG_IDENTIFIER="=Erotemic-CI " ubelt-1.3.7/dev/setup_secrets.sh000066400000000000000000000471461472470106000166540ustar00rootroot00000000000000#!/usr/bin/env bash __doc__=' ============================ SETUP CI SECRET INSTRUCTIONS ============================ TODO: These instructions are currently pieced together from old disparate instances, and are not yet fully organized. The original template file should be: ~/code/xcookie/dev/setup_secrets.sh Development script for updating secrets when they rotate The intent of this script is to help setup secrets for whichever of the following CI platforms is used: ../.github/workflows/tests.yml ../.gitlab-ci.yml ../.circleci/config.yml ========================= GITHUB ACTION INSTRUCTIONS ========================= * `PERSONAL_GITHUB_PUSH_TOKEN` - This is only needed if you want to automatically git-tag release branches. To make a API token go to: https://docs.github.com/en/free-pro-team@latest/github/authenticating-to-github/creating-a-personal-access-token ========================= GITLAB ACTION INSTRUCTIONS ========================= ```bash cat .setup_secrets.sh | \ sed "s|utils||g" | \ sed "s|xcookie||g" | \ sed "s|travis-ci-Erotemic||g" | \ sed "s|CI_SECRET||g" | \ sed "s|GITLAB_ORG_PUSH_TOKEN||g" | \ sed "s|gitlab.org.com|gitlab.your-instance.com|g" | \ tee /tmp/repl && colordiff .setup_secrets.sh /tmp/repl ``` * Make sure you add Runners to your project https://gitlab.org.com/utils/xcookie/-/settings/ci_cd in Runners-> Shared Runners and Runners-> Available specific runners * Ensure that you are auto-cancel redundant pipelines. Navigate to https://gitlab.kitware.com/utils/xcookie/-/settings/ci_cd and ensure "Auto-cancel redundant pipelines" is checked. More details are here https://docs.gitlab.com/ee/ci/pipelines/settings.html#auto-cancel-redundant-pipelines * TWINE_USERNAME - this is your pypi username twine info is only needed if you want to automatically publish to pypi * TWINE_PASSWORD - this is your pypi password * CI_SECRET - We will use this as a secret key to encrypt/decrypt gpg secrets This is only needed if you want to automatically sign published wheels with a gpg key. * GITLAB_ORG_PUSH_TOKEN - This is only needed if you want to automatically git-tag release branches. Create a new personal access token in User->Settings->Tokens, You can name the token GITLAB_ORG_PUSH_TOKEN_VALUE Give it api and write repository permissions SeeAlso: https://gitlab.org.com/profile/personal_access_tokens Take this variable and record its value somewhere safe. I put it in my secrets file as such: export GITLAB_ORG_PUSH_TOKEN_VALUE= I also create another variable with the prefix "git-push-token", which is necessary export GITLAB_ORG_PUSH_TOKEN=git-push-token:$GITLAB_ORG_PUSH_TOKEN_VALUE Then add this as a secret variable here: https://gitlab.org.com/groups/utils/-/settings/ci_cd Note the value of GITLAB_ORG_PUSH_TOKEN will look something like: "{token-name}:{token-password}" For instance it may look like this: "git-push-token:62zutpzqga6tvrhklkdjqm" References: https://stackoverflow.com/questions/51465858/how-do-you-push-to-a-gitlab-repo-using-a-gitlab-ci-job # ADD RELEVANT VARIABLES TO GITLAB SECRET VARIABLES # https://gitlab.kitware.com/computer-vision/kwcoco/-/settings/ci_cd # Note that it is important to make sure that these variables are # only decrpyted on protected branches by selecting the protected # and masked option. Also make sure you have master and release # branches protected. # https://gitlab.kitware.com/computer-vision/kwcoco/-/settings/repository#js-protected-branches-settings ============================ Relevant CI Secret Locations ============================ https://github.com/pyutils/line_profiler/settings/secrets/actions https://app.circleci.com/settings/project/github/pyutils/line_profiler/environment-variables?return-to=https%3A%2F%2Fapp.circleci.com%2Fpipelines%2Fgithub%2Fpyutils%2Fline_profiler ' setup_package_environs(){ __doc__=" Setup environment variables specific for this project. The remainder of this script should ideally be general to any repo. These non-secret variables are written to disk and loaded by the script, such that the specific repo only needs to modify that configuration file. " echo "Choose an organization specific setting or make your own. This needs to be generalized more" } ### FIXME: Should be configurable for general use setup_package_environs_gitlab_kitware(){ echo ' export VARNAME_CI_SECRET="CI_KITWARE_SECRET" export VARNAME_TWINE_PASSWORD="EROTEMIC_PYPI_MASTER_TOKEN" export VARNAME_TEST_TWINE_PASSWORD="EROTEMIC_TEST_PYPI_MASTER_TOKEN" export VARNAME_PUSH_TOKEN="GITLAB_KITWARE_TOKEN" export VARNAME_TWINE_USERNAME="EROTEMIC_PYPI_MASTER_TOKEN_USERNAME" export VARNAME_TEST_TWINE_USERNAME="EROTEMIC_TEST_PYPI_MASTER_TOKEN_USERNAME" export GPG_IDENTIFIER="=Erotemic-CI " ' | python -c "import sys; from textwrap import dedent; print(dedent(sys.stdin.read()).strip(chr(10)))" > dev/secrets_configuration.sh git add dev/secrets_configuration.sh } setup_package_environs_github_erotemic(){ echo ' export VARNAME_CI_SECRET="EROTEMIC_CI_SECRET" export VARNAME_TWINE_PASSWORD="EROTEMIC_PYPI_MASTER_TOKEN" export VARNAME_TEST_TWINE_PASSWORD="EROTEMIC_TEST_PYPI_MASTER_TOKEN" export VARNAME_TWINE_USERNAME="EROTEMIC_PYPI_MASTER_TOKEN_USERNAME" export VARNAME_TEST_TWINE_USERNAME="EROTEMIC_TEST_PYPI_MASTER_TOKEN_USERNAME" export GPG_IDENTIFIER="=Erotemic-CI " ' | python -c "import sys; from textwrap import dedent; print(dedent(sys.stdin.read()).strip(chr(10)))" > dev/secrets_configuration.sh git add dev/secrets_configuration.sh } setup_package_environs_github_pyutils(){ echo ' export VARNAME_CI_SECRET="PYUTILS_CI_SECRET" export VARNAME_TWINE_PASSWORD="PYUTILS_PYPI_MASTER_TOKEN" export VARNAME_TEST_TWINE_PASSWORD="PYUTILS_TEST_PYPI_MASTER_TOKEN" export VARNAME_TWINE_USERNAME="PYUTILS_PYPI_MASTER_TOKEN_USERNAME" export VARNAME_TEST_TWINE_USERNAME="PYUTILS_TEST_PYPI_MASTER_TOKEN_USERNAME" export GPG_IDENTIFIER="=PyUtils-CI " ' | python -c "import sys; from textwrap import dedent; print(dedent(sys.stdin.read()).strip(chr(10)))" > dev/secrets_configuration.sh git add dev/secrets_configuration.sh #echo ' #export VARNAME_CI_SECRET="PYUTILS_CI_SECRET" #export GPG_IDENTIFIER="=PyUtils-CI " #' | python -c "import sys; from textwrap import dedent; print(dedent(sys.stdin.read()).strip(chr(10)))" > dev/secrets_configuration.sh } upload_github_secrets(){ load_secrets unset GITHUB_TOKEN #printf "%s" "$GITHUB_TOKEN" | gh auth login --hostname Github.com --with-token if ! gh auth status ; then gh auth login fi source dev/secrets_configuration.sh gh secret set "TWINE_USERNAME" -b"${!VARNAME_TWINE_USERNAME}" gh secret set "TEST_TWINE_USERNAME" -b"${!VARNAME_TEST_TWINE_USERNAME}" toggle_setx_enter gh secret set "CI_SECRET" -b"${!VARNAME_CI_SECRET}" gh secret set "TWINE_PASSWORD" -b"${!VARNAME_TWINE_PASSWORD}" gh secret set "TEST_TWINE_PASSWORD" -b"${!VARNAME_TEST_TWINE_PASSWORD}" toggle_setx_exit } toggle_setx_enter(){ # Can we do something like a try/finally? # https://stackoverflow.com/questions/15656492/writing-try-catch-finally-in-shell echo "Enter sensitive area" if [[ -n "${-//[^x]/}" ]]; then __context_1_toggle_setx=1 else __context_1_toggle_setx=0 fi if [[ "$__context_1_toggle_setx" == "1" ]]; then echo "Setx was on, disable temporarily" set +x fi } toggle_setx_exit(){ echo "Exit sensitive area" # Can we guarantee this will happen? if [[ "$__context_1_toggle_setx" == "1" ]]; then set -x fi } upload_gitlab_group_secrets(){ __doc__=" Use the gitlab API to modify group-level secrets " # In Repo Directory load_secrets REMOTE=origin GROUP_NAME=$(git remote get-url $REMOTE | cut -d ":" -f 2 | cut -d "/" -f 1) HOST=https://$(git remote get-url $REMOTE | cut -d "/" -f 1 | cut -d "@" -f 2 | cut -d ":" -f 1) echo " * GROUP_NAME = $GROUP_NAME * HOST = $HOST " PRIVATE_GITLAB_TOKEN=$(git_token_for "$HOST") if [[ "$PRIVATE_GITLAB_TOKEN" == "ERROR" ]]; then echo "Failed to load authentication key" return 1 fi TMP_DIR=$(mktemp -d -t ci-XXXXXXXXXX) curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups" > "$TMP_DIR/all_group_info" GROUP_ID=$(< "$TMP_DIR/all_group_info" jq ". | map(select(.path==\"$GROUP_NAME\")) | .[0].id") echo "GROUP_ID = $GROUP_ID" curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID" > "$TMP_DIR/group_info" < "$TMP_DIR/group_info" jq # Get group-level secret variables curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables" > "$TMP_DIR/group_vars" < "$TMP_DIR/group_vars" jq '.[] | .key' if [[ "$?" != "0" ]]; then echo "Failed to access group level variables. Probably a permission issue" fi source dev/secrets_configuration.sh SECRET_VARNAME_ARR=(VARNAME_CI_SECRET VARNAME_TWINE_PASSWORD VARNAME_TEST_TWINE_PASSWORD VARNAME_TWINE_USERNAME VARNAME_TEST_TWINE_USERNAME VARNAME_PUSH_TOKEN) for SECRET_VARNAME_PTR in "${SECRET_VARNAME_ARR[@]}"; do SECRET_VARNAME=${!SECRET_VARNAME_PTR} echo "" echo " ---- " LOCAL_VALUE=${!SECRET_VARNAME} REMOTE_VALUE=$(< "$TMP_DIR/group_vars" jq -r ".[] | select(.key==\"$SECRET_VARNAME\") | .value") # Print current local and remote value of a variable echo "SECRET_VARNAME_PTR = $SECRET_VARNAME_PTR" echo "SECRET_VARNAME = $SECRET_VARNAME" echo "(local) $SECRET_VARNAME = $LOCAL_VALUE" echo "(remote) $SECRET_VARNAME = $REMOTE_VALUE" #curl --request GET --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables/SECRET_VARNAME" | jq -r .message if [[ "$REMOTE_VALUE" == "" ]]; then # New variable echo "Remove variable does not exist, posting" toggle_setx_enter curl --request POST --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables" \ --form "key=${SECRET_VARNAME}" \ --form "value=${LOCAL_VALUE}" \ --form "protected=true" \ --form "masked=true" \ --form "environment_scope=*" \ --form "variable_type=env_var" toggle_setx_exit elif [[ "$REMOTE_VALUE" != "$LOCAL_VALUE" ]]; then echo "Remove variable does not agree, putting" # Update variable value toggle_setx_enter curl --request PUT --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID/variables/$SECRET_VARNAME" \ --form "value=${LOCAL_VALUE}" toggle_setx_exit else echo "Remote value agrees with local" fi done rm "$TMP_DIR/group_vars" } upload_gitlab_repo_secrets(){ __doc__=" Use the gitlab API to modify group-level secrets " # In Repo Directory load_secrets REMOTE=origin GROUP_NAME=$(git remote get-url $REMOTE | cut -d ":" -f 2 | cut -d "/" -f 1) PROJECT_NAME=$(git remote get-url $REMOTE | cut -d ":" -f 2 | cut -d "/" -f 2 | cut -d "." -f 1) HOST=https://$(git remote get-url $REMOTE | cut -d "/" -f 1 | cut -d "@" -f 2 | cut -d ":" -f 1) echo " * GROUP_NAME = $GROUP_NAME * PROJECT_NAME = $PROJECT_NAME * HOST = $HOST " PRIVATE_GITLAB_TOKEN=$(git_token_for "$HOST") if [[ "$PRIVATE_GITLAB_TOKEN" == "ERROR" ]]; then echo "Failed to load authentication key" return 1 fi TMP_DIR=$(mktemp -d -t ci-XXXXXXXXXX) toggle_setx_enter curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups" > "$TMP_DIR/all_group_info" toggle_setx_exit GROUP_ID=$(< "$TMP_DIR/all_group_info" jq ". | map(select(.path==\"$GROUP_NAME\")) | .[0].id") echo "GROUP_ID = $GROUP_ID" toggle_setx_enter curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID" > "$TMP_DIR/group_info" toggle_setx_exit GROUP_ID=$(< "$TMP_DIR/all_group_info" jq ". | map(select(.path==\"$GROUP_NAME\")) | .[0].id") < "$TMP_DIR/group_info" jq PROJECT_ID=$(< "$TMP_DIR/group_info" jq ".projects | map(select(.path==\"$PROJECT_NAME\")) | .[0].id") echo "PROJECT_ID = $PROJECT_ID" # Get group-level secret variables toggle_setx_enter curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/projects/$PROJECT_ID/variables" > "$TMP_DIR/project_vars" toggle_setx_exit < "$TMP_DIR/project_vars" jq '.[] | .key' if [[ "$?" != "0" ]]; then echo "Failed to access project level variables. Probably a permission issue" fi LIVE_MODE=1 source dev/secrets_configuration.sh SECRET_VARNAME_ARR=(VARNAME_CI_SECRET VARNAME_TWINE_PASSWORD VARNAME_TEST_TWINE_PASSWORD VARNAME_TWINE_USERNAME VARNAME_TEST_TWINE_USERNAME VARNAME_PUSH_TOKEN) for SECRET_VARNAME_PTR in "${SECRET_VARNAME_ARR[@]}"; do SECRET_VARNAME=${!SECRET_VARNAME_PTR} echo "" echo " ---- " LOCAL_VALUE=${!SECRET_VARNAME} REMOTE_VALUE=$(< "$TMP_DIR/project_vars" jq -r ".[] | select(.key==\"$SECRET_VARNAME\") | .value") # Print current local and remote value of a variable echo "SECRET_VARNAME_PTR = $SECRET_VARNAME_PTR" echo "SECRET_VARNAME = $SECRET_VARNAME" echo "(local) $SECRET_VARNAME = $LOCAL_VALUE" echo "(remote) $SECRET_VARNAME = $REMOTE_VALUE" #curl --request GET --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/projects/$PROJECT_ID/variables/SECRET_VARNAME" | jq -r .message if [[ "$REMOTE_VALUE" == "" ]]; then # New variable echo "Remove variable does not exist, posting" if [[ "$LIVE_MODE" == "1" ]]; then curl --request POST --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/projects/$PROJECT_ID/variables" \ --form "key=${SECRET_VARNAME}" \ --form "value=${LOCAL_VALUE}" \ --form "protected=true" \ --form "masked=true" \ --form "environment_scope=*" \ --form "variable_type=env_var" else echo "dry run, not posting" fi elif [[ "$REMOTE_VALUE" != "$LOCAL_VALUE" ]]; then echo "Remove variable does not agree, putting" # Update variable value if [[ "$LIVE_MODE" == "1" ]]; then curl --request PUT --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/projects/$PROJECT_ID/variables/$SECRET_VARNAME" \ --form "value=${LOCAL_VALUE}" else echo "dry run, not putting" fi else echo "Remote value agrees with local" fi done rm "$TMP_DIR/project_vars" } export_encrypted_code_signing_keys(){ # You will need to rerun this whenever the signkeys expire and are renewed # Load or generate secrets load_secrets source dev/secrets_configuration.sh CI_SECRET="${!VARNAME_CI_SECRET}" echo "VARNAME_CI_SECRET = $VARNAME_CI_SECRET" echo "CI_SECRET=$CI_SECRET" echo "GPG_IDENTIFIER=$GPG_IDENTIFIER" # ADD RELEVANT VARIABLES TO THE CI SECRET VARIABLES # HOW TO ENCRYPT YOUR SECRET GPG KEY # You need to have a known public gpg key for this to make any sense MAIN_GPG_KEYID=$(gpg --list-keys --keyid-format LONG "$GPG_IDENTIFIER" | head -n 2 | tail -n 1 | awk '{print $1}') GPG_SIGN_SUBKEY=$(gpg --list-keys --with-subkey-fingerprints "$GPG_IDENTIFIER" | grep "\[S\]" -A 1 | tail -n 1 | awk '{print $1}') # Careful, if you don't have a subkey, requesting it will export more than you want. # Export the main key instead (its better to have subkeys, but this is a lesser evil) if [[ "$GPG_SIGN_SUBKEY" == "" ]]; then # NOTE: if you get here this probably means your subkeys expired (and # wont even be visible), so we probably should check for that here and # thrown an error instead of using this hack, which likely wont work # anyway. GPG_SIGN_SUBKEY=$(gpg --list-keys --with-subkey-fingerprints "$GPG_IDENTIFIER" | grep "\[C\]" -A 1 | tail -n 1 | awk '{print $1}') fi echo "MAIN_GPG_KEYID = $MAIN_GPG_KEYID" echo "GPG_SIGN_SUBKEY = $GPG_SIGN_SUBKEY" # Only export the signing secret subkey # Export plaintext gpg public keys, private sign key, and trust info mkdir -p dev gpg --armor --export-options export-backup --export-secret-subkeys "${GPG_SIGN_SUBKEY}!" > dev/ci_secret_gpg_subkeys.pgp gpg --armor --export "${GPG_SIGN_SUBKEY}" > dev/ci_public_gpg_key.pgp gpg --export-ownertrust > dev/gpg_owner_trust # Encrypt gpg keys and trust with CI secret GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -e -a -in dev/ci_public_gpg_key.pgp > dev/ci_public_gpg_key.pgp.enc GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -e -a -in dev/ci_secret_gpg_subkeys.pgp > dev/ci_secret_gpg_subkeys.pgp.enc GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -e -a -in dev/gpg_owner_trust > dev/gpg_owner_trust.enc echo "$MAIN_GPG_KEYID" > dev/public_gpg_key # Test decrpyt GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/ci_public_gpg_key.pgp.enc | gpg --list-packets --verbose GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/ci_secret_gpg_subkeys.pgp.enc | gpg --list-packets --verbose GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/gpg_owner_trust.enc cat dev/public_gpg_key unload_secrets # Look at what we did, clean up, and add it to git ls dev/*.enc rm dev/*.pgp rm dev/gpg_owner_trust git status git add dev/*.enc git add dev/gpg_owner_trust git add dev/public_gpg_key } # See the xcookie module gitlab python API #gitlab_set_protected_branches(){ #} _test_gnu(){ # shellcheck disable=SC2155 export GNUPGHOME=$(mktemp -d -t) ls -al "$GNUPGHOME" chmod 700 -R "$GNUPGHOME" source dev/secrets_configuration.sh gpg -k load_secrets CI_SECRET="${!VARNAME_CI_SECRET}" echo "CI_SECRET = $CI_SECRET" cat dev/public_gpg_key GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/ci_public_gpg_key.pgp.enc GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/gpg_owner_trust.enc GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/ci_secret_gpg_subkeys.pgp.enc GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/ci_public_gpg_key.pgp.enc | gpg --import GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/gpg_owner_trust.enc | gpg --import-ownertrust GLKWS=$CI_SECRET openssl enc -aes-256-cbc -pbkdf2 -md SHA512 -pass env:GLKWS -d -a -in dev/ci_secret_gpg_subkeys.pgp.enc | gpg --import gpg -k # | gpg --import # | gpg --list-packets --verbose } ubelt-1.3.7/dev/spell_passlist.txt000066400000000000000000000000471472470106000172170ustar00rootroot00000000000000cant wont dont doesnt arent datas hist ubelt-1.3.7/dev/travis_public_gpg_key.pgp.enc000066400000000000000000000020201472470106000212350ustar00rootroot00000000000000U2FsdGVkX19ilPp/F7ecQPcOZsyE1363vd4NaGHD3gxO48qj/lcdTCuzo232zb9V iMAfJmatw8Lak/AjmOw4txOwM7nVZxwGfmrqWXHg++oAzf8zwnry//zxWGQNVcBz BmZk3lOLceqf29OsbWeBM8ESDVjuKF26nh/TmpSkcdqgSiaDjVNDGOKrpyvRfxDS Wj2kFU2DpeG6qSwMGdfowa5mZNFtJne9GFjoX/d/Zj4kiQHkomwBLwI7+fxASt9T t0iCAwLMH0OSgKYGwA+omdxWdicuBdskzUJkAkJMq4Q5KwkYOCiZFse9SgXG7HXn VtJcESiOUfC2EHa0uAZXmd0iBkjD1Zy7Ybw+PKnYWk70mqud7HGj+7PngImoxTeY dqqEc0eT5vrJbOpl5CT7oeuJicgi5NyUVUZRcW1xZ9jdfkVohI8HkSB30RvfPCmp 5vi8Zjj5ftJ/oO42czRzy8cZqyz+vgRSFFRJjgZ286nhQKGzl5cUgNDWoiyLU+57 ravH5ufPRD4ZhrPv3fMdJLFRoYOk21mhc6X9LkEKRg5DpnpOeRFH/W6ZCKg0Hxe3 PMtcolr8OCw7dkybLEyINVugFj6SNjcW62t804J5+O0J1DyeaPP7IPz8J1wPVS8M Ht8OscBK6+OX6CYbZrLvVGICW1OxYS07SfquLv0MCJdVpnP9Q5+nBnmaNUtrVugJ uQ0PlJWsGB6wa2T5vLnlyHWugmPPCWt7m96PRr3Y5mbedRPodLmZBXDSe2K3jBsr alrYiEJXpDpQ3Hd/1/VRJpus1zlphl6K4CjX8I63kWE+tcHJ9KJdn9/Z/30Q8BZZ vQLiLBoZDRUsG0QF45O4g5hSpd0U0nt0Ar+B18thDH9Aq7AO/iaUQzjUnSMTkLKw mjPo5iS7xEjgJS3Mq/AZXiztUg8Px49VtmRZeXdd/147AuJm2137mmCoArgJUV63 9YE9YiIzU0KXHu/q/gg3mKUcsib1ZGo2KTI0ICPXgjw9KZETcG4F+Ey5lJvuAE3d ubelt-1.3.7/dev/travis_secret_gpg_key.pgp.enc000066400000000000000000000022221472470106000212500ustar00rootroot00000000000000U2FsdGVkX1+t528A5yTzIwRMADmsjUzmps8Js3C0MRONYH/dxa1lfIFSmZOX5aWf PCtO+VbrU9Kbrl6SNXxyl0NcbLmP1xI7VjvGQEQIgMMo3H4b2JNCapRzz1oxzpR4 8aIUTTVBkayfCg4BKz49XJm+U9ey5eB42C1RQGncERnSKUEo12UyEfv1T4MJDAzP EMzvDPRKBLKecbR+xkWUKc9xfUl1e5YFPTSGF7QerqPwQnr0aY9hYNN/8twAFJjW Dvx5ul4ASBbU/9kcLyWLndLdH4LTUE3cjG9mRaE9gXOSDF4ny28/rhZJd7wFcVmW JQZxNdSatIhj5qxq151jUgAp/3mf70lkqirsVtRZWwYmsxCHWcuB94CF4yG+olt1 lXmBHczeoeeHEOAVRrdFXvdYi+IZBVk7JI/emeOJ8/2BusnDFSoimxcv8qq4F4t0 loAMTxYVkc4Gx2eo3+r4giXk0VaI16OgXwuDLA67H/kNBP48og1tKxt+n0vxxoO6 vKnzCmLAeliWuNHE/yOSW2OExNbI4LnflDrnx3FESLRxgwLSjAYWSH8zifk4M7BL 30YhpKL+/trjCJKqSgJIiJk6rMI4M1sOJ+L1ysMvrArOpYbzgUgF+P7t/x3YV56E XhP+JzGjpticof7x5Z9AkcuAEfBGtmB5Y6ITsixnncXNIuPBCXi6MKGVwXBOa0Oq k4pA/P2RR9K9lc7LY3sGtNIDSicd6PP3t7J9B+oh7ycNQuMiAYyyEPadTJHjj87e z4yLeVqK7y+814CWm4VzIF70WMZxkzsaHlRXBG7YYYR1oNBU6kZOWgj1G3MQsUgU zawjoLYitxcL9PNN6wJxr7AhwiUrrnaVkwmETtrwV5zZDidQhU7KqfTlbLnV+jjf 5By5SdggA/aWt6jKfMwWus62pEGPGxr9AgOlMJoAJC0mJR6CaHU9kKrXkVOTdvan ovoWhE7lHjiLbCCeJouX5FadSUI0svA7GE0GYATmqCLywzfis1EY8X3qD0wBvDZZ JWBidLeNhDzUO1O1amG8wHB7TnTdUTHdX0W2YQ/6U0U4wAAV+dnP4Xv9JOdfDomw 6+BxVl9l2cs9YnP+ZcomPAnMDJepJ2XU2y5BEnsRHTVo2Co7T7jWsvF8Zd9Tnq2L ubelt-1.3.7/docs/000077500000000000000000000000001472470106000135665ustar00rootroot00000000000000ubelt-1.3.7/docs/CODE_OF_CONDUCT.md000066400000000000000000000064251472470106000163740ustar00rootroot00000000000000# Contributor Covenant Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socioeconomic status, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at erotemic@gmail.com. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see https://www.contributor-covenant.org/faq ubelt-1.3.7/docs/CONTRIBUTING.md000066400000000000000000000052601472470106000160220ustar00rootroot00000000000000# Contributing to UBelt This is a first pass at documentation describing how you can contribute to the UBelt project. If you find an issue, bug, or have a proposal for new behavior please submit an issue to the github issue tracker: https://github.com/Erotemic/ubelt/issues If you have a fix for an issue, or would like to submit a design for a new feature submit a pull-request https://github.com/Erotemic/ubelt/pulls ## Codebase structure The ubelt package is structured as a flat list of `util_*.py` submodules. All top-level APIs are exposed in the `__init__.py` file. Currently there are no subpackage-directories, and I think its best that ubelt remains this way for simplicity. ## Unit testing Ubelt is tested with a combination of unit-tests and doctests. All tests can be run via `pytest` or using the `./run_tests.py` script. The doctests can be run using `./run_doctests.sh` Some tests (like the ones that use the internet) are not enabled by default. These can be enabled by passing the `--network` flag to the above scripts. ## Documentation All documentation should be written in module-level, class-level, and function-level docstrings. Docstrings should follow [Google-style](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html). ## Code Style Code should follow PEP8 standards when possible. Any violations of PEP8 should be justified and have a ` # NOQA` tag to indicate that the linter should ignore an offending line. By default I'm using `flake8` to lint code and I disable these error messages by default: ```python 'E126', # continuation line hanging-indent 'E127', # continuation line over-indented for visual indent 'E201', # whitespace after '(' 'E202', # whitespace before ']' 'E203', # whitespace before ', ' 'E221', # multiple spaces before operator 'E222', # multiple spaces after operator 'E241', # multiple spaces after , 'E265', # block comment should start with "# " 'E271', # multiple spaces after keyword 'E272', # multiple spaces before keyword 'E301', # expected 1 blank line, found 0 'E305', # expected 1 blank line after class / func 'E306', # expected 1 blank line before func 'E501', # line length > 79 'W602', # Old reraise syntax 'E266', # too many leading '#' for block comment 'N801', # function name should be lowercase [N806] 'N802', # function name should be lowercase [N806] 'N803', # argument should be lowercase [N806] 'N805', # first argument of a method should be named 'self' 'N806', # variable in function should be lowercase [N806] 'N811', # constant name imported as non constant 'N813', # camel case ``` ubelt-1.3.7/docs/Makefile000066400000000000000000000011761472470106000152330ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = source BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ubelt-1.3.7/docs/make.bat000066400000000000000000000014441472470106000151760ustar00rootroot00000000000000@ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=source set BUILDDIR=build if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.https://www.sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% :end popd ubelt-1.3.7/docs/notebooks/000077500000000000000000000000001472470106000155715ustar00rootroot00000000000000ubelt-1.3.7/docs/notebooks/Ubelt Demo.ipynb000066400000000000000000000615301472470106000205610ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Timing\n", "------\n", "\n", "Quickly time a single line." ] }, { "cell_type": "code", "execution_count": 154, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "tic('Timer demo!')\n", "...toc('Timer demo!')=0.0933s\n" ] } ], "source": [ "import math\n", "import ubelt as ub\n", "timer = ub.Timer('Timer demo!', verbose=1)\n", "with timer:\n", " math.factorial(100000)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Loop Progress\n", "-------------\n", "\n", "``ProgIter`` is a (mostly) drop-in alternative to\n", "```tqdm`` `__. \n", "*The advantage of ``ProgIter`` is that it does not use any python threading*,\n", "and therefore can be safer with code that makes heavy use of multiprocessing.\n", "\n", "Note: ProgIter is now a standalone module: ``pip intstall progiter``)" ] }, { "cell_type": "code", "execution_count": 155, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 7500/7500... rate=3321.68 Hz, eta=0:00:00, total=0:00:0200\n" ] } ], "source": [ "import ubelt as ub\n", "import math\n", "for n in ub.ProgIter(range(7500)):\n", " math.factorial(n)" ] }, { "cell_type": "code", "execution_count": 156, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 7500/7500... rate=3305.80 Hz, eta=0:00:00, total=0:00:0200\n" ] } ], "source": [ "import ubelt as ub\n", "import math\n", "for n in ub.ProgIter(range(7500), freq=1000, adjust=False):\n", " math.factorial(n)\n", " \n", "# Note that forcing freq=2 all the time comes at a performance cost\n", "# The default adjustment algorithm causes almost no overhead" ] }, { "cell_type": "code", "execution_count": 157, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0/1000... rate=0 Hz, eta=?, total=0:00:00\n", " 1/1000... rate=126214.91 Hz, eta=0:00:00, total=0:00:00\n", " 4/1000... rate=16279.95 Hz, eta=0:00:00, total=0:00:00\n", " 16/1000... rate=35893.59 Hz, eta=0:00:00, total=0:00:00\n", " 64/1000... rate=88745.17 Hz, eta=0:00:00, total=0:00:00\n", " 256/1000... rate=201950.56 Hz, eta=0:00:00, total=0:00:00\n", " 1000/1000... rate=183798.63 Hz, eta=0:00:00, total=0:00:00\n" ] } ], "source": [ ">>> import ubelt as ub\n", ">>> def is_prime(n):\n", "... return n >= 2 and not any(n % i == 0 for i in range(2, n))\n", ">>> for n in ub.ProgIter(range(1000), verbose=2):\n", ">>> # do some work\n", ">>> is_prime(n)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Caching\n", "-------\n", "\n", "Cache intermediate results from blocks of code inside a script with minimal\n", "boilerplate or modification to the original code. \n", "\n", "For direct caching of data, use the ``Cacher`` class. By default results will\n", "be written to the ubelt's appdir cache, but the exact location can be specified\n", "via ``dpath`` or the ``appname`` arguments. Additionally, process dependencies\n", "can be specified via the ``depends`` argument, which allows for implicit cache\n", "invalidation. As far as I can tell, this is the most concise way (4 lines of\n", "boilerplate) to cache a block of code with existing Python syntax (as of\n", "2022-06-03).\n" ] }, { "cell_type": "code", "execution_count": 158, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[cacher] tryload fname=test_process\n", "[cacher] ... cache does not exist: dpath=myapp fname=test_process cfgstr=66783cfd3e2c9799bb98f5eec57738915ec3777be02e395a0e10ad566c07f2c25876fd1edd4f4fc2280601cae3c09efe539f18f2c5a7bb954764786f5be4b72b\n", "[cacher] ... test_process cache miss\n", "[cacher] ... test_process cache save\n", "[cacher] tryload fname=test_process\n", "[cacher] ... test_process cache hit\n" ] } ], "source": [ "import ubelt as ub\n", "depends = ['config', {'of': 'params'}, 'that-uniquely-determine-the-process']\n", "cacher = ub.Cacher('test_process', depends=depends, appname='myapp', verbose=3)\n", "\n", "if 1:\n", " cacher.fpath.delete()\n", " \n", "for _ in range(2):\n", " data = cacher.tryload()\n", " if data is None:\n", " myvar1 = 'result of expensive process'\n", " myvar2 = 'another result'\n", " data = myvar1, myvar2\n", " cacher.save(data)\n", " myvar1, myvar2 = data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For indirect caching, use the ``CacheStamp`` class. This simply writes a\n", "\"stamp\" file that marks that a process has completed. Additionally you can\n", "specify criteria for when the stamp should expire. If you let ``CacheStamp``\n", "know about the expected \"product\", it will expire the stamp if that file has\n", "changed, which can be useful in situations where caches might becomes corrupt\n", "or need invalidation." ] }, { "cell_type": "code", "execution_count": 159, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[cacher] tryload fname=name\n", "[cacher] ... cache does not exist: dpath=cache fname=name cfgstr=4a166a5cbaa2926ccceb1620ee63fa1e3c4626229e887c7604b88b44e5f5df021e172437c359614dfdce1be2043909aa54194da54b6bd20b9e1f558b48756a26\n", "[cacher] ... name cache miss\n", "[cacher] stamp expired no_cert\n", "[cacher] ... name cache save\n", "[cacher] tryload fname=name\n", "[cacher] ... name cache hit\n" ] } ], "source": [ "import ubelt as ub\n", "dpath = ub.Path.appdir('ubelt/demo/cache').delete().ensuredir()\n", "params = {'params1': 1, 'param2': 2}\n", "expected_fpath = dpath / 'file.txt'\n", "stamp = ub.CacheStamp('name', dpath=dpath, depends=params,\n", " hasher='sha256', product=expected_fpath,\n", " expires='2101-01-01T000000Z', verbose=3)\n", "\n", "if 1:\n", " stamp.fpath.delete()\n", " \n", "for _ in range(2):\n", " if stamp.expired():\n", " expected_fpath.write_text('expensive process')\n", " stamp.renew()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hashing\n", "-------\n", "\n", "The ``ub.hash_data`` constructs a hash for common Python nested data\n", "structures. Extensions to allow it to hash custom types can be registered. By\n", "default it handles lists, dicts, sets, slices, uuids, and numpy arrays." ] }, { "cell_type": "code", "execution_count": 160, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'0d95771ff684756d7be7895b5594b8f8484adecef03b46002f97ebeb1155fb15'" ] }, "execution_count": 160, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import ubelt as ub\n", "data = [('arg1', 5), ('lr', .01), ('augmenters', ['flip', 'translate'])]\n", "ub.hash_data(data, hasher='sha256')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Support for torch tensors and pandas data frames are also included, but needs to\n", "be explicitly enabled. There also exists an non-public plugin architecture to\n", "extend this function to arbitrary types. While not officially supported, it is\n", "usable and will become better integrated in the future. See\n", "``ubelt/util_hash.py`` for details." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Command Line Interaction\n", "------------------------\n", "\n", "The builtin Python ``subprocess.Popen`` module is great, but it can be a\n", "bit clunky at times. The ``os.system`` command is easy to use, but it\n", "doesn't have much flexibility. The ``ub.cmd`` function aims to fix this.\n", "It is as simple to run as ``os.system``, but it returns a dictionary\n", "containing the return code, standard out, standard error, and the\n", "``Popen`` object used under the hood." ] }, { "cell_type": "code", "execution_count": 161, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cmake version 3.22.1\n", "\n", "CMake suite maintained and supported by Kitware (kitware.com/cmake).\n", "\n" ] } ], "source": [ "import ubelt as ub\n", "info = ub.cmd('cmake --version')\n", "# Quickly inspect and parse output of a \n", "print(info['out'])" ] }, { "cell_type": "code", "execution_count": 162, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " 'command': 'cmake --version',\n", " 'cwd': None,\n", " 'err': '',\n", " 'proc': ,\n", " 'ret': 0,\n", "}\n" ] } ], "source": [ "# The info dict contains other useful data\n", "print(ub.repr2({k: v for k, v in info.items() if 'out' != k}))" ] }, { "cell_type": "code", "execution_count": 163, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cmake version 3.22.1\n", "\n", "CMake suite maintained and supported by Kitware (kitware.com/cmake).\n" ] } ], "source": [ "# Also possible to simultaneously capture and display output in realtime\n", "info = ub.cmd('cmake --version', tee=1)" ] }, { "cell_type": "code", "execution_count": 164, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ubelt.cmd] joncrall@toothbrush:~/code/ubelt/docs/notebooks$ cmake --version\n", "cmake version 3.22.1\n", "\n", "CMake suite maintained and supported by Kitware (kitware.com/cmake).\n" ] } ], "source": [ "# tee=True is equivalent to using verbose=1, but there is also verbose=2\n", "info = ub.cmd('cmake --version', verbose=2)" ] }, { "cell_type": "code", "execution_count": 165, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "┌─── START CMD ───\n", "[ubelt.cmd] joncrall@toothbrush:~/code/ubelt/docs/notebooks$ cmake --version\n", "cmake version 3.22.1\n", "\n", "CMake suite maintained and supported by Kitware (kitware.com/cmake).\n", "└─── END CMD ───\n" ] } ], "source": [ "# and verbose=3\n", "info = ub.cmd('cmake --version', verbose=3)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Cross-Platform Config and Cache Directories\n", "-------------------------------------------\n", "\n", "If you have an application which writes configuration or cache files,\n", "the standard place to dump those files differs depending if you are on\n", "Windows, Linux, or Mac. Ubelt offers a unified functions for determining\n", "what these paths are.\n", "\n", "The ``ub.ensure_app_cache_dir`` and ``ub.ensure_app_config_dir``\n", "functions find the correct platform-specific location for these files\n", "and ensures that the directories exist. (Note: replacing \"ensure\" with\n", "\"get\" will simply return the path, but not ensure that it exists)\n", "\n", "The config root directory is ``~/AppData/Roaming`` on Windows,\n", "``~/.config`` on Linux and ``~/Library/Application Support`` on Mac. The\n", "cache root directory is ``~/AppData/Local`` on Windows, ``~/.config`` on\n", "Linux and ``~/Library/Caches`` on Mac.\n", "\n", "Example usage on Linux might look like this:" ] }, { "cell_type": "code", "execution_count": 166, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "~/.cache/my_app\n", "~/.config/my_app\n" ] } ], "source": [ "import ubelt as ub\n", "print(ub.shrinkuser(ub.ensure_app_cache_dir('my_app')))\n", "print(ub.shrinkuser(ub.ensure_app_config_dir('my_app')))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "New in version 1.0.0: the ``ub.Path.appdir`` classmethod provides a way to\n", "achieve the above with a chainable object oriented interface." ] }, { "cell_type": "code", "execution_count": 167, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "~/.cache/my_app\n", "~/.config/my_app\n" ] } ], "source": [ "import ubelt as ub\n", "print(ub.Path.appdir('my_app').ensuredir().shrinkuser())\n", "print(ub.Path.appdir('my_app', type='config').ensuredir().shrinkuser())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Downloading Files\n", "-----------------\n", "\n", "The function ``ub.download`` provides a simple interface to download a\n", "URL and save its data to a file.\n", "\n", "The function ``ub.grabdata`` works similarly to ``ub.download``, but\n", "whereas ``ub.download`` will always re-download the file,\n", "``ub.grabdata`` will check if the file exists and only re-download it if\n", "it needs to.\n", "\n", "New in version 0.4.0: both functions now accepts the ``hash_prefix`` keyword\n", "argument, which if specified will check that the hash of the file matches the\n", "provided value. The ``hasher`` keyword argument can be used to change which\n", "hashing algorithm is used (it defaults to ``\"sha512\"``)." ] }, { "cell_type": "code", "execution_count": 168, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "~/.cache/ubelt/rqwaDag.png\n" ] } ], "source": [ " >>> import ubelt as ub\n", " >>> url = 'http://i.imgur.com/rqwaDag.png'\n", " >>> fpath = ub.download(url, verbose=0)\n", " >>> print(ub.shrinkuser(fpath))" ] }, { "cell_type": "code", "execution_count": 169, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "~/.cache/ubelt/rqwaDag.png\n" ] } ], "source": [ " >>> import ubelt as ub\n", " >>> url = 'http://i.imgur.com/rqwaDag.png'\n", " >>> fpath = ub.grabdata(url, verbose=0, hash_prefix='944389a39')\n", " >>> print(ub.shrinkuser(fpath))" ] }, { "cell_type": "code", "execution_count": 170, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[cacher] tryload fname=rqwaDag.png.stamp\n", "[cacher] ... rqwaDag.png.stamp cache hit\n", "[cacher] tryload fname=rqwaDag.png.stamp\n", "[cacher] ... rqwaDag.png.stamp cache hit\n", "invalid hash prefix value (expected \"wrong-944389a39dfb8f\", got \"944389a39dfb8fa9e3d075bc25416d56782093d5dca88a1f84cac16bf515fa12aeebbbebf91f1e31e8beb59468a7a5f3a69ab12ac1e3c1d1581e1ad9688b766f\")\n", "invalid hash prefix value (expected \"wrong-944389a39dfb8f\", got \"944389a39dfb8fa9e3d075bc25416d56782093d5dca88a1f84cac16bf515fa12aeebbbebf91f1e31e8beb59468a7a5f3a69ab12ac1e3c1d1581e1ad9688b766f\")\n", "Downloading url='http://i.imgur.com/rqwaDag.png' to fpath='/home/joncrall/.cache/ubelt/rqwaDag.png'\n", " 1233/1233... rate=2669535.98 Hz, eta=0:00:00, total=0:00:00\n", "hash_prefix = 'wrong-944389a39dfb8f'\n", "got = '944389a39dfb8fa9e3d075bc25416d56782093d5dca88a1f84cac16bf515fa12aeebbbebf91f1e31e8beb59468a7a5f3a69ab12ac1e3c1d1581e1ad9688b766f'\n", "type(ex) = \n" ] } ], "source": [ "url = 'http://i.imgur.com/rqwaDag.png'\n", "ub.grabdata(url, verbose=3, hash_prefix='944389a39dfb8f')\n", "\n", "try:\n", " ub.grabdata(url, verbose=3, hash_prefix='wrong-944389a39dfb8f')\n", "except RuntimeError as ex:\n", " print('type(ex) = {!r}'.format(type(ex)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Dictionary Tools" ] }, { "cell_type": "code", "execution_count": 171, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " 'dairy': ['cheese'],\n", " 'fruit': ['jam', 'bannana'],\n", " 'protein': ['ham', 'spam', 'eggs'],\n", "}\n" ] } ], "source": [ "import ubelt as ub\n", "items = ['ham', 'jam', 'spam', 'eggs', 'cheese', 'bannana']\n", "groupids = ['protein', 'fruit', 'protein', 'protein', 'dairy', 'fruit']\n", "groups = ub.group_items(items, groupids)\n", "print(ub.repr2(groups, nl=1))" ] }, { "cell_type": "code", "execution_count": 172, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{1: 1, 2: 4, 39: 1, 900: 3, 1232: 2}" ] }, "execution_count": 172, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import ubelt as ub\n", "items = [1, 2, 39, 900, 1232, 900, 1232, 2, 2, 2, 900]\n", "ub.dict_hist(items)" ] }, { "cell_type": "code", "execution_count": 173, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: [0, 1, 6], 2: [3, 8], 3: [4, 5]}" ] }, "execution_count": 173, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import ubelt as ub\n", "items = [0, 0, 1, 2, 3, 3, 0, 12, 2, 9]\n", "ub.find_duplicates(items, k=2)" ] }, { "cell_type": "code", "execution_count": 174, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "OrderedDict([('K', 3), ('dcvs_clip_max', 0.2)])\n" ] } ], "source": [ "import ubelt as ub\n", "dict_ = {'K': 3, 'dcvs_clip_max': 0.2, 'p': 0.1}\n", "subdict_ = ub.dict_subset(dict_, ['K', 'dcvs_clip_max'])\n", "print(subdict_)" ] }, { "cell_type": "code", "execution_count": 175, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['a', 'c', None, None]\n" ] } ], "source": [ "import ubelt as ub\n", "dict_ = {1: 'a', 2: 'b', 3: 'c'}\n", "print(list(ub.take(dict_, [1, 3, 4, 5], default=None)))" ] }, { "cell_type": "code", "execution_count": 176, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'a': 3, 'b': 0}\n" ] } ], "source": [ "import ubelt as ub\n", "dict_ = {'a': [1, 2, 3], 'b': []}\n", "newdict = ub.map_values(len, dict_)\n", "print(newdict)" ] }, { "cell_type": "code", "execution_count": 177, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'a': 0, 'b': 1, 'c': 2, 'd': 3}" ] }, "execution_count": 177, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import ubelt as ub\n", "mapping = {0: 'a', 1: 'b', 2: 'c', 3: 'd'}\n", "ub.invert_dict(mapping)" ] }, { "cell_type": "code", "execution_count": 178, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: {'A', 'a'}, 1: {'b'}, 2: {'C', 'c'}, 3: {'d'}}" ] }, "execution_count": 178, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import ubelt as ub\n", "mapping = {'a': 0, 'A': 0, 'b': 1, 'c': 2, 'C': 2, 'd': 3}\n", "ub.invert_dict(mapping, unique_vals=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "AutoDict - Autovivification\n", "---------------------------\n", "\n", "While the ``collections.defaultdict`` is nice, it is sometimes more\n", "convenient to have an infinitely nested dictionary of dictionaries.\n", "\n", "(But be careful, you may start to write in Perl) " ] }, { "cell_type": "code", "execution_count": 179, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "auto = {}\n", "auto = {0: {10: {100: None}}}\n", "auto = {0: {10: {100: None}, 1: 'hello'}}\n" ] } ], "source": [ ">>> import ubelt as ub\n", ">>> auto = ub.AutoDict()\n", ">>> print('auto = {!r}'.format(auto))\n", ">>> auto[0][10][100] = None\n", ">>> print('auto = {!r}'.format(auto))\n", ">>> auto[0][1] = 'hello'\n", ">>> print('auto = {!r}'.format(auto))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "String-based imports\n", "--------------------\n", "\n", "Ubelt contains functions to import modules dynamically without using the\n", "python ``import`` statement. While ``importlib`` exists, the ``ubelt``\n", "implementation is simpler to user and does not have the disadvantage of\n", "breaking ``pytest``.\n", "\n", "Note ``ubelt`` simply provides an interface to this functionality, the\n", "core implementation is in ``xdoctest`` (over as of version ``0.7.0``, \n", "the code is statically copied into an autogenerated file such that ``ubelt``\n", "does not actually depend on ``xdoctest`` during runtime)." ] }, { "cell_type": "code", "execution_count": 180, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "module = \n", "module = \n", "ubelt.util_import\n" ] } ], "source": [ "import ubelt as ub\n", "try:\n", " # This is where I keep ubelt on my machine, so it is not expected to work elsewhere.\n", " module = ub.import_module_from_path(ub.expandpath('~/code/ubelt/ubelt'))\n", " print('module = {!r}'.format(module))\n", "except OSError:\n", " pass\n", " \n", "module = ub.import_module_from_name('ubelt')\n", "print('module = {!r}'.format(module))\n", "\n", "try:\n", " module = ub.import_module_from_name('does-not-exist')\n", " raise AssertionError\n", "except ModuleNotFoundError:\n", " pass\n", "\n", "modpath = ub.Path(ub.util_import.__file__)\n", "print(ub.modpath_to_modname(modpath))\n", "modname = ub.util_import.__name__\n", "assert ub.Path(ub.modname_to_modpath(modname)).resolve() == modpath.resolve()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Related to this functionality are the functions\n", "``ub.modpath_to_modname`` and ``ub.modname_to_modpath``, which\n", "*statically* transform (i.e. no code in the target modules is imported\n", "or executed) between module names (e.g. ``ubelt.util_import``) and\n", "module paths (e.g.\n", "``~/.local/conda/envs/cenv3/lib/python3.5/site-packages/ubelt/util_import.py``)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Horizontal String Concatenation\n", "-------------------------------\n", "\n", "Sometimes its just prettier to horizontally concatenate two blocks of\n", "text." ] }, { "cell_type": "code", "execution_count": 181, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "A = [[1, 2], * [[5, 6],\n", " [3, 4]] [7, 8]]\n" ] } ], "source": [ " >>> import ubelt as ub\n", " >>> B = ub.repr2([[1, 2], [3, 4]], nl=1, cbr=True, trailsep=False)\n", " >>> C = ub.repr2([[5, 6], [7, 8]], nl=1, cbr=True, trailsep=False)\n", " >>> print(ub.hzcat(['A = ', B, ' * ', C]))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.9" } }, "nbformat": 4, "nbformat_minor": 2 } ubelt-1.3.7/docs/notebooks/demo_CacheStamp.ipynb000066400000000000000000000135521472470106000216560ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "id": "f33b6dee", "metadata": {}, "source": [ "# Ubelt's CacheStamp class\n", "\n", "The `ubelt.CacheStamp` class is used to mark that a block of code has been run, and it's output has been written to disk. \n", "\n", "You set up a `CacheStamp` by giving it a name and letting it know what files we expect to already exist or that need to be written. Then you, check if the stamp is \"expired\". If it is, you need to recompute the data you wish to cache and \"renew\" the stamp. If it is not expired, then you can expect that:\n", "\n", "1. The file already exist on disk.\n", "2. The file has not been tampered with since you wrote it.\n", "\n", "Running renew records the size, modification time (mtime), and hash (checksum) of each file registered via product. Running expired checks checks that these attributes match with existing files on disk, which gives you the tamperproof guarantee. This mechanism is similar to how Makefiles and other build systems (e.g. CMake, redo) handle detecting when files are modified. (Note that it is possible to disable the hash checks by specifying `hasher=None` while still retaining size and mtime checks, this is useful when hashing files it too expensive)." ] }, { "cell_type": "code", "execution_count": 33, "id": "be0d94d8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[cacher] ... stamp-name cache miss\n", "[cacher] stamp expired no_cert\n", "[cacher] ... stamp-name cache save\n" ] } ], "source": [ "import ubelt as ub\n", "\n", "dpath = ub.Path.appdir('stamp-demo').delete().ensuredir()\n", "fpath1 = dpath / 'large-file1.txt'\n", "fpath2 = dpath / 'large-file2.txt'\n", "\n", "stamp = ub.CacheStamp('stamp-name', dpath=dpath, product=[fpath1, fpath2])\n", "\n", "# If the stamp is expired, we need to recompute the process\n", "if stamp.expired():\n", " fpath1.write_text('large-data1')\n", " fpath2.write_text('large-data2')\n", " # After the process is complete, renew the stamp\n", " stamp.renew()\n", "\n", "# Next time the code is run, the stamp will not be expired\n", "assert not stamp.expired()" ] }, { "cell_type": "markdown", "id": "541291b3", "metadata": {}, "source": [ "The 1.1.0 implementation of `CacheStamp` also contains other features. For instance, you can set an expiration duration or time for the file to expire. All properties can be updated via the constructor or by setting instance attributes. We can demo the expired property by reusing the above stamp." ] }, { "cell_type": "code", "execution_count": 34, "id": "46c9dbbc", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[cacher] ... stamp-name cache save\n", "[cacher] stamp expired expired_cert\n" ] } ], "source": [ "import time\n", "\n", "# Tell the stamp it will expire 2 seconds, and renew it to set that property.\n", "stamp.expires = 2\n", "stamp.renew()\n", "\n", "assert not stamp.expired(), 'should not be expired yet'\n", "\n", "# Wait 2 seconds\n", "time.sleep(2.1)\n", "\n", "# The stamp is now expired\n", "assert stamp.expired(), 'the stamp should be expired'" ] }, { "cell_type": "markdown", "id": "18d2847b", "metadata": {}, "source": [ "You can also specify an expected hash prefix for each file, which is useful when you know what file will be produced a-priori (e.g. downloading a known file, in fact the `ubelt.grabdata` mechanism is now implemented with `ubelt.CacheStamp`). It works something like this:" ] }, { "cell_type": "code", "execution_count": 35, "id": "c49cadf9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[cacher] ... download-stamp cache miss\n", "[cacher] stamp expired no_cert\n", "Downloading url='https://github.com/Kitware/CMake/releases/download/v3.22.5/cmake-3.22.5.tar.gz' to fpath=Path('/home/joncrall/.cache/stamp-download-demo/cmake-3.22.5.tar.gz')\n", " 9785396/9785396... rate=19792642.19 Hz, eta=0:00:00, total=0:00:00\n", "[cacher] ... download-stamp cache save\n" ] } ], "source": [ "import ubelt as ub\n", "\n", "url = 'https://github.com/Kitware/CMake/releases/download/v3.22.5/cmake-3.22.5.tar.gz'\n", "dpath = ub.Path.appdir('stamp-download-demo').delete().ensuredir()\n", "\n", "fpath = dpath / 'cmake-3.22.5.tar.gz'\n", "stamp = ub.CacheStamp(\n", " 'download-stamp',\n", " dpath=dpath, \n", " product=fpath,\n", " hash_prefix='057d3d40d49fe1503edb62735a73de399d90c92c',\n", ")\n", "if stamp.expired():\n", " ub.download(url, fpath=fpath)\n", " stamp.renew()" ] }, { "cell_type": "markdown", "id": "ca1fd88b", "metadata": {}, "source": [ "The new features added in 1.1.0 were:\n", "\n", "* Supporting expiration time\n", "* Supporting mtime and size checks\n", "* Supporting an expected hash-prefix\n", "\n", "\n", "https://github.com/Erotemic/ubelt/releases/tag/v1.1.0" ] }, { "cell_type": "code", "execution_count": null, "id": "ba0d8805", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.9" } }, "nbformat": 4, "nbformat_minor": 5 } ubelt-1.3.7/docs/notebooks/demo_cmd.py.ipynb000066400000000000000000000611511472470106000210360ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "id": "56d25c0b", "metadata": {}, "source": [ "# Ubelt's cmd method" ] }, { "cell_type": "code", "execution_count": 1, "id": "ea6f19d6", "metadata": {}, "outputs": [], "source": [ "import ubelt as ub" ] }, { "cell_type": "code", "execution_count": 14, "id": "98c7de74", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dict_keys(['out', 'err', 'ret', 'proc', 'cwd', 'command'])\n", "{\n", " 'out': 'Python 3.9.9\\n',\n", " 'err': '',\n", " 'ret': 0,\n", " 'proc': ,\n", " 'cwd': None,\n", " 'command': 'python --version',\n", "}\n" ] } ], "source": [ "info = ub.cmd(['python', '--version'], shell=False)\n", "type(info)\n", "print(info.keys())\n", "print(ub.repr2(info, sort=0))" ] }, { "cell_type": "code", "execution_count": 39, "id": "39ba675b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2\n" ] } ], "source": [ "info = ub.cmd(['python', '--verssion'])\n", "print(info['ret'])" ] }, { "cell_type": "code", "execution_count": 36, "id": "afd95fc7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " Executes a command in a subprocess.\n", "\n", " The advantage of this wrapper around subprocess is that\n", " (1) you control if the subprocess prints to stdout,\n", " (2) the text written to stdout and stderr is returned for parsing,\n", " (3) cross platform behavior that lets you specify the command as a string\n", " or tuple regardless of whether or not shell=True.\n", " (4) ability to detach, return the process object and allow the process to\n", " run in the background (eventually we may return a Future object instead).\n", "\n", " Args:\n", " command (str | List[str]): bash-like command string or tuple of\n", " executable and args\n", "\n", " shell (bool, default=False): if True, process is run in shell.\n", "\n", " detach (bool, default=False):\n", " if True, process is detached and run in background.\n", "\n", " verbose (int, default=0): verbosity mode. Can be 0, 1, 2, or 3.\n", "\n", " tee (bool | None): if True, simultaneously writes to stdout while\n", " capturing output from the command. If not specified, defaults to\n", " True if verbose > 0. If detach is True, then this argument is\n", " ignored.\n", "\n", " cwd (str | PathLike | None):\n", " Path to run command. Defaults to current working directory if\n", " unspecified.\n", "\n", " env (Dict[str, str] | None): environment passed to Popen\n", "\n", " tee_backend (str, default='auto'): backend for tee output.\n", " Valid choices are: \"auto\", \"select\" (POSIX only), and \"thread\".\n", "\n", " check (bool, default=False): if True, check that the return code was\n", " zero before returning, otherwise raise a CalledProcessError.\n", " Does nothing if detach is True.\n", "\n", " system (bool, default=False): if True, most other considerations\n", " are dropped, and :func:`os.system` is used to execute the\n", " command in a platform dependent way. Other arguments such as\n", " env, tee, timeout, and shell are all ignored.\n", " (new in version 1.1.0)\n", "\n", " timeout (float):\n", " If the process does not complete in `timeout` seconds, raises a\n", " :class:`subprocess.TimeoutExpired`. (new in version 1.1.0)\n", "\n", " Returns:\n", " dict:\n", " info - information about command status.\n", " if detach is False ``info`` contains captured standard out,\n", " standard error, and the return code\n", " if detach is False ``info`` contains a reference to the process.\n", "\n", " Note:\n", " Inputs can either be text or tuple based. On UNIX we ensure conversion\n", " to text if shell=True, and to tuple if shell=False. On windows, the\n", " input is always text based. See [SO_33560364]_ for a potential\n", " cross-platform shlex solution for windows.\n", "\n", " When using the tee output, the stdout and stderr may be shuffled from\n", " what they would be on the command line.\n", "\n", " CommandLine:\n", " xdoctest -m ubelt.util_cmd cmd:6\n", " python -c \"import ubelt as ub; ub.cmd('ping localhost -c 2', verbose=2)\"\n", " pytest \"$(python -c 'import ubelt; print(ubelt.util_cmd.__file__)')\" -sv --xdoctest-verbose 2\n", "\n", " References:\n", " .. [SO_11495783] https://stackoverflow.com/questions/11495783/redirect-subprocess-stderr-to-stdout\n", " .. [SO_7729336] https://stackoverflow.com/questions/7729336/how-can-i-print-and-display-subprocess-stdout-and-stderr-output-without-distorti\n", " .. [SO_33560364] https://stackoverflow.com/questions/33560364/python-windows-parsing-command-lines-with-shlex\n", "\n", " Example:\n", " >>> import ubelt as ub\n", " >>> info = ub.cmd(('echo', 'simple cmdline interface'), verbose=1)\n", " simple cmdline interface\n", " >>> assert info['ret'] == 0\n", " >>> assert info['out'].strip() == 'simple cmdline interface'\n", " >>> assert info['err'].strip() == ''\n", "\n", " Example:\n", " >>> import ubelt as ub\n", " >>> info = ub.cmd('echo str noshell', verbose=0)\n", " >>> assert info['out'].strip() == 'str noshell'\n", "\n", " Example:\n", " >>> # windows echo will output extra single quotes\n", " >>> import ubelt as ub\n", " >>> info = ub.cmd(('echo', 'tuple noshell'), verbose=0)\n", " >>> assert info['out'].strip().strip(\"'\") == 'tuple noshell'\n", "\n", " Example:\n", " >>> # Note this command is formatted to work on win32 and unix\n", " >>> import ubelt as ub\n", " >>> info = ub.cmd('echo str&&echo shell', verbose=0, shell=True)\n", " >>> assert info['out'].strip() == 'str' + chr(10) + 'shell'\n", "\n", " Example:\n", " >>> import ubelt as ub\n", " >>> info = ub.cmd(('echo', 'tuple shell'), verbose=0, shell=True)\n", " >>> assert info['out'].strip().strip(\"'\") == 'tuple shell'\n", "\n", " Example:\n", " >>> import pytest\n", " >>> import ubelt as ub\n", " >>> info = ub.cmd('echo hi', check=True)\n", " >>> import subprocess\n", " >>> with pytest.raises(subprocess.CalledProcessError):\n", " >>> ub.cmd('exit 1', check=True, shell=True)\n", "\n", " Example:\n", " >>> import ubelt as ub\n", " >>> from os.path import join, exists\n", " >>> fpath1 = join(ub.get_app_cache_dir('ubelt'), 'cmdout1.txt')\n", " >>> fpath2 = join(ub.get_app_cache_dir('ubelt'), 'cmdout2.txt')\n", " >>> ub.delete(fpath1)\n", " >>> ub.delete(fpath2)\n", " >>> # Start up two processes that run simultaneously in the background\n", " >>> info1 = ub.cmd(('touch', fpath1), detach=True)\n", " >>> info2 = ub.cmd('echo writing2 > ' + fpath2, shell=True, detach=True)\n", " >>> # Detached processes are running in the background\n", " >>> # We can run other code while we wait for them.\n", " >>> while not exists(fpath1):\n", " ... pass\n", " >>> while not exists(fpath2):\n", " ... pass\n", " >>> # communicate with the process before you finish\n", " >>> # (otherwise you may leak a text wrapper)\n", " >>> info1['proc'].communicate()\n", " >>> info2['proc'].communicate()\n", " >>> # Check that the process actually did finish\n", " >>> assert (info1['proc'].wait()) == 0\n", " >>> assert (info2['proc'].wait()) == 0\n", " >>> # Check that the process did what we expect\n", " >>> assert ub.readfrom(fpath1) == ''\n", " >>> assert ub.readfrom(fpath2).strip() == 'writing2'\n", "\n", " Example:\n", " >>> # Can also use ub.cmd to call os.system\n", " >>> import pytest\n", " >>> import ubelt as ub\n", " >>> import subprocess\n", " >>> info = ub.cmd('echo hi', check=True, system=True)\n", " >>> with pytest.raises(subprocess.CalledProcessError):\n", " >>> ub.cmd('exit 1', check=True, shell=True)\n", " \n" ] } ], "source": [ "print(ub.cmd.__doc__)" ] }, { "cell_type": "code", "execution_count": 43, "id": "681a1c8b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "┌─── START CMD ───\n", "[ubelt.cmd] joncrall@toothbrush:~/code/ubelt/docs/notebooks$ echo hello world && ping localhost\n", "hello world\n", "PING localhost (127.0.0.1) 56(84) bytes of data.\n", "64 bytes from localhost (127.0.0.1): icmp_seq=1 ttl=64 time=0.042 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=2 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=3 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=4 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=5 ttl=64 time=0.018 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=6 ttl=64 time=0.022 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=7 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=8 ttl=64 time=0.026 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=9 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=10 ttl=64 time=0.019 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=11 ttl=64 time=0.019 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=12 ttl=64 time=0.021 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=13 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=14 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=15 ttl=64 time=0.036 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=16 ttl=64 time=0.018 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=17 ttl=64 time=0.019 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=18 ttl=64 time=0.019 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=19 ttl=64 time=0.041 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=20 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=21 ttl=64 time=0.021 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=22 ttl=64 time=0.068 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=23 ttl=64 time=0.019 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=24 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=25 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=26 ttl=64 time=0.040 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=27 ttl=64 time=0.021 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=28 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=29 ttl=64 time=0.019 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=30 ttl=64 time=0.019 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=31 ttl=64 time=0.018 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=32 ttl=64 time=0.019 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=33 ttl=64 time=0.034 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=34 ttl=64 time=0.018 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=35 ttl=64 time=0.019 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=36 ttl=64 time=0.018 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=37 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=38 ttl=64 time=0.018 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=39 ttl=64 time=0.017 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=40 ttl=64 time=0.038 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=41 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=42 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=43 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=44 ttl=64 time=0.037 ms\n" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "Input \u001b[0;32mIn [43]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[100mub\u001b[49m\u001b[38;5;241;100m.\u001b[39;49m\u001b[100mcmd\u001b[49m\u001b[100m(\u001b[49m\u001b[38;5;124;100m'\u001b[39;49m\u001b[38;5;124;100mecho hello world && ping localhost\u001b[39;49m\u001b[38;5;124;100m'\u001b[39;49m\u001b[100m,\u001b[49m\u001b[100m \u001b[49m\u001b[100mverbose\u001b[49m\u001b[38;5;241;100m=\u001b[39;49m\u001b[38;5;241;100m3\u001b[39;49m\u001b[100m,\u001b[49m\u001b[100m \u001b[49m\u001b[100mshell\u001b[49m\u001b[38;5;241;100m=\u001b[39;49m\u001b[38;5;28;100;01mTrue\u001b[39;49;00m\u001b[100m)\u001b[49m\n", "File \u001b[0;32m~/code/ubelt/ubelt/util_cmd.py:425\u001b[0m, in \u001b[0;36mcmd\u001b[0;34m(command, shell, detach, verbose, tee, cwd, env, tee_backend, check, system, timeout)\u001b[0m\n\u001b[1;32m 423\u001b[0m stderr \u001b[38;5;241m=\u001b[39m sys\u001b[38;5;241m.\u001b[39mstderr\n\u001b[1;32m 424\u001b[0m proc \u001b[38;5;241m=\u001b[39m make_proc()\n\u001b[0;32m--> 425\u001b[0m proc, logged_out, logged_err \u001b[38;5;241m=\u001b[39m \u001b[100m_tee_output\u001b[49m\u001b[100m(\u001b[49m\u001b[100mproc\u001b[49m\u001b[100m,\u001b[49m\u001b[100m \u001b[49m\u001b[100mstdout\u001b[49m\u001b[100m,\u001b[49m\u001b[100m \u001b[49m\u001b[100mstderr\u001b[49m\u001b[100m,\u001b[49m\n\u001b[1;32m 426\u001b[0m \u001b[100m \u001b[49m\u001b[100mbackend\u001b[49m\u001b[38;5;241;100m=\u001b[39;49m\u001b[100mtee_backend\u001b[49m\u001b[100m,\u001b[49m\n\u001b[1;32m 427\u001b[0m \u001b[100m \u001b[49m\u001b[100mtimeout\u001b[49m\u001b[38;5;241;100m=\u001b[39;49m\u001b[100mtimeout\u001b[49m\u001b[100m)\u001b[49m\n\u001b[1;32m 428\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 429\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(logged_out)\n", "File \u001b[0;32m~/code/ubelt/ubelt/util_cmd.py:186\u001b[0m, in \u001b[0;36m_tee_output\u001b[0;34m(proc, stdout, stderr, backend, timeout)\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAssertionError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mValidate \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbackend\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m before creating the proc\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 184\u001b[0m \u001b[38;5;66;03m# TODO: handle timeout\u001b[39;00m\n\u001b[0;32m--> 186\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m oline, eline \u001b[38;5;129;01min\u001b[39;00m _proc_iteroutput(proc):\n\u001b[1;32m 187\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m oline:\n\u001b[1;32m 188\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stdout: \u001b[38;5;66;03m# pragma: nobranch\u001b[39;00m\n", "File \u001b[0;32m~/code/ubelt/ubelt/util_cmd.py:113\u001b[0m, in \u001b[0;36m_proc_iteroutput_thread\u001b[0;34m(proc)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stdout_live: \u001b[38;5;66;03m# pragma: nobranch\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 113\u001b[0m oline \u001b[38;5;241m=\u001b[39m \u001b[100mstdout_queue\u001b[49m\u001b[38;5;241;100m.\u001b[39;49m\u001b[100mget_nowait\u001b[49m\u001b[100m(\u001b[49m\u001b[100m)\u001b[49m\n\u001b[1;32m 114\u001b[0m stdout_live \u001b[38;5;241m=\u001b[39m oline \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m queue\u001b[38;5;241m.\u001b[39mEmpty:\n", "File \u001b[0;32m~/.pyenv/versions/3.9.9/lib/python3.9/queue.py:199\u001b[0m, in \u001b[0;36mQueue.get_nowait\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 193\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_nowait\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 194\u001b[0m \u001b[38;5;124;03m'''Remove and return an item from the queue without blocking.\u001b[39;00m\n\u001b[1;32m 195\u001b[0m \n\u001b[1;32m 196\u001b[0m \u001b[38;5;124;03m Only get an item if one is immediately available. Otherwise\u001b[39;00m\n\u001b[1;32m 197\u001b[0m \u001b[38;5;124;03m raise the Empty exception.\u001b[39;00m\n\u001b[1;32m 198\u001b[0m \u001b[38;5;124;03m '''\u001b[39;00m\n\u001b[0;32m--> 199\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;100mself\u001b[39;49m\u001b[38;5;241;100m.\u001b[39;49m\u001b[100mget\u001b[49m\u001b[100m(\u001b[49m\u001b[100mblock\u001b[49m\u001b[38;5;241;100m=\u001b[39;49m\u001b[38;5;28;100;01mFalse\u001b[39;49;00m\u001b[100m)\u001b[49m\n", "File \u001b[0;32m~/.pyenv/versions/3.9.9/lib/python3.9/queue.py:165\u001b[0m, in \u001b[0;36mQueue.get\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget\u001b[39m(\u001b[38;5;28mself\u001b[39m, block\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m 155\u001b[0m \u001b[38;5;124;03m'''Remove and return an item from the queue.\u001b[39;00m\n\u001b[1;32m 156\u001b[0m \n\u001b[1;32m 157\u001b[0m \u001b[38;5;124;03m If optional args 'block' is true and 'timeout' is None (the default),\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 163\u001b[0m \u001b[38;5;124;03m in that case).\u001b[39;00m\n\u001b[1;32m 164\u001b[0m \u001b[38;5;124;03m '''\u001b[39;00m\n\u001b[0;32m--> 165\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnot_empty:\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m block:\n\u001b[1;32m 167\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_qsize():\n", "File \u001b[0;32m~/.pyenv/versions/3.9.9/lib/python3.9/threading.py:256\u001b[0m, in \u001b[0;36mCondition.__enter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 253\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock\u001b[38;5;241m.\u001b[39m_at_fork_reinit()\n\u001b[1;32m 254\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_waiters\u001b[38;5;241m.\u001b[39mclear()\n\u001b[0;32m--> 256\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__enter__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 257\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__enter__\u001b[39m()\n\u001b[1;32m 259\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__exit__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs):\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "ub.cmd('echo hello world && ping localhost', verbose=3, shell=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "007b0098", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "┌─── START CMD ───\n", "[ubelt.cmd] joncrall@toothbrush:~/code/ubelt/docs/notebooks$ ping localhost\n", "PING localhost (127.0.0.1) 56(84) bytes of data.\n", "64 bytes from localhost (127.0.0.1): icmp_seq=1 ttl=64 time=0.021 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=2 ttl=64 time=0.025 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=3 ttl=64 time=0.018 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=4 ttl=64 time=0.017 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=5 ttl=64 time=0.025 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=6 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=7 ttl=64 time=0.021 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=8 ttl=64 time=0.022 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=9 ttl=64 time=0.019 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=10 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=11 ttl=64 time=0.018 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=12 ttl=64 time=0.037 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=13 ttl=64 time=0.018 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=14 ttl=64 time=0.014 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=15 ttl=64 time=0.019 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=16 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=17 ttl=64 time=0.021 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=18 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=19 ttl=64 time=0.037 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=20 ttl=64 time=0.018 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=21 ttl=64 time=0.032 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=22 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=23 ttl=64 time=0.036 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=24 ttl=64 time=0.015 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=25 ttl=64 time=0.024 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=26 ttl=64 time=0.019 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=27 ttl=64 time=0.037 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=28 ttl=64 time=0.021 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=29 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=30 ttl=64 time=0.044 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=31 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=32 ttl=64 time=0.020 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=33 ttl=64 time=0.018 ms\n", "64 bytes from localhost (127.0.0.1): icmp_seq=34 ttl=64 time=0.023 ms\n" ] } ], "source": [ " ub.cmd('ping localhost', verbose=3, shell=True, timeout=10)" ] }, { "cell_type": "code", "execution_count": null, "id": "ddc880d5", "metadata": {}, "outputs": [], "source": [ " ub.cmd('ping localhost', verbose=3, shell=False, timeout=10)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.9" } }, "nbformat": 4, "nbformat_minor": 5 } ubelt-1.3.7/docs/source/000077500000000000000000000000001472470106000150665ustar00rootroot00000000000000ubelt-1.3.7/docs/source/auto/000077500000000000000000000000001472470106000160365ustar00rootroot00000000000000ubelt-1.3.7/docs/source/auto/modules.rst000066400000000000000000000000641472470106000202400ustar00rootroot00000000000000ubelt ===== .. toctree:: :maxdepth: 4 ubelt ubelt-1.3.7/docs/source/auto/ubelt.__main__.rst000066400000000000000000000002371472470106000214240ustar00rootroot00000000000000ubelt.\_\_main\_\_ module ========================= .. automodule:: ubelt.__main__ :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt._win32_links.rst000066400000000000000000000002471472470106000222060ustar00rootroot00000000000000ubelt.\_win32\_links module =========================== .. automodule:: ubelt._win32_links :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.orderedset.rst000066400000000000000000000002351472470106000220420ustar00rootroot00000000000000ubelt.orderedset module ======================= .. automodule:: ubelt.orderedset :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.progiter.rst000066400000000000000000000002271472470106000215360ustar00rootroot00000000000000ubelt.progiter module ===================== .. automodule:: ubelt.progiter :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.rst000066400000000000000000000015171472470106000177070ustar00rootroot00000000000000ubelt package ============= Submodules ---------- .. toctree:: :maxdepth: 4 ubelt.__main__ ubelt._win32_links ubelt.orderedset ubelt.progiter ubelt.util_arg ubelt.util_cache ubelt.util_cmd ubelt.util_colors ubelt.util_const ubelt.util_deprecate ubelt.util_dict ubelt.util_download ubelt.util_download_manager ubelt.util_format ubelt.util_func ubelt.util_futures ubelt.util_hash ubelt.util_import ubelt.util_indexable ubelt.util_io ubelt.util_links ubelt.util_list ubelt.util_memoize ubelt.util_mixins ubelt.util_path ubelt.util_platform ubelt.util_repr ubelt.util_str ubelt.util_stream ubelt.util_time ubelt.util_zip Module contents --------------- .. automodule:: ubelt :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_arg.rst000066400000000000000000000002311472470106000215040ustar00rootroot00000000000000ubelt.util\_arg module ====================== .. automodule:: ubelt.util_arg :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_cache.rst000066400000000000000000000002371472470106000220040ustar00rootroot00000000000000ubelt.util\_cache module ======================== .. automodule:: ubelt.util_cache :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_cmd.rst000066400000000000000000000002311472470106000214760ustar00rootroot00000000000000ubelt.util\_cmd module ====================== .. automodule:: ubelt.util_cmd :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_colors.rst000066400000000000000000000002421472470106000222360ustar00rootroot00000000000000ubelt.util\_colors module ========================= .. automodule:: ubelt.util_colors :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_const.rst000066400000000000000000000002371472470106000220670ustar00rootroot00000000000000ubelt.util\_const module ======================== .. automodule:: ubelt.util_const :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_deprecate.rst000066400000000000000000000002531472470106000226730ustar00rootroot00000000000000ubelt.util\_deprecate module ============================ .. automodule:: ubelt.util_deprecate :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_dict.rst000066400000000000000000000002341472470106000216610ustar00rootroot00000000000000ubelt.util\_dict module ======================= .. automodule:: ubelt.util_dict :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_download.rst000066400000000000000000000002501472470106000225430ustar00rootroot00000000000000ubelt.util\_download module =========================== .. automodule:: ubelt.util_download :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_download_manager.rst000066400000000000000000000003021472470106000242330ustar00rootroot00000000000000ubelt.util\_download\_manager module ==================================== .. automodule:: ubelt.util_download_manager :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_format.rst000066400000000000000000000002421472470106000222250ustar00rootroot00000000000000ubelt.util\_format module ========================= .. automodule:: ubelt.util_format :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_func.rst000066400000000000000000000002341472470106000216710ustar00rootroot00000000000000ubelt.util\_func module ======================= .. automodule:: ubelt.util_func :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_futures.rst000066400000000000000000000002451472470106000224350ustar00rootroot00000000000000ubelt.util\_futures module ========================== .. automodule:: ubelt.util_futures :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_hash.rst000066400000000000000000000002341472470106000216610ustar00rootroot00000000000000ubelt.util\_hash module ======================= .. automodule:: ubelt.util_hash :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_import.rst000066400000000000000000000002421472470106000222470ustar00rootroot00000000000000ubelt.util\_import module ========================= .. automodule:: ubelt.util_import :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_indexable.rst000066400000000000000000000002531472470106000226720ustar00rootroot00000000000000ubelt.util\_indexable module ============================ .. automodule:: ubelt.util_indexable :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_io.rst000066400000000000000000000002261472470106000213460ustar00rootroot00000000000000ubelt.util\_io module ===================== .. automodule:: ubelt.util_io :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_links.rst000066400000000000000000000002371472470106000220610ustar00rootroot00000000000000ubelt.util\_links module ======================== .. automodule:: ubelt.util_links :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_list.rst000066400000000000000000000002341472470106000217110ustar00rootroot00000000000000ubelt.util\_list module ======================= .. automodule:: ubelt.util_list :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_memoize.rst000066400000000000000000000002451472470106000224050ustar00rootroot00000000000000ubelt.util\_memoize module ========================== .. automodule:: ubelt.util_memoize :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_mixins.rst000066400000000000000000000002421472470106000222440ustar00rootroot00000000000000ubelt.util\_mixins module ========================= .. automodule:: ubelt.util_mixins :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_path.rst000066400000000000000000000002341472470106000216720ustar00rootroot00000000000000ubelt.util\_path module ======================= .. automodule:: ubelt.util_path :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_platform.rst000066400000000000000000000002501472470106000225600ustar00rootroot00000000000000ubelt.util\_platform module =========================== .. automodule:: ubelt.util_platform :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_repr.rst000066400000000000000000000002341472470106000217060ustar00rootroot00000000000000ubelt.util\_repr module ======================= .. automodule:: ubelt.util_repr :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_str.rst000066400000000000000000000002311472470106000215430ustar00rootroot00000000000000ubelt.util\_str module ====================== .. automodule:: ubelt.util_str :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_stream.rst000066400000000000000000000002421472470106000222300ustar00rootroot00000000000000ubelt.util\_stream module ========================= .. automodule:: ubelt.util_stream :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_time.rst000066400000000000000000000002341472470106000216740ustar00rootroot00000000000000ubelt.util\_time module ======================= .. automodule:: ubelt.util_time :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/auto/ubelt.util_zip.rst000066400000000000000000000002311472470106000215350ustar00rootroot00000000000000ubelt.util\_zip module ====================== .. automodule:: ubelt.util_zip :members: :undoc-members: :show-inheritance: :private-members: ubelt-1.3.7/docs/source/conf.py000066400000000000000000001053051472470106000163710ustar00rootroot00000000000000""" Notes: Based on template code in: ~/code/xcookie/xcookie/builders/docs.py ~/code/xcookie/xcookie/rc/conf_ext.py http://docs.readthedocs.io/en/latest/getting_started.html pip install sphinx sphinx-autobuild sphinx_rtd_theme sphinxcontrib-napoleon cd ~/code/ubelt mkdir -p docs cd docs sphinx-quickstart # need to edit the conf.py cd ~/code/ubelt/docs sphinx-apidoc --private --separate --force --output-dir ~/code/ubelt/docs/source/auto ~/code/ubelt/ubelt # Note: the module should importable before running this # (e.g. install it in developer mode or munge the PYTHONPATH) make html git add source/auto/*.rst Also: To turn on PR checks https://docs.readthedocs.io/en/stable/guides/autobuild-docs-for-pull-requests.html https://readthedocs.org/dashboard/ubelt/advanced/ ensure your github account is connected to readthedocs https://readthedocs.org/accounts/social/connections/ ### For gitlab To enable the read-the-docs go to https://readthedocs.org/dashboard/ and login The user will need to enable the repo on their readthedocs account: https://readthedocs.org/dashboard/import/manual/? Enter the following information: Set the Repository NAME: ubelt Set the Repository URL: https://github.com/Erotemic/ubelt Make sure you have a .readthedocs.yml file For gitlab you also need to setup an integrations. Navigate to: https://readthedocs.org/dashboard/ubelt/integrations/create/ Then add gitlab incoming webhook and copy the URL (make sure you copy the real url and not the text so https is included), specifically: In the "Integration type:" dropdown menu, select "Gitlab incoming webhook" Click "Add integration" Copy the text in the "Webhook URL" box to be used later. Copy the text in the "Secret" box to be used later. Then go to https://github.com/Erotemic/ubelt/hooks Click "Add new webhook". Copy the text previously saved from the "Webhook URL" box in the readthedocs form into the "URL" box in the gitlab form. Copy the text previously saved from the "Secret" box in the readthedocs form into the "Secret token" box in the gitlab form. For trigger permissions select the following checkboxes: push events, tag push events, merge request events Click the "Add webhook" button. See Docs for more details https://docs.readthedocs.io/en/stable/integrations.html Will also need to activate the main branch: https://readthedocs.org/projects/ubelt/versions/ """ # # Configuration file for the Sphinx documentation builder. # # This file does only contain a selection of the most common options. For a # full list see the documentation: # http://www.sphinx-doc.org/en/stable/config # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # # import os # import sys # sys.path.insert(0, os.path.abspath('.')) # -- Project information ----------------------------------------------------- import sphinx_rtd_theme from os.path import exists from os.path import dirname from os.path import join def parse_version(fpath): """ Statically parse the version number from a python file """ import ast if not exists(fpath): raise ValueError('fpath={!r} does not exist'.format(fpath)) with open(fpath, 'r') as file_: sourcecode = file_.read() pt = ast.parse(sourcecode) class VersionVisitor(ast.NodeVisitor): def visit_Assign(self, node): for target in node.targets: if getattr(target, 'id', None) == '__version__': self.version = node.value.s visitor = VersionVisitor() visitor.visit(pt) return visitor.version project = 'ubelt' copyright = '2024, Jon Crall' author = 'Jon Crall' modname = 'ubelt' repo_dpath = dirname(dirname(dirname(__file__))) mod_dpath = join(repo_dpath, 'ubelt') src_dpath = dirname(mod_dpath) modpath = join(mod_dpath, '__init__.py') release = parse_version(modpath) version = '.'.join(release.split('.')[0:2]) # Hack to ensure the module is importable # sys.path.insert(0, os.path.abspath(src_dpath)) # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ # 'autoapi.extension', 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'sphinx.ext.intersphinx', 'sphinx.ext.napoleon', 'sphinx.ext.todo', 'sphinx.ext.viewcode', 'myst_parser', # For markdown docs 'sphinx.ext.imgconverter', # For building latexpdf 'sphinx.ext.githubpages', # 'sphinxcontrib.redirects', 'sphinx_reredirects', ] todo_include_todos = True napoleon_google_docstring = True napoleon_use_param = False napoleon_use_ivar = True #autoapi_type = 'python' #autoapi_dirs = [mod_dpath] autodoc_inherit_docstrings = False # Hack for geowatch, todo configure autosummary_mock_imports = [ 'geowatch.utils.lightning_ext._jsonargparse_ext_ge_4_24_and_lt_4_xx', 'geowatch.utils.lightning_ext._jsonargparse_ext_ge_4_22_and_lt_4_24', 'geowatch.utils.lightning_ext._jsonargparse_ext_ge_4_21_and_lt_4_22', 'geowatch.tasks.fusion.datamodules.temporal_sampling.affinity_sampling', 'geowatch.tasks.depth_pcd.model', 'geowatch.tasks.cold.export_change_map', ] autodoc_member_order = 'bysource' autoclass_content = 'both' # autodoc_mock_imports = ['torch', 'torchvision', 'visdom'] # autoapi_modules = { # modname: { # 'override': False, # 'output': 'auto' # } # } # autoapi_dirs = [f'../../src/{modname}'] # autoapi_keep_files = True # References: # https://stackoverflow.com/questions/21538983/specifying-targets-for-intersphinx-links-to-numpy-scipy-and-matplotlib intersphinx_mapping = { # 'pytorch': ('http://pytorch.org/docs/master/', None), 'python': ('https://docs.python.org/3', None), 'click': ('https://click.palletsprojects.com/', None), # 'xxhash': ('https://pypi.org/project/xxhash/', None), # 'pygments': ('https://pygments.org/docs/', None), # 'tqdm': ('https://tqdm.github.io/', None), # Requires that the repo have objects.inv 'kwarray': ('https://kwarray.readthedocs.io/en/latest/', None), 'kwimage': ('https://kwimage.readthedocs.io/en/latest/', None), # 'kwplot': ('https://kwplot.readthedocs.io/en/latest/', None), 'ndsampler': ('https://ndsampler.readthedocs.io/en/latest/', None), 'ubelt': ('https://ubelt.readthedocs.io/en/latest/', None), 'xdoctest': ('https://xdoctest.readthedocs.io/en/latest/', None), 'networkx': ('https://networkx.org/documentation/stable/', None), 'scriptconfig': ('https://scriptconfig.readthedocs.io/en/latest/', None), 'rich': ('https://rich.readthedocs.io/en/latest/', None), 'numpy': ('https://numpy.org/doc/stable/', None), 'sympy': ('https://docs.sympy.org/latest/', None), 'scikit-learn': ('https://scikit-learn.org/stable/', None), 'pandas': ('https://pandas.pydata.org/docs/', None), 'matplotlib': ('https://matplotlib.org/stable/', None), 'pytest': ('https://docs.pytest.org/en/latest/', None), 'platformdirs': ('https://platformdirs.readthedocs.io/en/latest/', None), 'timerit': ('https://timerit.readthedocs.io/en/latest/', None), 'progiter': ('https://progiter.readthedocs.io/en/latest/', None), 'dateutil': ('https://dateutil.readthedocs.io/en/latest/', None), # 'pytest._pytest.doctest': ('https://docs.pytest.org/en/latest/_modules/_pytest/doctest.html', None), # 'colorama': ('https://pypi.org/project/colorama/', None), # 'cv2' : ('http://docs.opencv.org/2.4/', None), # 'h5py' : ('http://docs.h5py.org/en/latest/', None) } __dev_note__ = """ python -m sphinx.ext.intersphinx https://docs.python.org/3/objects.inv python -m sphinx.ext.intersphinx https://kwcoco.readthedocs.io/en/latest/objects.inv python -m sphinx.ext.intersphinx https://networkx.org/documentation/stable/objects.inv python -m sphinx.ext.intersphinx https://kwarray.readthedocs.io/en/latest/objects.inv python -m sphinx.ext.intersphinx https://kwimage.readthedocs.io/en/latest/objects.inv python -m sphinx.ext.intersphinx https://ubelt.readthedocs.io/en/latest/objects.inv python -m sphinx.ext.intersphinx https://networkx.org/documentation/stable/objects.inv sphobjinv suggest -t 90 -u https://readthedocs.org/projects/pytest/reference/objects.inv "signal.convolve2d" python -m sphinx.ext.intersphinx https://pygments-doc.readthedocs.io/en/latest/objects.inv """ # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = ['.rst', '.md'] # The master toctree document. master_doc = 'index' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = 'en' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path . exclude_patterns = [] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'sphinx_rtd_theme' html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = { 'collapse_navigation': False, 'display_version': True, 'navigation_depth': -1, # 'logo_only': True, } # html_logo = '.static/ubelt.svg' # html_favicon = '.static/ubelt.ico' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # # html_sidebars = {} # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = project + 'doc' # -- Options for LaTeX output ------------------------------------------------ # References: # https://tex.stackexchange.com/questions/546246/centos-8-the-font-freeserif-cannot-be-found """ # https://www.sphinx-doc.org/en/master/usage/builders/index.html#sphinx.builders.latex.LaTeXBuilder # https://tex.stackexchange.com/a/570691/83399 sudo apt install fonts-freefont-otf texlive-luatex texlive-latex-extra texlive-fonts-recommended texlive-latex-recommended tex-gyre latexmk make latexpdf LATEXMKOPTS="-shell-escape --synctex=-1 -src-specials -interaction=nonstopmode" make latexpdf LATEXMKOPTS="-lualatex -interaction=nonstopmode" make LATEXMKOPTS="-lualatex -interaction=nonstopmode" """ # latex_engine = 'lualatex' # latex_engine = 'xelatex' latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'ubelt.tex', 'ubelt Documentation', 'Jon Crall', 'manual'), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'ubelt', 'ubelt Documentation', [author], 1) ] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'ubelt', 'ubelt Documentation', author, 'ubelt', 'One line description of project.', 'Miscellaneous'), ] # -- Extension configuration ------------------------------------------------- from sphinx.domains.python import PythonDomain # NOQA # from sphinx.application import Sphinx # NOQA from typing import Any, List # NOQA # HACK TO PREVENT EXCESSIVE TIME. # TODO: FIXME FOR REAL MAX_TIME_MINUTES = None if MAX_TIME_MINUTES: import ubelt # NOQA TIMER = ubelt.Timer() TIMER.tic() class PatchedPythonDomain(PythonDomain): """ References: https://github.com/sphinx-doc/sphinx/issues/3866 """ def resolve_xref(self, env, fromdocname, builder, typ, target, node, contnode): """ Helps to resolves cross-references """ if target.startswith('ub.'): target = 'ubelt.' + target[3] if target.startswith('xdoc.'): target = 'xdoctest.' + target[3] return_value = super(PatchedPythonDomain, self).resolve_xref( env, fromdocname, builder, typ, target, node, contnode) return return_value class GoogleStyleDocstringProcessor: """ A small extension that runs after napoleon and reformats erotemic-flavored google-style docstrings for sphinx. """ def __init__(self, autobuild=1): self.debug = 0 self.registry = {} if autobuild: self._register_builtins() def register_section(self, tag, alias=None): """ Decorator that adds a custom processing function for a non-standard google style tag. The decorated function should accept a list of docstring lines, where the first one will be the google-style tag that likely needs to be replaced, and then return the appropriate sphinx format (TODO what is the name? Is it just RST?). """ alias = [] if alias is None else alias alias = [alias] if not isinstance(alias, (list, tuple, set)) else alias alias.append(tag) alias = tuple(alias) # TODO: better tag patterns def _wrap(func): self.registry[tag] = { 'tag': tag, 'alias': alias, 'func': func, } return func return _wrap def _register_builtins(self): """ Adds definitions I like of CommandLine, TextArt, and Ignore """ @self.register_section(tag='CommandLine') def commandline(lines): new_lines = [] new_lines.append('.. rubric:: CommandLine') new_lines.append('') new_lines.append('.. code-block:: bash') new_lines.append('') new_lines.extend(lines[1:]) return new_lines @self.register_section(tag='SpecialExample', alias=['Benchmark', 'Sympy', 'Doctest']) def benchmark(lines): import textwrap new_lines = [] tag = lines[0].replace(':', '').strip() # new_lines.append(lines[0]) # TODO: it would be nice to change the tagline. # new_lines.append('') new_lines.append('.. rubric:: {}'.format(tag)) new_lines.append('') new_text = textwrap.dedent('\n'.join(lines[1:])) redone = new_text.split('\n') new_lines.extend(redone) # import ubelt as ub # print('new_lines = {}'.format(ub.urepr(new_lines, nl=1))) # new_lines.append('') return new_lines @self.register_section(tag='TextArt', alias=['Ascii']) def text_art(lines): new_lines = [] new_lines.append('.. rubric:: TextArt') new_lines.append('') new_lines.append('.. code-block:: bash') new_lines.append('') new_lines.extend(lines[1:]) return new_lines # @self.register_section(tag='TODO', alias=['.. todo::']) # def todo_section(lines): # """ # Fixup todo sections # """ # import xdev # xdev.embed() # import ubelt as ub # print('lines = {}'.format(ub.urepr(lines, nl=1))) # return new_lines @self.register_section(tag='Ignore') def ignore(lines): return [] def process(self, lines): """ Example: >>> import ubelt as ub >>> self = GoogleStyleDocstringProcessor() >>> lines = ['Hello world', >>> '', >>> 'CommandLine:', >>> ' hi', >>> '', >>> 'CommandLine:', >>> '', >>> ' bye', >>> '', >>> 'TextArt:', >>> '', >>> ' 1', >>> ' 2', >>> '', >>> ' 345', >>> '', >>> 'Foobar:', >>> '', >>> 'TextArt:'] >>> new_lines = self.process(lines[:]) >>> print(chr(10).join(new_lines)) """ orig_lines = lines[:] new_lines = [] curr_mode = '__doc__' accum = [] def accept(): """ called when we finish reading a section """ if curr_mode == '__doc__': # Keep the lines as-is new_lines.extend(accum) else: # Process this section with the given function regitem = self.registry[curr_mode] func = regitem['func'] fixed = func(accum) new_lines.extend(fixed) # Reset the accumulator for the next section accum[:] = [] for line in orig_lines: found = None for regitem in self.registry.values(): if line.startswith(regitem['alias']): found = regitem['tag'] break if not found and line and not line.startswith(' '): # if the line startswith anything but a space, we are no longer # in the previous nested scope. NOTE: This assumption may not # be general, but it works for my code. found = '__doc__' if found: # New section is found, accept the previous one and start # accumulating the new one. accept() curr_mode = found accum.append(line) # Finalize the last section accept() lines[:] = new_lines # make sure there is a blank line at the end if lines and lines[-1]: lines.append('') return lines def process_docstring_callback(self, app, what_: str, name: str, obj: Any, options: Any, lines: List[str]) -> None: """ Callback to be registered to autodoc-process-docstring Custom process to transform docstring lines Remove "Ignore" blocks Args: app (sphinx.application.Sphinx): the Sphinx application object what (str): the type of the object which the docstring belongs to (one of "module", "class", "exception", "function", "method", "attribute") name (str): the fully qualified name of the object obj: the object itself options: the options given to the directive: an object with attributes inherited_members, undoc_members, show_inheritance and noindex that are true if the flag option of same name was given to the auto directive lines (List[str]): the lines of the docstring, see above References: https://www.sphinx-doc.org/en/1.5.1/_modules/sphinx/ext/autodoc.html https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html """ if self.debug: print(f'ProcessDocstring: name={name}, what_={what_}, num_lines={len(lines)}') # print('BEFORE:') # import ubelt as ub # print('lines = {}'.format(ub.urepr(lines, nl=1))) self.process(lines) # docstr = '\n'.join(lines) # if 'Convert the Mask' in docstr: # import xdev # xdev.embed() # if 'keys in this dictionary ' in docstr: # import xdev # xdev.embed() render_doc_images = 0 if MAX_TIME_MINUTES and TIMER.toc() > (60 * MAX_TIME_MINUTES): render_doc_images = False # FIXME too slow on RTD if render_doc_images: # DEVELOPING if any('REQUIRES(--show)' in line for line in lines): # import xdev # xdev.embed() create_doctest_figure(app, obj, name, lines) FIX_EXAMPLE_FORMATTING = 1 if FIX_EXAMPLE_FORMATTING: for idx, line in enumerate(lines): if line == "Example:": lines[idx] = "**Example:**" lines.insert(idx + 1, "") REFORMAT_SECTIONS = 0 if REFORMAT_SECTIONS: REFORMAT_RETURNS = 0 REFORMAT_PARAMS = 0 docstr = SphinxDocstring(lines) if REFORMAT_PARAMS: for found in docstr.find_tagged_lines('Parameters'): print(found['text']) edit_slice = found['edit_slice'] # TODO: figure out how to do this. # # file = 'foo.rst' # import rstparse # rst = rstparse.Parser() # import io # rst.read(io.StringIO(found['text'])) # rst.parse() # for line in rst.lines: # print(line) # # found['text'] # import docutils # settings = docutils.frontend.OptionParser( # components=(docutils.parsers.rst.Parser,) # ).get_default_values() # document = docutils.utils.new_document('', settings) # from docutils.parsers import rst # rst.Parser().parse(found['text'], document) if REFORMAT_RETURNS: for found in docstr.find_tagged_lines('returns'): # FIXME: account for new slice with -2 offset edit_slice = found['edit_slice'] text = found['text'] new_lines = [] for para in text.split('\n\n'): indent = para[:len(para) - len(para.lstrip())] new_paragraph = indent + paragraph(para) new_lines.append(new_paragraph) new_lines.append('') new_lines = new_lines[:-1] lines[edit_slice] = new_lines # print('AFTER:') # print('lines = {}'.format(ub.urepr(lines, nl=1))) # if name == 'kwimage.Affine.translate': # import sys # sys.exit(1) class SphinxDocstring: """ Helper to parse and modify sphinx docstrings """ def __init__(docstr, lines): docstr.lines = lines # FORMAT THE RETURNS SECTION A BIT NICER import re tag_pat = re.compile(r'^:(\w*):') directive_pat = re.compile(r'^.. (\w*)::\s*(\w*)') # Split by sphinx types, mark the line offset where they start / stop sphinx_parts = [] for idx, line in enumerate(lines): tag_match = tag_pat.search(line) directive_match = directive_pat.search(line) if tag_match: tag = tag_match.groups()[0] sphinx_parts.append({ 'tag': tag, 'start_offset': idx, 'type': 'tag', }) elif directive_match: tag = directive_match.groups()[0] sphinx_parts.append({ 'tag': tag, 'start_offset': idx, 'type': 'directive', }) prev_offset = len(lines) for part in sphinx_parts[::-1]: part['end_offset'] = prev_offset prev_offset = part['start_offset'] docstr.sphinx_parts = sphinx_parts if 0: for line in lines: print(line) def find_tagged_lines(docstr, tag): for part in docstr.sphinx_parts[::-1]: if part['tag'] == tag: edit_slice = slice(part['start_offset'], part['end_offset']) return_section = docstr.lines[edit_slice] text = '\n'.join(return_section) found = { 'edit_slice': edit_slice, 'text': text, } yield found def paragraph(text): r""" Wraps multi-line strings and restructures the text to remove all newlines, heading, trailing, and double spaces. Useful for writing log messages Args: text (str): typically a multiline string Returns: str: the reduced text block """ import re out = re.sub(r'\s\s*', ' ', text).strip() return out def create_doctest_figure(app, obj, name, lines): """ The idea is that each doctest that produces a figure should generate that and then that figure should be part of the docs. """ import xdoctest import sys import types if isinstance(obj, types.ModuleType): module = obj else: module = sys.modules[obj.__module__] # TODO: read settings from pyproject.toml? if '--show' not in sys.argv: sys.argv.append('--show') if '--nointeract' not in sys.argv: sys.argv.append('--nointeract') modpath = module.__file__ # print(doctest.format_src()) import pathlib # HACK: write to the srcdir doc_outdir = pathlib.Path(app.outdir) doc_srcdir = pathlib.Path(app.srcdir) doc_static_outdir = doc_outdir / '_static' doc_static_srcdir = doc_srcdir / '_static' src_fig_dpath = (doc_static_srcdir / 'images') src_fig_dpath.mkdir(exist_ok=True, parents=True) out_fig_dpath = (doc_static_outdir / 'images') out_fig_dpath.mkdir(exist_ok=True, parents=True) # fig_dpath = (doc_outdir / 'autofigs' / name).mkdir(exist_ok=True) fig_num = 1 import kwplot kwplot.autompl(force='agg') plt = kwplot.autoplt() docstr = '\n'.join(lines) # TODO: The freeform parser does not work correctly here. # We need to parse out the sphinx (epdoc)? individual examples # so we can get different figures. But we can hack it for now. import re split_parts = re.split('({}\\s*\n)'.format(re.escape('.. rubric:: Example')), docstr) # split_parts = docstr.split('.. rubric:: Example') # import xdev # xdev.embed() def doctest_line_offsets(doctest): # Where the doctests starts and ends relative to the file start_line_offset = doctest.lineno - 1 last_part = doctest._parts[-1] last_line_offset = start_line_offset + last_part.line_offset + last_part.n_lines - 1 offsets = { 'start': start_line_offset, 'end': last_line_offset, 'stop': last_line_offset + 1, } return offsets # from xdoctest import utils # part_lines = utils.add_line_numbers(docstr.split('\n'), n_digits=3, start=0) # print('\n'.join(part_lines)) to_insert_fpaths = [] curr_line_offset = 0 for part in split_parts: num_lines = part.count('\n') doctests = list(xdoctest.core.parse_docstr_examples( part, modpath=modpath, callname=name, # style='google' )) # print(doctests) # doctests = list(xdoctest.core.parse_docstr_examples( # docstr, modpath=modpath, callname=name)) for doctest in doctests: if '--show' in part: ... # print('-- SHOW TEST---')/) # kwplot.close_figures() try: import pytest # NOQA except ImportError: pass try: from xdoctest.exceptions import Skipped except ImportError: # nocover # Define dummy skipped exception if pytest is not available class Skipped(Exception): pass try: doctest.mode = 'native' doctest.run(verbose=0, on_error='raise') ... except Skipped: print(f'Skip doctest={doctest}') except Exception as ex: print(f'ex={ex}') print(f'Error in doctest={doctest}') offsets = doctest_line_offsets(doctest) doctest_line_end = curr_line_offset + offsets['stop'] insert_line_index = doctest_line_end figures = kwplot.all_figures() for fig in figures: fig_num += 1 # path_name = path_sanatize(name) path_name = (name).replace('.', '_') fig_fpath = src_fig_dpath / f'fig_{path_name}_{fig_num:03d}.jpeg' fig.savefig(fig_fpath) print(f'Wrote figure: {fig_fpath}') to_insert_fpaths.append({ 'insert_line_index': insert_line_index, 'fpath': fig_fpath, }) for fig in figures: plt.close(fig) # kwplot.close_figures(figures) curr_line_offset += (num_lines) # if len(doctests) > 1: # doctests # import xdev # xdev.embed() INSERT_AT = 'end' INSERT_AT = 'inline' end_index = len(lines) # Reverse order for inserts import shutil for info in to_insert_fpaths[::-1]: src_abs_fpath = info['fpath'] rel_to_static_fpath = src_abs_fpath.relative_to(doc_static_srcdir) # dst_abs_fpath = doc_static_outdir / rel_to_static_fpath # dst_abs_fpath.parent.mkdir(parents=True, exist_ok=True) rel_to_root_fpath = src_abs_fpath.relative_to(doc_srcdir) dst_abs_fpath1 = doc_outdir / rel_to_root_fpath dst_abs_fpath1.parent.mkdir(parents=True, exist_ok=True) shutil.copy(src_abs_fpath, dst_abs_fpath1) dst_abs_fpath2 = doc_outdir / rel_to_static_fpath dst_abs_fpath2.parent.mkdir(parents=True, exist_ok=True) shutil.copy(src_abs_fpath, dst_abs_fpath2) dst_abs_fpath3 = doc_srcdir / rel_to_static_fpath dst_abs_fpath3.parent.mkdir(parents=True, exist_ok=True) shutil.copy(src_abs_fpath, dst_abs_fpath3) if INSERT_AT == 'inline': # Try to insert after test insert_index = info['insert_line_index'] elif INSERT_AT == 'end': insert_index = end_index else: raise KeyError(INSERT_AT) lines.insert(insert_index, '.. image:: {}'.format('..' / rel_to_root_fpath)) # lines.insert(insert_index, '.. image:: {}'.format(rel_to_root_fpath)) # lines.insert(insert_index, '.. image:: {}'.format(rel_to_static_fpath)) lines.insert(insert_index, '') def postprocess_hyperlinks(app, doctree, docname): """ Extension to fixup hyperlinks. This should be connected to the Sphinx application's "autodoc-process-docstring" event. """ # Your hyperlink postprocessing logic here from docutils import nodes import pathlib for node in doctree.traverse(nodes.reference): if 'refuri' in node.attributes: refuri = node.attributes['refuri'] if '.rst' in refuri: if 'source' in node.document: fpath = pathlib.Path(node.document['source']) parent_dpath = fpath.parent if (parent_dpath / refuri).exists(): node.attributes['refuri'] = refuri.replace('.rst', '.html') else: raise AssertionError def fix_rst_todo_section(lines): new_lines = [] for line in lines: ... ... def setup(app): import sphinx app : sphinx.application.Sphinx = app app.add_domain(PatchedPythonDomain, override=True) app.connect("doctree-resolved", postprocess_hyperlinks) docstring_processor = GoogleStyleDocstringProcessor() # https://stackoverflow.com/questions/26534184/can-sphinx-ignore-certain-tags-in-python-docstrings app.connect('autodoc-process-docstring', docstring_processor.process_docstring_callback) def copy(src, dst): import shutil print(f'Copy {src} -> {dst}') assert src.exists() if not dst.parent.exists(): dst.parent.mkdir() shutil.copy(src, dst) ### Hack for kwcoco: TODO: figure out a way for the user to configure this. HACK_FOR_KWCOCO = 0 if HACK_FOR_KWCOCO: import pathlib doc_outdir = pathlib.Path(app.outdir) / 'auto' doc_srcdir = pathlib.Path(app.srcdir) / 'auto' mod_dpath = doc_srcdir / '../../../kwcoco' src_fpath = (mod_dpath / 'coco_schema.json') copy(src_fpath, doc_outdir / src_fpath.name) copy(src_fpath, doc_srcdir / src_fpath.name) src_fpath = (mod_dpath / 'coco_schema_informal.rst') copy(src_fpath, doc_outdir / src_fpath.name) copy(src_fpath, doc_srcdir / src_fpath.name) return app ubelt-1.3.7/docs/source/index.rst000066400000000000000000000017031472470106000167300ustar00rootroot00000000000000:github_url: https://github.com/Erotemic/ubelt .. The large version wont work because github strips rst image rescaling. https://i.imgur.com/AcWVroL.png # TODO: Add a logo .. image:: https://i.imgur.com/PoYIsWE.png :height: 100px :align: left .. Autogenerated by templates in /home/joncrall/code/xcookie/xcookie/builders/docs.py .. The large version wont work because github strips rst image rescaling. https://i.imgur.com/AcWVroL.png .. image:: https://i.imgur.com/PoYIsWE.png :height: 100px :align: left UBelt documentation =================== .. The __init__ files contains the top-level documentation overview .. automodule:: ubelt.__init__ :show-inheritance: .. # Computed function usefulness .. include:: manual/function_usefulness.rst .. toctree:: :maxdepth: 8 :caption: Package Layout auto/ubelt auto/modules Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` ubelt-1.3.7/docs/source/manual/000077500000000000000000000000001472470106000163435ustar00rootroot00000000000000ubelt-1.3.7/docs/source/manual/function_usefulness.rst000066400000000000000000000504451472470106000232060ustar00rootroot00000000000000The API by usefulness ===================== .. to help generate python ~/code/ubelt/dev/maintain/gen_api_for_docs.py --extra_modname=bioharn,watch --remove_zeros=False Perhaps the most useful way to learn this API is to sort by "usefulness". I measure usefulness as the number of times I've used a particular function in my own code (excluding ubelt itself). ================================================================================= ================ Function name Usefulness ================================================================================= ================ :func:`ubelt.urepr` 4327 :func:`ubelt.Path` 2125 :func:`ubelt.paragraph` 1349 :func:`ubelt.ProgIter` 747 :func:`ubelt.cmd` 657 :func:`ubelt.codeblock` 611 :func:`ubelt.udict` 603 :func:`ubelt.expandpath` 508 :func:`ubelt.take` 462 :func:`ubelt.oset` 342 :func:`ubelt.ddict` 341 :func:`ubelt.iterable` 313 :func:`ubelt.flatten` 303 :func:`ubelt.group_items` 287 :func:`ubelt.NiceRepr` 270 :func:`ubelt.ensuredir` 267 :func:`ubelt.map_vals` 265 :func:`ubelt.peek` 262 :func:`ubelt.NoParam` 248 :func:`ubelt.dzip` 239 :func:`ubelt.odict` 236 :func:`ubelt.hash_data` 200 :func:`ubelt.argflag` 184 :func:`ubelt.grabdata` 161 :func:`ubelt.dict_hist` 156 :func:`ubelt.identity` 156 :func:`ubelt.dict_isect` 152 :func:`ubelt.Timer` 145 :func:`ubelt.memoize` 142 :func:`ubelt.argval` 134 :func:`ubelt.allsame` 133 :func:`ubelt.color_text` 129 :func:`ubelt.schedule_deprecation` 123 :func:`ubelt.augpath` 120 :func:`ubelt.dict_diff` 117 :func:`ubelt.IndexableWalker` 116 :func:`ubelt.compress` 116 :func:`ubelt.JobPool` 107 :func:`ubelt.named_product` 104 :func:`ubelt.hzcat` 90 :func:`ubelt.delete` 88 :func:`ubelt.unique` 84 :func:`ubelt.WIN32` 78 :func:`ubelt.dict_union` 76 :func:`ubelt.symlink` 76 :func:`ubelt.indent` 69 :func:`ubelt.ensure_app_cache_dir` 67 :func:`ubelt.iter_window` 62 :func:`ubelt.invert_dict` 58 :func:`ubelt.memoize_property` 57 :func:`ubelt.import_module_from_name` 56 :func:`ubelt.argsort` 55 :func:`ubelt.timestamp` 54 :func:`ubelt.modname_to_modpath` 53 :func:`ubelt.find_duplicates` 53 :func:`ubelt.hash_file` 51 :func:`ubelt.find_exe` 50 :func:`ubelt.map_keys` 50 :func:`ubelt.dict_subset` 50 :func:`ubelt.Cacher` 49 :func:`ubelt.chunks` 47 :func:`ubelt.sorted_vals` 40 :func:`ubelt.CacheStamp` 38 :func:`ubelt.highlight_code` 37 :func:`ubelt.argmax` 36 :func:`ubelt.writeto` 36 :func:`ubelt.ensure_unicode` 32 :func:`ubelt.sorted_keys` 30 :func:`ubelt.memoize_method` 29 :func:`ubelt.compatible` 24 :func:`ubelt.import_module_from_path` 24 :func:`ubelt.Executor` 23 :func:`ubelt.readfrom` 23 :func:`ubelt.modpath_to_modname` 17 :func:`ubelt.AutoDict` 17 :func:`ubelt.touch` 17 :func:`ubelt.inject_method` 14 :func:`ubelt.timeparse` 13 :func:`ubelt.ChDir` 11 :func:`ubelt.shrinkuser` 11 :func:`ubelt.argmin` 10 :func:`ubelt.varied_values` 9 :func:`ubelt.split_modpath` 8 :func:`ubelt.LINUX` 8 :func:`ubelt.download` 7 :func:`ubelt.NO_COLOR` 7 :func:`ubelt.OrderedSet` 6 :func:`ubelt.zopen` 6 :func:`ubelt.CaptureStdout` 6 :func:`ubelt.DARWIN` 5 :func:`ubelt.boolmask` 4 :func:`ubelt.find_path` 4 :func:`ubelt.get_app_cache_dir` 4 :func:`ubelt.indexable_allclose` 3 :func:`ubelt.UDict` 3 :func:`ubelt.SetDict` 2 :func:`ubelt.AutoOrderedDict` 2 :func:`ubelt.argunique` 2 :func:`ubelt.map_values` 1 :func:`ubelt.unique_flags` 1 :func:`ubelt.userhome` 0 :func:`ubelt.split_archive` 0 :func:`ubelt.sorted_values` 0 :func:`ubelt.sdict` 0 :func:`ubelt.platform_data_dir` 0 :func:`ubelt.platform_config_dir` 0 :func:`ubelt.platform_cache_dir` 0 :func:`ubelt.get_app_data_dir` 0 :func:`ubelt.get_app_config_dir` 0 :func:`ubelt.ensure_app_data_dir` 0 :func:`ubelt.ensure_app_config_dir` 0 :func:`ubelt.TempDir` 0 :func:`ubelt.TeeStringIO` 0 :func:`ubelt.ReprExtensions` 0 :func:`ubelt.POSIX` 0 :func:`ubelt.DownloadManager` 0 :func:`ubelt.CaptureStream` 0 ================================================================================= ================ .. code:: python usage stats = { 'mean': 164.10257, 'std': 467.12064, 'min': 0.0, 'max': 4327.0, 'q_0.25': 6.0, 'q_0.50': 50.0, 'q_0.75': 134.0, 'med': 50.0, 'sum': 19200, 'shape': (117,), } :mod:`ubelt.orderedset` ----------------------- :func:`` :func:`` :mod:`ubelt.progiter` --------------------- :func:`` :mod:`ubelt.util_arg` --------------------- :func:`` :func:`` :mod:`ubelt.util_cache` ----------------------- :func:`` :func:`` :mod:`ubelt.util_cmd` --------------------- :func:`` :mod:`ubelt.util_colors` ------------------------ :func:`` :func:`` :func:`` :mod:`ubelt.util_const` ----------------------- :func:`` :mod:`ubelt.util_deprecate` --------------------------- :func:`` :mod:`ubelt.util_dict` ---------------------- :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :mod:`ubelt.util_download` -------------------------- :func:`` :func:`` :mod:`ubelt.util_download_manager` ---------------------------------- :func:`` :mod:`ubelt.util_format` ------------------------ :func:`` :func:`` :func:`` :mod:`ubelt.util_func` ---------------------- :func:`` :func:`` :func:`` :mod:`ubelt.util_futures` ------------------------- :func:`` :func:`` :mod:`ubelt.util_hash` ---------------------- :func:`` :func:`` :mod:`ubelt.util_import` ------------------------ :func:`` :func:`` :func:`` :func:`` :func:`` :mod:`ubelt.util_indexable` --------------------------- :func:`` :func:`` :mod:`ubelt.util_io` -------------------- :func:`` :func:`` :func:`` :func:`` :mod:`ubelt.util_links` ----------------------- :func:`` :mod:`ubelt.util_list` ---------------------- :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :mod:`ubelt.util_memoize` ------------------------- :func:`` :func:`` :func:`` :mod:`ubelt.util_mixins` ------------------------ :func:`` :mod:`ubelt.util_path` ---------------------- :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :mod:`ubelt.util_platform` -------------------------- :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :func:`` :mod:`ubelt.util_repr` ---------------------- :func:`` :func:`` :mod:`ubelt.util_str` --------------------- :func:`` :func:`` :func:`` :func:`` :func:`` :mod:`ubelt.util_stream` ------------------------ :func:`` :func:`` :func:`` :mod:`ubelt.util_time` ---------------------- :func:`` :func:`` :func:`` :mod:`ubelt.util_zip` --------------------- :func:`` :func:`` ubelt-1.3.7/docs/sphinxserver.py000077500000000000000000000017001472470106000167010ustar00rootroot00000000000000#!/usr/bin/env python """ This module is designed to used with _livereload to make it a little easier to write Sphinx documentation. Simply run the command:: python sphinx_server.py and browse to http://localhost:5500 livereload_: https://pypi.python.org/pypi/livereload """ import os from livereload import Server, shell rebuild_cmd = shell('make html', cwd='.') watch_dirs = [ '.', 'source', 'source/manual', 'source/auto', ] watch_globs = [ '*.rst', '*.ipynb' ] server = Server() server.watch('conf.py', rebuild_cmd) # Cover above configured watch dirs and globs matrix. for d in watch_dirs: for g in watch_globs: server.watch(os.path.join(d, g), rebuild_cmd) # Watch source python files. for dirpath, dirnames, filenames in os.walk('../ubelt'): server.watch(os.path.join(dirpath, '*.py'), rebuild_cmd) # Optionally change to host="0.0.0.0" to make available outside localhost. server.serve(root='build/html') ubelt-1.3.7/publish.sh000077500000000000000000000370351472470106000146530ustar00rootroot00000000000000#!/usr/bin/env bash __doc__=' Script to publish a new version of this library on PyPI. If your script has binary dependencies then we assume that you have built a proper binary wheel with auditwheel and it exists in the wheelhouse directory. Otherwise, for source tarballs and wheels this script runs the setup.py script to create the wheels as well. Running this script with the default arguments will perform any builds and gpg signing, but nothing will be uploaded to pypi unless the user explicitly sets DO_UPLOAD=True or answers yes to the prompts. Args: TWINE_USERNAME (str) : username for pypi. This must be set if uploading to pypi. Defaults to "". TWINE_PASSWORD (str) : password for pypi. This must be set if uploading to pypi. Defaults to "". DO_GPG (bool) : If True, sign the packages with a GPG key specified by `GPG_KEYID`. defaults to auto. DO_OTS (bool) : If True, make an opentimestamp for the package and signature (if available) DO_UPLOAD (bool) : If True, upload the packages to the pypi server specified by `TWINE_REPOSITORY_URL`. DO_BUILD (bool) : If True, will execute the setup.py build script, which is expected to use setuptools. In the future we may add support for other build systems. If False, this script will expect the pre-built packages to exist in "wheelhouse/{NAME}-{VERSION}-{SUFFIX}.{EXT}". Defaults to "auto". DO_TAG (bool) : if True, will "git tag" the current HEAD with TWINE_REPOSITORY_URL (url) : The URL of the pypi server to upload to. Defaults to "auto", which if on the release branch, this will default to the live pypi server `https://upload.pypi.org/legacy` otherwise this will default to the test.pypi server: `https://test.pypi.org/legacy` GPG_KEYID (str) : The keyid of the gpg key to sign with. (if DO_GPG=True). Defaults to the local git config user.signingkey DEPLOY_REMOTE (str) : The git remote to push any tags to. Defaults to "origin" GPG_EXECUTABLE (path) : Path to the GPG executable. Defaults to "auto", which chooses "gpg2" if it exists, otherwise "gpg". MODE (str): Can be pure, binary, or all. Defaults to pure unless a CMakeLists.txt exists in which case it defaults to binary. Requirements: twine >= 1.13.0 gpg2 >= 2.2.4 OpenSSL >= 1.1.1c Notes: # NEW API TO UPLOAD TO PYPI # https://docs.travis-ci.com/user/deployment/pypi/ # https://packaging.python.org/tutorials/distributing-packages/ # https://stackoverflow.com/questions/45188811/how-to-gpg-sign-a-file-that-is-built-by-travis-ci Based on template in # github.com/Erotemic/xcookie/ ~/code/xcookie/publish.sh Usage: load_secrets # TODO: set a trap to unload secrets? cd # Set your variables or load your secrets export TWINE_USERNAME= export TWINE_PASSWORD= TWINE_REPOSITORY_URL="https://test.pypi.org/legacy/" ' DEBUG=${DEBUG:=''} if [[ "${DEBUG}" != "" ]]; then set -x fi check_variable(){ KEY=$1 HIDE=$2 VAL=${!KEY} if [[ "$HIDE" == "" ]]; then echo "[DEBUG] CHECK VARIABLE: $KEY=\"$VAL\"" else echo "[DEBUG] CHECK VARIABLE: $KEY=" fi if [[ "$VAL" == "" ]]; then echo "[ERROR] UNSET VARIABLE: $KEY=\"$VAL\"" exit 1; fi } normalize_boolean(){ ARG=$1 ARG=$(echo "$ARG" | awk '{print tolower($0)}') if [ "$ARG" = "true" ] || [ "$ARG" = "1" ] || [ "$ARG" = "yes" ] || [ "$ARG" = "y" ] || [ "$ARG" = "on" ]; then echo "True" elif [ "$ARG" = "false" ] || [ "$ARG" = "0" ] || [ "$ARG" = "no" ] || [ "$ARG" = "n" ] || [ "$ARG" = "off" ]; then echo "False" else echo "$ARG" fi } #### # Parameters ### # Options DEPLOY_REMOTE=${DEPLOY_REMOTE:=origin} NAME=${NAME:=$(python -c "import setup; print(setup.NAME)")} VERSION=$(python -c "import setup; print(setup.VERSION)") check_variable DEPLOY_REMOTE ARG_1=$1 DO_UPLOAD=${DO_UPLOAD:=$ARG_1} DO_TAG=${DO_TAG:=$ARG_1} DO_GPG=${DO_GPG:="auto"} if [ "$DO_GPG" == "auto" ]; then DO_GPG="True" fi DO_OTS=${DO_OTS:="auto"} if [ "$DO_OTS" == "auto" ]; then # Do opentimestamp if it is available # python -m pip install opentimestamps-client if type ots ; then DO_OTS="True" else DO_OTS="False" fi fi DO_BUILD=${DO_BUILD:="auto"} # Verify that we want to build if [ "$DO_BUILD" == "auto" ]; then DO_BUILD="True" fi DO_GPG=$(normalize_boolean "$DO_GPG") DO_OTS=$(normalize_boolean "$DO_OTS") DO_BUILD=$(normalize_boolean "$DO_BUILD") DO_UPLOAD=$(normalize_boolean "$DO_UPLOAD") DO_TAG=$(normalize_boolean "$DO_TAG") TWINE_USERNAME=${TWINE_USERNAME:=""} TWINE_PASSWORD=${TWINE_PASSWORD:=""} DEFAULT_TEST_TWINE_REPO_URL="https://test.pypi.org/legacy/" DEFAULT_LIVE_TWINE_REPO_URL="https://upload.pypi.org/legacy/" TWINE_REPOSITORY_URL=${TWINE_REPOSITORY_URL:="auto"} if [[ "${TWINE_REPOSITORY_URL}" == "auto" ]]; then #if [[ "$(cat .git/HEAD)" != "ref: refs/heads/release" ]]; then # # If we are not on release, then default to the test pypi upload repo # TWINE_REPOSITORY_URL=${TWINE_REPOSITORY_URL:="https://test.pypi.org/legacy/"} #else if [[ "$DEBUG" == "" ]]; then TWINE_REPOSITORY_URL="live" else TWINE_REPOSITORY_URL="test" fi fi if [[ "${TWINE_REPOSITORY_URL}" == "live" ]]; then TWINE_REPOSITORY_URL=$DEFAULT_LIVE_TWINE_REPO_URL elif [[ "${TWINE_REPOSITORY_URL}" == "test" ]]; then TWINE_REPOSITORY_URL=$DEFAULT_TEST_TWINE_REPO_URL fi GPG_EXECUTABLE=${GPG_EXECUTABLE:="auto"} if [[ "$GPG_EXECUTABLE" == "auto" ]]; then if [[ "$(which gpg2)" != "" ]]; then GPG_EXECUTABLE="gpg2" else GPG_EXECUTABLE="gpg" fi fi GPG_KEYID=${GPG_KEYID:="auto"} if [[ "$GPG_KEYID" == "auto" ]]; then GPG_KEYID=$(git config --local user.signingkey) if [[ "$GPG_KEYID" == "" ]]; then GPG_KEYID=$(git config --global user.signingkey) fi fi if [ -f CMakeLists.txt ] ; then DEFAULT_MODE="binary" else DEFAULT_MODE="pure" fi # TODO: parameterize # The default should change depending on the application MODE=${MODE:=$DEFAULT_MODE} if [[ "$MODE" == "all" ]]; then MODE_LIST=("sdist" "native" "bdist") elif [[ "$MODE" == "pure" ]]; then MODE_LIST=("sdist" "native") elif [[ "$MODE" == "binary" ]]; then MODE_LIST=("sdist" "bdist") else MODE_LIST=("$MODE") fi MODE_LIST_STR=$(printf '"%s" ' "${MODE_LIST[@]}") #echo "MODE_LIST_STR = $MODE_LIST_STR" #### # Logic ### WAS_INTERACTION="False" echo " === PYPI BUILDING SCRIPT == NAME='$NAME' VERSION='$VERSION' TWINE_USERNAME='$TWINE_USERNAME' TWINE_REPOSITORY_URL = $TWINE_REPOSITORY_URL GPG_KEYID = '$GPG_KEYID' DO_UPLOAD=${DO_UPLOAD} DO_TAG=${DO_TAG} DO_GPG=${DO_GPG} DO_OTS=${DO_OTS} DO_BUILD=${DO_BUILD} MODE_LIST_STR=${MODE_LIST_STR} " # Verify that we want to tag if [[ "$DO_TAG" == "True" ]]; then echo "About to tag VERSION='$VERSION'" else if [[ "$DO_TAG" == "False" ]]; then echo "We are NOT about to tag VERSION='$VERSION'" else # shellcheck disable=SC2162 read -p "Do you want to git tag and push version='$VERSION'? (input 'yes' to confirm)" ANS echo "ANS = $ANS" WAS_INTERACTION="True" DO_TAG="$ANS" DO_TAG=$(normalize_boolean "$DO_TAG") if [ "$DO_BUILD" == "auto" ]; then DO_BUILD="" DO_GPG="" fi fi fi if [[ "$DO_BUILD" == "True" ]]; then echo "About to build wheels" else if [[ "$DO_BUILD" == "False" ]]; then echo "We are NOT about to build wheels" else # shellcheck disable=SC2162 read -p "Do you need to build wheels? (input 'yes' to confirm)" ANS echo "ANS = $ANS" WAS_INTERACTION="True" DO_BUILD="$ANS" DO_BUILD=$(normalize_boolean "$DO_BUILD") fi fi # Verify that we want to publish if [[ "$DO_UPLOAD" == "True" ]]; then echo "About to directly publish VERSION='$VERSION'" else if [[ "$DO_UPLOAD" == "False" ]]; then echo "We are NOT about to directly publish VERSION='$VERSION'" else # shellcheck disable=SC2162 read -p "Are you ready to directly publish version='$VERSION'? ('yes' will twine upload)" ANS echo "ANS = $ANS" WAS_INTERACTION="True" DO_UPLOAD="$ANS" DO_UPLOAD=$(normalize_boolean "$DO_UPLOAD") fi fi if [[ "$WAS_INTERACTION" == "True" ]]; then echo " === PYPI BUILDING SCRIPT == VERSION='$VERSION' TWINE_USERNAME='$TWINE_USERNAME' TWINE_REPOSITORY_URL = $TWINE_REPOSITORY_URL GPG_KEYID = '$GPG_KEYID' DO_UPLOAD=${DO_UPLOAD} DO_TAG=${DO_TAG} DO_GPG=${DO_GPG} DO_BUILD=${DO_BUILD} MODE_LIST_STR='${MODE_LIST_STR}' " # shellcheck disable=SC2162 read -p "Look good? Ready to build? Enter any text to continue" ANS fi if [ "$DO_BUILD" == "True" ]; then echo " === === " echo "LIVE BUILDING" # Build wheel and source distribution for _MODE in "${MODE_LIST[@]}" do echo "_MODE = $_MODE" if [[ "$_MODE" == "sdist" ]]; then python setup.py sdist || { echo 'failed to build sdist wheel' ; exit 1; } elif [[ "$_MODE" == "native" ]]; then python setup.py bdist_wheel || { echo 'failed to build native wheel' ; exit 1; } elif [[ "$_MODE" == "bdist" ]]; then echo "Assume wheel has already been built" else echo "ERROR: bad mode" exit 1 fi done echo " === === " else echo "DO_BUILD=False, Skipping build" fi ls_array(){ __doc__=' Read the results of a glob pattern into an array Args: arr_name glob_pattern Example: arr_name="myarray" glob_pattern="*" pass ' local arr_name="$1" local glob_pattern="$2" shopt -s nullglob # shellcheck disable=SC2206 array=($glob_pattern) shopt -u nullglob # Turn off nullglob to make sure it doesn't interfere with anything later # FIXME; for some reason this does not always work properly # Copy the array into the dynamically named variable # shellcheck disable=SC2086 readarray -t $arr_name < <(printf '%s\n' "${array[@]}") } WHEEL_FPATHS=() for _MODE in "${MODE_LIST[@]}" do if [[ "$_MODE" == "sdist" ]]; then ls_array "_NEW_WHEEL_PATHS" "dist/${NAME}-${VERSION}*.tar.gz" elif [[ "$_MODE" == "native" ]]; then ls_array "_NEW_WHEEL_PATHS" "dist/${NAME}-${VERSION}*.whl" elif [[ "$_MODE" == "bdist" ]]; then ls_array "_NEW_WHEEL_PATHS" "wheelhouse/${NAME}-${VERSION}-*.whl" else echo "ERROR: bad mode" exit 1 fi # hacky CONCAT because for some reason ls_array will return # something that looks empty but has one empty element for new_item in "${_NEW_WHEEL_PATHS[@]}" do if [[ "$new_item" != "" ]]; then WHEEL_FPATHS+=("$new_item") fi done done # Dedup the paths readarray -t WHEEL_FPATHS < <(printf '%s\n' "${WHEEL_FPATHS[@]}" | sort -u) WHEEL_PATHS_STR=$(printf '"%s" ' "${WHEEL_FPATHS[@]}") echo "WHEEL_PATHS_STR = $WHEEL_PATHS_STR" echo " MODE=$MODE VERSION='$VERSION' WHEEL_FPATHS='$WHEEL_PATHS_STR' " WHEEL_SIGNATURE_FPATHS=() if [ "$DO_GPG" == "True" ]; then echo " === === " for WHEEL_FPATH in "${WHEEL_FPATHS[@]}" do echo "WHEEL_FPATH = $WHEEL_FPATH" check_variable WHEEL_FPATH # https://stackoverflow.com/questions/45188811/how-to-gpg-sign-a-file-that-is-built-by-travis-ci # secure gpg --export-secret-keys > all.gpg # REQUIRES GPG >= 2.2 check_variable GPG_EXECUTABLE || { echo 'failed no gpg exe' ; exit 1; } check_variable GPG_KEYID || { echo 'failed no gpg key' ; exit 1; } echo "Signing wheels" GPG_SIGN_CMD="$GPG_EXECUTABLE --batch --yes --detach-sign --armor --local-user $GPG_KEYID" echo "GPG_SIGN_CMD = $GPG_SIGN_CMD" $GPG_SIGN_CMD --output "$WHEEL_FPATH".asc "$WHEEL_FPATH" echo "Checking wheels" twine check "$WHEEL_FPATH".asc "$WHEEL_FPATH" || { echo 'could not check wheels' ; exit 1; } echo "Verifying wheels" $GPG_EXECUTABLE --verify "$WHEEL_FPATH".asc "$WHEEL_FPATH" || { echo 'could not verify wheels' ; exit 1; } WHEEL_SIGNATURE_FPATHS+=("$WHEEL_FPATH".asc) done echo " === === " else echo "DO_GPG=False, Skipping GPG sign" fi if [ "$DO_OTS" == "True" ]; then echo " === === " if [ "$DO_GPG" == "True" ]; then # Stamp the wheels and the signatures ots stamp "${WHEEL_FPATHS[@]}" "${WHEEL_SIGNATURE_FPATHS[@]}" else # Stamp only the wheels ots stamp "${WHEEL_FPATHS[@]}" fi echo " === === " else echo "DO_OTS=False, Skipping OTS sign" fi if [[ "$DO_TAG" == "True" ]]; then TAG_NAME="v${VERSION}" # if we messed up we can delete the tag # git push origin :refs/tags/$TAG_NAME # and then tag with -f # git tag "$TAG_NAME" -m "tarball tag $VERSION" git push --tags "$DEPLOY_REMOTE" echo "Should also do a: git push $DEPLOY_REMOTE main:release" echo "For github should draft a new release: https://github.com/PyUtils/line_profiler/releases/new" else echo "Not tagging" fi if [[ "$DO_UPLOAD" == "True" ]]; then check_variable TWINE_USERNAME check_variable TWINE_PASSWORD "hide" for WHEEL_FPATH in "${WHEEL_FPATHS[@]}" do twine upload --username "$TWINE_USERNAME" "--password=$TWINE_PASSWORD" \ --repository-url "$TWINE_REPOSITORY_URL" \ "$WHEEL_FPATH" --skip-existing --verbose || { echo 'failed to twine upload' ; exit 1; } done echo """ !!! FINISH: LIVE RUN !!! """ else echo """ DRY RUN ... Skipping upload DEPLOY_REMOTE = '$DEPLOY_REMOTE' DO_UPLOAD = '$DO_UPLOAD' WHEEL_FPATH = '$WHEEL_FPATH' WHEEL_PATHS_STR = '$WHEEL_PATHS_STR' MODE_LIST_STR = '$MODE_LIST_STR' VERSION='$VERSION' NAME='$NAME' TWINE_USERNAME='$TWINE_USERNAME' GPG_KEYID = '$GPG_KEYID' To do live run set DO_UPLOAD=1 and ensure deploy and current branch are the same !!! FINISH: DRY RUN !!! """ fi __devel__=' # Checking to see how easy it is to upload packages to gitlab. # This logic should go in the CI script, not sure if it belongs here. export HOST=https://gitlab.kitware.com export GROUP_NAME=computer-vision export PROJECT_NAME=geowatch PROJECT_VERSION=$(geowatch --version) echo "$PROJECT_VERSION" load_secrets export PRIVATE_GITLAB_TOKEN=$(git_token_for "$HOST") TMP_DIR=$(mktemp -d -t ci-XXXXXXXXXX) curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups" > "$TMP_DIR/all_group_info" GROUP_ID=$(cat "$TMP_DIR/all_group_info" | jq ". | map(select(.name==\"$GROUP_NAME\")) | .[0].id") echo "GROUP_ID = $GROUP_ID" curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" "$HOST/api/v4/groups/$GROUP_ID" > "$TMP_DIR/group_info" PROJ_ID=$(cat "$TMP_DIR/group_info" | jq ".projects | map(select(.name==\"$PROJECT_NAME\")) | .[0].id") echo "PROJ_ID = $PROJ_ID" ls_array DIST_FPATHS "dist/*" for FPATH in "${DIST_FPATHS[@]}" do FNAME=$(basename $FPATH) echo $FNAME curl --header "PRIVATE-TOKEN: $PRIVATE_GITLAB_TOKEN" \ --upload-file $FPATH \ "https://gitlab.kitware.com/api/v4/projects/$PROJ_ID/packages/generic/$PROJECT_NAME/$PROJECT_VERSION/$FNAME" done ' ubelt-1.3.7/pyproject.toml000066400000000000000000000044031472470106000155530ustar00rootroot00000000000000[build-system] requires = [ "setuptools>=41.0.1",] build-backend = "setuptools.build_meta" [tool.mypy] ignore_missing_imports = true [tool.xcookie] tags = [ "purepy", "erotemic", "github",] mod_name = "ubelt" repo_name = "ubelt" rel_mod_parent_dpath = "." os = ["all"] min_python = 3.6 author = "Jon Crall" typed = "partial" author_email = "erotemic@gmail.com" description = "A Python utility belt containing simple tools, a stdlib like feel, and extra batteries" #ci_cpython_versions=["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13.0-beta.2"] license = "Apache 2" dev_status = "stable" classifiers = [ # List of classifiers available at: # https://pypi.python.org/pypi?%3Aaction=list_classifiers 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', 'Topic :: Software Development :: Libraries :: Python Modules', 'Topic :: Utilities', 'Operating System :: Microsoft :: Windows', 'Operating System :: MacOS', 'Operating System :: POSIX :: Linux', 'Typing :: Stubs Only', ] [tool.xcookie.setuptools] keywords = ["utility", "python", "hashing", "caching", "stdlib", "path", "pathlib", "dictionary", "download"] [tool.pytest.ini_options] addopts = "-p no:doctest --xdoctest --xdoctest-style=google --ignore-glob=setup.py --ignore-glob=docs" norecursedirs = ".git ignore build __pycache__ dev _skbuild docs" filterwarnings = [ "default", "ignore:.*No cfgstr given in Cacher constructor or call.*:Warning", "ignore:.*Define the __nice__ method for.*:Warning", "ignore:.*private pytest class or function.*:Warning", ] [tool.coverage.run] branch = true [tool.coverage.report] exclude_lines = [ "pragma: no cover", ".* # pragma: no cover", ".* # nocover", "def __repr__", "raise AssertionError", "raise NotImplementedError", "if 0:", "if trace is not None", "verbose = .*", "^ *raise", "^ *pass *$", "if _debug:", "if __name__ == .__main__.:", ".*if six.PY2:", ] omit = [ "ubelt/__main__.py", "ubelt/_win32_links.py", "*/setup.py", ] [tool.codespell] skip = ['./docs/build', './*.egg-info', './build', './htmlcov'] count = true quiet-level = 3 ignore-words-list = ['wont', 'cant', 'ANS', 'doesnt', 'arent', 'ans', 'thats', 'datas', 'isnt'] ubelt-1.3.7/requirements.txt000066400000000000000000000001231472470106000161160ustar00rootroot00000000000000-r requirements/runtime.txt -r requirements/tests.txt -r requirements/optional.txt ubelt-1.3.7/requirements/000077500000000000000000000000001472470106000153615ustar00rootroot00000000000000ubelt-1.3.7/requirements/docs.txt000066400000000000000000000003411472470106000170500ustar00rootroot00000000000000sphinx >= 4.3.2 sphinx-autobuild >= 2021.3.14 sphinx_rtd_theme >= 1.0.0 sphinxcontrib-napoleon >= 0.7 sphinx-autoapi >= 1.8.4 Pygments >= 2.9.0 # pytest # is this necessary? myst_parser >= 0.16.1 sphinx-reredirects >= 0.0.1 ubelt-1.3.7/requirements/optional.txt000066400000000000000000000045651472470106000177610ustar00rootroot00000000000000# xdev availpkg colorama # xdev availpkg numpy # xdev availpkg xxhash # xdev availpkg numpy # 1.19.2 was important for some versions of tensorflow numpy>=2.1.0 ; python_version < '4.0' and python_version >= '3.13' and platform_python_implementation == "CPython" # Python 3.13+ numpy>=1.26.0 ; python_version < '3.13' and python_version >= '3.12' and platform_python_implementation == "CPython" # Python 3.12 numpy>=1.23.2 ; python_version < '3.12' and python_version >= '3.11' and platform_python_implementation == "CPython" # Python 3.11 numpy>=1.21.1 ; python_version < '3.11' and python_version >= '3.10' and platform_python_implementation == "CPython" # Python 3.10 numpy>=1.19.3 ; python_version < '3.10' and python_version >= '3.9' and platform_python_implementation == "CPython" # Python 3.9 numpy>=1.19.2 ; python_version < '3.9' and python_version >= '3.8' and platform_python_implementation == "CPython" # Python 3.8 numpy>=1.14.5,<2.0.0 ; python_version < '3.8' and python_version >= '3.7' and platform_python_implementation == "CPython" # Python 3.7 numpy>=1.12.0,<2.0.0 ; python_version < '3.7' and python_version >= '3.6' and platform_python_implementation == "CPython" # Python 3.6 xxhash>=3.5.0 ; python_version < '4.0' and python_version >= '3.13' # Python 3.13+ xxhash>=3.4.1 ; python_version < '3.13' and python_version >= '3.12' # Python 3.12 xxhash>=3.2.0 ; python_version < '3.12' and python_version >= '3.11' # Python 3.11 xxhash>=3.0.0 ; python_version < '3.11' and python_version >= '3.10' # Python 3.10 xxhash>=2.0.2 ; python_version < '3.10' and python_version >= '3.9' # Python 3.9 xxhash>=1.4.3 ; python_version < '3.9' and python_version >= '3.8' # Python 3.8 xxhash>=1.3.0 ; python_version < '3.8' and python_version >= '3.7' # Python 3.7 xxhash>=1.3.0 ; python_version < '3.7' and python_version >= '3.6' # Python 3.6 Pygments>=2.13.0 ; python_version < '4.0' and python_version >= '3.12' # Python 3.12+ Pygments>=2.2.0 ; python_version < '3.12' # Python 3.12- colorama>=0.4.3;platform_system=="Windows" python_dateutil>=2.8.1 packaging>=21.0 jaraco.windows>=3.9.1;platform_system=="Windows" # Transative dependency from pydantic>=1.9.1->inflect->jaraco.text->jaraco.windows->ubelt pydantic<2.0;platform_system=="Windows" and platform_python_implementation == "PyPy" ubelt-1.3.7/requirements/runtime.txt000066400000000000000000000000001472470106000175730ustar00rootroot00000000000000ubelt-1.3.7/requirements/tests.txt000066400000000000000000000025641472470106000172730ustar00rootroot00000000000000# Pin maximum pytest versions for older python versions # TODO: determine what the actual minimum and maximum acceptable versions of # pytest (that are also compatible with xdoctest) are for each legacy python # major.minor version. # xdev availpkg pytest-timeout # xdev availpkg xdoctest # xdev availpkg coverage xdoctest >= 1.1.5 pytest>=8.1.1 ; python_version < '4.0' and python_version >= '3.13' # Python 3.13+ pytest>=8.1.1 ; python_version < '3.13' and python_version >= '3.12' # Python 3.12 pytest>=8.1.1 ; python_version < '3.12' and python_version >= '3.11' # Python 3.11 pytest>=6.2.5 ; python_version < '3.11' and python_version >= '3.10' # Python 3.10 pytest>=4.6.0 ; python_version < '3.10.0' and python_version >= '3.7.0' # Python 3.7-3.9 pytest-cov>=3.0.0 pytest_timeout>=2.3.1 ; python_version < '4.0' and python_version >= '3.12' # Python 3.13+ pytest_timeout>=1.4.2 ; python_version < '3.12' # Python 3.11- coverage>=7.3.0 ; python_version < '4.0' and python_version >= '3.12' # Python 3.12 coverage>=6.1.1 ; python_version < '3.12' and python_version >= '3.10' # Python 3.10-3.11 coverage>=5.3.1 ; python_version < '3.10' and python_version >= '3.9' # Python 3.9 coverage>=6.1.1 ; python_version < '3.9' and python_version >= '3.8' # Python 3.8 requests>=2.25.1 ubelt-1.3.7/requirements/types.txt000066400000000000000000000000451472470106000172650ustar00rootroot00000000000000mypy autoflake >= 1.4 yapf >= 0.32.0 ubelt-1.3.7/run_developer_setup.sh000077500000000000000000000002331472470106000172640ustar00rootroot00000000000000#!/bin/bash # Install dependency packages pip install -r requirements.txt # new pep makes this not always work # pip install -e . python setup.py develop ubelt-1.3.7/run_doctests.sh000077500000000000000000000000721472470106000157100ustar00rootroot00000000000000#!/usr/bin/env bash xdoctest ubelt --style=google all "$@"ubelt-1.3.7/run_linter.sh000077500000000000000000000002331472470106000153540ustar00rootroot00000000000000#!/bin/bash flake8 --count --select=E9,F63,F7,F82 --show-source --statistics ubelt flake8 --count --select=E9,F63,F7,F82 --show-source --statistics ./testsubelt-1.3.7/run_tests.py000077500000000000000000000023361472470106000152450ustar00rootroot00000000000000#!/usr/bin/env python import sys def get_this_script_fpath(): import pathlib try: fpath = pathlib.Path(__file__) except NameError: # This is not being run from a script, thus the developer is doing some # IPython hacking, so we will assume a path on the developer machine. fpath = pathlib.Path('~/code/ubelt/run_tests.py').expanduser() if not fpath.exists(): raise Exception( 'Unable to determine the file path that this script ' 'should correspond to') return fpath def main(): import pytest import os repo_dpath = get_this_script_fpath().parent package_name = 'ubelt' mod_dpath = repo_dpath / 'ubelt' test_dpath = repo_dpath / 'tests' config_fpath = repo_dpath / 'pyproject.toml' pytest_args = [ '--cov-config', os.fspath(config_fpath), '--cov-report', 'html', '--cov-report', 'term', '--durations', '100', '--xdoctest', '--cov=' + package_name, os.fspath(mod_dpath), os.fspath(test_dpath) ] pytest_args = pytest_args + sys.argv[1:] ret = pytest.main(pytest_args) return ret if __name__ == '__main__': sys.exit(main()) ubelt-1.3.7/setup.py000077500000000000000000000235411472470106000143600ustar00rootroot00000000000000#!/usr/bin/env python # Generated by ~/code/xcookie/xcookie/builders/setup.py # based on part ~/code/xcookie/xcookie/rc/setup.py.in import sys import re from os.path import exists, dirname, join from setuptools import find_packages from setuptools import setup def parse_version(fpath): """ Statically parse the version number from a python file """ value = static_parse("__version__", fpath) return value def static_parse(varname, fpath): """ Statically parse the a constant variable from a python file """ import ast if not exists(fpath): raise ValueError("fpath={!r} does not exist".format(fpath)) with open(fpath, "r") as file_: sourcecode = file_.read() pt = ast.parse(sourcecode) class StaticVisitor(ast.NodeVisitor): def visit_Assign(self, node): for target in node.targets: if getattr(target, "id", None) == varname: self.static_value = node.value.s visitor = StaticVisitor() visitor.visit(pt) try: value = visitor.static_value except AttributeError: import warnings value = "Unknown {}".format(varname) warnings.warn(value) return value def parse_description(): """ Parse the description in the README file CommandLine: pandoc --from=markdown --to=rst --output=README.rst README.md python -c "import setup; print(setup.parse_description())" """ readme_fpath = join(dirname(__file__), "README.rst") # This breaks on pip install, so check that it exists. if exists(readme_fpath): with open(readme_fpath, "r") as f: text = f.read() return text return "" def parse_requirements(fname="requirements.txt", versions=False): """ Parse the package dependencies listed in a requirements file but strips specific versioning information. Args: fname (str): path to requirements file versions (bool | str, default=False): If true include version specs. If strict, then pin to the minimum version. Returns: List[str]: list of requirements items CommandLine: python -c "import setup, ubelt; print(ubelt.urepr(setup.parse_requirements()))" """ require_fpath = fname def parse_line(line, dpath=""): """ Parse information from a line in a requirements text file line = 'git+https://a.com/somedep@sometag#egg=SomeDep' line = '-e git+https://a.com/somedep@sometag#egg=SomeDep' """ # Remove inline comments comment_pos = line.find(" #") if comment_pos > -1: line = line[:comment_pos] if line.startswith("-r "): # Allow specifying requirements in other files target = join(dpath, line.split(" ")[1]) for info in parse_require_file(target): yield info else: # See: https://www.python.org/dev/peps/pep-0508/ info = {"line": line} if line.startswith("-e "): info["package"] = line.split("#egg=")[1] else: if "--find-links" in line: # setuptools doesnt seem to handle find links line = line.split("--find-links")[0] if ";" in line: pkgpart, platpart = line.split(";") # Handle platform specific dependencies # setuptools.readthedocs.io/en/latest/setuptools.html # #declaring-platform-specific-dependencies plat_deps = platpart.strip() info["platform_deps"] = plat_deps else: pkgpart = line platpart = None # Remove versioning from the package pat = "(" + "|".join([">=", "==", ">"]) + ")" parts = re.split(pat, pkgpart, maxsplit=1) parts = [p.strip() for p in parts] info["package"] = parts[0] if len(parts) > 1: op, rest = parts[1:] version = rest # NOQA info["version"] = (op, version) yield info def parse_require_file(fpath): dpath = dirname(fpath) with open(fpath, "r") as f: for line in f.readlines(): line = line.strip() if line and not line.startswith("#"): for info in parse_line(line, dpath=dpath): yield info def gen_packages_items(): if exists(require_fpath): for info in parse_require_file(require_fpath): parts = [info["package"]] if versions and "version" in info: if versions == "strict": # In strict mode, we pin to the minimum version if info["version"]: # Only replace the first >= instance verstr = "".join(info["version"]).replace(">=", "==", 1) parts.append(verstr) else: parts.extend(info["version"]) if not sys.version.startswith("3.4"): # apparently package_deps are broken in 3.4 plat_deps = info.get("platform_deps") if plat_deps is not None: parts.append(";" + plat_deps) item = "".join(parts) yield item packages = list(gen_packages_items()) return packages # # Maybe use in the future? But has private deps # def parse_requirements_alt(fpath='requirements.txt', versions='loose'): # """ # Args: # versions (str): can be # False or "free" - remove all constraints # True or "loose" - use the greater or equal (>=) in the req file # strict - replace all greater equal with equals # """ # # Note: different versions of pip might have different internals. # # This may need to be fixed. # from pip._internal.req import parse_requirements # from pip._internal.network.session import PipSession # requirements = [] # for req in parse_requirements(fpath, session=PipSession()): # if not versions or versions == 'free': # req_name = req.requirement.split(' ')[0] # requirements.append(req_name) # elif versions == 'loose' or versions is True: # requirements.append(req.requirement) # elif versions == 'strict': # part1, *rest = req.requirement.split(';') # strict_req = ';'.join([part1.replace('>=', '==')] + rest) # requirements.append(strict_req) # else: # raise KeyError(versions) # requirements = [r.replace(' ', '') for r in requirements] # return requirements NAME = "ubelt" INIT_PATH = "ubelt/__init__.py" VERSION = parse_version(INIT_PATH) if __name__ == "__main__": setupkw = {} setupkw["install_requires"] = parse_requirements( "requirements/runtime.txt", versions="loose" ) setupkw["extras_require"] = { "all" : parse_requirements("requirements.txt", versions="loose"), "all-strict" : parse_requirements("requirements.txt", versions="strict"), "docs" : parse_requirements("requirements/docs.txt", versions="loose"), "docs-strict" : parse_requirements("requirements/docs.txt", versions="strict"), "optional" : parse_requirements("requirements/optional.txt", versions="loose"), "optional-strict" : parse_requirements("requirements/optional.txt", versions="strict"), "runtime" : parse_requirements("requirements/runtime.txt", versions="loose"), "runtime-strict" : parse_requirements("requirements/runtime.txt", versions="strict"), "tests" : parse_requirements("requirements/tests.txt", versions="loose"), "tests-strict" : parse_requirements("requirements/tests.txt", versions="strict"), "types" : parse_requirements("requirements/types.txt", versions="loose"), "types-strict" : parse_requirements("requirements/types.txt", versions="strict"), } setupkw["name"] = NAME setupkw["version"] = VERSION setupkw["author"] = "Jon Crall" setupkw["author_email"] = "erotemic@gmail.com" setupkw["url"] = "https://github.com/Erotemic/ubelt" setupkw[ "description" ] = "A Python utility belt containing simple tools, a stdlib like feel, and extra batteries" setupkw["long_description"] = parse_description() setupkw["long_description_content_type"] = "text/x-rst" setupkw["license"] = "Apache 2" setupkw["packages"] = find_packages(".") setupkw["python_requires"] = ">=3.6" setupkw["classifiers"] = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Utilities", "License :: OSI Approved :: Apache Software License", "Operating System :: Microsoft :: Windows", "Operating System :: MacOS", "Operating System :: POSIX :: Linux", "Typing :: Stubs Only", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", ] setupkw["package_data"] = {"ubelt": ["py.typed", "*.pyi"]} setupkw["keywords"] = [ "utility", "python", "hashing", "caching", "stdlib", "path", "pathlib", "dictionary", "download", ] setup(**setupkw) ubelt-1.3.7/tests/000077500000000000000000000000001472470106000140005ustar00rootroot00000000000000ubelt-1.3.7/tests/test_cache.py000066400000000000000000000137211472470106000164600ustar00rootroot00000000000000from os.path import exists import ubelt as ub import pytest def test_noexist_meta_clear(): """ What no errors happen when an external processes removes meta """ def func(): return 'expensive result' cacher = ub.Cacher('name', 'params', verbose=10) cacher.clear() cacher.ensure(func) data_fpath = cacher.get_fpath() meta_fpath = data_fpath + '.meta' assert exists(data_fpath) assert exists(meta_fpath) ub.delete(meta_fpath) assert not exists(meta_fpath) cacher.clear() assert not exists(meta_fpath) assert not exists(data_fpath) def test_clear_quiet(): """ What no errors happen when an external processes removes meta """ def func(): return 'expensive result' cacher = ub.Cacher('name', 'params', verbose=0) cacher.clear() cacher.clear() cacher.ensure(func) cacher.clear() def test_corrupt(): """ What no errors happen when an external processes removes meta python ubelt/tests/test_cache.py test_corrupt """ def func(): return ['expensive result'] cacher = ub.Cacher('name', 'params', verbose=10) cacher.clear() data = cacher.ensure(func) data2 = cacher.tryload() assert data2 == data # Overwrite the data with junk with open(cacher.get_fpath(), 'wb') as file: file.write(''.encode('utf8')) assert cacher.tryload() is None with pytest.raises(IOError): cacher.load() assert cacher.tryload() is None with open(cacher.get_fpath(), 'wb') as file: file.write(':junkdata:'.encode('utf8')) with pytest.raises(Exception): cacher.load() def _setup_corrupt_cacher(verbose=0): def func(): return ['expensive result'] cacher = ub.Cacher('name', 'params', verbose=verbose) cacher.clear() cacher.ensure(func) # Write junk data that will cause a non-io error with open(cacher.get_fpath(), 'wb') as file: file.write(':junkdata:'.encode('utf8')) with pytest.raises(Exception): assert cacher.tryload(on_error='raise') assert exists(cacher.get_fpath()) return cacher def test_onerror_clear(): cacher = _setup_corrupt_cacher() assert cacher.tryload(on_error='clear') is None assert not exists(cacher.get_fpath()) cacher.clear() def test_onerror_raise(): cacher = _setup_corrupt_cacher(verbose=1) with pytest.raises(Exception): assert cacher.tryload(on_error='raise') assert exists(cacher.get_fpath()) cacher.clear() def test_onerror_bad_method(): cacher = _setup_corrupt_cacher() assert exists(cacher.get_fpath()) with pytest.raises(KeyError): assert cacher.tryload(on_error='doesnt exist') assert exists(cacher.get_fpath()) cacher.clear() def test_cache_hit(): cacher = ub.Cacher('name', 'params', verbose=2) cacher.clear() assert not cacher.exists() cacher.save(['some', 'data']) assert cacher.exists() data = cacher.load() assert data == ['some', 'data'] def test_disable(): """ What no errors happen when an external processes removes meta """ nonlocal_var = [0] def func(): nonlocal_var[0] += 1 return ['expensive result'] cacher = ub.Cacher('name', 'params', verbose=10, enabled=False) assert nonlocal_var[0] == 0 cacher.ensure(func) assert nonlocal_var[0] == 1 cacher.ensure(func) assert nonlocal_var[0] == 2 cacher.ensure(func) with pytest.raises(IOError): cacher.load() assert cacher.tryload(func) is None def test_disabled_cache_stamp(): stamp = ub.CacheStamp('foo', 'bar', enabled=False) assert stamp.expired() == 'disabled', 'disabled cache stamps are always expired' def test_cache_depends(): """ What no errors happen when an external processes removes meta """ cacher = ub.Cacher('name', depends=['a', 'b', 'c'], verbose=10, enabled=False) cfgstr = cacher._rectify_cfgstr() assert cfgstr.startswith('8a82eef87cb905220841f95') def test_cache_cfgstr(): """ TODO: remove when cfgstr is removed """ # with pytest.warns(DeprecationWarning): with pytest.raises(RuntimeError): cacher1 = ub.Cacher('name', cfgstr='abc') cacher1 # assert cacher1.depends == 'abc' def test_cache_stamp_with_hash(): dpath = ub.Path.appdir('ubelt/tests/test-cache-stamp-with-hash') for verbose in [0, 1]: dpath.delete().ensuredir() fpath = dpath / 'result.txt' fpath.write_text('hello') expected_hash = ub.hash_file(fpath, hasher='sha256') unexpected_hash = 'fdsfdsafds' stamp = ub.CacheStamp( 'foo.stamp', dpath=dpath, product=[fpath], depends='nodep', hash_prefix=unexpected_hash, verbose=verbose, hasher='sha256', ext='.json') assert not exists(stamp.cacher.get_fpath()) with pytest.raises(RuntimeError): stamp.renew() assert not exists(stamp.cacher.get_fpath()) # Fix the expected hash and now renew should work stamp.hash_prefix = expected_hash stamp.renew() assert exists(stamp.cacher.get_fpath()) assert not stamp.expired() # But change it back, and we will be expired stamp.hash_prefix = unexpected_hash assert stamp.expired() == 'hash_prefix_mismatch' # Then change it back, and we are ok stamp.hash_prefix = expected_hash assert not stamp.expired() # Corrupt the file and check for hash diff # (need to disable mtime check for this to always work) stamp._expire_checks['mtime'] = False fpath.write_text('jello') assert stamp.expired() == 'hash_diff' # Disabling the hash check makes us rely on size / mtime, but is faster stamp._expire_checks['hash'] = False assert not stamp.expired() if __name__ == '__main__': r""" CommandLine: pytest ubelt/tests/test_cache.py """ import xdoctest xdoctest.doctest_module(__file__) ubelt-1.3.7/tests/test_cache_stamp.py000066400000000000000000000104571472470106000176670ustar00rootroot00000000000000import ubelt as ub def test_cache_stamp(): # stamp the computation of expensive-to-compute.txt dpath = ub.Path.appdir('ubelt/tests', 'test-cache-stamp').ensuredir() ub.delete(dpath) ub.ensuredir(dpath) product = dpath / 'expensive-to-compute.txt' self = ub.CacheStamp('test1', dpath=dpath, depends='test1', product=product, hasher=None) if self.expired(): product.write_text('very expensive') self.renew() assert not self.expired() # corrupting the output WILL expire in non-robust mode if the size is # different. product.write_text('corrupted') assert self.expired() self.hasher = 'sha1' # but it will expire if we are in robust mode, even if the size is not # different assert self.expired() # deleting the product will cause expiration in any mode self.hasher = None ub.delete(product) assert self.expired() def test_cache_stamp_corrupt_product_nohasher(): dpath = ub.Path.appdir('ubelt/tests', 'test-cache-stamp').ensuredir() name = 'corrupt_product_nohasher' ub.delete(dpath) ub.ensuredir(dpath) product = dpath / (name + '.txt') self = ub.CacheStamp(name, dpath=dpath, depends=name, product=product, hasher=None) # Disable the new (as of 1.1.0) size and mtime checks # note: as of version 1.1.0 we also have to disable the new size and # mtime checks to get a non-robust mode. self._expire_checks['size'] = False self._expire_checks['mtime'] = False if self.expired(): product.write_text('very expensive') self.renew() assert not self.expired() # corrupting the output will not expire in non-robust mode product.write_text('corrupted') assert not self.expired() def test_not_time_expired(): # stamp the computation of expensive-to-compute.txt dpath = ub.Path.appdir('ubelt/tests', 'test-cache-stamp').ensuredir() ub.delete(dpath) ub.ensuredir(dpath) self = ub.CacheStamp('test1', dpath=dpath, depends='test1', expires=10000, hasher=None) self.renew() assert not self.expired() def test_time_expired(): # stamp the computation of expensive-to-compute.txt dpath = ub.Path.appdir('ubelt/tests', 'test-cache-stamp').ensuredir() ub.delete(dpath) ub.ensuredir(dpath) self = ub.CacheStamp('test1', dpath=dpath, depends='test1', expires=-10000, hasher=None) self.renew() assert self.expired() == 'expired_cert' def test_cache_stamp_corrupt_product_hasher(): dpath = ub.Path.appdir('ubelt/tests', 'test-cache-stamp').ensuredir() name = 'corrupt_product_hasher' ub.delete(dpath) ub.ensuredir(dpath) product = dpath / (name + '.txt') self = ub.CacheStamp(name, dpath=dpath, depends=name, product=product, hasher='sha1') if self.expired(): product.write_text('very expensive') self.renew() assert not self.expired() # corrupting the output will not expire in non-robust mode product.write_text('corrupted') assert self.expired() def test_cache_stamp_multiproduct(): import os # stamp the computation of expensive-to-compute.txt dpath = ub.Path.appdir('ubelt/tests', 'test-cache-stamp').ensuredir() ub.delete(dpath) ub.ensuredir(dpath) product = [ dpath / 'product1.txt', os.fspath(dpath / 'product2.txt'), dpath / 'product3.txt', ] self = ub.CacheStamp('somedata', dpath=dpath, depends='someconfig', product=product) if self.expired(): for fpath in product: ub.Path(fpath).write_text('very expensive') self.renew() assert not self.expired() ub.Path(product[1]).write_text('corrupted') assert self.expired() def test_cache_stamp_noproduct(): # stamp the computation of expensive-to-compute.txt dpath = ub.Path.appdir('ubelt/tests', 'test-cache-stamp').ensuredir() ub.delete(dpath) ub.ensuredir(dpath) name = 'noproduct' product = dpath / (name + '.txt') self = ub.CacheStamp('somedata', dpath=dpath, depends='someconfig', product=None) if self.expired(): product.write_text('very expensive') self.renew() assert not self.expired() product.write_text('corrupted') assert not self.expired() ubelt-1.3.7/tests/test_cmd.py000066400000000000000000000446411472470106000161650ustar00rootroot00000000000000import pytest import sys import ubelt as ub # import shlex # PYEXE = shlex.quote(sys.executable) PYEXE = sys.executable def test_cmd_stdout(): """ Debug: # Issues on windows python -c "import ubelt; ubelt.cmd('echo hello stdout')" python -c "import subprocess; subprocess.call(['echo', 'hi'])" proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, universal_newlines=True, cwd=cwd, env=env) """ with ub.CaptureStdout() as cap: result = ub.cmd('echo hello stdout', verbose=True) assert result['out'].strip() == 'hello stdout' assert cap.text.strip() == 'hello stdout' def test_cmd_veryverbose(): with ub.CaptureStdout() as cap: result = ub.cmd('echo hello stdout', verbose=3) assert result['out'].strip() == 'hello stdout' print(cap.text) # assert cap.text.strip() == 'hello stdout' def test_tee_false(): with ub.CaptureStdout() as cap: result = ub.cmd('echo hello stdout', verbose=3, tee=False) assert result['out'].strip() == 'hello stdout' assert 'hello world' not in cap.text print(cap.text) def test_cmd_stdout_quiet(): with ub.CaptureStdout() as cap: result = ub.cmd('echo hello stdout', verbose=False) assert result['out'].strip() == 'hello stdout', 'should still capture internally' assert cap.text.strip() == '', 'nothing should print to stdout' def test_cmd_stderr(): result = ub.cmd('echo hello stderr 1>&2', shell=True, verbose=True) assert result['err'].strip() == 'hello stderr' def test_cmd_with_list_of_pathlib(): """ ub.cmd can accept a pathlib.Path in a list of its arguments. """ if not ub.POSIX: pytest.skip('posix only') fpath = ub.Path(ub.__file__) result = ub.cmd(['ls', fpath]) assert str(fpath) in result['out'] def test_cmd_with_single_pathlib(): """ ub.cmd can accept a pathlib.Path as its single argument """ if not ub.POSIX: pytest.skip('posix only') ls_exe = ub.Path(ub.find_exe('ls')) result = ub.cmd(ls_exe) result.check_returncode() def test_cmd_tee_auto(): """ pytest ubelt/tests/test_cmd.py -k tee_backend pytest ubelt/tests/test_cmd.py """ command = '{pyexe} -c "for i in range(100): print(str(i))"'.format(pyexe=PYEXE) result = ub.cmd(command, verbose=0, tee_backend='auto') assert result['out'] == '\n'.join(list(map(str, range(100)))) + '\n' def test_cmd_tee_thread(): """ CommandLine: pytest ubelt/tests/test_cmd.py::test_cmd_tee_thread -s python ubelt/tests/test_cmd.py test_cmd_tee_thread """ if 'tqdm' in sys.modules: if tuple(map(int, sys.modules['tqdm'].__version__.split('.'))) < (4, 19): pytest.skip('threads cause issues with early tqdms') import threading # check which threads currently exist (ideally 1) existing_threads = list(threading.enumerate()) print('existing_threads = {!r}'.format(existing_threads)) if ub.WIN32: # Windows cant break apart commands consistently command = [PYEXE, '-c', "for i in range(10): print(str(i))"] else: command = '{pyexe} -c "for i in range(10): print(str(i))"'.format(pyexe=PYEXE) result = ub.cmd(command, verbose=0, tee_backend='thread') assert result['out'] == '\n'.join(list(map(str, range(10)))) + '\n' after_threads = list(threading.enumerate()) print('after_threads = {!r}'.format(after_threads)) assert len(existing_threads) <= len(after_threads), ( 'we should be cleaning up our threads') @pytest.mark.skipif(sys.platform == 'win32', reason='not available on win32') def test_cmd_tee_select(): command = '{pyexe} -c "for i in range(100): print(str(i))"'.format(pyexe=PYEXE) result = ub.cmd(command, verbose=1, tee_backend='select') assert result['out'] == '\n'.join(list(map(str, range(100)))) + '\n' command = '{pyexe} -c "for i in range(100): print(str(i))"'.format(pyexe=PYEXE) result = ub.cmd(command, verbose=0, tee_backend='select') assert result['out'] == '\n'.join(list(map(str, range(100)))) + '\n' @pytest.mark.skipif(sys.platform == 'win32', reason='not available on win32') def test_cmd_tee_badmethod(): """ pytest tests/test_cmd.py::test_cmd_tee_badmethod """ command = '{pyexe} -c "for i in range(100): print(str(i))"'.format(pyexe=PYEXE) with pytest.raises(ValueError): ub.cmd(command, verbose=2, tee_backend='bad tee backend') def test_cmd_multiline_stdout(): """ python ubelt/tests/test_cmd.py test_cmd_multiline_stdout pytest ubelt/tests/test_cmd.py::test_cmd_multiline_stdout """ if ub.WIN32: # Windows cant break apart commands consistently command = [PYEXE, '-c', "for i in range(10): print(str(i))"] else: command = '{pyexe} -c "for i in range(10): print(str(i))"'.format(pyexe=PYEXE) result = ub.cmd(command, verbose=0) assert result['out'] == '\n'.join(list(map(str, range(10)))) + '\n' @pytest.mark.skipif(sys.platform == 'win32', reason='does not run on win32') def test_cmd_interleaved_streams_sh(): """ A test that ``Crosses the Streams'' of stdout and stderr pytest ubelt/tests/test_cmd.py::test_cmd_interleaved_streams_sh """ if False: sh_script = ub.codeblock( r''' for i in `seq 0 29`; do sleep .001 >&1 echo "O$i" if [ "$(($i % 5))" = "0" ]; then >&2 echo "!E$i" fi done ''').lstrip() result = ub.cmd(sh_script, shell=True, verbose=0) assert result['out'] == 'O0\nO1\nO2\nO3\nO4\nO5\nO6\nO7\nO8\nO9\nO10\nO11\nO12\nO13\nO14\nO15\nO16\nO17\nO18\nO19\nO20\nO21\nO22\nO23\nO24\nO25\nO26\nO27\nO28\nO29\n' assert result['err'] == '!E0\n!E5\n!E10\n!E15\n!E20\n!E25\n' else: sh_script = ub.codeblock( r''' for i in `seq 0 15`; do sleep .000001 >&1 echo "O$i" if [ "$(($i % 5))" = "0" ]; then >&2 echo "!E$i" fi done ''').lstrip() result = ub.cmd(sh_script, shell=True, verbose=0) assert result['out'] == 'O0\nO1\nO2\nO3\nO4\nO5\nO6\nO7\nO8\nO9\nO10\nO11\nO12\nO13\nO14\nO15\n' assert result['err'] == '!E0\n!E5\n!E10\n!E15\n' @pytest.mark.skipif(sys.platform == 'win32', reason='does not run on win32') def test_cmd_interleaved_streams_py(): # apparently multiline quotes dont work on win32 if False: # slow mode py_script = ub.codeblock( r''' python -c " import sys import time for i in range(30): time.sleep(.001) sys.stdout.write('O{}\n'.format(i)) sys.stdout.flush() if i % 5 == 0: sys.stderr.write('!E{}\n'.format(i)) sys.stderr.flush() " ''').lstrip().format(pyexe=PYEXE) result = ub.cmd(py_script, verbose=0) assert result['out'] == 'O0\nO1\nO2\nO3\nO4\nO5\nO6\nO7\nO8\nO9\nO10\nO11\nO12\nO13\nO14\nO15\nO16\nO17\nO18\nO19\nO20\nO21\nO22\nO23\nO24\nO25\nO26\nO27\nO28\nO29\n' assert result['err'] == '!E0\n!E5\n!E10\n!E15\n!E20\n!E25\n' else: # faster mode py_script = PYEXE + ' ' + ub.codeblock( r''' -c " import sys import time for i in range(15): time.sleep(.000001) sys.stdout.write('O{}\n'.format(i)) sys.stdout.flush() if i % 5 == 0: sys.stderr.write('!E{}\n'.format(i)) sys.stderr.flush() " ''').lstrip() result = ub.cmd(py_script, verbose=0) assert result['out'] == 'O0\nO1\nO2\nO3\nO4\nO5\nO6\nO7\nO8\nO9\nO10\nO11\nO12\nO13\nO14\n' assert result['err'] == '!E0\n!E5\n!E10\n' def test_cwd(): """ CommandLine: python ~/code/ubelt/ubelt/tests/test_cmd.py test_cwd """ import sys import os import ubelt as ub if not sys.platform.startswith('win32'): dpath = ub.Path.appdir('ubelt/tests').ensuredir() dpath = os.path.realpath(dpath) info = ub.cmd('pwd', cwd=dpath, shell=True) print('info = {}'.format(ub.urepr(info, nl=1))) print('dpath = {!r}'.format(dpath)) assert info['out'].strip() == dpath def test_env(): import sys import ubelt as ub import os if not sys.platform.startswith('win32'): env = os.environ.copy() env.update({'UBELT_TEST_ENV': '42'}) info = ub.cmd('echo $UBELT_TEST_ENV', env=env, shell=True) print(info['out']) assert info['out'].strip() == env['UBELT_TEST_ENV'] # @pytest.mark.skipif(sys.platform == 'win32', reason='does not run on win32') def test_timeout(): """ xdoctest ~/code/ubelt/tests/test_cmd.py test_timeout """ import subprocess import pytest # Infinite script py_script = ub.codeblock( r''' {pyexe} -c " while True: pass " ''').lstrip().format(pyexe=PYEXE) # if ub.WIN32: # # Windows cant break apart commands consistently # py_script = [PYEXE, '-c', ub.codeblock( # r''' # " # while True: # pass # " # ''').lstrip()] initial_grid = list(ub.named_product({ 'tee': [0, 1], 'capture': [0, 1], 'timeout': [0, 0.001, 0.01], })) expanded_grid = [] for kw in initial_grid: kw = ub.udict(kw) if kw['tee']: if not ub.WIN32: expanded_grid.append(kw | {'tee_backend': 'select'}) expanded_grid.append(kw | {'tee_backend': 'thread'}) else: expanded_grid.append(kw) for kw in expanded_grid: print('kw = {}'.format(ub.urepr(kw, nl=0))) with pytest.raises(subprocess.TimeoutExpired): ub.cmd(py_script, **kw) return def test_subprocess_compatability(): import subprocess import ubelt as ub def check_compatability(command, common_kwargs): ub_out = ub.cmd(command, verbose=1, capture=False, **common_kwargs) sp_out = subprocess.run(command, **common_kwargs) assert sp_out.stderr == ub_out.stderr assert sp_out.stdout == ub_out.stdout assert sp_out.returncode == ub_out.returncode assert sp_out.args == ub_out.args assert ub_out.check_returncode() == sp_out.check_returncode() if sys.version_info[0:2] >= (3, 11): ub_out = ub.cmd(command, verbose=0, capture=True, **common_kwargs) sp_out = subprocess.run(command, capture_output=True, universal_newlines=True, **common_kwargs) assert sp_out.stderr == ub_out.stderr assert sp_out.stdout == ub_out.stdout assert sp_out.returncode == ub_out.returncode assert sp_out.args == ub_out.args assert ub_out.check_returncode() == sp_out.check_returncode() ub_out = ub.cmd(command, verbose=0, capture=False, **common_kwargs) sp_out = subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, **common_kwargs) assert sp_out.stderr == ub_out.stderr assert sp_out.stdout == ub_out.stdout assert sp_out.returncode == ub_out.returncode assert sp_out.args == ub_out.args assert ub_out.check_returncode() == sp_out.check_returncode() command = ['echo', 'hello world'] common_kwargs = {'shell': False} check_compatability(command, common_kwargs) command = 'echo hello world' common_kwargs = {'shell': True} check_compatability(command, common_kwargs) if not ub.WIN32: command = ['ls', '-l'] common_kwargs = {'shell': False} check_compatability(command, common_kwargs) command = 'ls -l' common_kwargs = {'shell': True} check_compatability(command, common_kwargs) def test_failing_subprocess_compatability(): import subprocess import pytest import ubelt as ub def check_failing_compatability(command, common_kwargs): ub_out = ub.cmd(command, verbose=1, capture=False, **common_kwargs) sp_out = subprocess.run(command, **common_kwargs) assert sp_out.stderr == ub_out.stderr assert sp_out.stdout == ub_out.stdout assert sp_out.returncode == ub_out.returncode assert sp_out.args == ub_out.args with pytest.raises(subprocess.CalledProcessError): ub_out.check_returncode() with pytest.raises(subprocess.CalledProcessError): sp_out.check_returncode() if sys.version_info[0:2] >= (3, 11): ub_out = ub.cmd(command, verbose=0, capture=True, **common_kwargs) sp_out = subprocess.run(command, capture_output=True, universal_newlines=True, **common_kwargs) assert sp_out.stderr == ub_out.stderr assert sp_out.stdout == ub_out.stdout assert sp_out.returncode == ub_out.returncode assert sp_out.args == ub_out.args with pytest.raises(subprocess.CalledProcessError): ub_out.check_returncode() with pytest.raises(subprocess.CalledProcessError): sp_out.check_returncode() ub_out = ub.cmd(command, verbose=0, capture=False, **common_kwargs) sp_out = subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, **common_kwargs) assert sp_out.stderr == ub_out.stderr assert sp_out.stdout == ub_out.stdout assert sp_out.returncode == ub_out.returncode assert sp_out.args == ub_out.args with pytest.raises(subprocess.CalledProcessError): ub_out.check_returncode() with pytest.raises(subprocess.CalledProcessError): sp_out.check_returncode() if not ub.WIN32: command = ['ls', '-l', 'does not exist'] common_kwargs = {'shell': False} check_failing_compatability(command, common_kwargs) command = 'ls -l does not exist' common_kwargs = {'shell': True} check_failing_compatability(command, common_kwargs) def test_cmdoutput_object_with_non_subprocess_backends(): import ubelt as ub import pytest info = ub.cmd('echo hello world', verbose=1) assert info.stdout.strip() == 'hello world' assert info.stderr.strip() == '' info.check_returncode() # In this case, when tee=0 the user can still capture the output info = ub.cmd('echo hello world', detach=True, capture=True, tee=0) with pytest.raises(KeyError): info.stdout with pytest.raises(KeyError): info.stderr assert info['proc'].communicate()[0] is not None # In this case, when tee=0 and capture=False, the user cannot capture the output info = ub.cmd('echo hello world', detach=True, capture=False, tee=0) with pytest.raises(KeyError): info.stdout with pytest.raises(KeyError): info.stderr assert info['proc'].communicate()[0] is None # In this case when tee=1, a detached process will show its output but # capturing will not be possible. info = ub.cmd('echo hello world', detach=True, capture=False, tee=1) with pytest.raises(KeyError): info.stdout with pytest.raises(KeyError): info.stderr info['proc'].communicate() info.args with pytest.raises(KeyError): info.check_returncode() # Check attributes when system=True info = ub.cmd('echo hello world', system=True) assert info.stdout is None assert info.stderr is None assert info.args == 'echo hello world' info.check_returncode() info = ub.cmd(['echo', 'hello', 'world'], system=True, shell=True) assert info.stdout is None assert info.stderr is None assert info.args == 'echo hello world' info.check_returncode() def _dev_debug_timeouts(): """ Notes used when implementing timeout Ignore: # For debugging and development import sys from ubelt.util_cmd import ( _textio_iterlines, _proc_async_iter_stream, _proc_iteroutput_thread, _proc_iteroutput_select, _tee_output, logger) import logging logging.basicConfig( format='[%(asctime)s %(threadName)s %(levelname)s] %(message)s', level=logging.DEBUG, force=True ) logging.info('hi') logging.debug('hi') import subprocess args = ['ping', 'localhost', '-c', '1000'] args = ['python', '-c', "{}".format(chr(10) + ub.codeblock( ''' import sys import time import random import ubelt as ub wait_times = [0.5, 1.0, 2.0] def pseudo_sleep(sec): timer = ub.Timer().tic() while timer.toc() < sec: ... print('Starting a process') while True: sec = random.choice(wait_times) print('Sleep for {} seconds'.format(sec)) #pseudo_sleep(sec) #time.sleep(sec) time.sleep(0.05) print('Slept for {} seconds'.format(sec)) ''') + chr(10))] args = ['python', '-c', "{}".format(chr(10) + ub.codeblock( ''' import sys import ubelt as ub import time print('Starting a process') for i in range(4): print('Step {} {}'.format(i, ub.timestamp(precision=4))) time.sleep(1.0) ''') + chr(10))] lines = [] log = lines.append info = ub.cmd(args, verbose=3, shell=True, tee_backend='thread', timeout=10) try: ub.cmd(args, verbose=0, shell=True, tee_backend='thread', timeout=2) except subprocess.TimeoutExpired as e: std_ex = e try: ub.cmd(args, verbose=3, shell=True, tee_backend='thread', timeout=2) except subprocess.TimeoutExpired as e: verb_ex = e print('verb_ex.__dict__ = {}'.format(ub.urepr(verb_ex.__dict__, nl=1))) print('std_ex.__dict__ = {}'.format(ub.urepr(std_ex.__dict__, nl=1))) """ if __name__ == '__main__': """ pytest ubelt/tests/test_cmd.py -s python ~/code/ubelt/ubelt/tests/test_cmd.py test_cmd_veryverbose """ import xdoctest xdoctest.doctest_module(__file__) ubelt-1.3.7/tests/test_color.py000066400000000000000000000024221472470106000165270ustar00rootroot00000000000000def test_unable_to_find_color(): import ubelt as ub import pytest if ub.util_colors.NO_COLOR: pytest.skip() with pytest.warns(UserWarning): text = ub.color_text('text', 'wizbang') assert text == 'text', 'bad colors should pass the text back' def test_global_color_disable(): """ CommandLine: xdoctest -m /home/joncrall/code/ubelt/tests/test_color.py test_global_color_disable """ import ubelt as ub import pytest if ub.util_colors.NO_COLOR: pytest.skip() text = 'text = "hi"' has_color = ub.color_text(text, 'red') != ub.color_text(text, None) text1a = ub.color_text(text, 'red') text1b = ub.highlight_code(text) if has_color: assert text != text1a assert text != text1b # Force colors to be disabled prev = ub.util_colors.NO_COLOR try: ub.util_colors.NO_COLOR = True text2a = ub.color_text(text, 'red') text2b = ub.highlight_code(text) assert text == text2a assert text == text2b finally: # Re-enable coloration ub.util_colors.NO_COLOR = prev text3a = ub.color_text(text, 'red') text3b = ub.highlight_code(text) if has_color: assert text != text3a assert text != text3b ubelt-1.3.7/tests/test_dict.py000066400000000000000000000102401472470106000163310ustar00rootroot00000000000000import ubelt as ub import pytest def test_auto_dict(): auto = ub.AutoDict() assert 0 not in auto auto[0][10][100] = None assert 0 in auto assert isinstance(auto[0], ub.AutoDict) def test_auto_dict_to_dict(): from ubelt.util_dict import AutoDict auto = AutoDict() auto[1] = 1 auto['n1'] = AutoDict() auto['n1']['n2'] = AutoDict() auto['n1']['n2'][2] = 2 auto['n1']['n2'][3] = 3 auto['dict'] = {} auto['dict']['n3'] = AutoDict() auto['dict']['n3']['n4'] = AutoDict() print('auto = {!r}'.format(auto)) static = auto.to_dict() print('static = {!r}'.format(static)) assert not isinstance(static, AutoDict), '{}'.format(type(static)) assert not isinstance(static['n1'], AutoDict), '{}'.format(type(static['n1'])) assert not isinstance(static['n1']['n2'], AutoDict) assert isinstance(static['dict']['n3'], AutoDict) assert isinstance(static['dict']['n3']['n4'], AutoDict) def test_auto_dict_ordered(): # To Dict should respect ordering from ubelt.util_dict import AutoOrderedDict, AutoDict auto = AutoOrderedDict() auto[0][3] = 3 auto[0][2] = 2 auto[0][1] = 1 auto[0][4] = AutoDict() assert isinstance(auto, AutoDict) print('auto = {!r}'.format(auto)) static = auto.to_dict() print('static = {!r}'.format(static)) assert not isinstance(static, AutoDict), 'bad cast {}'.format(type(static)) assert not isinstance(static[0][4], AutoDict), 'bad cast {}'.format(type(static[0][4])) assert list(auto[0].values())[0:3] == [3, 2, 1], 'maintain order' def test_dzip_errors(): with pytest.raises(TypeError): ub.dzip([1], 2) with pytest.raises(TypeError): ub.dzip(1, [2]) with pytest.raises(ValueError): ub.dzip([1, 2, 3], []) with pytest.raises(ValueError): ub.dzip([], [4, 5, 6]) with pytest.raises(ValueError): ub.dzip([1, 2, 3], [4, 5]) def test_group_items_callable(): pairs = [ ('ham', 'protein'), ('jam', 'fruit'), ('spam', 'protein'), ('eggs', 'protein'), ('cheese', 'dairy'), ('banana', 'fruit'), ] items, groupids = zip(*pairs) lut = dict(zip(items, groupids)) result1 = ub.group_items(items, groupids) result2 = ub.group_items(items, lut.__getitem__) result1 = ub.map_values(set, result1) result2 = ub.map_values(set, result2) assert result1 == result2 def test_dict_hist_ordered(): import random import string import ubelt as ub rng = random.Random(0) items = [rng.choice(string.ascii_letters) for _ in range(100)] # Ensure that the ordered=True bug is fixed a = sorted(ub.dict_hist(items, ordered=True).items()) b = sorted(ub.dict_hist(items, ordered=False).items()) assert a == b def test_dict_subset_iterable(): """ CommandLine: xdoctest -m ~/code/ubelt/tests/test_dict.py test_dict_subset_iterable """ # There was a bug in 0.7.0 where iterable keys would be exhausted too soon keys_list = list(range(10)) dict_ = {k: k for k in keys_list} got = ub.dict_subset(dict_, iter(keys_list)) assert dict(got) == dict_ # def _benchmark_groupid_sorted(): # import random # import ubelt as ub # ydata = ub.ddict(list) # xdata = [] # ti = ub.Timerit(100, bestof=10, verbose=True) # num = 10 # for gamma in [0.01, .1, .5]: # for num in [10, 100, 1000, 10000, 100000]: # items = [random.random() for _ in range(num)] # groupids = [random.randint(0, int(num ** gamma)) for _ in range(num)] # xdata.append(num) # for timer in ti.reset(label='sort_g{}'.format(gamma)): # with timer: # ub.group_items(items, groupids, sorted_=True) # ydata[ti.label].append(ti.min()) # for timer in ti.reset(label='nosort_g{}'.format(gamma)): # with timer: # ub.group_items(items, groupids, sorted_=False) # ydata[ti.label].append(ti.min()) # ydata = ub.odict(sorted(ydata.items(), key=lambda t: t[1][-1])[::-1]) # import netharn as nh # nh.util.autompl() # nh.util.multi_plot(xdata, ydata) ubelt-1.3.7/tests/test_download.py000066400000000000000000000556111472470106000172300ustar00rootroot00000000000000import ubelt as ub import os import pytest import sys from os.path import basename, join, exists import platform IS_PYPY = platform.python_implementation() == 'PyPy' IS_WIN32 = sys.platform.startswith('win32') TIMEOUT = (15 if IS_PYPY else 5) * 30 @pytest.mark.timeout(TIMEOUT) def test_download_no_fpath(): # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url() dpath = ub.Path.appdir('ubelt/tests/test_download').ensuredir() fname = basename(url) fpath = join(dpath, fname) ub.delete(fpath) assert not exists(fpath) got_fpath = ub.download(url, appname='ubelt/tests/test_download') assert got_fpath == fpath assert exists(fpath) @pytest.mark.timeout(TIMEOUT) def test_download_with_fpath(): # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url(1201) dpath = ub.Path.appdir('ubelt/tests/test_download').ensuredir() fname = basename(url) fpath = join(dpath, fname) ub.delete(fpath) assert not exists(fpath) got_fpath = ub.download(url, fpath=fpath, appname='ubelt/tests/test_download') assert got_fpath == fpath assert exists(fpath) with open(got_fpath, 'rb') as file: data = file.read() assert len(data) > 1200, 'should have downloaded some bytes' @pytest.mark.timeout(TIMEOUT) def test_download_chunksize(): # url = 'https://www.dropbox.com/s/jl506apezj42zjz/ibeis-win32-setup-ymd_hm-2015-08-01_16-28.exe?dl=1' # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url() dpath = ub.Path.appdir('ubelt/tests/test_download').ensuredir() fname = basename(url) fpath = join(dpath, fname) ub.delete(fpath) assert not exists(fpath) got_fpath = ub.download(url, chunksize=2, appname='ubelt/tests/test_download') assert got_fpath == fpath assert exists(fpath) @pytest.mark.timeout(TIMEOUT) def test_download_cover_hashers(): # url = 'https://www.dropbox.com/s/jl506apezj42zjz/ibeis-win32-setup-ymd_hm-2015-08-01_16-28.exe?dl=1' # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url() dpath = ub.Path.appdir('ubelt/tests/test_download').ensuredir() fname = basename(url) # add coverage for different hashers ub.download(url, hasher='md5', hash_prefix='e09c80c42fda55f9d992e59ca6b33', dpath=dpath, fname=fname) ub.download(url, hasher='sha256', hash_prefix='bf2cb58a68f684d95a3b78ef8f', dpath=dpath, fname=fname) @pytest.mark.timeout(TIMEOUT) def test_download_hashalgo(): # url = 'https://www.dropbox.com/s/jl506apezj42zjz/ibeis-win32-setup-ymd_hm-2015-08-01_16-28.exe?dl=1' import hashlib # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url() dpath = ub.Path.appdir('ubelt/tests/test_download').ensuredir() fname = basename(url) fpath = join(dpath, fname) ub.delete(fpath) assert not exists(fpath) got_fpath = ub.download(url, hash_prefix='e09c80c42fda55f9d992e59ca6b3307d', appname='ubelt/tests/test_download', hasher=hashlib.md5()) assert got_fpath == fpath assert exists(fpath) @pytest.mark.timeout(TIMEOUT) def test_grabdata_cache(): """ Check where the url is downloaded to when fpath is not specified. """ # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url() dpath = ub.Path.appdir('ubelt/tests/test_download').ensuredir() fname = basename(url) fpath = join(dpath, fname) got_fpath = ub.grabdata(url, appname='ubelt/tests/test_download') assert got_fpath == fpath assert exists(fpath) ub.delete(fpath) assert not exists(fpath) ub.grabdata(url, appname='ubelt/tests/test_download') assert exists(fpath) @pytest.mark.timeout(TIMEOUT) def test_grabdata_nohash(): """ Check where the url is downloaded to when fpath is not specified. """ url = _demo_url() dpath = ub.Path.appdir('ubelt/tests/test_download/test-grabdata-nohash').ensuredir() fname = basename(url) fpath = (dpath / fname).delete() assert not fpath.exists() ub.grabdata(url, fpath=fpath, hasher=None, verbose=10) assert fpath.exists() # Even without the hasher, if the size of the data changes at all # we should be able to detect and correct it. orig_text = fpath.read_text() fpath.write_text('corrupted') ub.grabdata(url, fpath=fpath, hasher=None, verbose=10) assert fpath.read_text() == orig_text @pytest.mark.timeout(TIMEOUT) def test_grabdata_url_only(): """ Check where the url is downloaded to when fpath is not specified. """ # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url() dpath = ub.Path.appdir('ubelt') fname = basename(url) fpath = os.fspath(dpath / fname) got_fpath = ub.grabdata(url) assert got_fpath == fpath assert exists(fpath) @pytest.mark.timeout(TIMEOUT) def test_grabdata_with_fpath(): """ Check where the url is downloaded to when fpath is not specified. """ # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url() dpath = ub.Path.appdir('ubelt/tests/test_download').ensuredir() fname = basename(url) fpath = join(dpath, fname) got_fpath = ub.grabdata(url, fpath=fpath, verbose=3) assert got_fpath == fpath assert exists(fpath) ub.delete(fpath) assert not exists(fpath) ub.grabdata(url, fpath=fpath, verbose=3) assert exists(fpath) def test_grabdata_value_error(): """ Check where the url is downloaded to when fpath is not specified. """ # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url() dpath = ub.Path.appdir('ubelt/tests/test_download').ensuredir() fname = basename(url) fpath = join(dpath, fname) with pytest.raises(ValueError): ub.grabdata(url, fname=fname, fpath=fpath, dpath=dpath) with pytest.raises(ValueError): ub.grabdata(url, fname=fname, fpath=fpath) with pytest.raises(ValueError): ub.grabdata(url, dpath=dpath, fpath=fpath) with pytest.raises(ValueError): ub.grabdata(url, fpath=fpath, appname='foobar') with pytest.raises(ValueError): ub.grabdata(url, dpath=dpath, appname='foobar') @pytest.mark.timeout(TIMEOUT * 2) def test_download_bad_url(): """ Check that we error when the url is bad Notes: For some reason this can take a long time to realize there is no URL, even if the timeout is specified and fairly low. CommandLine: python tests/test_download.py test_download_bad_url --verbose """ import pytest pytest.skip('This takes a long time to timeout and I dont understand why') url = 'http://www.a-very-incorrect-url.gov/does_not_exist.txt' # if not ub.argflag('--network'): # pytest.skip('not running network tests') # Ensure the opener exist # import urllib.request as urllib_x from urllib.error import URLError # NOQA # if urllib_x._opener is None: # urllib_x.install_opener(urllib_x.build_opener()) dpath = ub.Path.appdir('ubelt/tests/test_download').ensuredir() fname = basename(url) fpath = join(dpath, fname) ub.delete(fpath) assert not exists(fpath) with pytest.raises(URLError): ub.download(url, fpath=fpath, verbose=1, timeout=1.0) @pytest.mark.timeout(TIMEOUT) def test_grabdata_fname_only(): # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') # fname = 'mario.png' url = _demo_url() dpath = ub.Path.appdir('ubelt') fname = 'custom_text.txt' fpath = os.fspath(dpath / fname) got_fpath = ub.grabdata(url, fname=fname) assert got_fpath == fpath assert exists(fpath) @pytest.mark.timeout(TIMEOUT) def test_grabdata_dpath_only(): # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url() dpath = ub.Path.appdir('ubelt/tests/test_download').ensuredir() fname = basename(url) fpath = join(dpath, fname) got_fpath = ub.grabdata(url, dpath=dpath) assert got_fpath == fpath assert exists(fpath) @pytest.mark.timeout(TIMEOUT) def test_grabdata_fpath_and_dpath(): # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url() with pytest.raises(ValueError): ub.grabdata(url, fpath='foo', dpath='bar') # @pytest.mark.timeout(TIMEOUT) def test_grabdata_hash_typo(): """ CommandLine: pytest ~/code/ubelt/tests/test_download.py -k test_grabdata_hash_typo --network -s xdoctest ~/code/ubelt/tests/test_download.py test_grabdata_hash_typo --network """ # url = 'https://www.dropbox.com/s/jl506apezj42zjz/ibeis-win32-setup-ymd_hm-2015-08-01_16-28.exe?dl=1' # url = 'http://i.imgur.com/rqwaDag.png' # if not ub.argflag('--network'): # pytest.skip('not running network tests') url = _demo_url() dpath = ub.Path.appdir('ubelt/tests/test_download') fname = basename(url) fpath = dpath / fname stamp_fpath = fpath.augment(tail='.stamp_md5.json') for verbose in [5]: fpath.delete() stamp_fpath.delete() assert not exists(fpath) print('[STEP1] Downloading file, but we have a typo in the hash') with pytest.raises(RuntimeError): got_fpath = ub.grabdata( url, hash_prefix='e09c80c42fda5-typo-5f9d992e59ca6b3307d', hasher='md5', verbose=verbose, appname='ubelt/tests/test_download') assert fpath.exists() real_hash = ub.hash_file(fpath, hasher='md5') real_hash print('[STEP2] Fixing the typo recomputes the hash, but does not redownload the file') got_fpath = ub.grabdata(url, hash_prefix='e09c80c42fda55f9d992e59ca6b3307d', hasher='md5', verbose=verbose, appname='ubelt/tests/test_download') assert ub.Path(got_fpath).resolve() == fpath.resolve() assert fpath.exists() # If we delete the .hash file we will simply recompute stamp_fpath.delete() print('[STEP3] Deleting the hash file recomputes the hash') got_fpath = ub.grabdata(url, fpath=fpath, hash_prefix='e09c80c42fda55f9d992e59ca6b3307d', hasher='md5', verbose=verbose) assert stamp_fpath.exists() def test_deprecated_grabdata_args(): # with pytest.warns(DeprecationWarning): with pytest.raises(RuntimeError): import hashlib url = _demo_url() # dpath = ub.Path.appdir('ubelt/tests/test_download').ensuredir() # fname = basename(url) # fpath = join(dpath, fname) got_fpath = ub.grabdata( url, hash_prefix='e09c80c42fda55f9d992e59ca6b3307d', hasher=hashlib.md5()) got_fpath def _devcheck_progres_download_bar(): """ import sys, ubelt sys.path.append(ubelt.expandpath('~/remote/toothbrush/code/ubelt/tests')) from test_download import SingletonTestServer import ubelt as ub """ self = SingletonTestServer.instance() urls = self.write_file(filebytes=85717624) import time class DummyIO: def write(self, msg): time.sleep(0.0005) ... file = DummyIO() url = urls[0] dl_file = ub.download(url, fpath=file, progkw=dict(desc='dling')) dl_file class SingletonTestServer(ub.NiceRepr): """ A singleton class used for testing. This could be done via a pytest fixture, but... I don't want to use fixtures until its easy and clear how to make an instance of them in an independent IPython session. CommandLine: xdoctest -m tests/test_download.py SingletonTestServer Note: We rely on python process close mechanisms to clean this server up. Might need to re-investigate this in the future. Ignore: >>> self = SingletonTestServer.instance() >>> print('self = {!r}'.format(self)) >>> url = self.urls[0] >>> print('url = {!r}'.format(url)) >>> dl_file = ub.download(url) """ _instance = None @classmethod def instance(cls): if cls._instance is not None: self = cls._instance else: self = cls() cls._instance = self return self def close(self): if self.proc.poll() is None: self.proc.terminate() self.proc.wait() self.__class__.instance = None def __nice__(self): return '{} - {}'.format(self.root_url, self.proc.returncode) def __init__(self): import requests import time import sys import ubelt as ub import socket from contextlib import closing def find_free_port(): """ References: .. [SO1365265] https://stackoverflow.com/questions/1365265/on-localhost-how-do-i-pick-a-free-port-number """ with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: s.bind(('', 0)) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) return s.getsockname()[1] # Find an open port port = find_free_port() print('port = {!r}'.format(port)) dpath = ub.Path.appdir('ubelt/tests/test_download/simple_server').ensuredir() if sys.platform.startswith('win32'): pyexe = 'python' else: pyexe = sys.executable server_cmd = [ pyexe, '-m', 'http.server', str(port) ] info = ub.cmd(server_cmd, detach=True, cwd=dpath) proc = info['proc'] self.proc = proc self.dpath = dpath self.root_url = 'http://localhost:{}'.format(port) if IS_PYPY and IS_WIN32: # not sure why import pytest pytest.skip('not sure why download tests are failing on pypy win32') init_sleeptime = 0.5 fail_sleeptime = 0.3 timeout = 10 else: init_sleeptime = 0.002 fail_sleeptime = 0.01 timeout = 1 time.sleep(init_sleeptime) # Wait for the server to be alive status_code = None max_tries = 300 for _ in range(max_tries): try: resp = requests.get(self.root_url, timeout=timeout) except requests.exceptions.ConnectionError: time.sleep(fail_sleeptime) else: status_code = resp.status_code if status_code == 200: break poll_ret = self.proc.poll() if poll_ret is not None: print('poll_ret = {!r}'.format(poll_ret)) print(self.proc.communicate()) raise AssertionError('Simple server did not start {}'.format(poll_ret)) self.urls = [] self.write_file() def write_file(self, filebytes=10, num_files=1): fnames = ['file_{}_{}.txt'.format(filebytes, i) for i in range(num_files)] for fname in fnames: # data = ''.join(random.choices(string.ascii_letters, k=filebytes)) data = 'a' * filebytes fpath = join(self.dpath, fname) with open(fpath, 'w') as file: file.write(data) urls = ['{}/{}'.format(self.root_url, fname) for fname in fnames] self.urls.extend(urls) return urls def test_local_download(): server = SingletonTestServer.instance() url = server.write_file(filebytes=int(10 * 2 ** 20))[0] # also test with a timeout for lazy coverage ub.download(url, timeout=3000) def _demo_url(num_bytes=None): REAL_URL = False if REAL_URL: url = 'http://i.imgur.com/rqwaDag.png' if not ub.argflag('--network'): pytest.skip('not running network tests') else: if num_bytes is None: url = SingletonTestServer.instance().urls[0] else: url = SingletonTestServer.instance().write_file(num_bytes)[0] return url @pytest.mark.timeout(TIMEOUT) def test_download_with_progkw(): """ Test that progkw is properly passed through to ub.download """ url = _demo_url(128 * 10) dpath = ub.Path.appdir('ubelt/tests/test_download').ensuredir() fname = basename(url) fpath = join(dpath, fname) with ub.CaptureStdout() as cap: ub.download(url, fpath=fpath, progkw={'verbose': 3, 'freq': 1, 'adjust': False, 'time_thresh': 0}, chunksize=128) assert len(cap.text.split('\n')) > 10 @pytest.mark.timeout(TIMEOUT) def test_download_with_filesize(): """ Test that progkw is properly passed through to ub.download """ url = _demo_url(128 * 10) dpath = ub.Path.appdir('ubelt/tests/test_download').ensuredir() fname = basename(url) fpath = join(dpath, fname) with ub.CaptureStdout() as cap: ub.download(url, filesize=11, fpath=fpath, progkw={'verbose': 3, 'freq': 1, 'adjust': False, 'time_thresh': 0}, chunksize=128) import re assert re.search(r'\d\d\d\d\.\d\d%', cap.text), 'should report over 100%' def make_stat_dict(stat_obj): # Convert the stat tuple to a dict we can manipulate # and ignore access time ignore_keys = {'st_atime', 'st_atime_ns'} return { k: getattr(stat_obj, k) for k in dir(stat_obj) if k.startswith('st_') and k not in ignore_keys} def test_grabdata(): import ubelt as ub import json import time # fname = 'foo.bar' # url = 'http://i.imgur.com/rqwaDag.png' # prefix1 = '944389a39dfb8fa9' url = _demo_url(128 * 11) prefix1 = 'b7fa848cd088ae842a89' fname = 'foo2.bar' # print('1. Download the file once') fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1, hasher='sha512') stat0 = make_stat_dict(ub.Path(fpath).stat()) stamp_fpath = ub.Path(fpath).augment(tail='.stamp_sha512.json') assert json.loads(stamp_fpath.read_text())['hash'][0].startswith(prefix1) # print("2. Rerun and check that the download doesn't happen again") fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1) stat1 = make_stat_dict(ub.Path(fpath).stat()) assert stat0 == stat1, 'the file should not be modified' # print('3. Set redo=True, which should force a redownload') sleep_time = 0.1 num_tries = 60 for _ in range(num_tries): fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1, redo=True, hasher='sha512') stat2 = make_stat_dict(ub.Path(fpath).stat()) # Note: the precision of mtime is too low for this test work reliably # https://apenwarr.ca/log/20181113 if stat2 != stat1: break print('... Sometimes the redownload happens so fast we need to ' 'wait to notice the file is actually different') time.sleep(sleep_time) else: raise AssertionError( 'the file stat should be modified, we waited over {}s.'.format( sleep_time * num_tries)) # print('4. Check that a redownload occurs when the stamp is changed') stamp_fpath.write_text('corrupt-stamp') fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1, hasher='sha512') assert json.loads(stamp_fpath.read_text())['hash'][0].startswith(prefix1) # print('5. Check that a redownload occurs when the stamp is removed') ub.delete(stamp_fpath) fpath = ub.Path(fpath) fpath.write_text('corrupt-stamp') assert not ub.hash_file(fpath, base='hex', hasher='sha512').startswith(prefix1) fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1, hasher='sha512') assert ub.hash_file(fpath, base='hex', hasher='sha512').startswith(prefix1) def test_grabdata_same_fpath_different_url(): url1 = _demo_url(128 * 11) url2 = _demo_url(128 * 12) url3 = _demo_url(128 * 13) fname = 'foobar' fpath1 = ub.grabdata(url1, fname=fname, hash_prefix='b7fa848cd088ae842a89ef', hasher='sha512', verbose=100) stat1 = make_stat_dict(ub.Path(fpath1).stat()) # Should requesting a new url, even with the same fpath, cause redownload? fpath2 = ub.grabdata(url2, fname=fname, hash_prefix=None, hasher='sha512', verbose=100) stat2 = make_stat_dict(ub.Path(fpath2).stat()) fpath3 = ub.grabdata(url3, fname=fname, hash_prefix=None, hasher='sha512', verbose=100) stat3 = make_stat_dict(ub.Path(fpath3).stat()) assert stat1 != stat2, 'the stats will change because we did not specify a hash prefix' assert stat2 == stat3, 'we may change this behavior in the future' fpath3 = ub.grabdata(url2, fname=fname, hash_prefix='43f92597d7eb08b57c88b6', hasher='sha512', verbose=100) stat3 = make_stat_dict(ub.Path(fpath3).stat()) assert stat1 != stat3, 'if we do specify a new hash, we should get a new download' assert url1 != url2, 'urls should be different' assert ub.allsame([fpath1, fpath2, fpath3]), 'all fpaths should be the same' def test_grabdata_delete_hash_stamp(): import ubelt as ub fname = 'foo3.bar' url = _demo_url(128 * 12) prefix1 = '43f92597d7eb08b57c88b636' fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1) stamp_fpath = ub.Path(fpath + '.stamp_sha512.json') ub.delete(stamp_fpath) fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1) def test_download_with_io(): import ubelt as ub import io url = _demo_url(128 * 3) file = io.BytesIO() fpath = ub.download(url, file) assert fpath is file file.seek(0) data = file.read() hashstr = ub.hash_data(data, hasher='sha1') assert hashstr.startswith('45a5c851bf12d1') def test_download_with_sha1_hasher(): import ubelt as ub url = _demo_url(128 * 4) ub.download(url, hasher='sha1', hash_prefix='164557facb7392') def setup_module(module): """ setup any state specific to the execution of the given module.""" SingletonTestServer.instance() def teardown_module(module): """ teardown any state that was previously setup with a setup_module method. """ SingletonTestServer.instance().close() if __name__ == '__main__': """ CommandLine: pytest ubelt/tests/test_download.py """ import xdoctest xdoctest.doctest_module(__file__) ubelt-1.3.7/tests/test_editable_modules.py000066400000000000000000000460221472470106000207160ustar00rootroot00000000000000""" This is a specialized set of tests for the util_import module on editable installs, specifically with the new setuptools editable hooks (v64.0.0). https://setuptools.pypa.io/en/latest/userguide/development_mode.html https://setuptools.pypa.io/en/latest/history.html#id37 Running the setup and teardown for this test is very expensive wrt how long this test takes versus others in this library. We should look into if there is a cheaper way to emulate it. What we could do is run the expensive test once, and serialize the outputs it produces so we can simply reconstruct the environment. """ import os import sys class ProjectStructure(): """ Method to help setup and teardown a demo package installed in editable mode. Ignore: import ubelt as ub import sys, ubelt import xdev sys.path.append(ubelt.expandpath('~/code/ubelt/tests')) from test_editable_modules import * # NOQA dpath = ub.Path.appdir('ubelt/tests/demo_project').ensuredir() self = ProjectStructure(dpath, mod_name='demopkg_mwe', use_src=False) self.generate() self.analyze() self.install() """ def __init__(self, repo_dpath='.', mod_name='demopkg_mwe', use_src=True): import ubelt as ub self.root = ub.Path(repo_dpath) self.mod_name = mod_name self.use_src = use_src if use_src: self.python_relpath = ub.Path('src', 'python') else: self.python_relpath = ub.Path('.') self.cxx_relpath = ub.Path('src', 'cxx') self.cxx_path = (self.root / self.cxx_relpath) self.python_path = (self.root / self.python_relpath) self.mod_dpath = (self.python_path / self.mod_name) def setup(self): self.generate() self.install() def teardown(self): self.uninstall() self.delete() def install(self): import sys import ubelt as ub ub.cmd([sys.executable, '-m', 'pip', 'install', '-e', self.root], verbose=3, check=True) def delete(self): self.root.delete() def uninstall(self): import sys import ubelt as ub ub.cmd([sys.executable, '-m', 'pip', 'uninstall', self.mod_name, '-y'], verbose=3, check=True) def generate(self, with_cxx=0): import ubelt as ub self.mod_dpath.delete().ensuredir() self.cxx_path.delete() (self.root / 'CMakeLists.txt').delete() (self.mod_dpath / '__init__.py').write_text('__version__ = "1.0.0"') if self.use_src: package_dir_line = ub.codeblock( f''' package_dir={{'': '{self.python_relpath}'}}, ''') else: package_dir_line = '' # Give the MWE a CXX extension WITH_CXX = with_cxx if WITH_CXX: (self.root / 'pyproject.toml').write_text(ub.codeblock( ''' [build-system] requires = ["setuptools>=41.0.1", "scikit-build>=0.11.1", "numpy", "ninja>=1.10.2", "cmake>=3.21.2", "cython>=0.29.24",] ''')) (self.root / 'setup.py').write_text(ub.codeblock( f''' if __name__ == '__main__': from skbuild import setup from setuptools import find_packages packages = find_packages('./{self.python_relpath}') setup( {package_dir_line} install_requires=['packaging'], name='{self.mod_name}', version="1.0.0", description='MWE of a binpy project', packages=packages, include_package_data=True, ) ''')) self.cxx_path.ensuredir() (self.root / 'CMakeLists.txt').write_text(ub.codeblock( rf''' cmake_minimum_required(VERSION 3.13.0) project({self.mod_name} LANGUAGES C Fortran) find_package(PythonInterp REQUIRED) find_package(PythonLibs REQUIRED) ### # Private helper function to execute `python -c ""` # # Runs a python command and populates an outvar with the result of stdout. # Be careful of indentation if `cmd` is multiline. # function(pycmd outvar cmd) execute_process( COMMAND "${{PYTHON_EXECUTABLE}}" -c "${{{{cmd}}}}" RESULT_VARIABLE _exitcode OUTPUT_VARIABLE _output) if(NOT ${{_exitcode}} EQUAL 0) message(ERROR "Failed when running python code: \"\"\" ${{cmd}}\"\"\"") message(FATAL_ERROR "Python command failed with error code: ${{_exitcode}}") endif() # Remove supurflous newlines (artifacts of print) string(STRIP "${{_output}}" _output) set(${{outvar}} "${{_output}}" PARENT_SCOPE) endfunction() ### # Find scikit-build and include its cmake resource scripts # if (NOT SKBUILD) pycmd(skbuild_location "import os, skbuild; print(os.path.dirname(skbuild.__file__))") set(skbuild_cmake_dir "${{skbuild_location}}/resources/cmake") # If skbuild is not the driver, then we need to include its utilities in our CMAKE_MODULE_PATH list(APPEND CMAKE_MODULE_PATH ${{skbuild_cmake_dir}}) endif() find_package(PythonExtensions REQUIRED) find_package(Cython REQUIRED) find_package(NumPy REQUIRED) # Backend C library add_subdirectory("src/cxx") # Cython library add_subdirectory("src/python/{self.mod_name}") ''')) (self.cxx_path / 'myalgo.h').write_text(ub.codeblock( ''' #ifndef MYALGO_H #define MYALGO_H int myalgo(long *arr1, long *arr2, size_t num); #endif MYALGO_H ''')) (self.cxx_path / 'myalgo.c').write_text(ub.codeblock( r''' #include long myalgo(long *arr1, long *arr2, size_t num) { for (int i = 0 ; i < num ; i++ ) { arr2[i] = arr1[i] + arr2[i]; } return 1; } ''')) cmake_list_cxx = self.cxx_path / 'CMakeLists.txt' cmake_list_cxx.write_text(ub.codeblock( ''' set(MYALGO_MODULE_NAME "myalgo") list(APPEND MYALGO_SOURCES "myalgo.h" "myalgo.c") add_library(${MYALGO_MODULE_NAME} STATIC ${MYALGO_SOURCES}) ''')) (self.mod_dpath / 'myalgo_cython.pyx').write_text(ub.codeblock( ''' import numpy as np cimport numpy as np cdef extern from "../../cxx/myalgo.h": cdef int myalgo(long *arr1, long *arr2, size_t num); def call_myalgo(): """ This is a docstring """ cdef int result; cdef np.ndarray[np.int64_t, ndim=1] arr1 cdef np.ndarray[np.int64_t, ndim=1] arr2 arr1 = np.array([1, 2, 3], dtype=np.int64) arr2 = np.array([4, 6, 9], dtype=np.int64) cdef long [:] arr1_view = arr1 cdef long [:] arr2_view = arr2 cdef size_t num = len(arr1) print(f'arr1={arr1}') print(f'arr2={arr2}') print('calling my algo') result = myalgo(&arr1_view[0], &arr2_view[0], num) print(f'arr1={arr1}') print(f'arr2={arr2}') return result ''')) (self.mod_dpath / 'CMakeLists.txt').write_text(ub.codeblock( ''' set(cython_source "myalgo_cython.pyx") set(PYMYALGO_MODULE_NAME "myalgo_cython") # Translate Cython into C/C++ add_cython_target(${PYMYALGO_MODULE_NAME} "${cython_source}" C OUTPUT_VAR sources) # Add other C sources list(APPEND sources ) # Create C++ library. Specify include dirs and link libs as normal add_library(${PYMYALGO_MODULE_NAME} MODULE ${sources}) target_include_directories( ${PYMYALGO_MODULE_NAME} PUBLIC ${NumPy_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ) # TODO: not sure why this isn't set in the global scope? # Hack around it: just hard code the module name set(MYALGO_MODULE_NAME "myalgo") # TODO: linking to the MYALGO shared object isn't working 100% yet. target_link_libraries(${PYMYALGO_MODULE_NAME} ${MYALGO_MODULE_NAME}) target_compile_definitions(${PYMYALGO_MODULE_NAME} PUBLIC "NPY_NO_DEPRECATED_API" #"NPY_1_7_API_VERSION=0x00000007" ) # Transform the C++ library into an importable python module python_extension_module(${PYMYALGO_MODULE_NAME}) # Install the C++ module to the correct relative location # (this will be an inplace build if you use `pip install -e`) #file(RELATIVE_PATH pymyalgo_install_dest "${CMAKE_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}") # My "normal" method of setting install targets does not seem to work here. Hacking it. # NOTE: skbuild *seems* to place libraries in a data dir *unless* the install destination # corresponds exactly to the / specified implicitly in setup.py set(pymyalgo_install_dest "src/python/{self.mod_name}") #install(TARGETS ${MYALGO_MODULE_NAME} LIBRARY DESTINATION "${pymyalgo_install_dest}") install(TARGETS ${PYMYALGO_MODULE_NAME} LIBRARY DESTINATION "${pymyalgo_install_dest}") ''' )) else: # Pure Python # TODO: Might want to test with different build backends. (self.root / 'pyproject.toml').write_text(ub.codeblock( ''' [build-system] requires = ["setuptools>=41.0.1", "wheel"] build-backend = "setuptools.build_meta" ''')) (self.root / 'setup.py').write_text(ub.codeblock( f''' if __name__ == '__main__': from setuptools import setup from setuptools import find_packages packages = find_packages('./{self.python_relpath}') setup( {package_dir_line} package_data={{ '{self.mod_name}': ['py.typed', '*.pyi'], }}, install_requires=['packaging'], name='{self.mod_name}', version="1.0.0", description='MWE of a purepy project', packages=packages, include_package_data=True, ) ''')) (self.mod_dpath / 'py.typed').write_text('') (self.mod_dpath / 'submod.py').write_text('A = 1') (self.mod_dpath / 'submod.pyi').write_text('A: int') def analyze(self): """ For debugging and development only, don't run in the tests Requires: rich, xdev """ from rich.console import Console from rich.panel import Panel from rich.syntax import Syntax from rich.table import Table import distutils.sysconfig import ubelt as ub import xdev console = Console() def rich_file_content(fpath, lexer='bash'): import os text = fpath.read_text() return Panel(Syntax(text, lexer), title=os.fspath(fpath)) def print_egg_path_content(egg_info_dpath, color='blue'): blocklist = {'requires.txt'} fpaths = egg_info_dpath.ls() table = Table(f'[{color}]' + str(egg_info_dpath)) for fpath in fpaths: if fpath.name not in blocklist: panel = rich_file_content(fpath) table.add_row(panel) console.print(table) print('\n') print('Repo Structure:') directory_blocklist = ['.*', '.git', 'dist', '_skbuild', 'dev'] xdev.tree_repr(self.root, max_files=None, dirblocklist=directory_blocklist) seems_installed = 0 print('\n') print('Content of the EGG Link:') site_dpath = ub.Path(distutils.sysconfig.get_python_lib()) egg_link_fpaths = list(site_dpath.glob(self.mod_name.replace('_', '*') + '*.egg-link')) if len(egg_link_fpaths) == 0: console.print('[red] No egg link') seems_installed = 0 else: assert len(egg_link_fpaths) == 1 egg_link_fpath = egg_link_fpaths[0] console.print(rich_file_content(egg_link_fpath)) seems_installed = 1 # Note: (recently 2022-08-ish) python switched to a new type of # This is not present in setuptools==63.2.0 but is in 65.3.0 # editable install. TODO: incomporate this. editable_fpaths = list(site_dpath.glob('__editable__*' + self.mod_name.replace('_', '*') + '*')) print(f'editable_fpaths={editable_fpaths}') print('\n') print('Check easy-install.pth') easy_install_fpath = site_dpath / 'easy-install.pth' assert easy_install_fpath.exists() easy_install_text = easy_install_fpath.read_text() abs_path = self.mod_dpath.absolute().parent print(f'abs_path={abs_path}') if str(abs_path) in easy_install_text: console.print('[green] Easy install dpath is good') else: console.print('[red] Easy install does not contain this package') # console.print(rich_file_content(easy_install_fpath)) expected_egg_info_dpath = self.python_path / f'{self.mod_name}.egg-info' all_egg_infos = [ub.Path(e).resolve() for e in xdev.find('*.egg-info', dpath=self.root, dirblocklist=directory_blocklist)] other_egg_infos = set(all_egg_infos) - {expected_egg_info_dpath.resolve()} print('expected_egg_info_dpath = {}'.format(ub.repr2(expected_egg_info_dpath, nl=1))) if expected_egg_info_dpath.exists(): console.print('[green] Egg info exists in expected location') egg_info_dpath = expected_egg_info_dpath print_egg_path_content(egg_info_dpath, color='green') else: console.print('[red] Egg info does not exist in expected location') print(f'other_egg_infos={other_egg_infos}') if other_egg_infos: console.print('[red] THERE ARE UNEXPECTED EGG INFOS') for egg_info_dpath in other_egg_infos: print_egg_path_content(egg_info_dpath, color='red') if seems_installed: print('\n') print('Test to ensure we can import the module') command = f'python -c "import {self.mod_name}; print({self.mod_name})"' info = ub.cmd(command, verbose=3) if info['ret'] != 0: raise Exception('failed to import') assert str(self.mod_dpath) in info['out'] else: console.print('[yellow] Package does not seem installed, so skipping import test') def serialize_install(self): # TODO: serialize this step to make it fast import distutils.sysconfig import ubelt as ub site_dpath = ub.Path(distutils.sysconfig.get_python_lib()) egg_link_fpaths = list(site_dpath.glob(self.mod_name.replace('_', '*') + '*.egg-link')) editable_fpaths = list(site_dpath.glob('__editable__*' + self.mod_name.replace('_', '*') + '*')) easy_install_fpath = site_dpath / 'easy-install.pth' # NOQA print(f'egg_link_fpaths={egg_link_fpaths}') print(f'editable_fpaths={editable_fpaths}') GLOBAL_PROJECTS = [] def _check_skip_editable_module_tests(): UBELT_DO_EDITABLE_TESTS = os.environ.get('UBELT_DO_EDITABLE_TESTS', '') if not UBELT_DO_EDITABLE_TESTS: import pytest pytest.skip('UBELT_DO_EDITABLE_TESTS is not enabled') if sys.platform.startswith('win32'): import pytest pytest.skip('skip editable module tests on Win32') if sys.platform.startswith('freebsd'): import pytest pytest.skip('skip editable module tests on FreeBSD') def setup_module(module): """ setup any state specific to the execution of the given module.""" import uuid import ubelt as ub _check_skip_editable_module_tests() suffix = ub.hash_data(uuid.uuid4(), base='abc')[0:8] dpath = ub.Path.appdir('ubelt/tests/demo_packages').ensuredir() # Define pure python module with ./src/python structure mod_name = 'purepy_src_demo_pkg_' + suffix PUREPY_SRC_PROJECT = ProjectStructure(repo_dpath=dpath / mod_name, mod_name=mod_name, use_src=True) PUREPY_SRC_PROJECT.setup() GLOBAL_PROJECTS.append(PUREPY_SRC_PROJECT) if 0: self = PUREPY_SRC_PROJECT self.serialize() # Define pure python module with the package at root level mod_name = 'purepy_root_demo_pkg_' + suffix PUREPY_SRC_PROJECT = ProjectStructure(repo_dpath=dpath / mod_name, mod_name=mod_name, use_src=False) PUREPY_SRC_PROJECT.setup() GLOBAL_PROJECTS.append(PUREPY_SRC_PROJECT) if 0: for proj in GLOBAL_PROJECTS: proj.analyze() def teardown_module(module): """ teardown any state that was previously setup with a setup_module method. """ _check_skip_editable_module_tests() for PROJ in GLOBAL_PROJECTS: PROJ.teardown() def test_import_of_editable_install(): _check_skip_editable_module_tests() print('Testing editable installs') import ubelt as ub for PROJ in GLOBAL_PROJECTS: result = ub.modname_to_modpath(PROJ.mod_name) print(f'result={result}') assert result is not None assert PROJ.mod_dpath == ub.Path(result) if __name__ == '__main__': """ CommandLine: UBELT_DO_EDITABLE_TESTS=1 python ~/code/ubelt/tests/test_editable_modules.py """ setup_module(None) test_import_of_editable_install() teardown_module(None) ubelt-1.3.7/tests/test_func.py000066400000000000000000000025101472470106000163420ustar00rootroot00000000000000 def test_compatible_keywords(): import ubelt as ub def func(a, e, f, *args, **kwargs): return a * e * f config = { 'a': 2, 'b': 3, 'c': 7, 'd': 11, 'e': 13, 'f': 17, } assert ub.compatible(config, func, keywords=True) is config assert ub.compatible(config, func, keywords=1) is config assert ub.compatible(config, func, keywords='truthy') is config assert ub.compatible(config, func, keywords=['iterable']) is not config assert ub.compatible(config, func, keywords=0) is not config assert ub.compatible(config, func, keywords={'b'}) == {'a': 2, 'e': 13, 'f': 17, 'b': 3} def test_positional_only_args(): import ubelt as ub import sys import pytest if sys.version_info[0:2] <= (3, 7): pytest.skip('position only arguments syntax requires Python >= 3.8') # Define via an exec, so this test does not raise a syntax error # in other versions of python and skips gracefully pos_only_code = ub.codeblock( ''' import ubelt as ub def func(a, e, /, f): return a * e * f ''') ns = {} exec(pos_only_code, ns, ns) func = ns['func'] config = { 'a': 2, 'b': 3, 'c': 7, 'd': 11, 'e': 13, 'f': 17, } pos_only = ub.compatible(config, func) assert sorted(pos_only) == ['f'] ubelt-1.3.7/tests/test_futures.py000066400000000000000000000223441472470106000171130ustar00rootroot00000000000000def test_job_pool_context_manager(): import ubelt as ub def worker(data): return data + 1 pool = ub.JobPool('thread', max_workers=16) with pool: for data in ub.ProgIter(range(10), desc='submit jobs'): pool.submit(worker, data) final = [] for job in pool.as_completed(desc='collect jobs'): info = job.result() final.append(info) def test_job_pool_as_completed_prog_args(): import ubelt as ub def worker(data): return data + 1 pool = ub.JobPool('thread', max_workers=1) for data in ub.ProgIter(range(10), desc='submit jobs'): pool.submit(worker, data) with ub.CaptureStdout() as cap: final = list(pool.as_completed(desc='collect jobs', progkw={'verbose': 3, 'time_thresh': 0})) print(f'cap.text={cap.text}') num_lines = len(cap.text.split('\n')) num_jobs = len(pool.jobs) assert num_lines > num_jobs print('final = {!r}'.format(final)) pool.shutdown() def test_executor_timeout(): import pytest pytest.skip( 'long test, demos that timeout does not work with SerialExecutor') import ubelt as ub import time from concurrent.futures import TimeoutError def long_job(n, t): for i in range(n): time.sleep(t) for mode in ['thread', 'process', 'serial']: executor = ub.Executor(mode=mode, max_workers=1) with executor: job = executor.submit(long_job, 10, 0.05) with ub.Timer() as timer: try: job_result = job.result(timeout=0.01) except TimeoutError as ex: ex_ = ex else: print('job_result = {!r}'.format(job_result)) print('timer.elapsed = {!r}'.format(timer.elapsed)) print('ex_ = {!r}'.format(ex_)) def test_job_pool_clear_completed(): import weakref import gc import ubelt as ub is_deleted = {} weak_futures = {} jobs = ub.JobPool(mode='process', max_workers=4) def make_finalizer(jobid): def _finalizer(): is_deleted[jobid] = True return _finalizer def debug_referrers(): if 0: referrers = ub.udict({}) for jobid, ref in weak_futures.items(): fs = ref() referrers[jobid] = 0 if fs is None else len(gc.get_referrers(fs)) print('is_deleted = {}'.format(ub.urepr(is_deleted, nl=1))) print('referrers = {}'.format(ub.urepr(referrers, nl=1))) for jobid in range(10): fs = jobs.submit(simple_worker, jobid) weak_futures[jobid] = weakref.ref(fs) is_deleted[jobid] = False weakref.finalize(fs, make_finalizer(jobid)) del fs debug_referrers() assert not any(is_deleted.values()) for fs in jobs.as_completed(): fs.result() debug_referrers() assert not any(is_deleted.values()) jobs._clear_completed() debug_referrers() import platform if 'pypy' not in platform.python_implementation().lower(): if not any(is_deleted.values()): raise AssertionError fs = None if 'pypy' not in platform.python_implementation().lower(): if not all(is_deleted.values()): raise AssertionError def simple_worker(jobid): return jobid def test_job_pool_transient(): import weakref import ubelt as ub is_deleted = {} weak_futures = {} jobs = ub.JobPool(mode='process', max_workers=4, transient=True) def make_finalizer(jobid): def _finalizer(): is_deleted[jobid] = True return _finalizer for jobid in range(10): fs = jobs.submit(simple_worker, jobid) weak_futures[jobid] = weakref.ref(fs) is_deleted[jobid] = False weakref.finalize(fs, make_finalizer(jobid)) if any(is_deleted.values()): raise AssertionError for fs in jobs.as_completed(): fs.result() # For 3.6, pytest has an AST issue if and assert statements are used. # raising regular AssertionErrors to handle that. import platform if 'pypy' not in platform.python_implementation().lower(): if not any(is_deleted.values()): raise AssertionError fs = None if 'pypy' not in platform.python_implementation().lower(): if not all(is_deleted.values()): raise AssertionError def test_backends(): import platform import sys # The process backend breaks pyp3 when using coverage if 'pypy' in platform.python_implementation().lower(): import pytest pytest.skip('not testing process on pypy') if sys.platform.startswith('win32'): import pytest pytest.skip('not running this test on win32 for now') import ubelt as ub # Fork before threading! # https://pybay.com/site_media/slides/raymond2017-keynote/combo.html self1 = ub.Executor(mode='serial', max_workers=0) self1.__enter__() self2 = ub.Executor(mode='process', max_workers=2) self2.__enter__() self3 = ub.Executor(mode='thread', max_workers=2) self3.__enter__() jobs = [] jobs.append(self1.submit(sum, [1, 2, 3])) jobs.append(self1.submit(sum, [1, 2, 3])) jobs.append(self2.submit(sum, [10, 20, 30])) jobs.append(self2.submit(sum, [10, 20, 30])) jobs.append(self3.submit(sum, [4, 5, 5])) jobs.append(self3.submit(sum, [4, 5, 5])) for job in jobs: result = job.result() print('result = {!r}'.format(result)) self1.__exit__(None, None, None) self2.__exit__(None, None, None) self3.__exit__(None, None, None) def test_done_callback(): import ubelt as ub self1 = ub.Executor(mode='serial', max_workers=0) with self1: jobs = [] for i in range(10): jobs.append(self1.submit(sum, [i + 1, i])) for job in jobs: job.add_done_callback(lambda x: print('done callback got x = {}'.format(x))) result = job.result() print('result = {!r}'.format(result)) def _killable_worker(kill_fpath): """ An infinite loop that we can kill by writing a sentinel value to disk """ import ubelt as ub timer = ub.Timer().tic() while True: # Don't want for too long if timer.toc() > 10: return if kill_fpath.exists(): return def _sleepy_worker(seconds, loops=100): """ An infinite loop that we can kill by writing a sentinel value to disk """ import time start_time = time.monotonic() while True: time.sleep(seconds / loops) elapsed = time.monotonic() - start_time if elapsed > seconds: return elapsed def test_as_completed_timeout(): """ xdoctest ~/code/ubelt/tests/test_futures.py test_as_completed_timeout """ from concurrent.futures import TimeoutError import ubelt as ub import uuid kill_fname = str(uuid.uuid4()) + '.signal' # modes = ['thread', 'process', 'serial'] modes = ['thread', 'process'] timeout = 0.1 dpath = ub.Path.appdir('ubelt', 'tests', 'futures', 'timeout').ensuredir() kill_fpath = dpath / kill_fname for mode in modes: jobs = ub.JobPool(mode=mode, max_workers=2) with jobs: print('Submitting') timer = ub.Timer().tic() jobs.submit(_sleepy_worker, seconds=1e-1) print('Submit job: ' + str(timer.toc())) jobs.submit(_sleepy_worker, seconds=2e-1) print('Submit job: ' + str(timer.toc())) jobs.submit(_sleepy_worker, seconds=3e-1) print('Submit job: ' + str(timer.toc())) jobs.submit(_killable_worker, kill_fpath) print('Submit job: ' + str(timer.toc())) jobs.submit(_killable_worker, kill_fpath) print('Submit job: ' + str(timer.toc())) jobs.submit(_killable_worker, kill_fpath) print('Submit job: ' + str(timer.toc())) jobs.submit(_killable_worker, kill_fpath) print('Submit job: ' + str(timer.toc())) print('Finished submit') timer.tic() try: completed_iter = jobs.as_completed(timeout=timeout) timer.tic() for job in completed_iter: print('Collect job: ' + str(timer.toc())) try: job.result() except Exception as ex: print(f'ex={ex}') ... # print('job = {}'.format(ub.urepr(job, nl=1))) timer.tic() ... except TimeoutError as ex: print(f'We got a timeout ex={ex}') print('Handled timeout: ' + str(timer.toc())) print('We cant escape this context until the jobs finish') print([j._state for j in jobs.jobs]) kill_fpath.touch() print([j._state for j in jobs.jobs]) print([j._state for j in jobs.jobs]) print('Cleanup') kill_fpath.delete() print('End of function') if __name__ == '__main__': """ CommandLine: python ~/code/ubelt/tests/test_futures.py """ test_as_completed_timeout() # import xdoctest # xdoctest.doctest_module(__file__) ubelt-1.3.7/tests/test_hash.py000066400000000000000000000674421472470106000163510ustar00rootroot00000000000000import ubelt as ub import itertools as it import uuid import pytest from ubelt.util_hash import _convert_hexstr_base, _ALPHABET_16 from ubelt.util_hash import _hashable_sequence from ubelt.util_hash import _rectify_hasher try: import numpy as np except ImportError: np = None def _benchmark(): """ On 64-bit processors sha512 may be faster than sha256 References: .. [SE26336] https://crypto.stackexchange.com/questions/26336/sha512-faster-than-sha256 """ result = ub.AutoOrderedDict() algos = ['sha1', 'sha256', 'sha512'] for n in ub.ProgIter([1, 10, 100, 1000, 10000, 100000], desc='time'): # for key in hashlib.algorithms_guaranteed: for key in algos: hashtype = _rectify_hasher(key) t1 = ub.Timerit(100, bestof=10, label=key, verbose=0) for timer in t1: data = b'8' * n with timer: hasher = hashtype() hasher.update(data) result[key][n] = t1.min() import pandas as pd print(pd.DataFrame(result)) result = ub.AutoOrderedDict() for n in ub.ProgIter([1, 10, 100, 1000, 10000, 100000], desc='time'): # for key in hashlib.algorithms_guaranteed: for key in algos: hashtype = _rectify_hasher(key) t1 = ub.Timerit(100, bestof=10, label=key, verbose=0) for timer in t1: data = b'8' * n hasher = hashtype() hasher.update(data) with timer: hasher.hexdigest() result[key][n] = t1.min() import pandas as pd print(pd.DataFrame(result)) """ CommandLine: python -m test_hash _benchmark Example: >>> # DISABLE_DOCTEST >>> from test_hash import * # NOQA >>> result = _benchmark() >>> print(result) %timeit hashlib.sha256().update(b'8' * 1000) 3.62 µs per loop %timeit hashlib.sha512().update(b'8' * 1000) 2.5 µs per loop %timeit hashlib.sha256().update(b'8' * 1) 318 ns %timeit hashlib.sha512().update(b'8' * 1) 342 ns %timeit hashlib.sha256().update(b'8' * 100000) 306 µs %timeit hashlib.sha512().update(b'8' * 100000) 213 µs """ def test_hash_data_with_types(): if np is None: pytest.skip('requires numpy') counter = [0] failed = [] def check_hash(want, input_): count = counter[0] = counter[0] + 1 got = ub.hash_data(input_, hasher='sha512', base='abc', types=True) got = got[0:32] # assert got.startswith(want), 'want={}, got={}'.format(want, got) print('check_hash({!r}, {!r})'.format(got, input_)) if want is not None and not got.startswith(want): item = (got, input_, count, want) failed.append(item) check_hash('egexcbwgdtmjrzafljtjwqpgfhmfetjs', '1') check_hash('hjvebphzylxgtxncyphclsjglvmstsbq', ['1']) check_hash('hjvebphzylxgtxncyphclsjglvmstsbq', tuple(['1'])) check_hash('ftzqivzayzivmobwymodjnnzzxzrvvjz', b'12') check_hash('jiwjkgkffldfoysfqblsemzkailyridf', [b'1', b'2']) check_hash('foevisahdffoxfasicvyklrmuuwqnfcc', [b'1', b'2', b'3']) check_hash('foevisahdffoxfasicvyklrmuuwqnfcc', ['1', '2', '3']) check_hash('rkcnfxkjwkrfejhbpcpopmyubhbvonkt', ['1', np.array([1, 2, 3], dtype=np.int64), '3']) check_hash('lxssoxdkstvccsyqaybaokehclyctgmn', '123') check_hash('fpvptydigvgjimbzadztgpvjpqrevwcq', zip([1, 2, 3], [4, 5, 6])) print(ub.urepr(failed, nl=1)) assert len(failed) == 0 def test_hash_data_without_types(): if np is None: pytest.skip('requires numpy') counter = [0] failed = [] def check_hash(want, input_): count = counter[0] = counter[0] + 1 got = ub.hash_data(input_, hasher='sha1', base='hex', types=False) # assert got.startswith(want), 'want={}, got={}'.format(want, got) print('check_hash({!r}, {!r})'.format(got, input_)) if want is not None and not got.startswith(want): item = (got, input_, count, want) failed.append(item) check_hash('356a192b7913b04c54574d18c28d46e6395428ab', '1') check_hash('d3bcc889aced30afd8e66ae45b310239d79be3df', ['1']) check_hash('d3bcc889aced30afd8e66ae45b310239d79be3df', ('1',)) check_hash('7b52009b64fd0a2a49e6d8a939753077792b0554', b'12') check_hash('6bcab1cebcb44fc5c69faacc0ed661b19eff9fef', [b'1', b'2']) check_hash('d6d265a904bc7df97bd54a8c2ff4546e211c3cd8', [b'1', b'2', b'3']) check_hash('d6d265a904bc7df97bd54a8c2ff4546e211c3cd8', ['1', '2', '3']) check_hash('eff59c7c787bd223a680c9d625f54756be4fdf5b', ['1', np.array([1, 2, 3], dtype=np.int64), '3']) check_hash('40bd001563085fc35165329ea1ff5c5ecbdbbeef', '123') check_hash('1ba3c4e7f5af2a5f38d624047f422553ead2b5ae', zip([1, 2, 3], [4, 5, 6])) print(ub.urepr(failed, nl=1)) assert len(failed) == 0 def test_available(): assert 'sha1' in ub.util_hash._HASHERS.available() def test_idempotency(): # When we disable types and join sequence items, the hashable # sequence should be idempotent nested_data = ['fds', [3, 2, 3], {3: 2, '3': [3, 2, {3}]}, {1, 2, 3}] hashable1 = b''.join(_hashable_sequence(nested_data)) hashable2 = b''.join(_hashable_sequence(hashable1, types=False)) assert hashable1 == hashable2 def test_special_floats(): # Tests a fix from version 0.10.3 for inf/nan floats # standard_floats = [0.0, 0.1, 0.2] data = [ float('inf'), float('nan'), float('-inf'), -0., 0., -1., 1., 0.3, 0.1 + 0.2, ] expected_encoding = [ b'_[_', b'FLTinf_,_', b'FLTnan_,_', b'FLT-inf_,_', b'FLT\x00/\x01_,_', b'FLT\x00/\x01_,_', b'FLT\xff/\x01_,_', b'FLT\x01/\x01_,_', b'FLT\x13333333/@\x00\x00\x00\x00\x00\x00_,_', b'FLT\x04\xcc\xcc\xcc\xcc\xcc\xcd/\x10\x00\x00\x00\x00\x00\x00_,_', b'_]_'] exepcted_prefix = '3196f80e17de93565f0fc57d98922a44' hasher = 'sha512' encoded = _hashable_sequence(data, types=True) hashed = ub.hash_data(data, hasher=hasher, types=True)[0:32] print('expected_encoding = {!r}'.format(expected_encoding)) print('encoded = {!r}'.format(encoded)) print('hashed = {!r}'.format(hashed)) print('exepcted_prefix = {!r}'.format(exepcted_prefix)) assert encoded == expected_encoding assert hashed == exepcted_prefix _sanity_check(data) def test_hashable_sequence_sanity(): data = [1, 2, [3.2, 5]] # data = [1] _sanity_check(data) def _sanity_check(data): hasher_code = 'sha512' hasher_type = ub.util_hash._rectify_hasher(hasher_code) encoded_seq = _hashable_sequence(data, types=False) encoded_byt = b''.join(encoded_seq) hashed = ub.hash_data(data, hasher=hasher_code, types=False) rehashed = ub.hash_data(encoded_byt, hasher=hasher_code, types=False) hash_obj1 = hasher_type() hash_obj1.update(encoded_byt) hashed1 = hash_obj1.hexdigest() hash_obj2 = hasher_type() for item in encoded_seq: hash_obj2.update(item) hashed2 = hash_obj2.hexdigest() print('encoded_seq = {!r}'.format(encoded_seq)) print('encoded_byt = {!r}'.format(encoded_byt)) print('hashed = {!r}'.format(hashed)) print('rehashed = {!r}'.format(rehashed)) print('hashed1 = {!r}'.format(hashed1)) print('hashed2 = {!r}'.format(hashed2)) # Sanity check ub.hash_data(encoded_seq, hasher=hasher_code, types=False) seq2 = b''.join(_hashable_sequence(encoded_byt, types=False)) assert encoded_byt == seq2 tracer1 = ub.util_hash._HashTracer() ub.hash_data(encoded_byt, types=False, hasher=tracer1) traced_bytes1 = tracer1.hexdigest() print('traced_bytes1 = {!r}'.format(traced_bytes1)) assert traced_bytes1 == encoded_byt tracer2 = ub.util_hash._HashTracer() ub.hash_data(encoded_byt, types=False, hasher=tracer2) traced_bytes2 = tracer1.hexdigest() print('traced_bytes2 = {!r}'.format(traced_bytes2)) assert traced_bytes2 == traced_bytes1 def test_numpy_object_array(): """ _HASHABLE_EXTENSIONS = ub.util_hash._HASHABLE_EXTENSIONS """ if np is None: pytest.skip('requires numpy') # An object array should have the same repr as a list of a tuple of data data = np.array([1, 2, 3], dtype=object) objhash = ub.hash_data(data) assert ub.hash_data([1, 2, 3]) == objhash assert ub.hash_data((1, 2, 3)) == objhash # Ensure this works when the object array is nested data = [np.array([1, 2, 3], dtype=object)] objhash = ub.hash_data(data) assert ub.hash_data([[1, 2, 3]]) == objhash assert ub.hash_data([(1, 2, 3)]) == objhash assert ub.hash_data(([1, 2, 3],)) == objhash def test_ndarray_int_object_convert(): if np is None: pytest.skip('requires numpy') data_list = [[1, 2, 3], [4, 5, 6]] data = np.array(data_list, dtype=np.int64) s1 = b''.join(_hashable_sequence(data.astype(object))) s2 = b''.join(_hashable_sequence(data_list)) s3 = b''.join(_hashable_sequence(data.tolist())) s4 = b''.join(_hashable_sequence(data.astype(np.uint8).astype(object))) assert s1 == s4 assert s2 == s4 assert s3 == s4 def test_ndarray_zeros(): if np is None: pytest.skip('requires numpy') data = np.zeros((3, 3), dtype=np.int64) hashid = ub.hash_data(data) assert hashid != ub.hash_data(data.ravel()), ( 'shape should influence data') assert hashid != ub.hash_data(data.astype(np.float32)) assert hashid != ub.hash_data(data.astype(np.int32)) assert hashid != ub.hash_data(data.astype(np.int8)) def test_nesting(): assert _hashable_sequence([1, 1, 1]) != _hashable_sequence([[1], 1, 1]) assert _hashable_sequence([[1], 1]) != _hashable_sequence([[1, 1]]) assert _hashable_sequence([1, [1]]) != _hashable_sequence([[1, 1]]) assert _hashable_sequence([[[1]]]) != _hashable_sequence([[1]]) def test_numpy_int(): if np is None: pytest.skip('requires numpy') assert _hashable_sequence(np.int8(3)) == _hashable_sequence(3) assert _hashable_sequence(np.int16(3)) == _hashable_sequence(3) assert _hashable_sequence(np.int32(3)) == _hashable_sequence(3) assert _hashable_sequence(np.int64(3)) == _hashable_sequence(3) assert _hashable_sequence(np.uint8(3)) == _hashable_sequence(3) assert _hashable_sequence(np.uint16(3)) == _hashable_sequence(3) assert _hashable_sequence(np.uint32(3)) == _hashable_sequence(3) assert _hashable_sequence(np.uint64(3)) == _hashable_sequence(3) def test_numpy_float(): if np is None: pytest.skip('requires numpy') assert _hashable_sequence(np.float16(3.0)) == _hashable_sequence(3.0) assert _hashable_sequence(np.float32(3.0)) == _hashable_sequence(3.0) assert _hashable_sequence(np.float64(3.0)) == _hashable_sequence(3.0) try: assert _hashable_sequence(np.float128(3.0)) == _hashable_sequence(3.0) except AttributeError: pass def test_numpy_random_state(): if np is None: pytest.skip('requires numpy') data = np.random.RandomState(0) assert ub.hash_data(data, hasher='sha512', types=True, base='abc').startswith('snkngbxghabesvowzalqtvdvjtvslmxve') def test_uuid(): data = uuid.UUID('12345678-1234-1234-1234-123456789abc') sequence = b''.join(_hashable_sequence(data, types=True)) assert sequence == b'UUID\x124Vx\x124\x124\x124\x124Vx\x9a\xbc' assert ub.hash_data(data, types=True, base='abc', hasher='sha512').startswith('nkklelnjzqbi') assert ub.hash_data(data.bytes, types=True) != ub.hash_data(data, types=True), ( 'the fact that it is a UUID should reflect in the hash') assert ub.hash_data(data.bytes, types=False) == ub.hash_data(data, types=False), ( 'the hash should be equal when ignoring types') def test_decimal(): import decimal data = decimal.Decimal('3.1415') sequence = b''.join(_hashable_sequence(data, types=True)) assert sequence == b'DECIMAL_[_INT\x00_,__[_INT\x03_,_INT\x01_,_INT\x04_,_INT\x01_,_INT\x05_,__]_INT\xfc_,__]_' assert ub.hash_data(data, types=True, base='abc', hasher='sha512').startswith('oquwtvtrsytm') assert ub.hash_data(data.as_tuple(), types=True) != ub.hash_data(data, types=True), ( 'the fact that it is a Decimal should reflect in the hash') assert ub.hash_data(data.as_tuple(), types=True) == ub.hash_data(data, types=False), ( 'it is a quirk of our hashable extensions that an a typed decimal ' 'tuple will be the same as an untyped decimal. ' 'It is ok to break this test if we refactor to fix issues in ' 'hashable extensions' ) sequence1 = b''.join(_hashable_sequence(data, types=True)) sequence2 = b''.join(_hashable_sequence(data, types=False)) sequence3 = b''.join(_hashable_sequence(data.as_tuple(), types=True)) sequence4 = b''.join(_hashable_sequence(data.as_tuple(), types=False)) assert sequence1 != sequence2, 'quirky test' assert sequence2 == sequence3, 'quirky test' assert sequence4 != sequence3, 'quirky test' def test_datetime(): import datetime as datetime_mod data = datetime_mod.datetime(2101, 1, 1) sequence = b''.join(_hashable_sequence(data, types=True)) assert sequence == b'DATETIME_[_INT\x085_,_INT\x01_,_INT\x01_,_INT\x00_,_INT\x00_,_INT\x00_,_INT\x05_,_INT\x01_,_INT\xff_,__]_' assert ub.hash_data(data, types=True, base='abc', hasher='sha512').startswith('fwjyfdtgcdasv') assert ub.hash_data(data.timetuple(), types=True) != ub.hash_data(data, types=True), ( 'the fact that it is a Decimal should reflect in the hash') assert ub.hash_data(data.timetuple(), types=True) == ub.hash_data(data, types=False), ( 'it is a quirk of our hashable extensions that an a typed datetime ' 'tuple will be the same as an untyped decimal. ' 'It is ok to break this test if we refactor to fix issues in ' 'hashable extensions' ) def test_date(): import datetime as datetime_mod data = datetime_mod.date(2101, 1, 1) sequence = b''.join(_hashable_sequence(data, types=True)) assert sequence == b'DATE_[_INT\x085_,_INT\x01_,_INT\x01_,_INT\x00_,_INT\x00_,_INT\x00_,_INT\x05_,_INT\x01_,_INT\xff_,__]_' assert ub.hash_data(data, types=True, base='abc', hasher='sha512').startswith('dlahlcoqypecc') assert ub.hash_data(data.timetuple(), types=True) != ub.hash_data(data, types=True), ( 'the fact that it is a Decimal should reflect in the hash') assert ub.hash_data(data.timetuple(), types=True) == ub.hash_data(data, types=False), ( 'it is a quirk of our hashable extensions that an a typed date' 'tuple will be the same as an untyped decimal. ' 'It is ok to break this test if we refactor to fix issues in ' 'hashable extensions' ) def test_hash_data_custom_base(): data = 1 # A larger base means the string can be shorter hashid_26 = ub.hash_data(data, base='abc', hasher='sha512', types=True) assert len(hashid_26) == 109 # assert hashid_26.startswith('lejivmqndqzp') assert hashid_26.startswith('rfsmlqsjsuzllgp') hashid_16 = ub.hash_data(data, base='hex', hasher='sha512', types=True) # assert hashid_16.startswith('8bf2a1f4dbea6e59e5c2ec4077498c44') assert hashid_16.startswith('d7c9cea9373eb7ba20444ec65e0186b') assert len(hashid_16) == 128 # Binary should have len 512 because the default hasher is sha512 hashid_2 = ub.hash_data(data, base=['0', '1'], hasher='sha512', types=True) assert len(hashid_2) == 512 assert hashid_2.startswith('110101111100100111001110101010010') def test_hash_file(): fpath = ub.Path.appdir('ubelt/tests').ensuredir() / 'tmp.txt' fpath.write_text('foobar') hashid1_a = ub.hash_file(fpath, hasher='sha512', stride=1, blocksize=1) hashid2_a = ub.hash_file(fpath, hasher='sha512', stride=2, blocksize=1) hashid1_b = ub.hash_file(fpath, hasher='sha512', stride=1, blocksize=10) hashid2_b = ub.hash_file(fpath, hasher='sha512', stride=2, blocksize=10) assert hashid1_a == hashid1_b assert hashid2_a != hashid2_b, 'blocksize matters when stride is > 1' assert hashid1_a != hashid2_a hashid3_c = ub.hash_file(fpath, hasher='sha512', stride=2, blocksize=10, maxbytes=1000) assert hashid3_c == hashid2_b def test_empty_hash_file(): fpath = ub.Path.appdir('ubelt/tests').ensuredir() / 'tmp.txt' fpath.write_bytes(b'') a = ub.hash_file(fpath, hasher='sha512', stride=1, blocksize=1) b = ub.hash_file(fpath, hasher='sha512', stride=4, blocksize=4) c = ub.hash_file(fpath, hasher='sha512', stride=4, blocksize=4, maxbytes=1) d = ub.hash_file(fpath, hasher='sha512', stride=1, blocksize=4, maxbytes=0) assert a == b == c == d def test_convert_base_hex(): # Test that hex values are unchanged for i in it.chain(range(-10, 10), range(-1000, 1000, 7)): text = hex(i).replace('0x', '') assert _convert_hexstr_base(text, _ALPHABET_16) == text, ( 'should not change hex') def test_convert_base_decimal(): base_10 = list(map(str, range(10))) # Test that decimal values agree with python conversion for i in it.chain(range(-10, 10), range(-1000, 1000, 7)): text_16 = hex(i).replace('0x', '') text_10 = _convert_hexstr_base(text_16, base_10) assert int(text_16, 16) == int(text_10, 10) def test_convert_base_simple(): # Quick one-of tests assert _convert_hexstr_base('aaa0111', _ALPHABET_16) == 'aaa0111' assert _convert_hexstr_base('aaa0111', list('01')) == '1010101010100000000100010001' assert _convert_hexstr_base('aaa0111', list('012')) == '110110122202020220' assert _convert_hexstr_base('aaa0111', list('0123')) == '22222200010101' base_10 = list(map(str, range(10))) assert _convert_hexstr_base('aaa0111', base_10) == '178913553' def test_no_prefix(): full = b''.join(_hashable_sequence(1, types=True)) part = b''.join(_hashable_sequence(1, types=False)) # assert full == b'INT\x00\x00\x00\x01' # assert part == b'\x00\x00\x00\x01' assert full == b'INT\x01' assert part == b'\x01' def _test_int_bytes(): assert ub.util_hash._int_to_bytes(0) == b'\x00' assert ub.util_hash._int_to_bytes(1) == b'\x01' assert ub.util_hash._int_to_bytes(2) == b'\x02' assert ub.util_hash._int_to_bytes(-1) == b'\xff' assert ub.util_hash._int_to_bytes(-2) == b'\xfe' assert ub.util_hash._int_to_bytes(600) == b'\x02X' assert ub.util_hash._int_to_bytes(-600) == b'\xfd\xa8' assert ub.util_hash._int_to_bytes(2 ** 256) == b'\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' assert ub.util_hash._int_to_bytes(-2 ** 256) == b'\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' def test_xxhash(): if 'xxh64' in ub.util_hash._HASHERS.available(): assert ub.hash_data('foo', hasher='xxh64') == '33bf00a859c4ba3f' else: pytest.skip('xxhash is not available') def test_blake3(): if 'blake3' in ub.util_hash._HASHERS.available(): assert ub.hash_data('foo', hasher='b3') == '04e0bb39f30b1a3feb89f536c93be15055482df748674b00d26e5a75777702e9' else: pytest.skip('blake3 is not available') def test_base32(): hashstr = ub.hash_data('abc', hasher='sha1', base=32, types=False) print(f'hashstr={hashstr}') assert hashstr == 'VGMT4NSHA2AWVOR6EVYXQUGCNSONBWE5' def test_compatible_hash_bases(): """ Ubelt ~1.2.3 has a ~bug~ incompatibility with non-hex hash bases. Depending on leftover amount of data in the byte stream, our hex reencoding may be incorrect. It is still correct when the input has correct lengths, but in general it can produce issues if you were expecting hashes to conform to RFC standards. FIXME: THIS ISSUE IS NOT RESOLVED YET. NEED A WAY OF GETTING COMPATIBLE BEHAVIOR WITH STANDARD ENCODINGS. THIS ULTIMATELY REQUIRES PROCESSING DATA WITH PADDING AND VIA BYTE FORM, NOT INTEGER FORM. References: .. [SO43920799] https://stackoverflow.com/questions/43920799/convert-byte-to-base64-and-ascii-in-python .. [MultiBase] https://github.com/multiformats/multibase .. [SO6916805] https://stackoverflow.com/questions/6916805/why-does-a-base64-encoded-string-have-an-sign-at-the-end .. [SementeBaseConv] https://github.com/semente/python-baseconv """ import pytest pytest.skip('FIXME. THE HASH PADDING ISSUE IS NOT RESOLVED YET.') if not ub.LINUX: pytest.skip('only runs on linux') required_programs = [ 'sha256sum', 'cut', 'xxd', 'base32', ] HAS_PROGS = all(ub.find_exe(p) for p in required_programs) if not HAS_PROGS: pytest.skip('only runs if required programs exist') hasher = 'sha1' hasher = 'sha256' # hasher = 'sha512' text = 'foobar' trace = ub.hash_data(text, hasher=ub.util_hash._HashTracer(), types=False) print(f'text={text}') print(f'trace={trace}') print(f'hasher={hasher}') hasher_obj = ub.util_hash._rectify_hasher(hasher)() hasher_obj.update(trace) raw_bytes = hasher_obj.digest() print(f'raw_bytes={raw_bytes}') import base64 realb32_encode = base64.b32encode(raw_bytes) # base64.b32decode(realb32_encode) print(f'realb32_encode=\n{realb32_encode}') _ = ub.cmd(fr'printf "{text}" | {hasher}sum | cut -f1 -d\ | xxd -r -p', shell=True, system=True) # _ = ub.cmd(fr'printf "{text}" | {hasher}sum | cut -f1 -d\ | xxd -r', shell=True, verbose=3) std_result = ub.cmd(fr'printf "{text}" | {hasher}sum', shell=True, verbose=3)['out'].split(' ')[0] our_result = ub.hash_data(text, hasher=hasher, types=False) print(f'std_result={std_result}') print(f'our_result={our_result}') assert our_result == std_result std_result = ub.cmd(fr'printf "{text}" | {hasher}sum | cut -f1 -d\ | xxd -r -p | base32', shell=True, verbose=3)['out'].strip().replace('\n', '') our_result = ub.hash_data(text, hasher=hasher, types=False, base=32) std_result_16 = ub.cmd(fr'printf "{text}" | {hasher}sum | cut -f1 -d\ ', shell=True, verbose=3)['out'].strip().replace('\n', '') our_result_16 = ub.hash_data(text, hasher=hasher, types=False, base=16) print(f'std_result_16={std_result_16}') print(f'our_result_16={our_result_16}') raw_result = base64.b16decode(our_result_16.upper()) fix_result = base64.b32encode(raw_result).decode() print(f'fix_result={fix_result}') print(f'std_result={std_result}') print(f'our_result={our_result}') assert our_result == std_result if 1: hexstr = our_result_16 base = ub.util_hash._ALPHABET_32 baselen = len(base) # Experimental solution for _convert_hexstr_base # The alternate code has a bug, but it is concistent so we can't change # it. Work towards correct logic is here, which we will eventually # introduce as an opt-in change. import base64 raw_bytes = base64.b16decode(hexstr.upper()) # leftover = len(raw_bytes) % 5 # # Pad the last quantum with zero bits if necessary # if leftover: # raw_bytes = raw_bytes + b'\0' * (5 - leftover) # Don't use += ! x = int.from_bytes(raw_bytes, 'big', signed=False) r = 0 digits = [] while x: x, r = divmod(x, baselen) digits.append(base[r]) print(r) digits.reverse() newbase_str = ''.join(digits) print(newbase_str) import baseconv base32_digits = ''.join(ub.util_hash._ALPHABET_32) base16_digits = ''.join(ub.util_hash._ALPHABET_16) class MyHexConvertor(baseconv.BaseConverter): decimal_digits = base16_digits co = MyHexConvertor(base32_digits) print(f'hexstr={hexstr}') got = co.encode(hexstr) print(f'got={got}') co = MyHexConvertor(base16_digits) co.decimal_digits = base32_digits redid = co.encode(got) print(f'redid={redid}') r""" echo "foobar" > test.txt ipfs add --only-hash test.txt --cid-version=1 # https://github.com/multiformats/py-multibase pip install py-multibase from multibase import encode, decode hasher_obj = ub.util_hash._rectify_hasher('sha256')() hasher_obj.update(b'foobar') raw_bytes = hasher_obj.digest() raw_bytes = b'\xc3\xab\x8f\xf17 \xe8\xad\x90G\xdd9Fk<\x89t\xe5\x92\xc2\xfa8=J9`qL\xae\xf0\xc4\xf2' encode('base32', raw_bytes).upper() encode('base32upper', raw_bytes).upper() """ if base == list(base64._b32alphabet.decode()): # NOTE: This code has an incompatibility with standard base encodings # because it does not pad the bytes. I.e. for base 64 3 bytes are # converted into 4 characters, so we need a input string divisible by # 3. For base32 5 bytes are converted into 2 characters. # in general we have to find lowest N and M such that # # N = number of characters in the encoding # M = number of bytes in the input # # Usually N > M # # ** N == (2 ** 8) ** M # or # ** N == (2 ** (8 * M)) # # e.g. For base=64 # 64 ** 4 == (2 ** 8) ** 3 # # e.g. For base=32 # 32 ** 8 == (2 ** 8) ** 5 # # In general need find integer solutions for: # M = log(B**N)/(8*log(2)) # or # N = log(256 ** M)/log(B) if 0: import sympy N, M, B = sympy.symbols('N, M, B') eqn = sympy.Eq((B ** N), ((2 ** 8) ** M)) solutions = sympy.solve(eqn, N) print('solutions = {}'.format(ub.urepr(solutions, nl=1))) b = 64 for soln in solutions: for m in range(1, 10): ans = soln.subs({M: m, B: b}).evalf() real, imag = ans.as_real_imag() if abs(imag) < 1e-8: fracs = real - int(real) if fracs < 1e-8: print(f'n={m}') print(soln) print(ans) raise Exception # There is no integer solution for base 26 base_size = 26 import math for i in range(0, 100): num_input_bytes = i num_output_symbols = math.log(256 ** num_input_bytes, base_size) print(f'{num_input_bytes} > {num_output_symbols}') # check # alphabet = base64._b32alphabet # s = raw_bytes # desired = base64.b32encode(raw_bytes) # print(f'desired={desired}') # print(f'newbase_str={newbase_str}') # leftover = len(s) % 5 # # Pad the last quantum with zero bits if necessary # if leftover: # s = s + b'\0' * (5 - leftover) # Don't use += ! # encoded = bytearray() # from_bytes = int.from_bytes # b32tab2 = base64._b32tab2[alphabet] # for i in range(0, len(s), 5): # if 1: # i = 0 # c = from_bytes(s[i: i + 5], 'big') # first = (b32tab2[c >> 30] + # bits 1 - 10 # b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20 # b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30 # b32tab2[c & 0x3ff] # bits 31 - 40 # ) if __name__ == '__main__': r""" CommandLine: python ~/code/ubelt/ubelt/tests/test_hash.py pytest ~/code/ubelt/ubelt/tests/test_hash.py """ import xdoctest xdoctest.doctest_module(__file__) ubelt-1.3.7/tests/test_import.py000066400000000000000000000503241472470106000167270ustar00rootroot00000000000000import os import sys import pytest import ubelt as ub import itertools as it from os.path import join from ubelt.util_import import PythonPathContext def test_import_modpath_basic(): assert 'testmod' not in sys.modules with pytest.warns(DeprecationWarning): temp = ub.TempDir() with temp: modpath = ub.Path(temp.dpath) / 'testmod.py' text = ub.codeblock( ''' a = 'value' ''') modpath.write_text(text) assert temp.dpath not in sys.path module = ub.import_module_from_path(modpath) assert temp.dpath not in sys.path, 'pythonpath should remain clean' assert module.a == 'value' assert module.__file__ == os.fspath(modpath) assert module.__name__ == 'testmod' assert 'testmod' in sys.modules def test_import_modpath_package(): assert '_tmproot373.sub1.sub2.testmod' not in sys.modules with pytest.warns(DeprecationWarning): temp = ub.TempDir().start() # with ub.TempDir() as temp: if True: dpath = ub.Path(temp.dpath) # Create a dummy package hierarchy root = (dpath / '_tmproot373').ensuredir() sub1 = (root / 'sub1').ensuredir() sub2 = (sub1 / 'sub2').ensuredir() (root / '__init__.py').touch() (sub1 / '__init__.py').touch() (sub2 / '__init__.py').touch() modpath = sub2 / 'testmod.py' text = ub.codeblock( ''' a = 'value' ''') modpath.write_text(text) assert temp.dpath not in sys.path module = ub.import_module_from_path(modpath) assert temp.dpath not in sys.path, 'pythonpath should remain clean' assert module.a == 'value' assert module.__file__ == os.fspath(modpath) assert module.__name__ == '_tmproot373.sub1.sub2.testmod' assert '_tmproot373.sub1.sub2.testmod' in sys.modules assert '_tmproot373.sub1.sub2' in sys.modules assert '_tmproot373' in sys.modules def test_import_modname_builtin(): module = ub.import_module_from_name('ast') import ast assert module is ast def _static_modname_to_modpath(modname, **kwargs): # Calls ub.modname_to_modpath with checks had = modname in sys.modules try: modpath = ub.modname_to_modpath(modname, **kwargs) except ValueError: modpath = None if not had: assert modname not in sys.modules, ( '{} should not be imported'.format(modname)) return modpath def test_modname_to_modpath_single(): with pytest.warns(DeprecationWarning): temp = ub.TempDir() with temp: dpath = temp.dpath # Single module single = ub.touch(join(dpath, '_tmpsingle.py')) single_main = ub.touch(join(dpath, '__main__.py')) with PythonPathContext(dpath): assert single == _static_modname_to_modpath('_tmpsingle') assert single == _static_modname_to_modpath('_tmpsingle', hide_init=True, hide_main=False) assert single == _static_modname_to_modpath('_tmpsingle', hide_init=False, hide_main=False) assert single == _static_modname_to_modpath('_tmpsingle', hide_init=False, hide_main=True) # Weird module named main not in a package assert _static_modname_to_modpath('__main__') == single_main assert _static_modname_to_modpath('__main__', hide_init=True, hide_main=False) == single_main assert _static_modname_to_modpath('__main__', hide_init=False, hide_main=False) == single_main assert _static_modname_to_modpath('__main__', hide_init=False, hide_main=True) == single_main def test_modname_to_modpath_package(): """ CommandLine: pytest testing/test_static.py::test_modname_to_modpath_package Ignore: import sys sys.path.append('/home/joncrall/code/xdoctest/testing') from test_static import * temp = ub.TempDir() temp.__enter__() sys.path.append(temp.dpath) temp.__exit__(None, None, None) """ with pytest.warns(DeprecationWarning): temp = ub.TempDir() with temp: dpath = temp.dpath # Create a dummy package hierarchy root = ub.ensuredir((dpath, '_tmproot927')) sub1 = ub.ensuredir((root, 'sub1')) sub2 = ub.ensuredir((sub1, 'sub2')) root_init = ub.touch(join(root, '__init__.py')) sub1_init = ub.touch(join(sub1, '__init__.py')) sub2_init = ub.touch(join(sub2, '__init__.py')) mod0 = ub.touch(join(root, 'mod0.py')) mod1 = ub.touch(join(sub1, 'mod1.py')) mod2 = ub.touch(join(sub2, 'mod2.py')) root_main = ub.touch(join(root, '__main__.py')) sub2_main = ub.touch(join(sub2, '__main__.py')) bad1 = ub.ensuredir((root, 'bad1')) bad2 = ub.ensuredir((sub1, 'bad2')) ub.touch(join(bad1, 'b0.py')) ub.touch(join(bad2, 'b0.py')) with PythonPathContext(dpath): # Bad module directories should return None assert _static_modname_to_modpath('_tmproot927.bad1') is None assert _static_modname_to_modpath('_tmproot927.sub1.bad1') is None assert _static_modname_to_modpath('_tmproot927.bad1.b0') is None assert _static_modname_to_modpath('_tmproot927.sub1.bad1.b1') is None assert _static_modname_to_modpath('_tmproot927.bad1') is None # package modules are accessible by the full path assert root == _static_modname_to_modpath('_tmproot927') assert sub1 == _static_modname_to_modpath('_tmproot927.sub1') assert sub2 == _static_modname_to_modpath('_tmproot927.sub1.sub2') assert mod0 == _static_modname_to_modpath('_tmproot927.mod0') assert mod1 == _static_modname_to_modpath('_tmproot927.sub1.mod1') assert mod2 == _static_modname_to_modpath('_tmproot927.sub1.sub2.mod2') # specifying a suffix will not work assert _static_modname_to_modpath('sub1') is None assert _static_modname_to_modpath('sub1.sub2') is None assert _static_modname_to_modpath('mod0') is None assert _static_modname_to_modpath('sub1.mod1') is None assert _static_modname_to_modpath('sub1.sub2.mod2') is None # Specify init if available assert root_init == _static_modname_to_modpath('_tmproot927', hide_init=False) if 1: # Test init assert _static_modname_to_modpath('_tmproot927', hide_init=False) == root_init assert _static_modname_to_modpath('_tmproot927.__init__', hide_init=False) == root_init assert _static_modname_to_modpath('_tmproot927.__main__', hide_init=False, hide_main=True) == root # Test main assert _static_modname_to_modpath('_tmproot927', hide_main=False) == root assert _static_modname_to_modpath('_tmproot927.__init__', hide_main=False) == root assert _static_modname_to_modpath('_tmproot927.__main__', hide_main=False) == root_main # Test init and main both false assert _static_modname_to_modpath('_tmproot927.__init__') == root assert _static_modname_to_modpath('_tmproot927.__main__', hide_main=True) == root # Test init and main both true assert _static_modname_to_modpath('_tmproot927', hide_init=False, hide_main=False) == root_init assert _static_modname_to_modpath('_tmproot927.__init__', hide_init=False, hide_main=False) == root_init assert _static_modname_to_modpath('_tmproot927.__main__', hide_init=False, hide_main=False) == root_main if 2: # Test in a nested directory # Test init assert _static_modname_to_modpath('_tmproot927.sub1.sub2', hide_init=False) == sub2_init assert _static_modname_to_modpath('_tmproot927.sub1.sub2.__init__', hide_init=False) == sub2_init assert _static_modname_to_modpath('_tmproot927.sub1.sub2.__main__', hide_init=False, hide_main=True) == sub2 # Test main assert _static_modname_to_modpath('_tmproot927.sub1.sub2', hide_main=False) == sub2 assert _static_modname_to_modpath('_tmproot927.sub1.sub2.__main__', hide_main=False) == sub2_main assert _static_modname_to_modpath('_tmproot927.sub1.sub2.__init__', hide_main=False) == sub2 # Test init and main both false assert _static_modname_to_modpath('_tmproot927.sub1.sub2.__init__', hide_main=True) == sub2 assert _static_modname_to_modpath('_tmproot927.sub1.sub2.__main__', hide_main=True) == sub2 # Test init and main both true assert _static_modname_to_modpath('_tmproot927.sub1.sub2', hide_init=False, hide_main=False) == sub2_init assert _static_modname_to_modpath('_tmproot927.sub1.sub2.__init__', hide_init=False, hide_main=False) == sub2_init assert _static_modname_to_modpath('_tmproot927.sub1.sub2.__main__', hide_init=False, hide_main=False) == sub2_main if 3: # Test in a nested directory with __init__ but no __main__ # Test init assert _static_modname_to_modpath('_tmproot927.sub1', hide_init=False) == sub1_init assert _static_modname_to_modpath('_tmproot927.sub1.__init__', hide_init=False) == sub1_init assert _static_modname_to_modpath('_tmproot927.sub1.__main__', hide_init=False) is None # Test main assert _static_modname_to_modpath('_tmproot927.sub1', hide_main=False) == sub1 assert _static_modname_to_modpath('_tmproot927.sub1.__main__', hide_main=False) is None assert _static_modname_to_modpath('_tmproot927.sub1.__init__', hide_main=False) == sub1 # Test init and main both false assert _static_modname_to_modpath('_tmproot927.sub1.__init__') == sub1 assert _static_modname_to_modpath('_tmproot927.sub1.__main__') is None # Test init and main both true assert _static_modname_to_modpath('_tmproot927.sub1', hide_init=False, hide_main=False) == sub1_init assert _static_modname_to_modpath('_tmproot927.sub1.__init__', hide_init=False, hide_main=False) == sub1_init assert _static_modname_to_modpath('_tmproot927.sub1.__main__', hide_init=False, hide_main=False) is None assert '_tmproot927' not in sys.modules assert '_tmproot927.mod0' not in sys.modules assert '_tmproot927.sub1' not in sys.modules assert '_tmproot927.sub1.mod1' not in sys.modules assert '_tmproot927.sub1.sub2' not in sys.modules assert '_tmproot927.sub1.mod2.mod2' not in sys.modules def test_modname_to_modpath_namespace(): """ Ignore: import sys sys.path.append('/home/joncrall/code/xdoctest/testing') from test_static import * temp = ub.TempDir() temp.__enter__() sys.path.append(temp.dpath) temp.__exit__(None, None, None) %timeit _syspath_modname_to_modpath('xdoctest.static_analysis') %timeit _pkgutil_modname_to_modpath('xdoctest.static_analysis') """ with pytest.warns(DeprecationWarning): temp = ub.TempDir() with temp: dpath = temp.dpath # Some "bad" non-module directories tmpbad = ub.ensuredir((dpath, '_tmpbad')) # Make a submodule of a bad directory, look good. sub_bad = ub.ensuredir((tmpbad, 'sub_bad')) ub.touch(join(tmpbad, '_inbad.py')) subbad = ub.touch(join(sub_bad, '__init__.py')) # NOQA b0 = ub.touch(join(sub_bad, 'b0.py')) # NOQA with PythonPathContext(dpath): assert _static_modname_to_modpath('_tmpbad') is None # Tricky case, these modules look good outside of _tmpbad WOW, you # can actually import this and it works, but pkgloader still # returns None so we should too. assert _static_modname_to_modpath('_tmpbad.sub_bad') is None assert _static_modname_to_modpath('_tmpbad.sub_bad.b0') is None # We should be able to statically find all of the good module # directories. # this should all be static import sys assert '_tmpsingle' not in sys.modules assert '_tmpbad' not in sys.modules def test_package_submodules(): """ CommandLine: pytest testing/test_static.py::test_package_submodules -s xdoctest -m ~/code/ubelt/tests/test_import.py test_package_submodules pass Ignore: import sys sys.path.append('/home/joncrall/code/xdoctest/testing') from test_static import * temp = ub.TempDir() temp.__enter__() sys.path.append(temp.dpath) temp.__exit__(None, None, None) """ from xdoctest import static_analysis as static with pytest.warns(DeprecationWarning): temp = ub.TempDir() with temp: dpath = temp.dpath # Create a dummy package hierarchy root = ub.ensuredir((dpath, '_tmproot927')) sub1 = ub.ensuredir((root, 'sub1')) sub2 = ub.ensuredir((sub1, 'sub2')) root_init = ub.touch(join(root, '__init__.py')) sub1_init = ub.touch(join(sub1, '__init__.py')) sub2_init = ub.touch(join(sub2, '__init__.py')) mod0 = ub.touch(join(root, 'mod0.py')) mod1 = ub.touch(join(sub1, 'mod1.py')) mod2 = ub.touch(join(sub2, 'mod2.py')) root_main = ub.touch(join(root, '__main__.py')) sub2_main = ub.touch(join(sub2, '__main__.py')) bad1 = ub.ensuredir((root, 'bad1')) bad2 = ub.ensuredir((sub1, 'bad2')) b0 = ub.touch(join(bad1, 'b0.py')) b1 = ub.touch(join(bad2, 'b1.py')) with PythonPathContext(dpath): subpaths = sorted(static.package_modpaths(root, with_pkg=True)) # should only return files not directories assert root_init in subpaths assert sub1_init in subpaths assert sub2_init in subpaths assert root not in subpaths assert sub1 not in subpaths assert sub2 not in subpaths assert root_main in subpaths assert sub2_main in subpaths assert mod0 in subpaths assert mod1 in subpaths assert mod2 in subpaths assert bad1 not in subpaths assert b0 not in subpaths assert b1 not in subpaths assert '_tmproot927' not in sys.modules assert '_tmproot927.mod0' not in sys.modules assert '_tmproot927.sub1' not in sys.modules assert '_tmproot927.sub1.mod1' not in sys.modules assert '_tmproot927.sub1.sub2' not in sys.modules assert '_tmproot927.sub1.mod2.mod2' not in sys.modules def test_modpath_to_modname(): """ CommandLine: pytest testing/test_static.py::test_modpath_to_modname -s python testing/test_static.py test_modpath_to_modname """ with pytest.warns(DeprecationWarning): temp = ub.TempDir() with temp: dpath = temp.dpath # Create a dummy package hierarchy root = ub.ensuredir((dpath, '_tmproot927')) sub1 = ub.ensuredir((root, 'sub1')) sub2 = ub.ensuredir((sub1, 'sub2')) root_init = ub.touch(join(root, '__init__.py')) sub1_init = ub.touch(join(sub1, '__init__.py')) sub2_init = ub.touch(join(sub2, '__init__.py')) mod0 = ub.touch(join(root, 'mod0.py')) mod1 = ub.touch(join(sub1, 'mod1.py')) mod2 = ub.touch(join(sub2, 'mod2.py')) root_main = ub.touch(join(root, '__main__.py')) sub2_main = ub.touch(join(sub2, '__main__.py')) bad1 = ub.ensuredir((root, 'bad1')) bad2 = ub.ensuredir((sub1, 'bad2')) b0 = ub.touch(join(bad1, 'b0.py')) b1 = ub.touch(join(bad2, 'b1.py')) import os ub.modpath_to_modname(root, relativeto=os.path.dirname(dpath)) # TODO: assert correct output with PythonPathContext(dpath): assert ub.modpath_to_modname(root) == '_tmproot927' assert ub.modpath_to_modname(sub1) == '_tmproot927.sub1' assert ub.modpath_to_modname(sub2) == '_tmproot927.sub1.sub2' assert ub.modpath_to_modname(mod0) == '_tmproot927.mod0' assert ub.modpath_to_modname(mod1) == '_tmproot927.sub1.mod1' assert ub.modpath_to_modname(mod2) == '_tmproot927.sub1.sub2.mod2' assert ub.modpath_to_modname(root_init) == '_tmproot927' assert ub.modpath_to_modname(sub1_init) == '_tmproot927.sub1' assert ub.modpath_to_modname(sub2_init) == '_tmproot927.sub1.sub2' assert ub.modpath_to_modname(root_init, hide_init=False) == '_tmproot927.__init__' assert ub.modpath_to_modname(sub1_init, hide_init=False) == '_tmproot927.sub1.__init__' assert ub.modpath_to_modname(sub2_init, hide_init=False) == '_tmproot927.sub1.sub2.__init__' assert ub.modpath_to_modname(root, hide_main=True, hide_init=False) == '_tmproot927.__init__' assert ub.modpath_to_modname(sub1, hide_main=True, hide_init=False) == '_tmproot927.sub1.__init__' assert ub.modpath_to_modname(sub2, hide_main=True, hide_init=False) == '_tmproot927.sub1.sub2.__init__' assert ub.modpath_to_modname(root, hide_main=False, hide_init=False) == '_tmproot927.__init__' assert ub.modpath_to_modname(sub1, hide_main=False, hide_init=False) == '_tmproot927.sub1.__init__' assert ub.modpath_to_modname(sub2, hide_main=False, hide_init=False) == '_tmproot927.sub1.sub2.__init__' assert ub.modpath_to_modname(root, hide_main=False, hide_init=True) == '_tmproot927' assert ub.modpath_to_modname(sub1, hide_main=False, hide_init=True) == '_tmproot927.sub1' assert ub.modpath_to_modname(sub2, hide_main=False, hide_init=True) == '_tmproot927.sub1.sub2' assert ub.modpath_to_modname(root_main, hide_main=False, hide_init=True) == '_tmproot927.__main__' assert ub.modpath_to_modname(sub2_main, hide_main=False, hide_init=True) == '_tmproot927.sub1.sub2.__main__' assert ub.modpath_to_modname(root_main, hide_main=False, hide_init=True) == '_tmproot927.__main__' assert ub.modpath_to_modname(sub2_main, hide_main=False, hide_init=True) == '_tmproot927.sub1.sub2.__main__' assert ub.modpath_to_modname(root_main, hide_main=True, hide_init=True) == '_tmproot927' assert ub.modpath_to_modname(sub2_main, hide_main=True, hide_init=True) == '_tmproot927.sub1.sub2' assert ub.modpath_to_modname(root_main, hide_main=True, hide_init=False) == '_tmproot927' assert ub.modpath_to_modname(sub2_main, hide_main=True, hide_init=False) == '_tmproot927.sub1.sub2' # Non-existent / invalid modules should always be None for a, b in it.product([True, False], [True, False]): with pytest.raises(ValueError): ub.modpath_to_modname(join(sub1, '__main__.py'), hide_main=a, hide_init=b) assert ub.modpath_to_modname(b0, hide_main=a, hide_init=b) == 'b0' assert ub.modpath_to_modname(b1, hide_main=a, hide_init=b) == 'b1' with pytest.raises(ValueError): ub.modpath_to_modname(bad1, hide_main=a, hide_init=b) with pytest.raises(ValueError): ub.modpath_to_modname(bad2, hide_main=a, hide_init=b) assert '_tmproot927' not in sys.modules assert '_tmproot927.mod0' not in sys.modules assert '_tmproot927.sub1' not in sys.modules assert '_tmproot927.sub1.mod1' not in sys.modules assert '_tmproot927.sub1.sub2' not in sys.modules assert '_tmproot927.sub1.mod2.mod2' not in sys.modules def test_splitmodpath(): with pytest.raises(ValueError): ub.split_modpath('does/not/exists/module.py') ub.split_modpath('does/not/exists/module.py', check=False) if __name__ == '__main__': r""" CommandLine: pytest ubelt/tests/test_import.py """ import xdoctest xdoctest.doctest_module(__file__) ubelt-1.3.7/tests/test_indexable.py000066400000000000000000000211451472470106000173470ustar00rootroot00000000000000import ubelt as ub def _indexable_walker_map_v1(self, func): # This is a one pattern for "mapping" a function over nested data and # preserving the structure. mapped = ub.AutoDict() mapped_walker = ub.IndexableWalker(mapped) for path, value in self: if isinstance(value, self.list_cls): mapped_walker[path] = [ub.AutoDict()] * len(value) elif not isinstance(value, self.indexable_cls): mapped_walker[path] = func(value) return mapped def _indexable_walker_map_v2(self, func): # TODO: might be reasonable to add a map attribute to the indexable walker. # This is a another pattern for "mapping" a function over nested data and # preserving the structure. if isinstance(self.data, self.dict_cls): mapped = {} elif isinstance(self.data, self.list_cls): mapped = [] else: raise NotImplementedError mapped_walker = ub.IndexableWalker(mapped) for path, value in self: if isinstance(value, self.dict_cls): mapped_walker[path] = {} elif isinstance(value, self.list_cls): mapped_walker[path] = [None] * len(value) else: mapped_walker[path] = func(value) return mapped def _map_vals3(self, func): """ Defines the underlying generator used by IndexableWalker Yields: Tuple[List, object] | None: path (List) - a "path" through the nested data structure value (object) - the value indexed by that "path". Can also yield None in the case that `send` is called on the generator. Example: data = { '1': [2, 3, {4: 5}], '3': { 'foo': 'bar', 'baz': [1, 2, ['biz']], } } self = ub.IndexableWalker(data) import sys, ubelt sys.path.append(ubelt.expandpath('~/code/ubelt/tests')) from test_indexable import * # NOQA from test_indexable import _indexable_walker_map_v1, _indexable_walker_map_v2, _map_vals3 _map_vals3(self, type) _map_vals3(self, str) """ data = self.data if isinstance(data, self.dict_cls): mapped = {} elif isinstance(data, self.list_cls): mapped = [] else: raise NotImplementedError stack = [(data, mapped)] while stack: _data, _parent = stack.pop() # Create an items iterable of depending on the indexable data type if isinstance(_data, self.list_cls): items = enumerate(_data) elif isinstance(_data, self.dict_cls): items = _data.items() else: raise TypeError(type(_data)) for key, value in items: if isinstance(value, self.indexable_cls): if isinstance(value, self.dict_cls): new = _parent[key] = {} elif isinstance(value, self.list_cls): new = _parent[key] = [None] * len(value) else: raise TypeError(type(value)) stack.append((value, new)) else: _parent[key] = func(value) return mapped # def _map_vals4(self, func): # for key, value, _data in _walk2(self.data): # pass # def _walk2(self, data=None, mapped=None): # """ # Defines the underlying generator used by IndexableWalker # Yields: # Tuple[List, object] | None: # path (List) - a "path" through the nested data structure # value (object) - the value indexed by that "path". # Can also yield None in the case that `send` is called on the # generator. # Example: # data = { # '1': [2, 3, {4: 5}], # '3': { # 'foo': 'bar', # 'baz': [1, 2, ['biz']], # } # } # self = ub.IndexableWalker(data) # list(_walk2(self)) # self = ub.IndexableWalker(data) # for key, value, _data, _prefix in _walk2(self): # print('key = {!r}'.format(key)) # print('value = {!r}'.format(value)) # print('_prefix = {!r}'.format(_prefix)) # print('_data = {!r}'.format(_data)) # print('---') # """ # if data is None: # pragma: nobranch # data = self.data # key = None # if mapped is None: # mapped = {None: } # stack = [(data, key, mapped)] # while stack: # _data, _prefix, _mapped = stack.pop() # # Create an items iterable of depending on the indexable data type # if isinstance(_data, self.list_cls): # items = enumerate(_data) # elif isinstance(_data, self.dict_cls): # items = _data.items() # else: # raise TypeError(type(_data)) # for key, value in items: # # Yield the full path to this position and its value # path = _prefix + [key] # message = yield path, key, value, _data, _prefix # # If the value at this path is also indexable, then continue # # the traversal, unless the False message was explicitly sent # # by the caller. # if message is False: # # Because the `send` method will return the next value, # # we yield a dummy value so we don't clobber the next # # item in the traversal. # yield None # else: # if isinstance(value, self.indexable_cls): # stack.append((value, key, _mapped[key])) def test_indexable_walker_map_patterns(): """ Check that we can walk through an indexable and make a deep copy """ data = { '1': [2, 3, {4: 5}], '3': { 'foo': 'bar', 'baz': [1, 2, ['biz']], } } self = ub.IndexableWalker(data) func = type mapped_v1 = _indexable_walker_map_v1(self, func) mapped_v2 = _indexable_walker_map_v2(self, func) print('data = {}'.format(ub.urepr(data, nl=1))) print('mapped_v1 = {}'.format(ub.urepr(mapped_v1, nl=1))) print('mapped_v2 = {}'.format(ub.urepr(mapped_v2, nl=1))) import pytest with pytest.warns(Warning): assert ub.indexable_allclose(mapped_v1, mapped_v2) self = ub.IndexableWalker(data) # import timerit # ti = timerit.Timerit(10, bestof=2, verbose=2) # for timer in ti.reset('time'): # with timer: self_v1 = _indexable_walker_map_v1(self, ub.identity) # for timer in ti.reset('time'): # with timer: self_v2 = _indexable_walker_map_v2(self, ub.identity) # for timer in ti.reset('time'): # with timer: self_v3 = _map_vals3(self, ub.identity) # NOQA # change auto-dict into lists when appropriate fixup = ub.IndexableWalker(self_v1) for path, value in fixup: if isinstance(value, dict) and isinstance(self[path], list): fixup[path] = [v for k, v in sorted(value.items())] import pytest with pytest.warns(Warning): assert ub.indexable_allclose(self.data, self_v2) with pytest.warns(Warning): assert ub.indexable_allclose(self.data, self_v1) with pytest.warns(Warning): assert not ub.indexable_allclose(self.data, mapped_v1) def test_walk_iter_gen_behavior(): from itertools import count import ubelt as ub # from functools import cache counter = count() @ub.memoize def tree(b, d): if d == 1: return [next(counter) for i in range(b)] else: return [tree(b, d - 1) for i in range(b)] data = tree(3, 3) # Order of operations does matter walker = ub.IndexableWalker(data) # Should use self-iter item1 = next(walker) item2 = next(walker) item3 = next(walker) print('item1 = {!r}'.format(item1)) print('item2 = {!r}'.format(item2)) print('item3 = {!r}'.format(item3)) # Should make new iters, and clobber existing ones assert list(walker) == list(walker) import pytest # Exhausting the current iterator will cause StopIteration list(walker) with pytest.raises(StopIteration): item4 = next(walker) # NOQA walker = ub.IndexableWalker(data) # Should make new iters, and clobber existing ones item1 = next(walker) iter(walker) item2 = next(walker) assert item1 == item2 assert item1 != next(walker) # Should make new iters walker = ub.IndexableWalker(data) c = 0 for _ in walker: try: next(walker) except StopIteration: pass c += 1 walker = ub.IndexableWalker(data) d = 0 for _ in walker: d += 1 assert d == len(list(walker)) assert d != c ubelt-1.3.7/tests/test_io.py000066400000000000000000000014431472470106000160220ustar00rootroot00000000000000from __future__ import unicode_literals from os.path import os def test_touch(): import ubelt as ub dpath = ub.Path.appdir('ubelt', 'tests').ensuredir() fpath = dpath / 'touch_file' assert not fpath.exists() ub.touch(fpath, verbose=True) assert fpath.exists() os.unlink(fpath) def test_readwrite(): import pytest import ubelt as ub dpath = ub.Path.appdir('ubelt', 'tests').ensuredir() fpath = dpath / 'testwrite.txt' if fpath.exists(): os.remove(fpath) to_write = 'utf-8 symbols Δ, Й, ק, م, ๗, あ, 叶, 葉, and 말.' with pytest.warns(DeprecationWarning): ub.writeto(fpath, to_write, verbose=True) with pytest.warns(DeprecationWarning): read_ = ub.readfrom(fpath, verbose=True) assert read_ == to_write ubelt-1.3.7/tests/test_links.py000066400000000000000000000437331472470106000165430ustar00rootroot00000000000000""" TODO: test _can_symlink=False variants on systems that can symlink. """ from os.path import isdir from os.path import isfile from os.path import islink from os.path import join, exists, relpath, dirname import ubelt as ub import pytest import os from ubelt import util_links import sys if sys.platform.startswith('win32'): try: import jaraco.windows.filesystem as jwfs except ImportError: jwfs = None def test_rel_dir_link(): """ xdoctest ~/code/ubelt/tests/test_links.py test_rel_dir_link """ import pytest import ubelt as ub if ub.WIN32 and jwfs is None: pytest.skip() # hack for windows for now. dpath = ub.Path.appdir('ubelt/tests/test_links', 'test_rel_dir_link').ensuredir() ub.delete(dpath, verbose=2) ub.ensuredir(dpath, verbose=2) real_dpath = join((dpath / 'dir1').ensuredir(), 'real') link_dpath = join((dpath / 'dir2').ensuredir(), 'link') ub.ensuredir(real_dpath) orig = os.getcwd() try: os.chdir(dpath) real_path = relpath(real_dpath, dpath) link_path = relpath(link_dpath, dpath) link = ub.symlink(real_path, link_path) # Note: on windows this is hacked. pointed = ub.util_links._readlink(link) resolved = os.path.realpath(ub.expandpath(join(dirname(link), pointed))) final_real_dpath = os.path.realpath(ub.expandpath(real_dpath)) if final_real_dpath != resolved: raise AssertionError(f'{final_real_dpath} != {resolved}') # assert os.path.realpath(ub.expandpath(real_dpath)) == resolved except Exception: util_links._dirstats(dpath) util_links._dirstats(join(dpath, 'dir1')) util_links._dirstats(join(dpath, 'dir2')) print('TEST FAILED: test_rel_link') print('real_dpath = {!r}'.format(real_dpath)) print('link_dpath = {!r}'.format(link_dpath)) print('real_path = {!r}'.format(real_path)) print('link_path = {!r}'.format(link_path)) try: if 'link' in vars(): print('link = {!r}'.format(link)) if 'pointed' in vars(): print('pointed = {!r}'.format(pointed)) if 'resolved' in vars(): print('resolved = {!r}'.format(resolved)) except Exception: print('...rest of the names are not available') raise finally: util_links._dirstats(dpath) util_links._dirstats(join(dpath, 'dir1')) util_links._dirstats(join(dpath, 'dir2')) os.chdir(orig) def test_rel_file_link(): import pytest import ubelt as ub if ub.WIN32 and jwfs is None: pytest.skip() # hack for windows for now. dpath = ub.Path.appdir('ubelt/tests/test_links', 'test_rel_file_link').ensuredir() ub.delete(dpath, verbose=2) ub.ensuredir(dpath, verbose=2) real_fpath = join(ub.ensuredir((dpath, 'dir1')), 'real') link_fpath = join(ub.ensuredir((dpath, 'dir2')), 'link') ub.touch(real_fpath) orig = os.getcwd() try: os.chdir(dpath) real_path = relpath(real_fpath, dpath) link_path = relpath(link_fpath, dpath) link = ub.symlink(real_path, link_path) import sys if sys.platform.startswith('win32') and isfile(link): # Note: if windows hard links the file there is no way we can # tell that it was a symlink. Just verify it exists. from ubelt import _win32_links assert _win32_links._win32_is_hardlinked(real_fpath, link_fpath) else: pointed = ub.util_links._readlink(link) resolved = os.path.realpath(ub.expandpath(join(dirname(link), pointed))) assert os.path.realpath(ub.expandpath(real_fpath)) == resolved except Exception: util_links._dirstats(dpath) util_links._dirstats(join(dpath, 'dir1')) util_links._dirstats(join(dpath, 'dir2')) print('TEST FAILED: test_rel_link') print('real_fpath = {!r}'.format(real_fpath)) print('link_fpath = {!r}'.format(link_fpath)) print('real_path = {!r}'.format(real_path)) print('link_path = {!r}'.format(link_path)) try: if 'link' in vars(): print('link = {!r}'.format(link)) if 'pointed' in vars(): print('pointed = {!r}'.format(pointed)) if 'resolved' in vars(): print('resolved = {!r}'.format(resolved)) except Exception: print('...rest of the names are not available') raise finally: util_links._dirstats(dpath) util_links._dirstats(join(dpath, 'dir1')) util_links._dirstats(join(dpath, 'dir2')) os.chdir(orig) def test_delete_symlinks(): """ CommandLine: python -m ubelt.tests.test_links test_delete_symlinks """ import pytest import ubelt as ub if ub.WIN32 and jwfs is None: pytest.skip() # hack for windows for now. # TODO: test that we handle broken links dpath = ub.Path.appdir('ubelt/tests/test_links', 'test_delete_links').ensuredir() happy_dpath = join(dpath, 'happy_dpath') happy_dlink = join(dpath, 'happy_dlink') happy_fpath = join(dpath, 'happy_fpath.txt') happy_flink = join(dpath, 'happy_flink.txt') broken_dpath = join(dpath, 'broken_dpath') broken_dlink = join(dpath, 'broken_dlink') broken_fpath = join(dpath, 'broken_fpath.txt') broken_flink = join(dpath, 'broken_flink.txt') def check_path_condition(path, positive, want, msg): if not want: positive = not positive msg = 'not ' + msg if not positive: util_links._dirstats(dpath) print('About to raise error: {}'.format(msg)) print('path = {!r}'.format(path)) print('exists(path) = {!r}'.format(exists(path))) print('islink(path) = {!r}'.format(islink(path))) print('isdir(path) = {!r}'.format(isdir(path))) print('isfile(path) = {!r}'.format(isfile(path))) raise AssertionError('path={} {}'.format(path, msg)) def assert_sometrace(path, want=True): # Either exists or is a broken link positive = exists(path) or islink(path) check_path_condition(path, positive, want, 'has trace') def assert_broken_link(path, want=True): if util_links._can_symlink(): print('path={} should{} be a broken link'.format( path, ' ' if want else ' not')) positive = not exists(path) and islink(path) check_path_condition(path, positive, want, 'broken link') else: # TODO: we can test this # positive = util_links._win32_is_junction(path) print('path={} should{} be a broken link (junction)'.format( path, ' ' if want else ' not')) print('cannot check this yet') # We wont be able to differentiate links and nonlinks for junctions # positive = exists(path) # check_path_condition(path, positive, want, 'broken link') util_links._dirstats(dpath) ub.delete(dpath, verbose=2) ub.ensuredir(dpath, verbose=2) util_links._dirstats(dpath) ub.ensuredir(happy_dpath, verbose=2) ub.ensuredir(broken_dpath, verbose=2) ub.touch(happy_fpath, verbose=2) ub.touch(broken_fpath, verbose=2) util_links._dirstats(dpath) ub.symlink(broken_fpath, broken_flink, verbose=2) ub.symlink(broken_dpath, broken_dlink, verbose=2) ub.symlink(happy_fpath, happy_flink, verbose=2) ub.symlink(happy_dpath, happy_dlink, verbose=2) util_links._dirstats(dpath) # Deleting the files should not delete the symlinks (windows) ub.delete(broken_fpath, verbose=2) util_links._dirstats(dpath) ub.delete(broken_dpath, verbose=2) util_links._dirstats(dpath) assert_broken_link(broken_flink, 1) assert_broken_link(broken_dlink, 1) assert_sometrace(broken_fpath, 0) assert_sometrace(broken_dpath, 0) assert_broken_link(happy_flink, 0) assert_broken_link(happy_dlink, 0) assert_sometrace(happy_fpath, 1) assert_sometrace(happy_dpath, 1) # broken symlinks no longer exist after they are deleted ub.delete(broken_dlink, verbose=2) util_links._dirstats(dpath) assert_sometrace(broken_dlink, 0) ub.delete(broken_flink, verbose=2) util_links._dirstats(dpath) assert_sometrace(broken_flink, 0) # real symlinks no longer exist after they are deleted # but the original data is fine ub.delete(happy_dlink, verbose=2) util_links._dirstats(dpath) assert_sometrace(happy_dlink, 0) assert_sometrace(happy_dpath, 1) ub.delete(happy_flink, verbose=2) util_links._dirstats(dpath) assert_sometrace(happy_flink, 0) assert_sometrace(happy_fpath, 1) def test_modify_directory_symlinks(): import pytest import ubelt as ub if ub.WIN32 and jwfs is None: pytest.skip() # hack for windows for now. dpath = ub.Path.appdir('ubelt/tests/test_links', 'test_modify_symlinks').ensuredir() ub.delete(dpath, verbose=2) ub.ensuredir(dpath, verbose=2) happy_dpath = dpath / 'happy_dpath' happy_dlink = dpath / 'happy_dlink' ub.ensuredir(happy_dpath, verbose=2) ub.symlink(happy_dpath, happy_dlink, verbose=2) # Test file inside directory symlink file_path1 = happy_dpath / 'file.txt' file_path2 = happy_dlink / 'file.txt' ub.touch(file_path1, verbose=2) assert file_path1.exists() assert file_path2.exists() file_path1.write_text('foo') assert file_path1.read_text() == 'foo' assert file_path2.read_text() == 'foo' file_path2.write_text('bar') assert file_path1.read_text() == 'bar' assert file_path2.read_text() == 'bar' ub.delete(file_path2, verbose=2) assert not file_path1.exists() assert not file_path2.exists() # Test directory inside directory symlink dir_path1 = happy_dpath / 'dir' dir_path2 = happy_dlink / 'dir' ub.ensuredir(dir_path1, verbose=2) assert dir_path1.exists() assert dir_path2.exists() subfile_path1 = dir_path1 / 'subfile.txt' subfile_path2 = dir_path2 / 'subfile.txt' subfile_path1.write_text('foo') assert subfile_path1.read_text() == 'foo' assert subfile_path2.read_text() == 'foo' subfile_path1.write_text('bar') assert subfile_path1.read_text() == 'bar' assert subfile_path2.read_text() == 'bar' ub.delete(dir_path1, verbose=2) assert not dir_path1.exists() assert not dir_path2.exists() def test_modify_file_symlinks(): """ CommandLine: python -m ubelt.tests.test_links test_modify_symlinks """ import pytest import ubelt as ub if ub.WIN32 and jwfs is None: pytest.skip() # hack for windows for now. # TODO: test that we handle broken links dpath = ub.Path.appdir('ubelt/tests/test_links', 'test_modify_symlinks').ensuredir() happy_fpath = dpath / 'happy_fpath.txt' happy_flink = dpath / 'happy_flink.txt' ub.touch(happy_fpath, verbose=2) ub.symlink(happy_fpath, happy_flink, verbose=2) # Test file symlink happy_fpath.write_text('foo') assert happy_fpath.read_text() == 'foo' assert happy_flink.read_text() == 'foo' happy_flink.write_text('bar') assert happy_fpath.read_text() == 'bar' assert happy_flink.read_text() == 'bar' def test_broken_link(): """ CommandLine: python -m ubelt.tests.test_links test_broken_link """ import pytest import ubelt as ub if ub.WIN32 and jwfs is None: pytest.skip() # hack for windows for now. dpath = ub.Path.appdir('ubelt/tests/test_links', 'test_broken_link').ensuredir() ub.delete(dpath, verbose=2) ub.ensuredir(dpath, verbose=2) util_links._dirstats(dpath) broken_fpath = join(dpath, 'broken_fpath.txt') broken_flink = join(dpath, 'broken_flink.txt') ub.touch(broken_fpath, verbose=2) util_links._dirstats(dpath) ub.symlink(real_path=broken_fpath, link_path=broken_flink, verbose=2) util_links._dirstats(dpath) ub.delete(broken_fpath, verbose=2) util_links._dirstats(dpath) # make sure I am sane that this is the correct check. can_symlink = util_links._can_symlink() print('can_symlink = {!r}'.format(can_symlink)) if can_symlink: # normal behavior assert islink(broken_flink) assert not exists(broken_flink) else: # on windows hard links are essentially the same file. # there is no trace that it was actually a link. assert exists(broken_flink) def test_cant_overwrite_file_with_symlink(): if ub.WIN32: # Can't distinguish this case on windows pytest.skip() dpath = ub.Path.appdir('ubelt/tests/test_links', 'test_cant_overwrite_file_with_symlink').ensuredir() ub.delete(dpath, verbose=2) ub.ensuredir(dpath, verbose=2) happy_fpath = join(dpath, 'happy_fpath.txt') happy_flink = join(dpath, 'happy_flink.txt') for verbose in [2, 1, 0]: print('=======') print('verbose = {!r}'.format(verbose)) ub.delete(dpath, verbose=verbose) ub.ensuredir(dpath, verbose=verbose) ub.touch(happy_fpath, verbose=verbose) ub.touch(happy_flink) # create a file where a link should be util_links._dirstats(dpath) with pytest.raises(FileExistsError): # file exists error ub.symlink(happy_fpath, happy_flink, overwrite=False, verbose=verbose) with pytest.raises(FileExistsError): # file exists error ub.symlink(happy_fpath, happy_flink, overwrite=True, verbose=verbose) def test_overwrite_symlink(): """ CommandLine: python ~/code/ubelt/tests/test_links.py test_overwrite_symlink """ import pytest import ubelt as ub if ub.WIN32 and jwfs is None: pytest.skip() # hack for windows for now. # TODO: test that we handle broken links dpath = ub.Path.appdir('ubelt/tests/test_links', 'test_overwrite_symlink').ensuredir() ub.delete(dpath, verbose=2) ub.ensuredir(dpath, verbose=2) happy_fpath = join(dpath, 'happy_fpath.txt') other_fpath = join(dpath, 'other_fpath.txt') happy_flink = join(dpath, 'happy_flink.txt') for verbose in [2, 1, 0]: print('@==========@') print('verbose = {!r}'.format(verbose)) print('[test] Setup') ub.delete(dpath, verbose=verbose) ub.ensuredir(dpath, verbose=verbose) ub.touch(happy_fpath, verbose=verbose) ub.touch(other_fpath, verbose=verbose) print('[test] Dirstats dpath') util_links._dirstats(dpath) print('[test] Create initial link (to happy)') ub.symlink(happy_fpath, happy_flink, verbose=verbose) print('[test] Dirstats dpath') util_links._dirstats(dpath) # Creating a duplicate link print('[test] Create a duplicate link (to happy)') ub.symlink(happy_fpath, happy_flink, verbose=verbose) print('[test] Dirstats dpath') util_links._dirstats(dpath) print('[test] Create an unauthorized overwrite link (to other)') with pytest.raises(Exception) as exc_info: # file exists error ub.symlink(other_fpath, happy_flink, verbose=verbose) print(' * exc_info = {!r}'.format(exc_info)) print('[test] Create an authorized overwrite link (to other)') ub.symlink(other_fpath, happy_flink, verbose=verbose, overwrite=True) print('[test] Dirstats dpath') ub.delete(other_fpath, verbose=verbose) print('[test] Create an unauthorized overwrite link (back to happy)') with pytest.raises(Exception) as exc_info: # file exists error ub.symlink(happy_fpath, happy_flink, verbose=verbose) print(' * exc_info = {!r}'.format(exc_info)) print('[test] Create an authorized overwrite link (back to happy)') ub.symlink(happy_fpath, happy_flink, verbose=verbose, overwrite=True) def _force_junction(func): from functools import wraps @wraps(func) def _wrap(*args): if not ub.WIN32: pytest.skip() from ubelt import _win32_links _win32_links.__win32_can_symlink__ = False func(*args) _win32_links.__win32_can_symlink__ = None return _wrap def test_symlink_to_rel_symlink(): """ Test a case with a absolute link to a relative link to a real path. """ import ubelt as ub if ub.WIN32: import pytest pytest.skip('dont try on windows') dpath = ub.Path.appdir('ubelt/tests/links/sym-to-relsym') dpath.delete().ensuredir() level1 = (dpath / 'level1').ensuredir() real = dpath / 'real' link1 = level1 / 'link1' real.touch() print('Should create') # rel_link1_to_real = os.path.relpath(real, link1.parent) # FIXME: This ub.symlink behavior seems broken link1.symlink_to(os.path.relpath(real, link1.parent)) # ub.symlink(real_path=rel_link1_to_real, link_path=link1, verbose=3) # ub.symlink(real_path=rel_link1_to_real, link_path=link2, verbose=3) """ At this point we have: ├── level1 │   ├── level2 │   │   └── link2 -> /home/joncrall/.cache/ubelt/tests/links/sym-to-relsym/level1/link1 │   └── link1 -> ../real └── real """ # _ = ub.cmd(f'tree {dpath}', verbose=3) import pytest with pytest.raises(FileExistsError): ub.symlink(real_path=real, link_path=link1, verbose=3) # ub.symlink(real_path=link1, link_path=link2, verbose=1) # class TestSymlinksForceJunction(object): fj_test_delete_symlinks = _force_junction(test_delete_symlinks) fj_test_modify_directory_symlinks = _force_junction(test_modify_directory_symlinks) fj_test_modify_file_symlinks = _force_junction(test_modify_file_symlinks) fj_test_broken_link = _force_junction(test_broken_link) fj_test_overwrite_symlink = _force_junction(test_overwrite_symlink) if __name__ == '__main__': r""" CommandLine: set PYTHONPATH=%PYTHONPATH%;C:/Users/erote/code/ubelt/ubelt/tests pytest ubelt/tests/test_links.py pytest ubelt/tests/test_links.py -s """ import xdoctest xdoctest.doctest_module(__file__) ubelt-1.3.7/tests/test_list.py000066400000000000000000000013751472470106000163720ustar00rootroot00000000000000import pytest import ubelt as ub def test_chunk_errors(): with pytest.raises(ValueError): ub.chunks(range(9)) with pytest.raises(ValueError): ub.chunks(range(9), chunksize=2, nchunks=2) with pytest.raises(ValueError): len(ub.chunks((_ for _ in range(2)), nchunks=2)) def test_chunk_total_chunksize(): gen = ub.chunks([], total=10, chunksize=4) assert len(gen) == 3 def test_chunk_total_nchunks(): gen = ub.chunks([], total=10, nchunks=4) assert len(gen) == 4 def test_chunk_len(): gen = ub.chunks([1] * 6, chunksize=3) assert len(gen) == 2 if __name__ == '__main__': r""" CommandLine: pytest tests/test_list.py """ import xdoctest xdoctest.doctest_module(__file__) ubelt-1.3.7/tests/test_orderedset.py000066400000000000000000000254651472470106000175650ustar00rootroot00000000000000import pickle import pytest import collections import sys import operator import itertools as it import random from ubelt import OrderedSet def test_pickle(): set1 = OrderedSet('abracadabra') roundtrip = pickle.loads(pickle.dumps(set1)) assert roundtrip == set1 def test_empty_pickle(): empty_oset = OrderedSet() empty_roundtrip = pickle.loads(pickle.dumps(empty_oset)) assert empty_roundtrip == empty_oset def test_order(): set1 = OrderedSet('abracadabra') assert len(set1) == 5 assert set1 == OrderedSet(['a', 'b', 'r', 'c', 'd']) assert list(reversed(set1)) == ['d', 'c', 'r', 'b', 'a'] def test_binary_operations(): set1 = OrderedSet('abracadabra') set2 = OrderedSet('simsalabim') assert set1 != set2 assert set1 & set2 == OrderedSet(['a', 'b']) assert set1 | set2 == OrderedSet(['a', 'b', 'r', 'c', 'd', 's', 'i', 'm', 'l']) assert set1 - set2 == OrderedSet(['r', 'c', 'd']) def test_indexing(): set1 = OrderedSet('abracadabra') assert set1[:] == set1 assert set1.copy() == set1 assert set1 is set1 assert set1[:] is not set1 assert set1.copy() is not set1 assert set1[[1, 2]] == OrderedSet(['b', 'r']) assert set1[1:3] == OrderedSet(['b', 'r']) assert set1.index('b') == 1 assert set1.index(['b', 'r']) == [1, 2] with pytest.raises(KeyError): set1.index('br') class FancyIndexTester: """ Make sure we can index by a NumPy ndarray, without having to import NumPy. """ def __init__(self, indices): self.indices = indices def __iter__(self): return iter(self.indices) def __index__(self): raise TypeError("NumPy arrays have weird __index__ methods") def __eq__(self, other): # Emulate NumPy being fussy about the == operator raise TypeError def test_fancy_index_class(): set1 = OrderedSet('abracadabra') indexer = FancyIndexTester([1, 0, 4, 3, 0, 2]) assert ''.join(set1[indexer]) == 'badcar' def test_pandas_compat(): set1 = OrderedSet('abracadabra') assert set1.get_loc('b') == 1 assert set1.get_indexer(['b', 'r']) == [1, 2] def test_tuples(): set1 = OrderedSet() tup = ('tuple', 1) set1.add(tup) assert set1.index(tup) == 0 assert set1[0] == tup def test_remove(): set1 = OrderedSet('abracadabra') set1.remove('a') set1.remove('b') assert set1 == OrderedSet('rcd') assert set1[0] == 'r' assert set1[1] == 'c' assert set1[2] == 'd' assert set1.index('r') == 0 assert set1.index('c') == 1 assert set1.index('d') == 2 assert 'a' not in set1 assert 'b' not in set1 assert 'r' in set1 # Make sure we can .discard() something that's already gone, plus # something that was never there set1.discard('a') set1.discard('a') def test_remove_error(): # If we .remove() an element that's not there, we get a KeyError set1 = OrderedSet('abracadabra') with pytest.raises(KeyError): set1.remove('z') def test_clear(): set1 = OrderedSet('abracadabra') set1.clear() assert len(set1) == 0 assert set1 == OrderedSet() def test_update(): set1 = OrderedSet('abcd') result = set1.update('efgh') assert result == 7 assert len(set1) == 8 assert ''.join(set1) == 'abcdefgh' set2 = OrderedSet('abcd') result = set2.update('cdef') assert result == 5 assert len(set2) == 6 assert ''.join(set2) == 'abcdef' def test_pop(): set1 = OrderedSet('ab') elem = set1.pop() assert elem == 'b' elem = set1.pop() assert elem == 'a' pytest.raises(KeyError, set1.pop) def test_getitem_type_error(): set1 = OrderedSet('ab') with pytest.raises(TypeError): set1['a'] def test_update_value_error(): set1 = OrderedSet('ab') with pytest.raises(ValueError): # noinspection PyTypeChecker set1.update(3) def test_empty_repr(): set1 = OrderedSet() assert repr(set1) == 'OrderedSet()' def test_eq_wrong_type(): set1 = OrderedSet() assert set1 != 2 def test_ordered_equality(): # Ordered set checks order against sequences. assert OrderedSet([1, 2]) == OrderedSet([1, 2]) assert OrderedSet([1, 2]) == [1, 2] assert OrderedSet([1, 2]) == (1, 2) assert OrderedSet([1, 2]) == collections.deque([1, 2]) def test_ordered_inequality(): # Ordered set checks order against sequences. assert OrderedSet([1, 2]) != OrderedSet([2, 1]) assert OrderedSet([1, 2]) != [2, 1] assert OrderedSet([1, 2]) != [2, 1, 1] assert OrderedSet([1, 2]) != (2, 1) assert OrderedSet([1, 2]) != (2, 1, 1) # Note: in Python 2.7 deque does not inherit from Sequence, but __eq__ # contains an explicit check for this case for python 2/3 compatibility. assert OrderedSet([1, 2]) != collections.deque([2, 1]) assert OrderedSet([1, 2]) != collections.deque([2, 2, 1]) def test_comparisons(): # Comparison operators on sets actually test for subset and superset. assert OrderedSet([1, 2]) < OrderedSet([1, 2, 3]) assert OrderedSet([1, 2]) > OrderedSet([1]) # MutableSet subclasses aren't comparable to set on 3.3. assert OrderedSet([1, 2]) > {1} def test_unordered_equality(): # Unordered set checks order against non-sequences. assert OrderedSet([1, 2]) == {1, 2} assert OrderedSet([1, 2]) == frozenset([2, 1]) assert OrderedSet([1, 2]) == {1: 'a', 2: 'b'} assert OrderedSet([1, 2]) == {1: 1, 2: 2}.keys() assert OrderedSet([1, 2]) == {1: 1, 2: 2}.values() # Corner case: OrderedDict is not a Sequence, so we don't check for order, # even though it does have the concept of order. assert OrderedSet([1, 2]) == collections.OrderedDict([(2, 2), (1, 1)]) # Corner case: We have to treat iterators as unordered because there # is nothing to distinguish an ordered and unordered iterator assert OrderedSet([1, 2]) == iter([1, 2]) assert OrderedSet([1, 2]) == iter([2, 1]) assert OrderedSet([1, 2]) == iter([2, 1, 1]) def test_unordered_inequality(): assert OrderedSet([1, 2]) != set([]) assert OrderedSet([1, 2]) != frozenset([2, 1, 3]) assert OrderedSet([1, 2]) != {2: 'b'} assert OrderedSet([1, 2]) != {1: 1, 4: 2}.keys() assert OrderedSet([1, 2]) != {1: 1, 2: 3}.values() # Corner case: OrderedDict is not a Sequence, so we don't check for order, # even though it does have the concept of order. assert OrderedSet([1, 2]) != collections.OrderedDict([(2, 2), (3, 1)]) def allsame_(iterable, eq=operator.eq): """ returns True of all items in iterable equal each other """ iter_ = iter(iterable) try: first = next(iter_) except StopIteration: return True return all(eq(first, item) for item in iter_) def check_results_(results, datas, name): """ helper for binary operator tests. check that all results have the same value, but are different items. data and name are used to indicate what sort of tests is run. """ if not allsame_(results): raise AssertionError( 'Not all same {} for {} with datas={}'.format(results, name, datas) ) for a, b in it.combinations(results, 2): if not isinstance(a, (bool, int)): assert a is not b, name + ' should all be different items' def _operator_consistency_testdata(): """ Predefined and random data used to test operator consistency. """ # test case 1 data1 = OrderedSet([5, 3, 1, 4]) data2 = OrderedSet([1, 4]) yield data1, data2 # first set is empty data1 = OrderedSet([]) data2 = OrderedSet([3, 1, 2]) yield data1, data2 # second set is empty data1 = OrderedSet([3, 1, 2]) data2 = OrderedSet([]) yield data1, data2 # both sets are empty data1 = OrderedSet([]) data2 = OrderedSet([]) yield data1, data2 # random test cases rng = random.Random(0) a, b = 20, 20 for _ in range(10): data1 = OrderedSet(rng.randint(0, a) for _ in range(b)) data2 = OrderedSet(rng.randint(0, a) for _ in range(b)) yield data1, data2 yield data2, data1 def test_operator_consistency_isect(): for data1, data2 in _operator_consistency_testdata(): result1 = data1.copy() result1.intersection_update(data2) result2 = data1 & data2 result3 = data1.intersection(data2) check_results_([result1, result2, result3], datas=(data1, data2), name='isect') def test_operator_consistency_difference(): for data1, data2 in _operator_consistency_testdata(): result1 = data1.copy() result1.difference_update(data2) result2 = data1 - data2 result3 = data1.difference(data2) check_results_( [result1, result2, result3], datas=(data1, data2), name='difference' ) def test_operator_consistency_xor(): for data1, data2 in _operator_consistency_testdata(): result1 = data1.copy() result1.symmetric_difference_update(data2) result2 = data1 ^ data2 result3 = data1.symmetric_difference(data2) check_results_([result1, result2, result3], datas=(data1, data2), name='xor') def test_operator_consistency_union(): for data1, data2 in _operator_consistency_testdata(): result1 = data1.copy() result1.update(data2) result2 = data1 | data2 result3 = data1.union(data2) check_results_([result1, result2, result3], datas=(data1, data2), name='union') def test_operator_consistency_subset(): for data1, data2 in _operator_consistency_testdata(): result1 = data1 <= data2 result2 = data1.issubset(data2) result3 = set(data1).issubset(set(data2)) check_results_([result1, result2, result3], datas=(data1, data2), name='subset') def test_operator_consistency_superset(): for data1, data2 in _operator_consistency_testdata(): result1 = data1 >= data2 result2 = data1.issuperset(data2) result3 = set(data1).issuperset(set(data2)) check_results_( [result1, result2, result3], datas=(data1, data2), name='superset' ) def test_operator_consistency_disjoint(): for data1, data2 in _operator_consistency_testdata(): result1 = data1.isdisjoint(data2) result2 = len(data1.intersection(data2)) == 0 check_results_([result1, result2], datas=(data1, data2), name='disjoint') def test_bitwise_and_consistency(): # Specific case that was failing without explicit __and__ definition data1 = OrderedSet([12, 13, 1, 8, 16, 15, 9, 11, 18, 6, 4, 3, 19, 17]) data2 = OrderedSet([19, 4, 9, 3, 2, 10, 15, 17, 11, 13, 20, 6, 14, 16, 8]) result1 = data1.copy() result1.intersection_update(data2) # This requires a custom & operation apparently result2 = data1 & data2 result3 = data1.intersection(data2) check_results_([result1, result2, result3], datas=(data1, data2), name='isect') ubelt-1.3.7/tests/test_oset.py000066400000000000000000000103131472470106000163610ustar00rootroot00000000000000import random import ubelt as ub import itertools as it def test_operators(): """ CommandLine: python ~/code/ubelt/ubelt/tests/test_oset.py test_operators """ rng = random.Random(0) def random_oset(rng, a=20, b=20): return ub.OrderedSet(rng.randint(0, a) for _ in range(b)) def check_results(*results, **kw): name = kw.get('name', 'set test') datas = kw.get('datas', []) if not ub.allsame(results): raise AssertionError('Not all same {} for {} with datas={}'.format( results, name, datas)) for a, b in it.combinations(results, 2): if not isinstance(a, (bool, int)): assert a is not b, name + ' should all be different items' def operator_tests(data1, data2): result1 = data1.copy() print('====') print('data1 = {!r}'.format(data1)) print('data2 = {!r}'.format(data2)) result1.intersection_update(data2) result2 = (data1 & data2) result3 = (data1.intersection(data2)) print('result1 = {!r} result1.intersection_update(data2)'.format(result1)) print('result2 = {!r} (data1 & data2) '.format(result2)) print('result3 = {!r} (data1.intersection(data2))'.format(result3)) check_results(result1, result2, result3, datas=(data1, data2), name='isect') result1 = data1.copy() result1.difference_update(data2) result2 = (data1 - data2) result3 = (data1.difference(data2)) check_results(result1, result2, result3, datas=(data1, data2), name='-') result1 = data1.copy() result1.symmetric_difference_update(data2) result2 = (data1 ^ data2) result3 = (data1.symmetric_difference(data2)) check_results(result1, result2, result3, datas=(data1, data2), name='xor') result1 = data1.copy() result1.update(data2) result2 = (data1 | data2) result3 = (data1.union(data2)) check_results(result1, result2, result3, datas=(data1, data2), name='union') result1 = data1 <= data2 result2 = data1.issubset(data2) result3 = set(data1).issubset(set(data2)) check_results(result1, result2, result3, datas=(data1, data2), name='subset') result1 = data1 >= data2 result2 = data1.issuperset(data2) result3 = set(data1).issuperset(set(data2)) check_results(result1, result2, result3, datas=(data1, data2), name='superset') result1 = data1.isdisjoint(data2) result2 = len(data1.intersection(data2)) == 0 check_results(result1, result2, datas=(data1, data2), name='disjoint') # run tests on standard test cases data1 = ub.OrderedSet([5, 3, 1, 4]) data2 = ub.OrderedSet([1, 4]) operator_tests(data1, data2) operator_tests(data2, data1) data1 = ub.OrderedSet([]) data2 = ub.OrderedSet([]) operator_tests(data1, data2) data1 = ub.OrderedSet([3, 1, 2]) data2 = ub.OrderedSet([]) operator_tests(data1, data2) operator_tests(data2, data1) # run tests on random test cases for _ in range(10): data1 = random_oset(rng) data2 = random_oset(rng) operator_tests(data1, data2) operator_tests(data2, data1) def test_equality(): def check(a, b): # Self checks assert a == a assert a >= a assert a <= a assert not a < a assert not a > a assert not a != a # Lesser checks assert a < b assert a <= b assert a != b assert not a == b # Greater checks assert b > a assert b >= a assert b != a assert not b == a a = ub.oset([]) b = ub.oset([1]) c = ub.oset([1, 2]) d = ub.oset([1, 2, 3]) check(a, b) check(b, c) check(c, d) check(a, d) check(a, d) if __name__ == '__main__': r""" CommandLine: python ~/code/ubelt/ubelt/tests/test_oset.py test_equality pytest ~/code/ubelt/ubelt/tests/test_oset.py """ import xdoctest xdoctest.doctest_module(__file__) ubelt-1.3.7/tests/test_path.py000066400000000000000000000252741472470106000163570ustar00rootroot00000000000000from os.path import exists, join import ubelt as ub # DEBUG_PATH = ub.Path.home().name == 'joncrall' def test_pathlib_compatability(): import pathlib base = pathlib.Path(ub.Path.appdir('ubelt').ensuredir()) dpath = base.joinpath('test_pathlib_mkdir') # ensuredir ub.delete(dpath) assert not dpath.exists() got = ub.ensuredir(dpath) assert got.exists() # shrinkuser assert ub.shrinkuser(base).startswith('~') assert ub.augpath(base, prefix='foo').endswith('fooubelt') assert not ub.expandpath(base).startswith('~') def test_tempdir(): import pytest with pytest.warns(DeprecationWarning): temp = ub.TempDir() assert temp.dpath is None temp.ensure() assert exists(temp.dpath) # Double ensure for coverage temp.ensure() assert exists(temp.dpath) dpath = temp.dpath temp.cleanup() assert not exists(dpath) assert temp.dpath is None def test_augpath_identity(): assert ub.augpath('foo') == 'foo' assert ub.augpath('foo/bar') == join('foo', 'bar') assert ub.augpath('') == '' def test_augpath_dpath(): assert ub.augpath('foo', dpath='bar') == join('bar', 'foo') assert ub.augpath('foo/bar', dpath='baz') == join('baz', 'bar') assert ub.augpath('', dpath='bar').startswith('bar') def test_ensuredir_recreate(): import pytest base = ub.Path.appdir('ubelt/tests').ensuredir() folder = join(base, 'foo') member = join(folder, 'bar') with pytest.warns(DeprecationWarning): ub.ensuredir(folder, recreate=True) ub.ensuredir(member) assert exists(member) with pytest.warns(DeprecationWarning): ub.ensuredir(folder, recreate=True) assert not exists(member) def test_ensuredir_verbosity(): base = ub.Path.appdir('ubelt/tests').ensuredir() with ub.CaptureStdout() as cap: ub.ensuredir(join(base, 'foo'), verbose=0) assert cap.text == '' # None defaults to verbose=0 with ub.CaptureStdout() as cap: ub.ensuredir((base, 'foo'), verbose=None) assert cap.text == '' ub.delete(join(base, 'foo')) with ub.CaptureStdout() as cap: ub.ensuredir(join(base, 'foo'), verbose=1) assert 'creating' in cap.text with ub.CaptureStdout() as cap: ub.ensuredir(join(base, 'foo'), verbose=1) assert 'existing' in cap.text def demo_nested_paths(dpath, nfiles=2, ndirs=1, depth=0): for idx in range(nfiles): (dpath / f'file_{idx}.txt').write_text(f'hello world idx={idx} depth={depth}') subdirs = [] for idx in range(ndirs): subdir = (dpath / f'subdir_{idx}').ensuredir() subdirs.append(subdir) if depth > 0: for subdir in subdirs: demo_nested_paths(subdir, nfiles=nfiles, ndirs=ndirs, depth=depth - 1) def relative_contents(dpath): return [p.relative_to(dpath) for p in sorted(dpath.glob('**'), key=str)] def test_copy_directory_cases(): """ Ignore: cases = [ {'dst': '{}'}, ] """ import pytest import ubelt as ub base = ub.Path.appdir('ubelt/tests/path/copy_move').delete().ensuredir() root1 = (base / 'root1').ensuredir() root2 = (base / 'root2').ensuredir() paths = { 'empty': root1 / 'empty', 'shallow': root1 / 'shallow', 'deep': root1 / 'deep', } for d in paths.values(): d.ensuredir() demo_nested_paths(paths['shallow']) demo_nested_paths(paths['deep'], depth=3) # Instead you can always expect / to be the same as # /. for key, src in paths.items(): for meta in ['stats', 'mode', None]: kwargs = { 'meta': meta } root2.delete().ensuredir() # Because root2 exists we error if overwrite if False with pytest.raises(FileExistsError): src.copy(root2, **kwargs) # When overwrite is True, src.copy(root2, overwrite=True, **kwargs) relative_contents(root2) contents1 = relative_contents(src) contents2 = relative_contents(root2) assert contents1 == contents2 # We can copy to a directory that doesn't exist root2.delete().ensuredir() new_dpath = src.copy(root2 / src.name, **kwargs) assert new_dpath.name == src.name contents2 = relative_contents(new_dpath) # But we can't do it again with pytest.raises(FileExistsError): src.copy(root2 / src.name, **kwargs) assert contents2 == relative_contents(new_dpath) # Unless overwrite is True new_dpath = src.copy(root2 / src.name, overwrite=True, **kwargs) # And in all cases the contents should be unchanged assert contents2 == relative_contents(new_dpath) # Test copy src into root2/sub1/sub2 when root/sub1 does not exist root2.delete().ensuredir() dst = root2 / 'sub1/sub2' new_dpath = src.copy(dst, **kwargs) assert new_dpath.name == 'sub2' # Unlike cp, Path.copy will create the intermediate directories assert contents1 == relative_contents(new_dpath) if 0 and ub.LINUX: """ In all cases we have a folder = / with members // and = / """ verbose = 2 """ Case: copy ``/`` into ```` and ``/`` does not exist, then cp will result in ``//`` THEN copy ``/`` into ```` and ``/`` exists, then cp will result in ``//`` CP recognizes that ``/`` does not exist and makes a new directory / to correspond to to the THEN CP recognizes that ``/`` does exist assumes that should correspond to """ root2.delete().ensuredir() dst = root2 ub.cmd(f"cp -rv {src} {dst}", verbose=verbose) contents1 = relative_contents(src) contents2 = relative_contents(root2) assert len(contents1) == (len(contents2) - 1) ub.cmd(f"cp -rv {src} {dst}", verbose=verbose) contents1 = relative_contents(src) contents2 = relative_contents(root2) assert len(contents1) == (len(contents2) - 1) """ Case: copy ``/`` into ``/`` and ``/`` does not exist, then cp will result in ``//`` THEN copy ``/`` into ``/`` and ``/`` does exist, then cp will result in ``///`` Because in does not exist, it assumes you want to effectively *change the name* of your folder. THEN This is the weird case that Path.copy avoids by not pretending that you can use the directory name from the source implicitly in the destination. """ root2.delete().ensuredir() name2 = f'{src.name}2' dst = root2 / name2 ub.cmd(f"cp -rv {src} {dst}", verbose=verbose) contents1 = relative_contents(src) contents2 = relative_contents(root2) assert len(contents1) == (len(contents2) - 1) dst = root2 / name2 ub.cmd(f"cp -rv {src} {dst}", verbose=verbose) contents1 = relative_contents(src) contents2 = relative_contents(root2) assert len(contents1) * 2 == (len(contents2) - 1) """ Case: copy ``/`` into ``//`` and ``/`` does not exist, then cp will error because it wont create intermediate directories """ root2.delete().ensuredir() dst = root2 / 'sub1/sub2' info = ub.cmd(f"cp -rv {src} {dst}", verbose=verbose) assert info['ret'] == 1 contents2 = relative_contents(root2) assert len(contents2) == 1 def test_move_directory_cases(): """ Ignore: cases = [ {'dst': '{}'}, ] """ import pytest import ubelt as ub base = ub.Path.appdir('ubelt/tests/path/move').delete().ensuredir() root1 = (base / 'root1').ensuredir() root2 = (base / 'root2').ensuredir() paths = { 'empty': root1 / 'empty', 'shallow': root1 / 'shallow', 'deep': root1 / 'deep', } for d in paths.values(): d.ensuredir() # Instead you can always expect / to be the same as # /. for key, src in paths.items(): for meta in ['stats', 'mode', None]: # Reset original dires for d in paths.values(): d.ensuredir() demo_nested_paths(paths['shallow']) demo_nested_paths(paths['deep'], depth=3) kwargs = { 'meta': meta } root2.delete().ensuredir() # We cannot move to a file that exists with pytest.raises(FileExistsError): src.move(root2, **kwargs) contents1 = relative_contents(src) # We can move to a directory that doesn't exist root2.delete().ensuredir() new_dpath = src.move(root2 / src.name, **kwargs) assert new_dpath.name == src.name contents2 = relative_contents(new_dpath) assert not src.exists() assert contents1 == contents2 with pytest.raises(FileExistsError): src.move(root2 / src.name, **kwargs) # Test move src into root2/sub1/sub2 when root/sub1 does not exist # Reset original dires for d in paths.values(): d.ensuredir() demo_nested_paths(paths['shallow']) demo_nested_paths(paths['deep'], depth=3) root2.delete().ensuredir() dst = root2 / 'sub1/sub2' new_dpath = src.move(dst, **kwargs) assert new_dpath.name == 'sub2' # Unlike cp, Path.move will create the intermediate directories assert contents1 == relative_contents(new_dpath) ubelt-1.3.7/tests/test_pathlib.py000066400000000000000000000413761472470106000170470ustar00rootroot00000000000000import ubelt as ub DEBUG_PATH = 0 # ub.Path.home().name == 'joncrall' def _demo_directory_structure(): import ubelt as ub import uuid level = 0 suffix = ub.hash_data(uuid.uuid4())[0:8] dpath = ub.Path.appdir('ubelt', 'tests', 'test_path') base = (dpath / suffix).delete().ensuredir() (base / 'root' / 'dir_L0_X0_A').ensuredir() if level > 2: (base / 'root' / 'dir_L0_X0_A' / 'dir_L1_X0_B').ensuredir() if level > 1: (base / 'root' / 'dir_L0_X1_C').ensuredir() (base / 'root' / 'inside_dir').ensuredir() (base / 'root' / 'links').ensuredir() (base / 'outside_dir').ensuredir() (base / 'root' / 'file_L0_X0_a.txt').touch() if level > 1: (base / 'root' / 'dir_L0_X0_A' / 'file_L1_X0_b.txt').touch() if level > 1: (base / 'root' / 'dir_L0_X1_C' / 'file_L1_X0_c.txt').touch() (base / 'root' / 'inside_dir' / 'inside_file.txt').touch() (base / 'outside_dir' / 'outside_file.txt').touch() # Create links inside and outside the root to_abs_symlink = [] to_abs_symlink.append((base / 'root/inside_dir/inside_file.txt' , base / 'root/links/inside_flink.txt')) to_abs_symlink.append((base / 'outside_dir/outside_file.txt' , base / 'root/links/outside_flink.txt')) to_abs_symlink.append((base / 'outside_dir' , base / 'root/links/outside_dlink')) to_abs_symlink.append((base / 'root/inside_dir' , base / 'root/links/inside_dlink')) to_abs_symlink.append((base / 'root/links/cyclic' , (base / 'root/links/cyclic/n1/n2').ensuredir() / 'loop')) to_rel_symlink = [] to_rel_symlink.append((base / 'root/inside_dir/inside_file.txt' , base / 'root/links/rel_inside_flink.txt')) to_rel_symlink.append((base / 'outside_dir/outside_file.txt' , base / 'root/links/rel_outside_flink.txt')) to_rel_symlink.append((base / 'outside_dir' , base / 'root/links/rel_outside_dlink')) to_rel_symlink.append((base / 'root/inside_dir' , base / 'root/links/rel_inside_dlink')) to_rel_symlink.append((base / 'root/links/rel_cyclic' , (base / 'root/links/rel_cyclic/n1/n2/').ensuredir() / 'rel_loop')) try: # TODO: the implementation of ubelt.symlink might be wrong when the # link target is relative. import os for real, link in to_abs_symlink: link.symlink_to(real) # ub.symlink(real, link, verbose=1) for real, link in to_rel_symlink: rel_real = os.path.relpath(real, link.parent) link.symlink_to(rel_real) # ub.symlink(rel_real, link, verbose=1) except Exception: import pytest pytest.skip('unable to symlink') if 0: import xdev xdev.tree_repr(base) return base ### MOVE TESTS def test_move_dir_to_non_existing(): base = _demo_directory_structure() root = base / 'root' if ub.LINUX: root2 = root.copy(root.augment(tail='2')) root3 = root.copy(root.augment(tail='3')) if DEBUG_PATH: import xdev xdev.tree_repr(base) root.move(base / 'our_move') if ub.LINUX: ub.cmd(f'mv {root2} {base}/linux_move', verbose=2, check=1) ub.cmd(f'mv -T {root3} {base}/linux_moveT', verbose=2, check=1) if DEBUG_PATH: import xdev xdev.tree_repr(base) if ub.LINUX: # We behave like Linux mv here in both cases here case1 = _comparable_walk(base / 'linux_move') case2 = _comparable_walk(base / 'linux_moveT') case3 = _comparable_walk(base / 'our_move') assert case1 == case2 == case3 base.delete() def test_move_to_nested_non_existing(): base = _demo_directory_structure() root = base / 'root' import platform if ub.WIN32 and platform.python_implementation() == 'PyPy': ub.util_path._patch_win32_stats_on_pypy() if ub.LINUX: root2 = root.copy(root.augment(tail='2')) root3 = root.copy(root.augment(tail='3')) if DEBUG_PATH: import xdev xdev.tree_repr(base) # shutil move will make the parent directory if it doesn't exist. root.move(base / 'our/move') if ub.LINUX: # Posix fails unless the parent exists (base / 'linux').ensuredir() ub.cmd(f'mv -v {root2} {base}/linux/move', verbose=2, check=1) ub.cmd(f'mv -Tv {root3} {base}/linux/moveT', verbose=2, check=1) if DEBUG_PATH: import xdev xdev.tree_repr(base) if ub.LINUX: # We behave like Linux mv here in both cases here # up to the fact that we will always create the dir, whereas mv wont case1 = _comparable_walk(base / 'linux/move') case2 = _comparable_walk(base / 'linux/moveT') case3 = _comparable_walk(base / 'our/move') assert case1 == case2 == case3 base.delete() def test_move_dir_to_existing_dir_noconflict(): base = _demo_directory_structure() root = base / 'root' (base / 'our_move').ensuredir() if ub.LINUX: root2 = root.copy(root.augment(tail='2')) root3 = root.copy(root.augment(tail='3')) (base / 'linux_move').ensuredir() (base / 'linux_moveT').ensuredir() if DEBUG_PATH: import xdev xdev.tree_repr(base) import pytest with pytest.raises(IOError): # shutil.move behaves similar to linux with -T # We are just going to disallow this case root.move(base / 'our_move') if ub.LINUX: ub.cmd(f'mv {root2} {base}/linux_move', verbose=2, check=1) ub.cmd(f'mv -T {root3} {base}/linux_moveT', verbose=2, check=1) if DEBUG_PATH: import xdev xdev.tree_repr(base) base.delete() def test_move_dir_to_existing_dir_withconflict(): base = _demo_directory_structure() root = base / 'root' bluntobject = (root / 'will_they_wont_they.txt') bluntobject.write_text('smash!') if ub.LINUX: root2 = root.copy(root.augment(tail='2')) root3 = root.copy(root.augment(tail='3')) # NOQA dst1 = (base / 'our_move').ensuredir() dst2 = (base / 'linux_move').ensuredir() dst3 = (base / 'linux_move_T').ensuredir() toclobber1 = (dst1 / 'will_they_wont_they.txt') toclobber1.write_text('I hope nobody clobbers me!') disjoint1 = (dst1 / 'disjoint.txt') disjoint1.write_text('I should be disjoint!') toclobber2 = (dst2 / 'will_they_wont_they.txt') toclobber2.write_text('I hope nobody clobbers me!') disjoint2 = (dst2 / 'disjoint.txt') disjoint2.write_text('I should be disjoint!') toclobber3 = (dst3 / 'will_they_wont_they.txt') toclobber3.write_text('I hope nobody clobbers me!') disjoint3 = (dst3 / 'disjoint.txt') disjoint3.write_text('I should be disjoint!') if DEBUG_PATH: import xdev print('BEFORE MOVE') xdev.tree_repr(base) # This case is weird, dont let the user do it. # they can use shutil if they want import pytest with pytest.raises(IOError): root.move(dst1) if 0: if ub.LINUX: ub.cmd(f'mv -v {root2} {dst2}', verbose=2, check=1) # The mv command wont move a non-empty directory! # Maybe we shouldn't either. # ub.cmd(f'mv -T -u -f -v {root3} {dst3}', verbose=3, check=1) if DEBUG_PATH: import xdev print('AFTER MOVE') xdev.tree_repr(base) got = toclobber1.read_text() # THIS IS VERY SURPRISING, the file being moved is clobbered, but the file # in the dst is safe! assert got != 'smash!' assert not bluntobject.exists() if ub.LINUX: got2 = toclobber3.read_text() assert got2 == 'smash!' assert toclobber1.exists() assert bluntobject.exists() assert disjoint1.exists() assert disjoint1.read_text() == 'I should be disjoint!' if ub.LINUX: assert disjoint2.exists() assert disjoint2.read_text() == 'I should be disjoint!' assert disjoint3.exists() assert disjoint3.read_text() == 'I should be disjoint!' base.delete() ### Simple Copy Tests def test_copy_basic(): dpath = ub.Path.appdir('ubelt', 'tests', 'test_path', 'test_copy_basic') dpath.delete().ensuredir() fpath = (dpath / 'file.txt') fpath.write_text('foobar') empty_dpath = (dpath / 'empty_dir').ensuredir() full_dpath = (dpath / 'full_dir').ensuredir() (full_dpath / 'nested_file.txt').touch() if DEBUG_PATH: print('AFTER COPY') import xdev xdev.tree_repr(dpath) fpath.copy(fpath.augment(prefix='copied_')) empty_dpath.copy(empty_dpath.augment(prefix='copied_')) full_dpath.copy(full_dpath.augment(prefix='copied_')) # Doing it again will fail import pytest with pytest.raises(IOError): fpath.copy(fpath.augment(prefix='copied_')) with pytest.raises(IOError): empty_dpath.copy(empty_dpath.augment(prefix='copied_')) with pytest.raises(IOError): full_dpath.copy(full_dpath.augment(prefix='copied_')) # But with overwrite=True it is ok fpath.copy(fpath.augment(prefix='copied_'), overwrite=True) empty_dpath.copy(empty_dpath.augment(prefix='copied_'), overwrite=True) full_dpath.copy(full_dpath.augment(prefix='copied_'), overwrite=True) if DEBUG_PATH: print('AFTER COPY') import xdev xdev.tree_repr(dpath) def test_copy_meta(): dpath = ub.Path.appdir('ubelt', 'tests', 'test_path', 'test_copy_basic') dpath.delete().ensuredir() fpath = (dpath / 'file.txt') fpath.write_text('foobar') empty_dpath = (dpath / 'empty_dir').ensuredir() full_dpath = (dpath / 'full_dir').ensuredir() (full_dpath / 'nested_file.txt').touch() if DEBUG_PATH: print('AFTER COPY') import xdev xdev.tree_repr(dpath) for meta in ['stats', 'mode', None]: prefix = 'copied_' + str(meta) + '_' fpath.copy(fpath.augment(prefix=prefix), meta=meta) empty_dpath.copy(empty_dpath.augment(prefix=prefix)) full_dpath.copy(full_dpath.augment(prefix=prefix)) # TODO: verify that the metadata really did copy as intended if DEBUG_PATH: print('AFTER COPY') import xdev xdev.tree_repr(dpath) ### Simple Move Tests def test_move_basic(): dpath = ub.Path.appdir('ubelt', 'tests', 'test_path', 'test_move_basic') dpath.delete().ensuredir() fpath = (dpath / 'file.txt') fpath.write_text('foobar') empty_dpath = (dpath / 'empty_dir').ensuredir() full_dpath = (dpath / 'full_dir').ensuredir() (full_dpath / 'nested_file.txt').touch() if DEBUG_PATH: print('AFTER COPY') import xdev xdev.tree_repr(dpath) fpath.move(fpath.augment(prefix='moved_')) empty_dpath.move(empty_dpath.augment(prefix='moved_')) full_dpath.move(full_dpath.augment(prefix='moved_')) if DEBUG_PATH: print('AFTER COPY') import xdev xdev.tree_repr(dpath) def test_move_meta(): base_dpath = ub.Path.appdir('ubelt', 'tests', 'test_path', 'test_move_basic') base_dpath.delete().ensuredir() for meta in ['stats', 'mode', None]: prefix = 'copied_' + str(meta) + '_' dpath = (base_dpath / prefix).ensuredir() fpath = (dpath / 'file.txt') fpath.write_text('foobar') empty_dpath = (dpath / 'empty_dir').ensuredir() full_dpath = (dpath / 'full_dir').ensuredir() (full_dpath / 'nested_file.txt').touch() fpath.move(fpath.augment(prefix=prefix), meta=meta) empty_dpath.move(empty_dpath.augment(prefix=prefix)) full_dpath.move(full_dpath.augment(prefix=prefix)) # TODO: test that the metadata really did move as intended if DEBUG_PATH: print('AFTER MOVE') import xdev xdev.tree_repr(base_dpath) ### COPY TESTS def test_copy_dir_to_non_existing(): base = _demo_directory_structure() root = base / 'root' if DEBUG_PATH: print('BEFORE COPY') import xdev xdev.tree_repr(base) dst = root.copy(base / 'our_copy') if ub.LINUX: ub.cmd(f'cp -r {root} {base}/linux_copy', verbose=2) print(f'dst={dst}') if DEBUG_PATH: print('AFTER COPY') import xdev xdev.tree_repr(base) if ub.LINUX: # Our copy should behave like the linux copy case1 = _comparable_walk(base / 'our_copy') case2 = _comparable_walk(base / 'linux_copy') print('case1 = {}'.format(ub.urepr(case1, nl=1))) print('case2 = {}'.format(ub.urepr(case2, nl=1))) assert case1 == case2 base.delete() def test_copy_to_nested_non_existing_with_different_symlink_flags(): base = _demo_directory_structure() root = base / 'root' if DEBUG_PATH: import xdev xdev.tree_repr(base) root.copy(base / 'new_subdir' / 'new_root_FD0_FF1', follow_dir_symlinks=False, follow_file_symlinks=True) root.copy(base / 'new_subdir' / 'new_root_FD0_FF0', follow_dir_symlinks=False, follow_file_symlinks=False) (root / 'links' / 'cyclic').delete() (root / 'links' / 'rel_cyclic').delete() root.copy(base / 'new_subdir' / 'new_root_FD1_FF1', follow_dir_symlinks=True, follow_file_symlinks=True) root.copy(base / 'new_subdir' / 'new_root_FD1_FF0', follow_dir_symlinks=True, follow_file_symlinks=False) if DEBUG_PATH: import xdev xdev.tree_repr(base) base.delete() def test_copy_dir_to_existing_dir_noconflict(): base = _demo_directory_structure() root = base / 'root' (root / 'links' / 'cyclic').delete() (root / 'links' / 'rel_cyclic').delete() dst1 = (base / 'our_copy').ensuredir() dst2 = (base / 'linux_copy').ensuredir() dst3 = (base / 'linux_copyT').ensuredir() if DEBUG_PATH: import xdev print('BEFORE MOVE') xdev.tree_repr(base) root.copy(dst1, overwrite=True) if ub.LINUX: # We behave like linux copy with T here. ub.cmd(f'cp -r {root} {dst2}', verbose=2) ub.cmd(f'cp -r -T {root} {dst3}', verbose=2) if DEBUG_PATH: import xdev print('AFTER MOVE') xdev.tree_repr(base) if ub.LINUX: # Our copy should behave like the linux copy case1 = _comparable_walk(base / 'our_copy') case2 = _comparable_walk(base / 'linux_copy') case3 = _comparable_walk(base / 'linux_copyT') assert case1 == case3 assert case1 != case2 base.delete() def test_copy_dir_to_existing_dir_withconflict(): base = _demo_directory_structure() root = base / 'root' bluntobject = (root / 'will_they_wont_they.txt') bluntobject.write_text('smash!') dst1 = (base / 'our_copy').ensuredir() dst2 = (base / 'linux_copy').ensuredir() dst3 = (base / 'linux_copyT').ensuredir() toclobber1 = (dst1 / 'will_they_wont_they.txt') toclobber1.write_text('I hope nobody clobbers me!') disjoint1 = (dst1 / 'disjoint.txt') disjoint1.write_text('I should be disjoint!') toclobber2 = (dst2 / 'will_they_wont_they.txt') toclobber2.write_text('I hope nobody clobbers me!') disjoint2 = (dst2 / 'disjoint.txt') disjoint2.write_text('I should be disjoint!') toclobber3 = (dst3 / 'will_they_wont_they.txt') toclobber3.write_text('I hope nobody clobbers me!') disjoint3 = (dst3 / 'disjoint.txt') disjoint3.write_text('I should be disjoint!') if DEBUG_PATH: import xdev print('BEFORE MOVE') xdev.tree_repr(base) root.copy(dst1, overwrite=True) if ub.LINUX: ub.cmd(f'cp -r {root} {dst2}', verbose=2, check=1) ub.cmd(f'cp -r -T {root} {dst3}', verbose=2, check=1) if DEBUG_PATH: import xdev print('AFTER MOVE') xdev.tree_repr(base) # This behavior makes more sense to me got = toclobber1.read_text() assert got == 'smash!' if ub.LINUX: got2 = toclobber3.read_text() assert got2 == 'smash!' assert toclobber1.exists() assert bluntobject.exists() assert disjoint1.exists() assert disjoint1.read_text() == 'I should be disjoint!' if ub.LINUX: assert disjoint2.exists() assert disjoint2.read_text() == 'I should be disjoint!' assert disjoint3.exists() assert disjoint3.read_text() == 'I should be disjoint!' if ub.LINUX: # Our copy should behave like the linux copy case1 = _comparable_walk(base / 'our_copy') case2 = _comparable_walk(base / 'linux_copy') case3 = _comparable_walk(base / 'linux_copyT') print('case1 = {}'.format(ub.urepr(case1, nl=1))) print('case3 = {}'.format(ub.urepr(case3, nl=1))) print('case2 = {}'.format(ub.urepr(case2, nl=1))) assert case1 == case3 assert case1 != case2 base.delete() def _comparable_walk(p): return sorted([(tuple(sorted(f)), tuple(sorted(d))) for (r, f, d) in (p).walk()]) ubelt-1.3.7/tests/test_platform.py000066400000000000000000000026451472470106000172440ustar00rootroot00000000000000import ubelt as ub from os.path import expanduser, basename def test_compressuser_without_home(): username = basename(expanduser('~')) not_the_user = 'foobar_' + username ub.shrinkuser(not_the_user) == not_the_user def test_find_path_no_path(): candidates = list(ub.find_path('does-not-exist', path=[])) assert len(candidates) == 0 def _available_prog(): # Try and find a program that exists on the machine import pytest common_progs = ['ls', 'ping', 'which'] prog_name = None for cand_prog_name in common_progs: if ub.find_exe(cand_prog_name): prog_name = cand_prog_name break else: pytest.skip(( 'Common progs {} are not installed. ' 'Are we on a weird machine?').format(common_progs)) return prog_name def test_find_exe_idempotence(): prog_name = _available_prog() prog_fpath = ub.find_exe(prog_name) assert prog_fpath == ub.find_exe(prog_fpath), ( 'find_exe with an existing path should work') def test_find_exe_no_exist(): assert ub.find_exe('!noexist', multi=False) is None, ( 'multi=False not found should return None') assert ub.find_exe('!noexist', multi=True) == [], ( 'multi=True not found should return an empty list') if __name__ == '__main__': """ pytest ubelt/tests/test_platform.py """ import xdoctest xdoctest.doctest_module(__file__) ubelt-1.3.7/tests/test_progiter.py000066400000000000000000000534061472470106000172540ustar00rootroot00000000000000""" pytest tests/test_progiter.py """ import sys from io import StringIO from xdoctest.utils import CaptureStdout from xdoctest.utils import strip_ansi import itertools as it from ubelt import ProgIter class FakeStream: """ Helper to hook into and introspect when progiter writes to the display """ def __init__(self, verbose=0, callback=None): self.verbose = verbose self.callback = callback self._callcount = 0 self.messages = [] def write(self, msg): self._callcount += 1 self.messages.append(msg) if self.verbose: sys.stdout.write(msg) if self.callback is not None: self.callback() def flush(self, *args, **kw): ... class FakeTimer: """ Helper to hook into and introspect when progiter measures times. You must tic this timer yourself. """ def __init__(self, times=[1]): self._time = 0 self._callcount = 0 self._iter = it.cycle(times) def tic(self, step=None): if step is None: step = next(self._iter) self._time += step def __call__(self): self._callcount += 1 return self._time def test_rate_format_string(): # Less of a test than a demo rates = [1 * 10 ** i for i in range(-10, 10)] texts = [] for rate in rates: rate_format = '4.2f' if rate > .001 else 'g' # Really cool: you can embed format strings inside format strings msg = '{rate:{rate_format}}'.format(rate=rate, rate_format=rate_format) texts.append(msg) assert texts == ['1e-10', '1e-09', '1e-08', '1e-07', '1e-06', '1e-05', '0.0001', '0.001', '0.01', '0.10', '1.00', '10.00', '100.00', '1000.00', '10000.00', '100000.00', '1000000.00', '10000000.00', '100000000.00', '1000000000.00'] def test_rate_format(): # Define a function that takes some time file = StringIO() prog = ProgIter(file=file) prog.begin() prog._iters_per_second = .000001 msg = prog.format_message() rate_part = msg.split('rate=')[1].split(' Hz')[0] assert rate_part == '1e-06' prog._iters_per_second = .1 msg = prog.format_message() rate_part = msg.split('rate=')[1].split(' Hz')[0] assert rate_part == '0.10' prog._iters_per_second = 10000 msg = prog.format_message() rate_part = msg.split('rate=')[1].split(' Hz')[0] assert rate_part == '10000.00' def test_progiter(): # Define a function that takes some time def is_prime(n): return n >= 2 and not any(n % i == 0 for i in range(2, n)) N = 500 if False: file = StringIO() prog = ProgIter(range(N), clearline=False, file=file, freq=N // 10, adjust=False) file.seek(0) print(file.read()) prog = ProgIter(range(N), clearline=False) for n in prog: was_prime = is_prime(n) prog.set_extra('n=%r, was_prime=%r' % (n, was_prime,)) if (n + 1) % 128 == 0 and was_prime: prog.set_extra('n=%r, was_prime=%r EXTRA' % (n, was_prime,)) file.seek(0) print(file.read()) total = 200 N = 5000 N0 = N - total print('N = %r' % (N,)) print('N0 = %r' % (N0,)) print('\n-----') print('Demo #0: progress can be disabled and incur essentially 0 overhead') print('However, the overhead of enabled progress is minimal and typically ' 'insignificant') print('this is verbosity mode verbose=0') sequence = (is_prime(n) for n in range(N0, N)) if True: psequence = ProgIter(sequence, total=total, desc='demo0', enabled=False) list(psequence) print('\n-----') print('Demo #1: progress is shown by default in the same line') print('this is verbosity mode verbose=1') sequence = (is_prime(n) for n in range(N0, N)) if True: psequence = ProgIter(sequence, total=total, desc='demo1') list(psequence) # Default behavior adjusts frequency of progress reporting so # the performance of the loop is minimally impacted print('\n-----') print('Demo #2: clearline=False prints multiple lines.') print('Progress is only printed as needed') print('Notice the adjustment behavior of the print frequency') print('this is verbosity mode verbose=2') if True: sequence = (is_prime(n) for n in range(N0, N)) psequence = ProgIter(sequence, total=total, clearline=False, desc='demo2') list(psequence) # import utool as ut # print(ut.repr4(psequence.__dict__)) print('\n-----') print('Demo #3: Adjustments can be turned off to give constant feedback') print('this is verbosity mode verbose=3') sequence = (is_prime(n) for n in range(N0, N)) if True: psequence = ProgIter(sequence, total=total, adjust=False, clearline=False, freq=100, desc='demo3') list(psequence) def test_progiter_offset_10(): """ pytest -s tests/test_progiter.py::test_progiter_offset_10 """ # Define a function that takes some time file = StringIO() list(ProgIter(range(10), total=20, verbose=3, start=10, file=file, freq=5, show_rate=False, show_eta=False, show_total=False, time_thresh=0)) file.seek(0) want = ['50.00% 10/20...', '75.00% 15/20...', '100.00% 20/20...'] got = [line.strip() for line in file.readlines()] if sys.platform.startswith('win32'): # nocover # on windows \r seems to be mixed up with ansi sequences from xdoctest.utils import strip_ansi got = [strip_ansi(line).strip() for line in got] assert got == want def test_progiter_offset_0(): """ pytest -s tests/test_progiter.py::test_progiter_offset_0 """ # Define a function that takes some time file = StringIO() for _ in ProgIter(range(10), total=20, verbose=3, start=0, file=file, freq=5, show_rate=False, show_eta=False, show_total=False, time_thresh=0): pass file.seek(0) want = ['0.00% 0/20...', '25.00% 5/20...', '50.00% 10/20...'] got = [line.strip() for line in file.readlines()] if sys.platform.startswith('win32'): # nocover # on windows \r seems to be mixed up with ansi sequences from xdoctest.utils import strip_ansi got = [strip_ansi(line).strip() for line in got] assert got == want def test_unknown_total(): """ Make sure a question mark is printed if the total is unknown """ iterable = (_ for _ in range(0, 10)) file = StringIO() prog = ProgIter(iterable, desc='unknown seq', file=file, show_times=False, verbose=1) for n in prog: pass file.seek(0) got = [line.strip() for line in file.readlines()] # prints an eroteme if total is unknown assert len(got) > 0, 'should have gotten something' assert all('?' in line for line in got), 'all lines should have an eroteme' def test_initial(): """ Make sure a question mark is printed if the total is unknown """ file = StringIO() prog = ProgIter(initial=9001, file=file, show_times=False, clearline=False) message = prog.format_message_parts()[1] assert strip_ansi(message) == ' 9001/?... ' def test_clearline(): """ Make sure a question mark is printed if the total is unknown pytest tests/test_progiter.py::test_clearline """ file = StringIO() # Clearline=False version should simply have a newline at the end. prog = ProgIter(file=file, show_times=False, clearline=False) before, message, after = prog.format_message_parts() assert before == '' assert strip_ansi(message).strip(' ') == '0/?...' # Clearline=True version should carriage return at the beginning and have no # newline at the end. prog = ProgIter(file=file, show_times=False, clearline=True) before, message, after = prog.format_message_parts() assert before == '\r' assert strip_ansi(message).strip(' ') == '0/?...' def test_disabled(): prog = ProgIter(range(20), enabled=True) prog.begin() assert prog.started prog = ProgIter(range(20), enabled=False) prog.begin() prog.step() assert not prog.started def test_eta_window_None(): # nothing to check (that I can think of) run test for coverage prog = ProgIter(range(20), enabled=True, eta_window=None) for _ in prog: pass def test_adjust_freq(): # nothing to check (that I can think of) run test for coverage prog = ProgIter(range(20), enabled=True, eta_window=None, rel_adjust_limit=4.0) # Adjust frequency up to have each update happen every 1sec or so prog.freq = 1 prog.time_thresh = 1.0 prog._max_between_count = -1.0 prog._max_between_time = -1.0 prog._measure_timedelta = 1 prog._measure_countdelta = 1000 prog._adjust_frequency() assert prog.freq == 4 # Adjust frequency down to have each update happen every 1sec or so prog.freq = 1000 prog.time_thresh = 1.0 prog._max_between_count = -1.0 prog._max_between_time = -1.0 prog._measure_timedelta = 1 prog._measure_countdelta = 1 prog._adjust_frequency() assert prog.freq == 250 # No need to adjust frequency to have each update happen every 1sec or so prog.freq = 1 prog.time_thresh = 1.0 prog._max_between_count = -1.0 prog._max_between_time = -1.0 prog._measure_timedelta = 1 prog._measure_countdelta = 1 prog._adjust_frequency() assert prog.freq == 1 def test_tqdm_compatibility(): prog = ProgIter(range(20), total=20, miniters=17, show_times=False) assert prog.pos == 0 assert prog.freq == 17 for _ in prog: pass with CaptureStdout() as cap: ProgIter.write('foo') assert cap.text.strip() == 'foo' with CaptureStdout() as cap: prog = ProgIter(show_times=False) prog.set_description('new desc', refresh=False) prog.begin() prog.refresh() prog.close() assert prog.label == 'new desc' assert 'new desc' in cap.text.strip() with CaptureStdout() as cap: prog = ProgIter(show_times=False) prog.set_description('new desc', refresh=True) prog.close() assert prog.label == 'new desc' assert 'new desc' in cap.text.strip() with CaptureStdout() as cap: prog = ProgIter(show_times=False) prog.set_description_str('new desc') prog.begin() prog.refresh() prog.close() assert prog.label == 'new desc' assert 'new desc' in cap.text.strip() with CaptureStdout() as cap: prog = ProgIter(show_times=False) prog.set_postfix({'foo': 'bar'}, baz='biz', x=object(), y=2) prog.begin() assert prog.length is None assert 'foo=bar' in cap.text.strip() assert 'baz=biz' in cap.text.strip() assert 'y=2' in cap.text.strip() assert 'x= 8 def test_adjust_slow_early_fast_late_doesnt_spam(): cnt = IntObject() fake_stream = FakeStream(verbose=0, callback=cnt.inc) fake_timer = FakeTimer() prog = ProgIter(range(1000), enabled=True, adjust=True, time_thresh=1.0, rel_adjust_limit=1000000.0, homogeneous=False, timer=fake_timer, stream=fake_stream) it = iter(prog) # Few slow updates at the beginning for i in range(10): next(it) fake_timer.tic(100) # Followed by a ton of extremely fast updates for i in range(990): next(it) fake_timer.tic(0.00001) # Outputs should not spam the screen with messages assert cnt.n < 20 def test_homogeneous_heuristic_with_iter_lengths(): for size in range(0, 10): list(ProgIter(range(size), homogeneous='auto')) def test_mixed_iteration_and_step(): # Check to ensure nothing breaks for adjust in [0, 1]: for homogeneous in [0, 1] if adjust else [0]: for size in range(0, 10): for n_inner_steps in range(size): prog = ProgIter(range(size), adjust=adjust, homogeneous=homogeneous) iprog = iter(prog) try: while True: next(iprog) for k in range(n_inner_steps): prog.step() except StopIteration: ... def check_issue_32_non_homogeneous_time_threshold_prints(): """ xdoctest ~/code/progiter/tests/test_progiter.py check_issue_32_non_homogeneous_time_threshold_prints """ from ubelt import ProgIter fake_stream = FakeStream(verbose=1) fake_timer = FakeTimer([10, 1, 30, 40, 3, 4, 10, 10, 10, 10, 10, 10]) time_thresh = 50 # fake_timer = FakeTimer([.5 * factor]) # time_thresh = 2.9 * factor N = 20 prog = ProgIter(range(N), timer=fake_timer, time_thresh=time_thresh, homogeneous='auto', stream=fake_stream, clearline=False) static_state = { 'time_thresh': prog.time_thresh, 'adjust': prog.adjust, 'homogeneous': prog.homogeneous, } states = [] def record_state(): real_display_timedelta = fake_timer._time - prog._display_measurement.time state = { 'iter_idx': prog._iter_idx, 'next_idx': prog._next_measure_idx, 'time': fake_timer._time, 'freq': prog.freq, 'curr': prog._curr_measurement, 'disp': prog._display_measurement, 'meas_td': prog._measure_timedelta, 'disp_td': prog._display_timedelta, 'real_disp_td': real_display_timedelta, 'n_disp': fake_stream._callcount, 'n_times': fake_timer._callcount, } states.append(state) return state _iter = iter(prog) prog.begin() record_state() for _ in range(N): next(_iter) record_state() fake_timer.tic() assert fake_stream._callcount == len(fake_stream.messages) prog.end() record_state() try: import ubelt as ub import pandas as pd import rich print('fake_stream.messages = {}'.format(ub.urepr(fake_stream.messages, nl=1))) print(f'prog._likely_homogeneous={prog._likely_homogeneous}') rich.print(pd.Series(static_state)) df = pd.DataFrame(states) df['displayed'] = df['n_disp'].diff().astype(bool) df['timed'] = df['n_times'].diff().astype(bool) rich.print(df.to_string()) except ImportError: ... # TODO: write actual asserts that check that displays, measurements, and # adjustments happen at the write times def test_end_message_is_displayed(): """ Older versions of progiter had a bug where the end step would not trigger if calculations were updated without a display """ import io stream = io.StringIO() prog = ProgIter(range(1000), stream=stream) for i in prog: ... stream.seek(0) text = stream.read() assert '1000/1000' in text, 'end message should have printed' def test_standalone_display(): from ubelt import ProgIter fake_stream = FakeStream(verbose=1) fake_timer = FakeTimer() time_thresh = 50 N = 20 prog = ProgIter(range(N), timer=fake_timer, time_thresh=time_thresh, homogeneous=True, stream=fake_stream, clearline=True) prog.begin() _iter = iter(prog) prog.display_message() prog.display_message() prog.display_message() fake_timer.tic(1) prog.display_message() next(_iter) prog.display_message() prog.display_message() fake_timer.tic(1) next(_iter) fake_timer.tic(1) next(_iter) fake_timer.tic(1) next(_iter) prog.display_message() assert fake_stream.messages == [ '\r 0.00% 0/20... rate=0 Hz, eta=?, total=0:00:00', '\r 0.00% 0/20... rate=0 Hz, eta=?, total=0:00:00', '\r 0.00% 0/20... rate=0 Hz, eta=?, total=0:00:00', '\r 0.00% 0/20... rate=0 Hz, eta=?, total=0:00:00', '\r 0.00% 0/20... rate=0 Hz, eta=?, total=0:00:00', '\r 5.00% 1/20... rate=1.00 Hz, eta=0:00:19, total=0:00:01', '\r 5.00% 1/20... rate=1.00 Hz, eta=0:00:19, total=0:00:01', '\r 20.00% 4/20... rate=1.00 Hz, eta=0:00:16, total=0:00:04'] def test_no_percent(): from ubelt import ProgIter fake_stream = FakeStream(verbose=1) fake_timer = FakeTimer() time_thresh = 50 N = 20 prog = ProgIter(range(N), timer=fake_timer, time_thresh=time_thresh, show_percent=False, homogeneous=True, stream=fake_stream, clearline=True) prog.begin() _iter = iter(prog) prog.display_message() prog.display_message() prog.display_message() fake_timer.tic(1) prog.display_message() next(_iter) prog.display_message() prog.display_message() fake_timer.tic(1) next(_iter) fake_timer.tic(1) next(_iter) fake_timer.tic(1) next(_iter) prog.display_message() assert fake_stream.messages == [ '\r 0/20... rate=0 Hz, eta=?, total=0:00:00', '\r 0/20... rate=0 Hz, eta=?, total=0:00:00', '\r 0/20... rate=0 Hz, eta=?, total=0:00:00', '\r 0/20... rate=0 Hz, eta=?, total=0:00:00', '\r 0/20... rate=0 Hz, eta=?, total=0:00:00', '\r 1/20... rate=1.00 Hz, eta=0:00:19, total=0:00:01', '\r 1/20... rate=1.00 Hz, eta=0:00:19, total=0:00:01', '\r 4/20... rate=1.00 Hz, eta=0:00:16, total=0:00:04'] def test_clearline_padding(): """ Ensure we overwrite the entire previous message """ from ubelt import ProgIter fake_stream = FakeStream(verbose=1) prog = ProgIter(range(20), time_thresh=99999999, show_percent=False, homogeneous=True, stream=fake_stream, clearline=True) prog.start() prog.display_message() msg1_len = prog._prev_msg_len assert prog._prev_msg_len > 30 assert prog._prev_msg_len < 50 assert prog.clearline, 'test requirement' prog.set_extra('a very long message') prog.step() assert prog._prev_msg_len == msg1_len, ( 'We are under the time threshold. ' 'We should not have updated the display message yet') prog.display_message() msg2_len = prog._prev_msg_len assert msg2_len > msg1_len, 'should have a longer message' # Now make a shorter line length prog.set_extra('shorter') prog.step() prog.display_message() msg3_len = prog._prev_msg_len assert msg3_len < msg2_len, 'should have a shorter message' msg1 = fake_stream.messages[-3] msg2 = fake_stream.messages[-2] msg3 = fake_stream.messages[-1] assert len(msg1) == msg1_len + 1 assert len(msg2) == msg2_len + 1 assert len(msg3) >= msg3_len + 1, 'the real third message should include padding' assert len(msg3) == msg2_len + 1, 'the real third message should include padding to clear msg2' def test_extra_callback(): from ubelt import ProgIter fake_stream = FakeStream(verbose=1) fake_timer = FakeTimer() time_thresh = 50 def build_extra(): return chr(prog._iter_idx % 26 + 97) * 3 N = 20 prog = ProgIter(range(N), timer=fake_timer, time_thresh=time_thresh, homogeneous=True, stream=fake_stream, clearline=True) prog.set_extra(build_extra) prog.begin() _iter = iter(prog) prog.display_message() prog.display_message() prog.display_message() fake_timer.tic(1) prog.display_message() next(_iter) prog.display_message() prog.display_message() fake_timer.tic(1) next(_iter) fake_timer.tic(1) next(_iter) fake_timer.tic(1) next(_iter) prog.display_message() assert fake_stream.messages == [ '\r 0.00% 0/20...aaa rate=0 Hz, eta=?, total=0:00:00', '\r 0.00% 0/20...aaa rate=0 Hz, eta=?, total=0:00:00', '\r 0.00% 0/20...aaa rate=0 Hz, eta=?, total=0:00:00', '\r 0.00% 0/20...aaa rate=0 Hz, eta=?, total=0:00:00', '\r 0.00% 0/20...aaa rate=0 Hz, eta=?, total=0:00:00', '\r 5.00% 1/20...bbb rate=1.00 Hz, eta=0:00:19, total=0:00:01', '\r 5.00% 1/20...bbb rate=1.00 Hz, eta=0:00:19, total=0:00:01', '\r 20.00% 4/20...eee rate=1.00 Hz, eta=0:00:16, total=0:00:04', ] if __name__ == '__main__': import pytest pytest.main([__file__]) ubelt-1.3.7/tests/test_repr.py000066400000000000000000000333101472470106000163610ustar00rootroot00000000000000import ubelt as ub def test_newlines(): import ubelt as ub dict_ = { 'k1': [[1, 2, 3], [4, 5, 6]], 'k2': [[1, 2, 3], [4, 5, 6]], } assert ub.urepr(dict_, nl=1) != ub.urepr(dict_, nl=2) assert ub.urepr(dict_, nl=2) != ub.urepr(dict_, nl=3) assert ub.urepr(dict_, nl=3) == ub.urepr(dict_, nl=4) assert ub.urepr(dict_, nl=1) == ub.codeblock( ''' { 'k1': [[1, 2, 3], [4, 5, 6]], 'k2': [[1, 2, 3], [4, 5, 6]], } ''') assert ub.urepr(dict_, nl=2) == ub.codeblock( ''' { 'k1': [ [1, 2, 3], [4, 5, 6], ], 'k2': [ [1, 2, 3], [4, 5, 6], ], } ''') def test_negative_newlines(): import ubelt as ub dict_ = { 'k1': [[1, 2, 3], [4, 5, 6]], 'k2': [[[1, 2, [1, 2, 3]], [1, 2, 3], 3], [4, 5, 6]], 'k3': [1, 2, 3], 'k4': [[[1, 2, 3], 2, 3], [4, 5, 6]], } text = ub.urepr(dict_, nl=-1) print(text) assert text == ub.codeblock( ''' { 'k1': [ [1, 2, 3], [4, 5, 6] ], 'k2': [ [ [ 1, 2, [1, 2, 3] ], [1, 2, 3], 3 ], [4, 5, 6] ], 'k3': [1, 2, 3], 'k4': [ [ [1, 2, 3], 2, 3 ], [4, 5, 6] ] } ''') def test_compact_brace(): import ubelt as ub def _nest(d, w): if d == 0: return {} else: return {'n{}'.format(d): _nest(d - 1, w + 1), 'mm{}'.format(d): _nest(d - 1, w + 1)} dict_ = _nest(d=3, w=1) result = ub.urepr(dict_, nl=4, precision=2, compact_brace=0, sort=1) print(result) assert result == ub.codeblock( ''' { 'mm3': { 'mm2': { 'mm1': {}, 'n1': {}, }, 'n2': { 'mm1': {}, 'n1': {}, }, }, 'n3': { 'mm2': { 'mm1': {}, 'n1': {}, }, 'n2': { 'mm1': {}, 'n1': {}, }, }, } ''') result = ub.urepr(dict_, nl=4, precision=2, compact_brace=1, sort=1) print(result) assert result == ub.codeblock( ''' {'mm3': {'mm2': {'mm1': {}, 'n1': {},}, 'n2': {'mm1': {}, 'n1': {},},}, 'n3': {'mm2': {'mm1': {}, 'n1': {},}, 'n2': {'mm1': {}, 'n1': {},},},} ''') def test_empty(): import ubelt as ub assert ub.urepr(list()) == '[]' assert ub.urepr(dict()) == '{}' assert ub.urepr(set()) == '{}' assert ub.urepr(tuple()) == '()' assert ub.urepr(dict(), explicit=1) == 'dict()' # Even when no braces are no, still include them when input is empty assert ub.urepr(list(), nobr=1) == '[]' assert ub.urepr(dict(), nobr=1) == '{}' assert ub.urepr(set(), nobr=1) == '{}' assert ub.urepr(tuple(), nobr=1) == '()' assert ub.urepr(dict(), nobr=1, explicit=1) == 'dict()' def test_list_of_numpy(): try: import numpy as np except ImportError: import pytest pytest.skip('numpy is optional') import ubelt as ub data = [ np.zeros((3, 3), dtype=np.int32), np.zeros((3, 10), dtype=np.int32), np.zeros((3, 20), dtype=np.int32), np.zeros((3, 30), dtype=np.int32), ] text = ub.urepr(data, nl=2) print(text) assert repr(data) == repr(eval(text)), 'should produce eval-able code' assert text == ub.codeblock( ''' [ np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]], dtype=np.int32), np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int32), np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int32), np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int32), ] ''') text = ub.urepr(data, max_line_width=10000, nl=2) print(text) assert text == ub.codeblock( ''' [ np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]], dtype=np.int32), np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int32), np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int32), np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int32), ] ''') text = ub.urepr(data, nl=1) print(text) assert text == ub.codeblock( ''' [ np.array([[0, 0, 0],[0, 0, 0],[0, 0, 0]], dtype=np.int32), np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int32), np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int32), np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0],[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0],[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int32), ] ''' ) text = ub.urepr(data, nl=0) print(text) assert text == ub.codeblock( ''' [np.array([[0, 0, 0],[0, 0, 0],[0, 0, 0]], dtype=np.int32), np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int32), np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int32), np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0],[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0],[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int32)] ''' ) def test_dict_of_numpy(): try: import numpy as np except ImportError: import pytest pytest.skip('numpy is optional') data = ub.odict(zip( ['one', 'two', 'three', 'four'], [ np.zeros((3, 3), dtype=np.int32), np.zeros((3, 10), dtype=np.int32), np.zeros((3, 20), dtype=np.int32), np.zeros((3, 30), dtype=np.int32), ])) text = ub.urepr(data, nl=2) print(text) assert text == ub.codeblock( ''' { 'one': np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]], dtype=np.int32), 'two': np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int32), 'three': np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int32), 'four': np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int32), } ''') def test_numpy_scalar_precision(): try: import numpy as np except ImportError: import pytest pytest.skip('numpy is optional') text = ub.urepr(np.float32(3.333333), precision=2) assert text == '3.33' def test_urepr_tuple_keys(): data = { ('one', 'two'): 100, ('three', 'four'): 200, } text = ub.urepr(data) print(text) assert text == ub.codeblock( ''' { ('one', 'two'): 100, ('three', 'four'): 200, } ''') data = { ('one', 'two'): 100, ('three', 'four'): 200, } text = ub.urepr(data, sk=1) print(text) assert text == ub.codeblock( ''' { ('one', 'two'): 100, ('three', 'four'): 200, } ''') def test_newline_keys(): import ubelt as ub class NLRepr(object): def __repr__(self): return ub.codeblock( ''' ''') key = NLRepr() dict_ = {key: {key: [1, 2, 3, key]}} text = ub.urepr(dict_) print(text) want = ub.codeblock( ''' { : { : [ 1, 2, 3, , ], }, } ''') assert text == want text = ub.urepr(dict_, cbr=True) want = ub.codeblock( ''' {: {: [1, 2, 3, ,],},} ''') print(text) assert text == want def test_format_inf(): import ubelt as ub ub.urepr(float('inf')) ub.urepr({'a': float('inf')}) try: import numpy as np except ImportError: pass else: ub.urepr(float(np.inf), sv=1) text1 = ub.urepr(np.array([np.inf, 1, 2, np.nan, -np.inf]), sv=0) assert 'np.inf' in text1 and 'np.nan' in text1 text2 = ub.urepr(np.array([np.inf, 1, 2, np.nan, -np.inf]), sv=1) assert 'np.inf' not in text2 and 'inf' in text2 assert 'np.nan' not in text2 and 'nan' in text2 def test_autosort(): import ubelt as ub import sys dict_ = { 'k2': [[9, 2, 3], [4, 5, 2]], 'k1': [[1, 7, 3], [8, 5, 6]], } if sys.version_info[0:2] >= (3, 7): import pytest # with pytest.warns(DeprecationWarning): with pytest.deprecated_call(): assert ub.repr2(dict_, sort='auto', nl=1) == ub.codeblock( ''' { 'k1': [[1, 7, 3], [8, 5, 6]], 'k2': [[9, 2, 3], [4, 5, 2]], } ''') if sys.version_info[0:2] >= (3, 7): from collections import OrderedDict dict_ = OrderedDict(sorted(dict_.items())[::-1]) assert ub.urepr(dict_, sort='auto', nl=1) == ub.codeblock( ''' { 'k2': [[9, 2, 3], [4, 5, 2]], 'k1': [[1, 7, 3], [8, 5, 6]], } ''') def test_align_with_nobrace(): data = {'123': 123, '45': 45, '6': 6} text = ub.urepr(data, align=':') print(text) assert text == ub.codeblock( ''' { '123': 123, '45' : 45, '6' : 6, } ''') text = ub.urepr(data, align=':', nobr=1) print(text) assert text == ub.codeblock( ''' '123': 123, '45' : 45, '6' : 6, ''') if __name__ == '__main__': """ CommandLine: pytest ~/code/ubelt/ubelt/tests/test_format.py --verbose -s """ import xdoctest xdoctest.doctest_module(__file__) ubelt-1.3.7/tests/test_str.py000066400000000000000000000011071472470106000162200ustar00rootroot00000000000000import ubelt as ub def test_capture_stdout_enabled(): with ub.CaptureStdout(enabled=False) as cap: print('foobar') assert cap.text is None with ub.CaptureStdout(enabled=True) as cap: print('foobar') assert cap.text.strip() == 'foobar' def test_capture_stdout_exception(): """ CommandLine: pytest ubelt/tests/test_str.py::test_capture_stdout_exception -s """ try: with ub.CaptureStdout(enabled=True) as cap: raise Exception('foobar') except Exception: pass assert cap.text.strip() == '' ubelt-1.3.7/tests/test_stream.py000066400000000000000000000004631472470106000167070ustar00rootroot00000000000000 def test_capture_stream_error(): import ubelt as ub class DummyException(Exception): ... try: with ub.CaptureStdout() as cap: print('hello there') raise DummyException except DummyException: ... assert cap.text.startswith('hello there') ubelt-1.3.7/tests/test_time.py000066400000000000000000000113351472470106000163520ustar00rootroot00000000000000import pytest import ubelt as ub import re from xdoctest.utils import CaptureStdout def test_timer_nonewline(): with CaptureStdout() as cap: timer = ub.Timer(newline=False, verbose=1) timer.tic() timer.toc() assert cap.text.replace('u', '').startswith("\ntic('')...toc('')") def test_timestamp(): stamp = ub.timestamp() assert re.match(r'\d+-\d+-\d+T\d+[\+\-]\d+', stamp) def test_timer_default_verbosity(): with CaptureStdout() as cap: ub.Timer('').tic().toc() assert cap.text == '', 'should be quiet by default when label is not given' with CaptureStdout() as cap: ub.Timer('a label').tic().toc() assert cap.text != '', 'should be verbose by default when label is given' def test_timer_error(): try: with ub.Timer() as timer: raise Exception() except Exception: pass assert timer.elapsed > 0 def test_timestamp_corner_cases(): from datetime import datetime as datetime_cls import datetime as datetime_mod datetime = datetime_cls(1, 1, 1, 1, 1, 1, tzinfo=datetime_mod.timezone.utc) stamp = ub.timestamp(datetime) assert stamp == '0001-01-01T010101+0' def test_timeparse_minimal(): # We should always be able to parse these good_stamps = [ '2000-11-22T111111.44444Z', '2000-11-22T111111.44444+5', '2000-11-22T111111.44444-05', '2000-11-22T111111.44444-0500', '2000-11-22T111111.44444+0530', '2000-11-22T111111Z', '2000-11-22T111111+5', '2000-11-22T111111+0530', '2000-11-22T111111', ] for stamp in good_stamps: result = ub.timeparse(stamp, allow_dateutil=0) recon_stamp = ub.timestamp(result, precision=9) recon_result = ub.timeparse(recon_stamp, allow_dateutil=0) print('----') print(f'stamp = {stamp}') print(f'recon_stamp = {recon_stamp}') print(f'result = {result!r}') print(f'recon_result = {recon_result!r}') assert recon_result == result def test_timeparse_with_dateutil(): import ubelt as ub # See Also: https://github.com/dateutil/dateutil/blob/master/tests/test_isoparser.py conditional_stamps = [ 'Thu Sep 25 10:36:28 2003', 'Thu Sep 25 2003', '2003-09-25T10:49:41', '2003-09-25T10:49', '2003-09-25T10', # '2003-09-25', '20030925T104941', '20030925T1049', '20030925T10', '20030925', '2003-09-25 10:49:41,502', '199709020908', '19970902090807', '09-25-2003', '25-09-2003', '10-09-2003', '10-09-03', '2003.09.25', '09.25.2003', '25.09.2003', '10.09.2003', '10.09.03', '2003/09/25', '09/25/2003', '25/09/2003', '10/09/2003', '10/09/03', '2003 09 25', '09 25 2003', '25 09 2003', '10 09 2003', '10 09 03', '25 09 03', '03 25 Sep', '25 03 Sep', ' July 4 , 1976 12:01:02 am ', "Wed, July 10, '96", '1996.July.10 AD 12:08 PM', 'July 4, 1976', '7 4 1976', '4 jul 1976', '4 Jul 1976', '7-4-76', '19760704', '0:01:02 on July 4, 1976', 'July 4, 1976 12:01:02 am', 'Mon Jan 2 04:24:27 1995', '04.04.95 00:22', 'Jan 1 1999 11:23:34.578', '950404 122212', '3rd of May 2001', '5th of March 2001', '1st of May 2003', '0099-01-01T00:00:00', '0031-01-01T00:00:00', '20080227T21:26:01.123456789', ] for stamp in conditional_stamps: with pytest.raises(ValueError): result = ub.timeparse(stamp, allow_dateutil=False) have_dateutil = bool(ub.modname_to_modpath('dateutil')) if have_dateutil: for stamp in conditional_stamps: result = ub.timeparse(stamp) recon_stamp = ub.timestamp(result, precision=6) recon_result = ub.timeparse(recon_stamp) print('----') print(f'stamp = {stamp}') print(f'recon_stamp = {recon_stamp}') print(f'result = {result!r}') print(f'recon_result = {recon_result!r}') assert result == recon_result def test_timeparse_bad_stamps(): # We can never parse these types of stamps bad_stamps = [ '', 'foobar', '0000-00-00T00:00:00.0000+05' ] for stamp in bad_stamps: with pytest.raises(ValueError): ub.timeparse(stamp) if __name__ == '__main__': r""" CommandLine: python ubelt/tests/test_time.py test_timer_nonewline """ import xdoctest xdoctest.doctest_module(__file__) ubelt-1.3.7/ubelt/000077500000000000000000000000001472470106000137515ustar00rootroot00000000000000ubelt-1.3.7/ubelt/__init__.py000066400000000000000000000211021472470106000160560ustar00rootroot00000000000000""" UBelt is a "utility belt" of commonly needed utility and helper functions. It is a currated collection of top-level utilities with functionality that falls into a mixture of categories. The source code is available at `https://github.com/Erotemic/ubelt `_. We also have `Jupyter notebook demos `_. The ubelt API is organized by submodules containing related functionality. Each submodule contains top level overview documentation, and each function contains a docstring with at least one example. NOTE: The `README `_ on github contains information and examples complementary to these docs. """ __dev__ = """ AutogenInit: mkinit ubelt --diff mkinit ubelt -w # todo: get sphinx to ignore this # TODO: Lazy imports with mkinit (requires python 3.7) Testing: xdoctest ubelt """ __version__ = '1.3.7' # Deprecated functions from ubelt.util_platform import ( ensure_app_cache_dir, ensure_app_config_dir, ensure_app_data_dir, get_app_cache_dir, get_app_config_dir, get_app_data_dir, ) from ubelt.util_io import (readfrom, writeto,) from ubelt.util_str import (ensure_unicode,) from ubelt import util_format from ubelt.util_format import FormatterExtensions, repr2 __ignore__ = [ 'ensure_app_cache_dir', 'ensure_app_config_dir', 'ensure_app_data_dir', 'get_app_cache_dir', 'get_app_config_dir', 'get_app_data_dir', 'readfrom', 'writeto', 'ensure_unicode', ] __explicit__ = [ 'ensure_app_cache_dir', 'ensure_app_config_dir', 'ensure_app_data_dir', 'get_app_cache_dir', 'get_app_config_dir', 'get_app_data_dir', 'readfrom', 'writeto', 'ensure_unicode', 'util_format', 'repr2', 'FormatterExtensions', ] __submodules__ = { 'util_arg': None, 'util_cache': None, 'util_colors': None, 'util_const': None, 'util_cmd': None, 'util_dict': None, 'util_deprecate': None, 'util_download': None, 'util_download_manager': None, 'util_func': None, 'util_repr': None, 'util_futures': None, 'util_io': None, 'util_links': None, 'util_list': None, 'util_hash': None, 'util_import': None, 'util_indexable': None, 'util_memoize': None, 'util_mixins': None, 'util_path': None, 'util_platform': None, 'util_str': None, 'util_stream': None, 'util_time': None, 'util_zip': None, 'orderedset': None, 'progiter': None, } from ubelt import orderedset from ubelt import progiter from ubelt import util_arg from ubelt import util_cache from ubelt import util_cmd from ubelt import util_colors from ubelt import util_const from ubelt import util_deprecate from ubelt import util_dict from ubelt import util_download from ubelt import util_download_manager from ubelt import util_func from ubelt import util_futures from ubelt import util_hash from ubelt import util_import from ubelt import util_indexable from ubelt import util_io from ubelt import util_links from ubelt import util_list from ubelt import util_memoize from ubelt import util_mixins from ubelt import util_path from ubelt import util_platform from ubelt import util_repr from ubelt import util_str from ubelt import util_stream from ubelt import util_time from ubelt import util_zip # Deprecated parts of the top-level API # These functions are mostly moved into internal classes __deprecated__ = [ 'AutoOrderedDict', 'dict_diff', 'dict_isect', 'dict_subset', 'invert_dict', 'map_keys', 'map_vals', 'map_values', 'sorted_keys', 'sorted_vals', 'delete', 'touch', 'augpath', 'ensuredir', 'expandpath', 'shrinkuser', 'userhome', ] from ubelt.util_arg import (argflag, argval,) from ubelt.util_cache import (CacheStamp, Cacher,) from ubelt.util_colors import (NO_COLOR, color_text, highlight_code,) from ubelt.util_const import (NoParam,) from ubelt.util_cmd import (cmd,) from ubelt.util_dict import (AutoDict, AutoOrderedDict, SetDict, UDict, ddict, dict_diff, dict_hist, dict_isect, dict_subset, dict_union, dzip, find_duplicates, group_items, invert_dict, map_keys, map_vals, map_values, named_product, odict, sdict, sorted_keys, sorted_vals, sorted_values, udict, varied_values,) from ubelt.util_deprecate import (schedule_deprecation,) from ubelt.util_download import (download, grabdata,) from ubelt.util_download_manager import (DownloadManager,) from ubelt.util_func import (compatible, identity, inject_method,) from ubelt.util_repr import (ReprExtensions, urepr,) from ubelt.util_futures import (Executor, JobPool,) from ubelt.util_io import (delete, touch,) from ubelt.util_links import (symlink,) from ubelt.util_list import (allsame, argmax, argmin, argsort, argunique, boolmask, chunks, compress, flatten, iter_window, iterable, peek, take, unique, unique_flags,) from ubelt.util_hash import (hash_data, hash_file,) from ubelt.util_import import (import_module_from_name, import_module_from_path, modname_to_modpath, modpath_to_modname, split_modpath,) from ubelt.util_indexable import (IndexableWalker, indexable_allclose,) from ubelt.util_memoize import (memoize, memoize_method, memoize_property,) from ubelt.util_mixins import (NiceRepr,) from ubelt.util_path import (ChDir, Path, TempDir, augpath, ensuredir, expandpath, shrinkuser, userhome,) from ubelt.util_platform import (DARWIN, LINUX, POSIX, WIN32, find_exe, find_path, platform_cache_dir, platform_config_dir, platform_data_dir,) from ubelt.util_str import (codeblock, hzcat, indent, paragraph,) from ubelt.util_stream import (CaptureStdout, CaptureStream, TeeStringIO,) from ubelt.util_time import (Timer, timeparse, timestamp,) from ubelt.util_zip import (split_archive, zopen,) from ubelt.orderedset import (OrderedSet, oset,) from ubelt.progiter import (ProgIter,) __all__ = ['AutoDict', 'AutoOrderedDict', 'CacheStamp', 'Cacher', 'CaptureStdout', 'CaptureStream', 'ChDir', 'DARWIN', 'DownloadManager', 'Executor', 'FormatterExtensions', 'IndexableWalker', 'JobPool', 'LINUX', 'NO_COLOR', 'NiceRepr', 'NoParam', 'OrderedSet', 'POSIX', 'Path', 'ProgIter', 'ReprExtensions', 'SetDict', 'TeeStringIO', 'TempDir', 'Timer', 'UDict', 'WIN32', 'allsame', 'argflag', 'argmax', 'argmin', 'argsort', 'argunique', 'argval', 'augpath', 'boolmask', 'chunks', 'cmd', 'codeblock', 'color_text', 'compatible', 'compress', 'ddict', 'delete', 'dict_diff', 'dict_hist', 'dict_isect', 'dict_subset', 'dict_union', 'download', 'dzip', 'ensure_app_cache_dir', 'ensure_app_config_dir', 'ensure_app_data_dir', 'ensure_unicode', 'ensuredir', 'expandpath', 'find_duplicates', 'find_exe', 'find_path', 'flatten', 'get_app_cache_dir', 'get_app_config_dir', 'get_app_data_dir', 'grabdata', 'group_items', 'hash_data', 'hash_file', 'highlight_code', 'hzcat', 'identity', 'import_module_from_name', 'import_module_from_path', 'indent', 'indexable_allclose', 'inject_method', 'invert_dict', 'iter_window', 'iterable', 'map_keys', 'map_vals', 'map_values', 'memoize', 'memoize_method', 'memoize_property', 'modname_to_modpath', 'modpath_to_modname', 'named_product', 'odict', 'orderedset', 'oset', 'paragraph', 'peek', 'platform_cache_dir', 'platform_config_dir', 'platform_data_dir', 'progiter', 'readfrom', 'repr2', 'schedule_deprecation', 'sdict', 'shrinkuser', 'sorted_keys', 'sorted_vals', 'sorted_values', 'split_archive', 'split_modpath', 'symlink', 'take', 'timeparse', 'timestamp', 'touch', 'udict', 'unique', 'unique_flags', 'urepr', 'userhome', 'util_arg', 'util_cache', 'util_cmd', 'util_colors', 'util_const', 'util_deprecate', 'util_dict', 'util_download', 'util_download_manager', 'util_format', 'util_func', 'util_futures', 'util_hash', 'util_import', 'util_indexable', 'util_io', 'util_links', 'util_list', 'util_memoize', 'util_mixins', 'util_path', 'util_platform', 'util_repr', 'util_str', 'util_stream', 'util_time', 'util_zip', 'varied_values', 'writeto', 'zopen'] ubelt-1.3.7/ubelt/__main__.py000066400000000000000000000004051472470106000160420ustar00rootroot00000000000000#!/usr/bin/env python """ Runs the xdoctest CLI interface for ubelt CommandLine: python -m ubelt list python -m ubelt all python -m ubelt zero """ if __name__ == '__main__': import xdoctest # type: ignore xdoctest.doctest_module('ubelt') ubelt-1.3.7/ubelt/__main__.pyi000066400000000000000000000000011472470106000162030ustar00rootroot00000000000000 ubelt-1.3.7/ubelt/_win32_jaraco.py000066400000000000000000000207001472470106000167420ustar00rootroot00000000000000""" Liberated portions of :mod:`jaraco.windows.filesystem`. Ignore: cat ~/code/ubelt/ubelt/_win32_links.py | grep -o jwfs.api cat ~/code/ubelt/ubelt/_win32_links.py | grep -o "jwfs\\.[^ ]*" | sort git clone git@github.com:jaraco/jaraco.windows.git $HOME/code cd ~/code/jaraco.windows touch jaraco/__init__.py --- Notes: liberator does not handle the ctypes attributes nicely where the definition is then modified with argtypes and restypes But it does help get a good start on the file. --- import liberator import ubelt as ub repo_dpath = ub.Path('~/code/jaraco.windows').expand() jwfs_modpath = repo_dpath / 'jaraco/windows/filesystem/__init__.py' jw_api_filesystem_modpath = repo_dpath / 'jaraco/windows/api/filesystem.py' jw_reparse_modpath = repo_dpath / 'jaraco/windows/reparse.py' lib = liberator.Liberator() lib.add_static('link', modpath=jwfs_modpath) lib.add_static('handle_nonzero_success', modpath=jwfs_modpath) lib.add_static('is_reparse_point', modpath=jwfs_modpath) # FIXME: argtypes / restypes lib.add_static('CreateFile', modpath=jw_api_filesystem_modpath) lib.add_static('CloseHandle', modpath=jw_api_filesystem_modpath) lib.add_static('REPARSE_DATA_BUFFER', modpath=jw_api_filesystem_modpath) lib.add_static('OPEN_EXISTING', modpath=jw_api_filesystem_modpath) lib.add_static('FILE_FLAG_OPEN_REPARSE_POINT', modpath=jw_api_filesystem_modpath) lib.add_static('FILE_FLAG_BACKUP_SEMANTICS', modpath=jw_api_filesystem_modpath) lib.add_static('FSCTL_GET_REPARSE_POINT', modpath=jw_api_filesystem_modpath) lib.add_static('INVALID_HANDLE_VALUE', modpath=jw_api_filesystem_modpath) lib.add_static('IO_REPARSE_TAG_SYMLINK', modpath=jw_api_filesystem_modpath) lib.add_static('BY_HANDLE_FILE_INFORMATION', modpath=jw_api_filesystem_modpath) lib.add_static('GetFileInformationByHandle', modpath=jw_api_filesystem_modpath) #lib.add_static('DeviceIoControl', modpath=jw_reparse_modpath) lib.expand(['jaraco']) print(lib.current_sourcecode()) """ import ctypes.wintypes import ctypes # Makes mypy happy import sys assert sys.platform == "win32" def handle_nonzero_success(result): if (result == 0): raise ctypes.WinError() class BY_HANDLE_FILE_INFORMATION(ctypes.Structure): _fields_ = [ ('file_attributes', ctypes.wintypes.DWORD), ('creation_time', ctypes.wintypes.FILETIME), ('last_access_time', ctypes.wintypes.FILETIME), ('last_write_time', ctypes.wintypes.FILETIME), ('volume_serial_number', ctypes.wintypes.DWORD), ('file_size_high', ctypes.wintypes.DWORD), ('file_size_low', ctypes.wintypes.DWORD), ('number_of_links', ctypes.wintypes.DWORD), ('file_index_high', ctypes.wintypes.DWORD), ('file_index_low', ctypes.wintypes.DWORD) ] @property def file_size(self): return ((self.file_size_high << 32) + self.file_size_low) @property def file_index(self): return ((self.file_index_high << 32) + self.file_index_low) class REPARSE_DATA_BUFFER(ctypes.Structure): _fields_ = [ ('tag', ctypes.c_ulong), ('data_length', ctypes.c_ushort), ('reserved', ctypes.c_ushort), ('substitute_name_offset', ctypes.c_ushort), ('substitute_name_length', ctypes.c_ushort), ('print_name_offset', ctypes.c_ushort), ('print_name_length', ctypes.c_ushort), ('flags', ctypes.c_ulong), ('path_buffer', (ctypes.c_byte * 1)) ] def get_print_name(self): wchar_size = ctypes.sizeof(ctypes.wintypes.WCHAR) arr_typ = (ctypes.wintypes.WCHAR * (self.print_name_length // wchar_size)) data = ctypes.byref(self.path_buffer, self.print_name_offset) return ctypes.cast(data, ctypes.POINTER(arr_typ)).contents.value def get_substitute_name(self): wchar_size = ctypes.sizeof(ctypes.wintypes.WCHAR) arr_typ = (ctypes.wintypes.WCHAR * (self.substitute_name_length // wchar_size)) data = ctypes.byref(self.path_buffer, self.substitute_name_offset) return ctypes.cast(data, ctypes.POINTER(arr_typ)).contents.value class SECURITY_ATTRIBUTES(ctypes.Structure): _fields_ = ( ('length', ctypes.wintypes.DWORD), ('p_security_descriptor', ctypes.wintypes.LPVOID), ('inherit_handle', ctypes.wintypes.BOOLEAN), ) LPSECURITY_ATTRIBUTES = ctypes.POINTER(SECURITY_ATTRIBUTES) IO_REPARSE_TAG_SYMLINK = 0xA000000C INVALID_HANDLE_VALUE = ctypes.wintypes.HANDLE((- 1)).value FSCTL_GET_REPARSE_POINT = 0x900A8 FILE_FLAG_BACKUP_SEMANTICS = 0x2000000 FILE_FLAG_OPEN_REPARSE_POINT = 0x00200000 FILE_SHARE_READ = 1 OPEN_EXISTING = 3 FILE_ATTRIBUTE_REPARSE_POINT = 0x400 GENERIC_READ = 0x80000000 INVALID_FILE_ATTRIBUTES = 0xFFFFFFFF GetFileAttributes = ctypes.windll.kernel32.GetFileAttributesW GetFileAttributes.argtypes = (ctypes.wintypes.LPWSTR,) GetFileAttributes.restype = ctypes.wintypes.DWORD CreateHardLink = ctypes.windll.kernel32.CreateHardLinkW CreateHardLink.argtypes = ( ctypes.wintypes.LPWSTR, ctypes.wintypes.LPWSTR, ctypes.wintypes.LPVOID, # reserved for LPSECURITY_ATTRIBUTES ) CreateHardLink.restype = ctypes.wintypes.BOOLEAN GetFileInformationByHandle = ctypes.windll.kernel32.GetFileInformationByHandle GetFileInformationByHandle.restype = ctypes.wintypes.BOOL GetFileInformationByHandle.argtypes = ( ctypes.wintypes.HANDLE, ctypes.POINTER(BY_HANDLE_FILE_INFORMATION), ) CloseHandle = ctypes.windll.kernel32.CloseHandle CloseHandle.argtypes = (ctypes.wintypes.HANDLE,) CloseHandle.restype = ctypes.wintypes.BOOLEAN CreateFile = ctypes.windll.kernel32.CreateFileW CreateFile.argtypes = ( ctypes.wintypes.LPWSTR, ctypes.wintypes.DWORD, ctypes.wintypes.DWORD, LPSECURITY_ATTRIBUTES, ctypes.wintypes.DWORD, ctypes.wintypes.DWORD, ctypes.wintypes.HANDLE, ) CreateFile.restype = ctypes.wintypes.HANDLE LPDWORD = ctypes.POINTER(ctypes.wintypes.DWORD) LPOVERLAPPED = ctypes.wintypes.LPVOID DeviceIoControl = ctypes.windll.kernel32.DeviceIoControl DeviceIoControl.argtypes = [ ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD, ctypes.wintypes.LPVOID, ctypes.wintypes.DWORD, ctypes.wintypes.LPVOID, ctypes.wintypes.DWORD, LPDWORD, LPOVERLAPPED, ] DeviceIoControl.restype = ctypes.wintypes.BOOL def is_reparse_point(path): """ Determine if the given path is a reparse point. Return False if the file does not exist or the file attributes cannot be determined. """ res = GetFileAttributes(path) return ((res != INVALID_FILE_ATTRIBUTES) and bool((res & FILE_ATTRIBUTE_REPARSE_POINT))) def link(target, link): """ Establishes a hard link between an existing file and a new file. """ handle_nonzero_success(CreateHardLink(link, target, None)) def _reparse_DeviceIoControl(device, io_control_code, in_buffer, out_buffer, overlapped=None): # ubelt note: name is overloaded, so we mangle it here. if overlapped is not None: raise NotImplementedError("overlapped handles not yet supported") if isinstance(out_buffer, int): out_buffer = ctypes.create_string_buffer(out_buffer) in_buffer_size = len(in_buffer) if in_buffer is not None else 0 out_buffer_size = len(out_buffer) assert isinstance(out_buffer, ctypes.Array) returned_bytes = ctypes.wintypes.DWORD() res = DeviceIoControl( device, io_control_code, in_buffer, in_buffer_size, out_buffer, out_buffer_size, returned_bytes, overlapped, ) handle_nonzero_success(res) handle_nonzero_success(returned_bytes) return out_buffer[: returned_bytes.value] # Fake the jaraco api class api: CreateFile = CreateFile CloseHandle = CloseHandle GetFileInformationByHandle = GetFileInformationByHandle BY_HANDLE_FILE_INFORMATION = BY_HANDLE_FILE_INFORMATION FILE_FLAG_BACKUP_SEMANTICS = FILE_FLAG_BACKUP_SEMANTICS FILE_FLAG_OPEN_REPARSE_POINT = FILE_FLAG_OPEN_REPARSE_POINT FILE_SHARE_READ = FILE_SHARE_READ FSCTL_GET_REPARSE_POINT = FSCTL_GET_REPARSE_POINT GENERIC_READ = GENERIC_READ INVALID_HANDLE_VALUE = INVALID_HANDLE_VALUE IO_REPARSE_TAG_SYMLINK = IO_REPARSE_TAG_SYMLINK OPEN_EXISTING = OPEN_EXISTING REPARSE_DATA_BUFFER = REPARSE_DATA_BUFFER class reparse: DeviceIoControl = _reparse_DeviceIoControl ubelt-1.3.7/ubelt/_win32_links.py000066400000000000000000000550201472470106000166260ustar00rootroot00000000000000""" For dealing with symlinks, junctions, and hard-links on windows. Note: The terminology used here was written before I really understood the difference between symlinks, hardlinks, and junctions. As such it may be inconsistent or incorrect in some places. This might be fixed in the future. References: .. [SO18883892] https://stackoverflow.com/questions/18883892/batch-file-windows-cmd-exe-test-if-a-directory-is-a-link-symlink .. [SO21561850] https://stackoverflow.com/questions/21561850/python-test-for-junction-point-target .. [WinTwoFilesSame] http://timgolden.me.uk/python/win32_how_do_i/see_if_two_files_are_the_same_file.html .. [SO6260149] https://stackoverflow.com/questions/6260149/os-symlink-support-in-windows .. [WinDesktopAA365006] https://msdn.microsoft.com/en-us/library/windows/desktop/aa365006(v=vs.85).aspx .. [SU902082] https://superuser.com/a/902082/215232 Weird Behavior: - [ ] In many cases using the win32 API seems to result in privilege errors but using shell commands does not have this problem. """ import os import warnings import platform from os.path import exists from os.path import join from ubelt import util_io from ubelt import util_path import sys if sys.platform.startswith('win32'): try: import jaraco.windows.filesystem as jwfs except ImportError: # Use vendored subset of jaraco.windows from ubelt import _win32_jaraco as jwfs __win32_can_symlink__ = None # type: bool | None def _win32_can_symlink(verbose=0, force=False, testing=False): """ Args: verbose (int): verbosity level force (bool): flag testing (bool): flag Example: >>> # xdoctest: +REQUIRES(WIN32) >>> import ubelt as ub >>> _win32_can_symlink(verbose=3, force=True, testing=True) """ global __win32_can_symlink__ if verbose: print('__win32_can_symlink__ = {!r}'.format(__win32_can_symlink__)) if __win32_can_symlink__ is not None and not force: return __win32_can_symlink__ # We have to use a unique directory otherwise we encounter multiprocess # race conditions. import tempfile tempdir = tempfile.mkdtemp(suffix='_win32_can_symlink') # from ubelt import util_platform # tempdir = util_platform.Path.appdir('ubelt', '_win32_can_symlink').ensuredir() try: util_io.delete(tempdir) except Exception: print('ERROR IN DELETE: sys.platform={}'.format(sys.platform)) from ubelt import util_links util_links._dirstats(tempdir) raise util_path.ensuredir(tempdir) dpath = join(tempdir, 'dpath') fpath = join(tempdir, 'fpath.txt') dlink = join(tempdir, 'dlink') flink = join(tempdir, 'flink.txt') util_path.ensuredir(dpath) util_io.touch(fpath) # Add broken variants of the links for testing purposes # Its ugly, but so is all this windows code. if testing: broken_dpath = join(tempdir, 'broken_dpath') broken_fpath = join(tempdir, 'broken_fpath.txt') # Create files that we will delete after we link to them util_path.ensuredir(broken_dpath) util_io.touch(broken_fpath) try: _win32_symlink(dpath, dlink, verbose=verbose) if testing: _win32_symlink(broken_dpath, join(tempdir, 'broken_dlink'), verbose=verbose) can_symlink_directories = os.path.islink(dlink) except OSError: can_symlink_directories = False if verbose: print('can_symlink_directories = {!r}'.format(can_symlink_directories)) try: _win32_symlink(fpath, flink, verbose=verbose) if testing: _win32_symlink(broken_fpath, join(tempdir, 'broken_flink'), verbose=verbose) can_symlink_files = os.path.islink(flink) # os.path.islink(flink) except OSError: can_symlink_files = False if verbose: print('can_symlink_files = {!r}'.format(can_symlink_files)) if int(can_symlink_directories) + int(can_symlink_files) == 1: raise AssertionError( 'can do one but not both. Unexpected {} {}'.format( can_symlink_directories, can_symlink_files)) try: # test that we can create junctions, even if symlinks are disabled if verbose: print('Testing that we can create junctions, ' 'even if symlinks are disabled') # from ubelt import util_links # util_links._dirstats(tempdir) # print('^^ before ^^') djunc = _win32_junction(dpath, join(tempdir, 'djunc'), verbose=verbose) fjunc = _win32_junction(fpath, join(tempdir, 'fjunc.txt'), verbose=verbose) if testing: _win32_junction(broken_dpath, join(tempdir, 'broken_djunc'), verbose=verbose) _win32_junction(broken_fpath, join(tempdir, 'broken_fjunc.txt'), verbose=verbose) if not _win32_is_junction(djunc): print(f'Error: djunc={djunc} claims to not be a junction') from ubelt import util_links util_links._dirstats(tempdir) raise AssertionError(f'expected djunc={djunc} to be a junction') if not _win32_is_hardlinked(fpath, fjunc): print(f'Error: fjunc={fjunc} claims to not be a hardlink') from ubelt import util_links util_links._dirstats(tempdir) raise AssertionError(f'expected fjunc={fjunc} to be a hardlink') except Exception: warnings.warn('We cannot create junctions either!') raise if testing: # break the links util_io.delete(broken_dpath) util_io.delete(broken_fpath) if verbose: from ubelt import util_links util_links._dirstats(tempdir) try: # Cleanup the test directory util_io.delete(tempdir) except Exception: print('ERROR IN DELETE') from ubelt import util_links util_links._dirstats(tempdir) raise can_symlink = can_symlink_directories and can_symlink_files __win32_can_symlink__ = can_symlink if not can_symlink: warnings.warn('Cannot make real symlink. Falling back to junction') if verbose: print('can_symlink = {!r}'.format(can_symlink)) print('__win32_can_symlink__ = {!r}'.format(__win32_can_symlink__)) return can_symlink def _symlink(path, link, overwrite=0, verbose=0): """ Windows helper for ub.symlink """ if exists(link) and not os.path.islink(link): # On windows a broken link might still exist as a hard link or a # junction. Overwrite it if it is a file and we cannot symlink. # However, if it is a non-junction directory then do not overwrite if verbose: print('link location already exists') is_junc = _win32_is_junction(link) # NOTE: # in python2 broken junctions are directories and exist # in python3 broken junctions are directories and do not exist if os.path.isdir(link): if is_junc: pointed = _win32_read_junction(link) if path == pointed: if verbose: print('...and is a junction that points to the same place') return link else: if verbose: if not exists(pointed): print('...and is a broken junction that points somewhere else') else: print('...and is a junction that points somewhere else') else: if verbose: print('...and is an existing real directory!') raise IOError('Cannot overwrite a real directory') elif os.path.isfile(link): if _win32_is_hardlinked(link, path): if verbose: print('...and is a hard link that points to the same place') return link else: if verbose: print('...and is a hard link that points somewhere else') if _win32_can_symlink(): raise IOError('Cannot overwrite potentially real file if we can symlink') if overwrite: if verbose: print('...overwriting') util_io.delete(link, verbose > 1) else: if exists(link): raise IOError('Link already exists') _win32_symlink2(path, link, verbose=verbose) def _win32_symlink2(path, link, allow_fallback=True, verbose=0): """ Perform a real symbolic link if possible. However, on most versions of windows you need special privileges to create a real symlink. Therefore, we try to create a symlink, but if that fails we fallback to using a junction. AFAIK, the main difference between symlinks and junctions are that symlinks can reference relative or absolute paths, where as junctions always reference absolute paths. Not 100% on this though. Windows is weird. Note that junctions will not register as links via `islink`, but I believe real symlinks will. """ if _win32_can_symlink(): return _win32_symlink(path, link, verbose) else: return _win32_junction(path, link, verbose) def _win32_symlink(path, link, verbose=0): """ Creates real symlink. This will only work in versions greater than Windows Vista. Creating real symlinks requires admin permissions or at least specially enabled symlink permissions. On Windows 10 enabling developer mode should give you these permissions. """ if verbose >= 3: print(f'_win32_symlink {link} -> {path}') from ubelt import util_cmd if os.path.isdir(path): # directory symbolic link if verbose: print('... as directory symlink') command = 'mklink /D "{}" "{}"'.format(link, path) # Using the win32 API seems to result in privilege errors # but using shell commands does not have this problem. Weird. # jwfs.symlink(path, link, target_is_directory=True) # TODO: what do we need to do to use the windows api instead of shell? else: # file symbolic link if verbose: print('... as file symlink') command = 'mklink "{}" "{}"'.format(link, path) if command is not None: cmd_verbose = 3 * verbose >= 3 info = util_cmd.cmd(command, shell=True, verbose=cmd_verbose) if info['ret'] != 0: from ubelt import util_repr permission_msg = 'You do not have sufficient privledge' if permission_msg not in info['err']: print('Failed command:') print(info['command']) print(util_repr.urepr(info, nl=1)) raise OSError(str(info)) return link def _win32_junction(path, link, verbose=0): """ On older (pre 10) versions of windows we need admin privileges to make symlinks, however junctions seem to work. For paths we do a junction (softlink) and for files we use a hard link Example: >>> # xdoc: +REQUIRES(WIN32) >>> import ubelt as ub >>> root = ub.Path.appdir('ubelt', 'win32_junction').ensuredir() >>> ub.delete(root) >>> ub.ensuredir(root) >>> fpath = join(root, 'fpath.txt') >>> dpath = join(root, 'dpath') >>> fjunc = join(root, 'fjunc.txt') >>> djunc = join(root, 'djunc') >>> ub.touch(fpath) >>> ub.ensuredir(dpath) >>> ub.ensuredir(join(root, 'djunc_fake')) >>> ub.ensuredir(join(root, 'djunc_fake with space')) >>> ub.touch(join(root, 'djunc_fake with space file')) >>> _win32_junction(fpath, fjunc) >>> _win32_junction(dpath, djunc) >>> # thank god colons are not allowed >>> djunc2 = join(root, 'djunc2 [with pathological attrs]') >>> _win32_junction(dpath, djunc2) >>> _win32_is_junction(djunc) >>> ub.writeto(join(djunc, 'afile.txt'), 'foo') >>> assert ub.readfrom(join(dpath, 'afile.txt')) == 'foo' >>> ub.writeto(fjunc, 'foo') """ # junctions store absolute paths path = os.path.abspath(path) link = os.path.abspath(link) if verbose >= 3: print(f'_win32_junction {link} -> {path}') from ubelt import util_cmd if os.path.isdir(path): # try using a junction (soft link) if verbose: print('... as soft link (junction)') # TODO: what is the windows api for this? command = 'mklink /J "{}" "{}"'.format(link, path) else: # try using a hard link if verbose: print('... as hard link') # command = 'mklink /H "{}" "{}"'.format(link, path) try: jwfs.link(path, link) # this seems to be allowed except Exception: print('Failed to hardlink link={} to path={}'.format(link, path)) raise command = None if command is not None: cmd_verbose = 3 * verbose >= 3 info = util_cmd.cmd(command, shell=True, verbose=cmd_verbose) if info['ret'] != 0: from ubelt import util_repr print('Failed command:') print(info['command']) print(util_repr.urepr(info, nl=1)) raise OSError(str(info)) return link def _win32_is_junction(path): """ Determines if a path is a win32 junction Note: on PyPy this is bugged and will currently return True for a symlinked directory. Returns: bool: Example: >>> # xdoctest: +REQUIRES(WIN32) >>> from ubelt._win32_links import _win32_junction, _win32_is_junction >>> import ubelt as ub >>> root = ub.Path.appdir('ubelt', 'win32_junction').ensuredir() >>> ub.delete(root) >>> ub.ensuredir(root) >>> dpath = root / 'dpath' >>> djunc = root / 'djunc' >>> dpath.ensuredir() >>> _win32_junction(dpath, djunc) >>> assert _win32_is_junction(djunc) is True >>> assert _win32_is_junction(dpath) is False >>> assert _win32_is_junction('notafile') is False """ path = os.fspath(path) if not exists(path): if os.path.isdir(path): if not os.path.islink(path): return True return False if platform.python_implementation() == 'PyPy': # Workaround for pypy where os.path.islink will return True # for a junction. Can we just rely on it being a reparse point? # https://github.com/pypy/pypy/issues/4976 return _is_reparse_point(path) else: return _is_reparse_point(path) and not os.path.islink(path) def _is_reparse_point(path): """ Check if a directory is a reparse point in windows. Note: a reparse point seems like it could be a junction or symlink. .. [SO54678399] https://stackoverflow.com/a/54678399/887074 """ if jwfs is None: raise ImportError('jaraco.windows.filesystem is required to run _is_reparse_point') # if jwfs is not None: return jwfs.is_reparse_point(os.fspath(path)) # else: # # Fallback without jaraco: TODO: test this is 1-to-1 # # Seems to break on pypy? # import subprocess # child = subprocess.Popen(f'fsutil reparsepoint query "{path}"', # stdout=subprocess.PIPE) # child.communicate()[0] # return child.returncode == 0 def _win32_read_junction(path): """ Returns the location that the junction points, raises ValueError if path is not a junction. Example: >>> # xdoc: +REQUIRES(WIN32) >>> import ubelt as ub >>> root = ub.Path.appdir('ubelt', 'win32_junction').ensuredir() >>> ub.delete(root) >>> ub.ensuredir(root) >>> dpath = join(root, 'dpath') >>> djunc = join(root, 'djunc') >>> ub.ensuredir(dpath) >>> _win32_junction(dpath, djunc) >>> path = djunc >>> pointed = _win32_read_junction(path) >>> print('pointed = {!r}'.format(pointed)) """ import ctypes path = os.fspath(path) if jwfs is None: raise ImportError('jaraco.windows.filesystem is required to run _win32_read_junction') if not jwfs.is_reparse_point(path): raise ValueError('not a junction') # new version using the windows api handle = jwfs.api.CreateFile( path, 0, 0, None, jwfs.api.OPEN_EXISTING, jwfs.api.FILE_FLAG_OPEN_REPARSE_POINT | jwfs.api.FILE_FLAG_BACKUP_SEMANTICS, None) if handle == jwfs.api.INVALID_HANDLE_VALUE: raise WindowsError() res = jwfs.reparse.DeviceIoControl( handle, jwfs.api.FSCTL_GET_REPARSE_POINT, None, 10240) bytes = ctypes.create_string_buffer(res) p_rdb = ctypes.cast(bytes, ctypes.POINTER(jwfs.api.REPARSE_DATA_BUFFER)) rdb = p_rdb.contents if rdb.tag not in [2684354563, jwfs.api.IO_REPARSE_TAG_SYMLINK]: raise RuntimeError( "Expected <2684354563 or 2684354572>, but got %d" % rdb.tag) jwfs.handle_nonzero_success(jwfs.api.CloseHandle(handle)) subname = rdb.get_substitute_name() # probably has something to do with long paths, not sure if subname.startswith('?\\'): subname = subname[2:] return subname def _win32_rmtree(path, verbose=0): """ rmtree for win32 that treats junctions like directory symlinks. The junction removal portion may not be safe on race conditions. There is a known issue [CPythonBug31226]_ that prevents :func:`shutil.rmtree` from deleting directories with junctions. References: .. [CPythonBug31226] https://bugs.python.org/issue31226 """ path = os.fspath(path) def _rmjunctions(root): from os.path import join, isdir, islink for r, ds, fs in os.walk(root): subdirs = [] for d in ds: path = join(r, d) if isdir(path): if _win32_is_junction(path): # remove any junctions as we encounter them os.rmdir(path) elif not islink(path): subdirs.append(d) if 1: # Not sure if necessary, double check, junctions are odd for name in os.listdir(r): current = join(r, name) if os.path.isdir(current): if _win32_is_junction(current): # remove any junctions as we encounter them os.rmdir(current) # only recurse into real directories ds[:] = subdirs if _win32_is_junction(path): if verbose: print('Deleting directory="{}"'.format(path)) os.rmdir(path) else: if verbose: print('Deleting directory="{}"'.format(path)) # first remove all junctions _rmjunctions(path) # now we can rmtree as normal import shutil def onerror(func, path, exc_info): print('Error') print('func = {!r}'.format(func)) print('path = {!r}'.format(path)) print('exc_info = {!r}'.format(exc_info)) shutil.rmtree(path, onerror=onerror) def _win32_is_hardlinked(fpath1, fpath2): """ Test if two hard links point to the same location Example: >>> # xdoc: +REQUIRES(WIN32) >>> import ubelt as ub >>> root = ub.Path.appdir('ubelt', 'win32_hardlink').ensuredir() >>> ub.delete(root) >>> ub.ensuredir(root) >>> fpath1 = join(root, 'fpath1') >>> fpath2 = join(root, 'fpath2') >>> ub.touch(fpath1) >>> ub.touch(fpath2) >>> fjunc1 = _win32_junction(fpath1, join(root, 'fjunc1')) >>> fjunc2 = _win32_junction(fpath2, join(root, 'fjunc2')) >>> assert _win32_is_hardlinked(fjunc1, fpath1) >>> assert _win32_is_hardlinked(fjunc2, fpath2) >>> assert not _win32_is_hardlinked(fjunc2, fpath1) >>> assert not _win32_is_hardlinked(fjunc1, fpath2) """ if jwfs is None: raise ImportError('jaraco.windows.filesystem is required to run _win32_is_hardlinked') # NOTE: jwf.samefile(fpath1, fpath2) seems to behave differently def get_read_handle(fpath): if os.path.isdir(fpath): dwFlagsAndAttributes = jwfs.api.FILE_FLAG_BACKUP_SEMANTICS else: dwFlagsAndAttributes = 0 hFile = jwfs.api.CreateFile(fpath, jwfs.api.GENERIC_READ, jwfs.api.FILE_SHARE_READ, None, jwfs.api.OPEN_EXISTING, dwFlagsAndAttributes, None) return hFile def get_unique_id(hFile): info = jwfs.api.BY_HANDLE_FILE_INFORMATION() res = jwfs.api.GetFileInformationByHandle(hFile, info) jwfs.handle_nonzero_success(res) unique_id = (info.volume_serial_number, info.file_index_high, info.file_index_low) return unique_id hFile1 = get_read_handle(fpath1) hFile2 = get_read_handle(fpath2) try: are_equal = (get_unique_id(hFile1) == get_unique_id(hFile2)) except Exception: raise finally: jwfs.api.CloseHandle(hFile1) jwfs.api.CloseHandle(hFile2) return are_equal def _win32_dir(path, star=''): """ Using the windows cmd shell to get information about a directory """ from ubelt import util_cmd import re wrapper = 'cmd /S /C "{}"' # the /S will preserve all inner quotes command = 'dir /-C "{}"{}'.format(path, star) wrapped = wrapper.format(command) info = util_cmd.cmd(wrapped, shell=True) if info['ret'] != 0: from ubelt import util_repr print('Failed command:') print(info['command']) print(util_repr.urepr(info, nl=1)) raise OSError(str(info)) # parse the output of dir to get some info # Remove header and footer lines = info['out'].split('\n')[5:-3] splitter = re.compile('( +)') for line in lines: parts = splitter.split(line) date, sep, time, sep, ampm, sep, type_or_size, sep = parts[:8] name = ''.join(parts[8:]) # if type is a junction then name will also contain the linked loc if name == '.' or name == '..': continue if type_or_size in ['', '', '']: # colons cannot be in path names, so use that to find where # the name ends pos = name.find(':') bpos = name[:pos].rfind('[') name = name[:bpos - 1] pointed = name[bpos + 1:-1] yield type_or_size, name, pointed else: yield type_or_size, name, None ubelt-1.3.7/ubelt/_win32_links.pyi000066400000000000000000000000431472470106000167720ustar00rootroot00000000000000__win32_can_symlink__: bool | None ubelt-1.3.7/ubelt/orderedset.py000066400000000000000000000442341472470106000164720ustar00rootroot00000000000000""" This module exposes the :class:`OrderedSet` class, which is a collection of unique items that maintains the order in which the items were added. An :class:`OrderedSet` (or its alias :class:`oset`) behaves very similarly to Python's builtin :class:`set` object, the main difference being that an :class:`OrderedSet` can efficiently lookup its items by index. Example: >>> import ubelt as ub >>> ub.oset([1, 2, 3]) OrderedSet([1, 2, 3]) >>> (ub.oset([1, 2, 3]) - {2}) | {2} OrderedSet([1, 3, 2]) >>> [ub.oset([1, 2, 3])[i] for i in [1, 0, 2]] [2, 1, 3] As of version (0.8.5), `ubelt` contains its own internal copy of :class:`OrderedSet` in order to reduce external dependencies. The original standalone implementation lives in https://github.com/LuminosoInsight/ordered-set. The original documentation is as follows: An OrderedSet is a custom MutableSet that remembers its order, so that every entry has an index that can be looked up. Based on a recipe originally posted to ActiveState Recipes by Raymond Hettiger, and released under the MIT license. """ import itertools as it from collections import deque from collections.abc import MutableSet, Sequence __all__ = ['OrderedSet', 'oset'] SLICE_ALL = slice(None) # type: slice __version__ = "3.2" def is_iterable(obj): """ Are we being asked to look up a list of things, instead of a single thing? We check for the `__iter__` attribute so that this can cover types that don't have to be known by this module, such as NumPy arrays. Strings, however, should be considered as atomic values to look up, not iterables. The same goes for tuples, since they are immutable and therefore valid entries. We don't need to check for the Python 2 `unicode` type, because it doesn't have an `__iter__` attribute anyway. Returns: bool """ return ( hasattr(obj, "__iter__") and not isinstance(obj, str) and not isinstance(obj, tuple) ) class OrderedSet(MutableSet, Sequence): """ An OrderedSet is a custom MutableSet that remembers its order, so that every entry has an index that can be looked up. Attributes: items (List[Any]): internal ordered representation. map (Dict[Any, int]): internal mapping from items to indices. Example: >>> OrderedSet([1, 1, 2, 3, 2]) OrderedSet([1, 2, 3]) """ def __init__(self, iterable=None): """ Args: iterable (None | Iterable): input data """ self.items = [] self.map = {} if iterable is not None: self |= iterable def __len__(self): """ Returns the number of unique elements in the ordered set Example: >>> len(OrderedSet([])) 0 >>> len(OrderedSet([1, 2])) 2 Returns: int """ return len(self.items) def __getitem__(self, index): """ Get the item at a given index. If ``index`` is a slice, you will get back that slice of items, as a new OrderedSet. If ``index`` is a list or a similar iterable, you'll get a list of items corresponding to those indices. This is similar to NumPy's "fancy indexing". The result is not an OrderedSet because you may ask for duplicate indices, and the number of elements returned should be the number of elements asked for. Args: index (int | slice | Any): a simple or fancy index Returns: List | OrderedSet | Any : item or items Example: >>> oset = OrderedSet([1, 2, 3]) >>> oset[1] 2 """ if isinstance(index, slice) and index == SLICE_ALL: return self.copy() elif is_iterable(index): return [self.items[i] for i in index] elif hasattr(index, "__index__") or isinstance(index, slice): result = self.items[index] if isinstance(result, list): return self.__class__(result) else: return result else: raise TypeError("Don't know how to index an OrderedSet by %r" % index) def copy(self): """ Return a shallow copy of this object. Returns: OrderedSet Example: >>> this = OrderedSet([1, 2, 3]) >>> other = this.copy() >>> this == other True >>> this is other False """ return self.__class__(self) def __getstate__(self): if len(self) == 0: # The state can't be an empty list. # We need to return a truthy value, or else __setstate__ won't be run. # # This could have been done more gracefully by always putting the state # in a tuple, but this way is backwards- and forwards- compatible with # previous versions of OrderedSet. return (None,) else: return list(self) def __setstate__(self, state): if state == (None,): self.__init__([]) else: self.__init__(state) def __contains__(self, key): """ Test if the item is in this ordered set Args: key (Any): check if this item exists in the set Returns: bool Example: >>> 1 in OrderedSet([1, 3, 2]) True >>> 5 in OrderedSet([1, 3, 2]) False """ return key in self.map def add(self, key): # type: ignore """ Add ``key`` as an item to this OrderedSet, then return its index. If ``key`` is already in the OrderedSet, return the index it already had. Args: key (Any): the item to add Returns: int: the index of the items. Note, violates the Liskov Substitution Principle and might be changed. Example: >>> oset = OrderedSet() >>> oset.append(3) 0 >>> print(oset) OrderedSet([3]) """ if key not in self.map: self.map[key] = len(self.items) self.items.append(key) return self.map[key] append = add def update(self, sequence): """ Update the set with the given iterable sequence, then return the index of the last element inserted. Args: sequence (Iterable): items to add to this set Example: >>> oset = OrderedSet([1, 2, 3]) >>> oset.update([3, 1, 5, 1, 4]) 4 >>> print(oset) OrderedSet([1, 2, 3, 5, 4]) """ item_index = None try: for item in sequence: item_index = self.add(item) except TypeError: raise ValueError( "Argument needs to be an iterable, got %s" % type(sequence) ) return item_index def index(self, key, start=0, stop=None): # type: ignore """ Get the index of a given entry, raising an IndexError if it's not present. `key` can be a non-string iterable of entries, in which case this returns a list of indices. Args: key (Any): item to find the position of start (int): not supported yet stop (int | None): not supported yet Returns: int Example: >>> oset = OrderedSet([1, 2, 3]) >>> oset.index(2) 1 """ # Note: adding in this typing information breaks mypy # Args: # key (Any | List[Any]): item(s) in the set to find the index of # Returns: # int | List[int]: if is_iterable(key): return [self.index(subkey) for subkey in key] return self.map[key] # Provide some compatibility with pd.Index get_loc = index get_indexer = index def pop(self): """ Remove and return the last element from the set. Raises KeyError if the set is empty. Returns: Any Example: >>> oset = OrderedSet([1, 2, 3]) >>> oset.pop() 3 """ if not self.items: raise KeyError("Set is empty") elem = self.items[-1] del self.items[-1] del self.map[elem] return elem def discard(self, key): """ Remove an element. Do not raise an exception if absent. The MutableSet mixin uses this to implement the .remove() method, which *does* raise an error when asked to remove a non-existent item. Args: key (Any): item to remove. Example: >>> oset = OrderedSet([1, 2, 3]) >>> oset.discard(2) >>> print(oset) OrderedSet([1, 3]) >>> oset.discard(2) >>> print(oset) OrderedSet([1, 3]) """ if key in self: i = self.map[key] del self.items[i] del self.map[key] for k, v in self.map.items(): if v >= i: self.map[k] = v - 1 def clear(self): """ Remove all items from this OrderedSet. """ del self.items[:] self.map.clear() def __iter__(self): """ Returns: Iterator Example: >>> list(iter(OrderedSet([1, 2, 3]))) [1, 2, 3] """ return iter(self.items) def __reversed__(self): """ Returns: Iterator Example: >>> list(reversed(OrderedSet([1, 2, 3]))) [3, 2, 1] """ return reversed(self.items) def __repr__(self): """ Returns: str """ if not self: return "%s()" % (self.__class__.__name__,) return "%s(%r)" % (self.__class__.__name__, list(self)) def __eq__(self, other): """ Returns true if the containers have the same items. If `other` is a Sequence, then order is checked, otherwise it is ignored. Args: other (Any): item to compare against Returns: bool Example: >>> oset = OrderedSet([1, 3, 2]) >>> oset == [1, 3, 2] True >>> oset == [1, 2, 3] False >>> oset == [2, 3] False >>> oset == OrderedSet([3, 2, 1]) False """ # In Python 2 deque is not a Sequence, so treat it as one for # consistent behavior with Python 3. if isinstance(other, (Sequence, deque)): # Check that this OrderedSet contains the same elements, in the # same order, as the other object. return list(self) == list(other) try: other_as_set = set(other) except TypeError: # If `other` can't be converted into a set, it's not equal. return False else: return set(self) == other_as_set def union(self, *sets): """ Combines all unique items. Each items order is defined by its first appearance. Args: *sets : zero or more other iterables to operate on Returns: OrderedSet Example: >>> oset = OrderedSet.union(OrderedSet([3, 1, 4, 1, 5]), [1, 3], [2, 0]) >>> print(oset) OrderedSet([3, 1, 4, 5, 2, 0]) >>> oset.union([8, 9]) OrderedSet([3, 1, 4, 5, 2, 0, 8, 9]) >>> oset | {10} OrderedSet([3, 1, 4, 5, 2, 0, 10]) """ cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet containers = map(list, it.chain([self], sets)) items = it.chain.from_iterable(containers) return cls(items) def __and__(self, other): # the parent implementation of this is backwards return self.intersection(other) def intersection(self, *sets): """ Returns elements in common between all sets. Order is defined only by the first set. Args: *sets : zero or more other iterables to operate on Returns: OrderedSet Example: >>> from ubelt.orderedset import * # NOQA >>> oset = OrderedSet.intersection(OrderedSet([0, 1, 2, 3]), [1, 2, 3]) >>> print(oset) OrderedSet([1, 2, 3]) >>> oset.intersection([2, 4, 5], [1, 2, 3, 4]) OrderedSet([2]) >>> oset.intersection() OrderedSet([1, 2, 3]) """ cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet if sets: common = set.intersection(*map(set, sets)) items = (item for item in self if item in common) else: items = self return cls(items) def difference(self, *sets): """ Returns all elements that are in this set but not the others. Args: *sets : zero or more other iterables to operate on Returns: OrderedSet Example: >>> OrderedSet([1, 2, 3]).difference(OrderedSet([2])) OrderedSet([1, 3]) >>> OrderedSet([1, 2, 3]).difference(OrderedSet([2]), OrderedSet([3])) OrderedSet([1]) >>> OrderedSet([1, 2, 3]) - OrderedSet([2]) OrderedSet([1, 3]) >>> OrderedSet([1, 2, 3]).difference() OrderedSet([1, 2, 3]) """ cls = self.__class__ if sets: other = set.union(*map(set, sets)) items = (item for item in self if item not in other) else: items = self return cls(items) def issubset(self, other): """ Report whether another set contains this set. Args: other (Iterable): check if items in other are all contained in self. Returns: bool Example: >>> OrderedSet([1, 2, 3]).issubset({1, 2}) False >>> OrderedSet([1, 2, 3]).issubset({1, 2, 3, 4}) True >>> OrderedSet([1, 2, 3]).issubset({1, 4, 3, 5}) False """ if len(self) > len(other): # Fast check for obvious cases return False return all(item in other for item in self) # todo: contiguous subset / subsequence_index? def issuperset(self, other): """ Report whether this set contains another set. Args: other (Iterable): check all items in self are contained in other. Returns: bool Example: >>> OrderedSet([1, 2]).issuperset([1, 2, 3]) False >>> OrderedSet([1, 2, 3, 4]).issuperset({1, 2, 3}) True >>> OrderedSet([1, 4, 3, 5]).issuperset({1, 2, 3}) False """ if len(self) < len(other): # Fast check for obvious cases return False return all(item in self for item in other) def symmetric_difference(self, other): """ Return the symmetric difference of two OrderedSets as a new set. That is, the new set will contain all elements that are in exactly one of the sets. Their order will be preserved, with elements from `self` preceding elements from `other`. Args: other (Iterable): items to operate on Returns: OrderedSet Example: >>> this = OrderedSet([1, 4, 3, 5, 7]) >>> other = OrderedSet([9, 7, 1, 3, 2]) >>> this.symmetric_difference(other) OrderedSet([4, 5, 9, 2]) """ cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet diff1 = cls(self).difference(other) diff2 = cls(other).difference(self) return diff1.union(diff2) def _update_items(self, items): """ Replace the 'items' list of this OrderedSet with a new one, updating self.map accordingly. """ self.items = items self.map = {item: idx for (idx, item) in enumerate(items)} def difference_update(self, *sets): """ Update this OrderedSet to remove items from one or more other sets. Example: >>> this = OrderedSet([1, 2, 3]) >>> this.difference_update(OrderedSet([2, 4])) >>> print(this) OrderedSet([1, 3]) >>> this = OrderedSet([1, 2, 3, 4, 5]) >>> this.difference_update(OrderedSet([2, 4]), OrderedSet([1, 4, 6])) >>> print(this) OrderedSet([3, 5]) """ items_to_remove = set() for other in sets: items_to_remove |= set(other) self._update_items([item for item in self.items if item not in items_to_remove]) def intersection_update(self, other): """ Update this OrderedSet to keep only items in another set, preserving their order in this set. Args: other (Iterable): items to operate on Example: >>> this = OrderedSet([1, 4, 3, 5, 7]) >>> other = OrderedSet([9, 7, 1, 3, 2]) >>> this.intersection_update(other) >>> print(this) OrderedSet([1, 3, 7]) """ other = set(other) self._update_items([item for item in self.items if item in other]) def symmetric_difference_update(self, other): """ Update this OrderedSet to remove items from another set, then add items from the other set that were not present in this set. Args: other (Iterable): items to operate on Example: >>> this = OrderedSet([1, 4, 3, 5, 7]) >>> other = OrderedSet([9, 7, 1, 3, 2]) >>> this.symmetric_difference_update(other) >>> print(this) OrderedSet([4, 5, 9, 2]) """ items_to_add = [item for item in other if item not in self] items_to_remove = set(other) self._update_items( [item for item in self.items if item not in items_to_remove] + items_to_add ) # OrderedSet = ordered_set.OrderedSet oset = OrderedSet ubelt-1.3.7/ubelt/orderedset.pyi000066400000000000000000000034671472470106000166460ustar00rootroot00000000000000from typing import List from typing import Any from typing import Dict from typing import Iterable from typing import Iterator from collections.abc import MutableSet, Sequence SLICE_ALL: slice def is_iterable(obj) -> bool: ... class OrderedSet(MutableSet, Sequence): items: List[Any] map: Dict[Any, int] def __init__(self, iterable: None | Iterable = None) -> None: ... def __len__(self) -> int: ... def __getitem__(self, index: int | slice | Any) -> List | OrderedSet | Any: ... def copy(self) -> OrderedSet: ... def __contains__(self, key: Any) -> bool: ... def add(self, key: Any): ... append = add def update(self, sequence: Iterable): ... def index(self, key: Any, start: int = 0, stop: int | None = None) -> int: ... get_loc = index get_indexer = index def pop(self) -> Any: ... def discard(self, key: Any) -> None: ... def clear(self) -> None: ... def __iter__(self) -> Iterator: ... def __reversed__(self) -> Iterator: ... def __eq__(self, other: Any) -> bool: ... def union(self, *sets) -> OrderedSet: ... def __and__(self, other): ... def intersection(self, *sets) -> OrderedSet: ... def difference(self, *sets) -> OrderedSet: ... def issubset(self, other: Iterable) -> bool: ... def issuperset(self, other: Iterable) -> bool: ... def symmetric_difference(self, other: Iterable) -> OrderedSet: ... def difference_update(self, *sets) -> None: ... def intersection_update(self, other: Iterable) -> None: ... def symmetric_difference_update(self, other: Iterable) -> None: ... oset = OrderedSet ubelt-1.3.7/ubelt/progiter.py000066400000000000000000001114721472470106000161640ustar00rootroot00000000000000""" A Progress Iterator ProgIter lets you measure and print the progress of an iterative process. This can be done either via an iterable interface or using the manual API. Using the iterable interface is most common. The basic usage of ProgIter is simple and intuitive. Just wrap a python iterable. The following example wraps a ``range`` iterable and prints reported progress to stdout as the iterable is consumed. Example: >>> for n in ProgIter(range(1000)): >>> # do some work >>> pass Note that by default ProgIter reports information about iteration-rate, fraction-complete, estimated time remaining, time taken so far, and the current wall time. Example: >>> # xdoctest: +IGNORE_WANT >>> def is_prime(n): ... return n >= 2 and not any(n % i == 0 for i in range(2, n)) >>> for n in ProgIter(range(1000), verbose=1): >>> # do some work >>> is_prime(n) 1000/1000... rate=114326.51 Hz, eta=0:00:00, total=0:00:00 For more complex applications is may sometimes be desirable to manually use the ProgIter API. This is done as follows: Example: >>> # xdoctest: +IGNORE_WANT >>> n = 3 >>> prog = ProgIter(desc='manual', total=n, verbose=3) >>> prog.begin() # Manually begin progress iteration >>> for _ in range(n): ... prog.step(inc=1) # specify the number of steps to increment >>> prog.end() # Manually end progress iteration manual 0/3... rate=0 Hz, eta=?, total=0:00:00 manual 1/3... rate=14454.63 Hz, eta=0:00:00, total=0:00:00 manual 2/3... rate=17485.42 Hz, eta=0:00:00, total=0:00:00 manual 3/3... rate=21689.78 Hz, eta=0:00:00, total=0:00:00 When working with ProgIter in either iterable or manual mode you can use the ``prog.ensure_newline`` method to guarantee that the next call you make to stdout will start on a new line. You can also use the ``prog.set_extra`` method to update a dynamci "extra" message that is shown in the formatted output. The following example demonstrates this. Example: >>> # xdoctest: +IGNORE_WANT >>> def is_prime(n): ... return n >= 2 and not any(n % i == 0 for i in range(2, n)) >>> _iter = range(1000) >>> prog = ProgIter(_iter, desc='check primes', verbose=2, show_wall=True) >>> for n in prog: >>> if n == 97: >>> print('!!! Special print at n=97 !!!') >>> if is_prime(n): >>> prog.set_extra('Biggest prime so far: {}'.format(n)) >>> prog.ensure_newline() check primes 0/1000... rate=0 Hz, eta=?, total=0:00:00, wall=2020-10-23 17:27 EST check primes 1/1000... rate=95547.49 Hz, eta=0:00:00, total=0:00:00, wall=2020-10-23 17:27 EST check primes 4/1000...Biggest prime so far: 3 rate=41062.28 Hz, eta=0:00:00, total=0:00:00, wall=2020-10-23 17:27 EST check primes 16/1000...Biggest prime so far: 13 rate=85340.61 Hz, eta=0:00:00, total=0:00:00, wall=2020-10-23 17:27 EST check primes 64/1000...Biggest prime so far: 61 rate=164739.98 Hz, eta=0:00:00, total=0:00:00, wall=2020-10-23 17:27 EST !!! Special print at n=97 !!! check primes 256/1000...Biggest prime so far: 251 rate=206287.91 Hz, eta=0:00:00, total=0:00:00, wall=2020-10-23 17:27 EST check primes 512/1000...Biggest prime so far: 509 rate=165271.92 Hz, eta=0:00:00, total=0:00:00, wall=2020-10-23 17:27 EST check primes 768/1000...Biggest prime so far: 761 rate=136480.12 Hz, eta=0:00:00, total=0:00:00, wall=2020-10-23 17:27 EST check primes 1000/1000...Biggest prime so far: 997 rate=115214.95 Hz, eta=0:00:00, total=0:00:00, wall=2020-10-23 17:27 EST """ import sys import time import collections from itertools import islice __all__ = [ 'ProgIter', ] default_timer = time.perf_counter # type: Callable # A measurement takes place at a given iteration and posixtime. Measurement = collections.namedtuple('Measurement', ['idx', 'time']) CLEAR_BEFORE = '\r' AT_END = '\n' def _infer_length(iterable): """ Try and infer the length using the PEP 424 length hint if available. adapted from click implementation Args: iterable (Iterable): Returns: int | None """ try: return len(iterable) except (AttributeError, TypeError): # nocover try: get_hint = type(iterable).__length_hint__ except AttributeError: return None try: hint = get_hint(iterable) except TypeError: return None if (hint is NotImplemented or not isinstance(hint, int) or hint < 0): return None return hint class _TQDMCompat(object): """ Base class for ProgIter that implements a restricted TQDM Compatibility API """ @classmethod def write(cls, s, file=None, end='\n', nolock=False): """ simply writes to stdout Args: s (str): string file (None | SupportsWrite): end (str): end of line nolock (bool): """ fp = file if file is not None else sys.stdout fp.write(s) fp.write(end) def set_description(self, desc=None, refresh=True): """ tqdm api compatibility. Changes the description of progress Args: desc (str | None): description """ self.desc = desc if refresh: self.refresh() def set_description_str(self, desc=None, refresh=True): """ tqdm api compatibility. Changes the description of progress Args: desc (str | None): description string """ self.set_description(desc, refresh) def update(self, n=1): """ alias of `step` for tqdm compatibility """ self.step(n) def close(self): """ alias of `end` for tqdm compatibility """ self.end() def unpause(self): """ tqdm api compatibility. does nothing """ pass def moveto(self, n): """ tqdm api compatibility. does nothing """ pass def clear(self, nolock=False): """ tqdm api compatibility. does nothing """ pass def refresh(self, nolock=False): """ tqdm api compatibility. redisplays message (can cause a message to print twice) """ if not self.started: self.begin() self.display_message() @property def pos(self): """ Returns: int """ return 0 @classmethod def set_lock(cls, lock): """ tqdm api compatibility. does nothing """ pass @classmethod def get_lock(cls): """ tqdm api compatibility. does nothing """ pass def set_postfix_dict(self, ordered_dict=None, refresh=True, **kwargs): """ tqdm api compatibility. calls set_extra Args: ordered_dict (None | dict): refresh (bool): **kwargs: """ # Sort in alphabetical order to be more deterministic postfix = collections.OrderedDict( [] if ordered_dict is None else ordered_dict) for key in sorted(kwargs.keys()): postfix[key] = kwargs[key] # Preprocess stats according to datatype for key in postfix.keys(): import numbers # Number: limit the length of the string if isinstance(postfix[key], numbers.Number): postfix[key] = '{0:2.3g}'.format(postfix[key]) # Else for any other type, try to get the string conversion elif not isinstance(postfix[key], str): postfix[key] = str(postfix[key]) # Else if it's a string, don't need to preprocess anything # Stitch together to get the final postfix postfix = ', '.join(key + '=' + postfix[key].strip() for key in postfix.keys()) self.set_postfix_str(postfix, refresh=refresh) def set_postfix(self, postfix, **kwargs): if isinstance(postfix, str): self.set_postfix_str(postfix, **kwargs) else: self.set_postfix_dict(ordered_dict=postfix, **kwargs) def set_postfix_str(self, s='', refresh=True): """ tqdm api compatibility. calls set_extra """ self.set_extra(str(s)) if refresh: self.refresh() class _BackwardsCompat(object): """ Base class for ProgIter that maintains backwards compatibility with older versions of the ProgIter API. """ # Backwards Compatibility API @property def length(self): """ alias of total """ return self.total @property def label(self): """ alias of desc """ return self.desc def start(self): # nocover """ Alias of :func:`ProgIter.begin` """ return self.begin() def stop(self): # nocover """ Alias of :func:`ProgIter.end` """ return self.end() class ProgIter(_TQDMCompat, _BackwardsCompat): """ Prints progress as an iterator progresses ProgIter is an alternative to `tqdm`. ProgIter implements much of the tqdm-API. The main difference between `ProgIter` and `tqdm` is that ProgIter does not use threading whereas `tqdm` does. Attributes: Note: Either use ProgIter in a with statement or call prog.end() at the end of the computation if there is a possibility that the entire iterable may not be exhausted. Note: ProgIter is an alternative to `tqdm`. The main difference between `ProgIter` and `tqdm` is that ProgIter does not use threading whereas `tqdm` does. `ProgIter` is simpler than `tqdm` and thus more stable in certain circumstances. SeeAlso: tqdm - https://pypi.python.org/pypi/tqdm References: .. [DatagenProgBars] http://datagenetics.com/blog/february12017/index.html Example: >>> # doctest: +SKIP >>> def is_prime(n): ... return n >= 2 and not any(n % i == 0 for i in range(2, n)) >>> for n in ProgIter(range(100), verbose=1, show_wall=True): >>> # do some work >>> is_prime(n) 100/100... rate=... Hz, total=..., wall=... """ def __init__(self, iterable=None, desc=None, total=None, freq=1, initial=0, eta_window=64, clearline=True, adjust=True, time_thresh=2.0, show_percent=True, show_times=True, show_rate=True, show_eta=True, show_total=True, show_wall=False, enabled=True, verbose=None, stream=None, chunksize=None, rel_adjust_limit=4.0, homogeneous='auto', timer=None, **kwargs): """ See attributes more arg information Args: iterable (List | Iterable): A list or iterable to loop over desc (str | None): description label to show with progress total (int | None): Maximum length of the process. If not specified, we estimate it from the iterable, if possible. freq (int): How many iterations to wait between messages. Defaults to 1. initial (int): starting index offset, default=0 eta_window (int): number of previous measurements to use in eta calculation, default=64 clearline (bool): if True messages are printed on the same line otherwise each new progress message is printed on new line. default=True adjust (bool): if True `freq` is adjusted based on time_thresh. This may be overwritten depending on the setting of verbose. default=True time_thresh (float): desired amount of time to wait between messages if adjust is True otherwise does nothing, default=2.0 show_percent (bool): if True show percent progress. Default=True show_times (bool): if False do not show rate, eta, or wall time. default=True Deprecated. Use show_rate / show_eta / show_wall instead. show_rate (bool): show / hide rate, default=True show_eta (bool): show / hide estimated time of arrival (i.e. time to completion), default=True show_wall (bool): show / hide wall time, default=False stream (typing.IO): stream where progress information is written to, default=sys.stdout timer (callable): the timer object to use. Defaults to :func:`time.perf_counter`. enabled (bool): if False nothing happens. default=True chunksize (int | None): indicates that each iteration processes a batch of this size. Iteration rate is displayed in terms of single-items. rel_adjust_limit (float): Maximum factor update frequency can be adjusted by in a single step. default=4.0 verbose (int): verbosity mode, which controls clearline, adjust, and enabled. The following maps the value of `verbose` to its effect. 0: enabled=False, 1: enabled=True with clearline=True and adjust=True, 2: enabled=True with clearline=False and adjust=True, 3: enabled=True with clearline=False and adjust=False homogeneous (bool | str): Indicate if the iterable is likely to take a uniform or homogeneous amount of time per iteration. When True we can enable a speed optimization. When False, the time estimates are more accurate. Default to "auto", which attempts to determine if it is safe to use True. Has no effect if ``adjust`` is False. show_total (bool): if True show total time. **kwargs: accepts most of the tqdm api """ if desc is None: desc = '' if verbose is not None: if verbose <= 0: # nocover enabled = False elif verbose == 1: # nocover enabled, clearline, adjust = 1, 1, 1 elif verbose == 2: # nocover enabled, clearline, adjust = 1, 0, 1 elif verbose >= 3: # nocover enabled, clearline, adjust = 1, 0, 0 # Potential new additions to the API self._microseconds = kwargs.pop('microseconds', False) # --- Accept the tqdm api --- if kwargs: stream = kwargs.pop('file', stream) enabled = not kwargs.pop('disable', not enabled) if kwargs.get('miniters', None) is not None: adjust = False freq = kwargs.pop('miniters', freq) kwargs.pop('position', None) # API compatibility does nothing kwargs.pop('dynamic_ncols', None) # API compatibility does nothing kwargs.pop('leave', True) # we always leave # Accept the old api keywords desc = kwargs.pop('label', desc) total = kwargs.pop('length', total) enabled = kwargs.pop('enabled', enabled) initial = kwargs.pop('start', initial) time_thresh = kwargs.pop('mininterval', time_thresh) if kwargs: raise ValueError('ProgIter given unknown kwargs {}'.format(kwargs)) # ---------------------------- if stream is None: stream = sys.stdout self.stream = stream self.iterable = iterable self.desc = desc self.total = total self.freq = freq self.initial = initial self.enabled = enabled self.adjust = adjust self.show_percent = show_percent self.show_times = show_times self.show_rate = show_rate self.show_eta = show_eta self.show_total = show_total self.show_wall = show_wall self.eta_window = eta_window self.time_thresh = time_thresh self.clearline = clearline self.chunksize = chunksize self.rel_adjust_limit = rel_adjust_limit self.extra = '' self._extra_fn = None self.started = False self.finished = False if timer is None: timer = default_timer self._timer = timer self.homogeneous = homogeneous self._likely_homogeneous = None # indicates if the cursor is currently at the start of a line (True) or # if characters have been written with no newline yet. self._cursor_at_newline = True self._prev_msg_len = 0 # used to ensure lines are fully cleared self._reset_internals() def __call__(self, iterable): """ Overwrites the current iterator with iterable and starts iterating on it. Warning: Using this function is not recommended. Args: iterable (Iterable): Returns: Iterable """ self.iterable = iterable return iter(self) def __enter__(self): """ Returns: ProgIter Example: >>> # can be used as a context manager in iter mode >>> n = 3 >>> with ProgIter(desc='manual', total=n, verbose=3) as prog: ... list(prog(range(n))) """ self.begin() return self def __exit__(self, ex_type, ex_value, ex_traceback): """ Args: ex_type (Type[BaseException] | None): ex_value (BaseException | None): ex_traceback (TracebackType | None): Returns: bool | None """ if ex_traceback is not None: # nocover return False else: self.end() def __iter__(self): """ Returns: Iterable """ if not self.enabled: return iter(self.iterable) else: return self._iterate() def set_extra(self, extra): """ specify a custom info appended to the end of the next message Args: extra (str | Callable): a constant or dynamically constructed extra message. TODO: - [ ] extra is a bad name; come up with something better and rename Example: >>> prog = ProgIter(range(100, 300, 100), show_times=False, verbose=3) >>> for n in prog: >>> prog.set_extra('processesing num {}'.format(n)) 0.00% 0/2... 50.00% 1/2...processesing num 100 100.00% 2/2...processesing num 200 """ if callable(extra): self._extra_fn = extra else: self._extra_fn = None self.extra = extra def _reset_internals(self): """ Initialize all variables used in the internal state """ # Prepare for iteration if self.total is None: self.total = _infer_length(self.iterable) # Track the total time up to the most recent measurement. self._total_seconds = 0 # Track the current iteration we are on self._iter_idx = self.initial # Track the last time we displayed a message self._display_measurement = Measurement(-1, -1) # Track the most recent iteration/time a measurement was made self._curr_measurement = Measurement(self.initial, 0) # Track the number of iterations and time between the last two measurements self._measure_countdelta = -1 self._measure_timedelta = self.time_thresh self._display_timedelta = self.time_thresh self._next_measure_idx = self._iter_idx + self.freq # Primary estimates self._est_seconds_left = None self._iters_per_second = 0.0 # hack flag that should be refactored and removed used to ensure the # first message after begin is displayed. self._force_next_display = False self._update_message_template() def begin(self): """ Initializes information used to measure progress This only needs to be used if this ProgIter is not wrapping an iterable. Does nothing if this ProgIter is disabled. Returns: ProgIter: a chainable self-reference """ if not self.enabled: return self._reset_internals() # Time progress was initialized self._start_time = self._timer() # Last time measures were updated self._curr_measurement = Measurement(self._iter_idx, self._start_time) # use last few times to compute a more stable average rate if self.eta_window is not None: self._measurements = collections.deque([ self._curr_measurement ], maxlen=self.eta_window) else: self._measurements = collections.deque([ self._curr_measurement ], maxlen=2) # self._cursor_at_newline = True self._cursor_at_newline = not self.clearline self.started = True self.finished = False self._tryflush() self.display_message() # The start message isn't very helpful. # If we enable this we could force the first iteration. self._force_next_display = self.freq == 1 return self def end(self): """ Signals that iteration has ended and displays the final message. This only needs to be used if this ProgIter is not wrapping an iterable. Does nothing if this ProgIter object is disabled or has already finished. """ if not self.enabled or self.finished: return # Write the final progress line if it was not written in the loop if self._iter_idx != self._display_measurement.idx: self._measure_time() self._est_seconds_left = 0 self.display_message() self.ensure_newline() self._cursor_at_newline = True self.finished = True def _iterate(self): """ iterates with progress """ if not self.started: self.begin() # Wrap input sequence in a generator gen = enumerate(self.iterable, start=self.initial + 1) # Iterating is performance sensitive, so separate both cases - where # 'freq' is used and checks can be fast, and where 'adjust' is used and # checks need more calculation. This is worth duplicating code for. if self.adjust: homogeneous = self.homogeneous if homogeneous == 'auto': yield from self._homogeneous_check(gen) homogeneous = self._likely_homogeneous if homogeneous: use_fast_path = True else: use_fast_path = False # Slow path where we do checks every iteration. for self._iter_idx, item in gen: yield item self._slow_path_step_body() else: use_fast_path = True if use_fast_path: # In the fast path we only check the time every `freq` iterations. for self._iter_idx, item in gen: yield item if self._force_next_display or self._iter_idx >= self._next_measure_idx: self._measure_time() if self._force_next_display or (self._display_timedelta >= self.time_thresh): self.display_message() self.end() def _homogeneous_check(self, gen): # NOTE: We could have a more complex heuristic with negligible # overhead and more robustness that checks every n iterations # that such that the time call overhead would be negligible. # To do this we would need a semi-fast mode that does the fast # mode for a fixed number of iterations and then rechecks the # slow mode. Or something like that. # NOTE: We could also try to find a pseudo property to check to # see if things are changing. Is this faster than a call to # time.time? # Take a few steps in the slow path and then check to see # if we should continue or do go down the fast path. num_initial_steps = 5 # A call to time is 50ns, we can accept the overhead if it # is only .01% of the total loop time overhead_threshold = 50e-9 * 10_000 slowest = 0 for self._iter_idx, item in islice(gen, num_initial_steps): yield item self._slow_path_step_body() slowest = max(slowest, self._measure_timedelta) # We are moving fast, take the faster path self._likely_homogeneous = (slowest < overhead_threshold) def _slow_path_step_body(self, force=False): # In the slow path, we don't make any assumption about how long # iterations take. So on every iteration we must measure the time self._measure_time() if force or (self._display_timedelta >= self.time_thresh): self.display_message() def step(self, inc=1, force=False): """ Manually step progress update, either directly or by an increment. Args: inc (int): number of steps to increment. Defaults to 1. force (bool): if True forces progress display. Defaults to False. Example: >>> n = 3 >>> prog = ProgIter(desc='manual', total=n, verbose=3) >>> # Need to manually begin and end in this mode >>> prog.begin() >>> for _ in range(n): ... prog.step() >>> prog.end() Example: >>> n = 3 >>> # can be used as a context manager in manual mode >>> with ProgIter(desc='manual', total=n, verbose=3) as prog: ... for _ in range(n): ... prog.step() """ if not self.enabled: return self._iter_idx += inc self._slow_path_step_body(force=force) def _adjust_frequency(self): # Adjust frequency so the next print will not happen until # approximately `time_thresh` seconds have passed as estimated by # iter_idx. # If progress was uniform and all time estimates were # perfect this would be the new freq to achieve self.time_thresh eps = 1E-9 new_freq = int(self.time_thresh * self._measure_countdelta / max(eps, self._measure_timedelta)) # But things are not perfect. So, don't make drastic changes rel_limit = self.rel_adjust_limit max_freq = int(self.freq * rel_limit) min_freq = int(self.freq // rel_limit) self.freq = max(min(new_freq, max_freq), min_freq, 1) def _measure_time(self): """ Measures the current time and update info about how long we've been waiting since the last iteration was displayed. """ _prev_measurement = self._measurements[-1] if _prev_measurement.idx == self._iter_idx: # We already recorded this time measurement # raise AssertionError("PROBABLY SHOULD NOT BE HERE") return _curr_measurement = Measurement(self._iter_idx, self._timer()) self._curr_measurement = _curr_measurement self._measurements.append(_curr_measurement) self._measure_timedelta = _curr_measurement.time - _prev_measurement.time self._measure_countdelta = _curr_measurement.idx - _prev_measurement.idx self._total_seconds = _curr_measurement.time - self._start_time self._display_timedelta = (self._curr_measurement.time - self._display_measurement.time) # Estimate rate of progress if self.eta_window is None: self._iters_per_second = self._curr_measurement.idx / self._total_seconds else: # Smooth out rate with a window oldest_idx, oldest_time = self._measurements[0] latest_idx, latest_time = self._measurements[-1] self._iters_per_second = ((latest_idx - oldest_idx) / (latest_time - oldest_time)) if self.total is not None: # Estimate time remaining if total is given iters_left = self.total - self._curr_measurement.idx est_eta = iters_left / self._iters_per_second self._est_seconds_left = est_eta # Adjust frequency to stay within time_thresh if self.adjust and (self._measure_timedelta < self.time_thresh or self._measure_timedelta > self.time_thresh * 2.0): self._adjust_frequency() # Mark when our next measurement should be in "fast mode" self._next_measure_idx = self._iter_idx + self.freq def _update_message_template(self): self._msg_fmtstr = self._build_message_template() def _build_message_template(self): """ Defines the template for the progress line Returns: Tuple[str, str, str] Example: >>> self = ProgIter() >>> print(self._build_message_template()[1].strip()) {desc} {iter_idx:4d}/?...{extra} rate={rate:{rate_format}} Hz, total={total}... >>> self = ProgIter(show_total=False, show_eta=False, show_rate=False) >>> print(self._build_message_template()[1].strip()) {desc} {iter_idx:4d}/?...{extra} >>> self = ProgIter(total=0, show_times=True) >>> print(self._build_message_template()[1].strip()) {desc} {percent:03.2f}% {iter_idx:1d}/0...{extra} rate={rate:{rate_format}} Hz, total={total} """ from math import log10, floor length_unknown = self.total is None or self.total < 0 if length_unknown: n_chrs = 4 else: if self.total == 0: n_chrs = 1 else: n_chrs = int(floor(log10(float(self.total))) + 1) if self.chunksize and not length_unknown: msg_body = [ ('{desc}'), (' {percent:03.2f}% of ' + str(self.chunksize) + 'x'), ('?' if length_unknown else str(self.total)), ('...'), ] else: if self.show_percent and not length_unknown: msg_body = [ ('{desc}'), (' {percent:03.2f}% {iter_idx:' + str(n_chrs) + 'd}/'), ('?' if length_unknown else str(self.total)), ('...'), ] else: msg_body = [ ('{desc}'), (' {iter_idx:' + str(n_chrs) + 'd}/'), ('?' if length_unknown else str(self.total)), ('...'), ] msg_body.append('{extra} ') if self.show_times: if self.show_rate: msg_body.append('rate={rate:{rate_format}} Hz,') if self.show_eta: msg_body.append(' eta={eta},' if self.total else '') if self.show_total: msg_body.append(' total={total}') # this is total time if self.show_wall: msg_body.append(', wall={wall}') if self.clearline: parts = (CLEAR_BEFORE, ''.join(msg_body), '') else: parts = ('', ''.join(msg_body), AT_END) return parts def format_message(self): """ Exists only for backwards compatibility. See `format_message_parts` for more recent API. Returns: str """ return ''.join(self.format_message_parts()) def format_message_parts(self): r""" builds a formatted progress message with the current values. This contains the special characters needed to clear lines. Returns: Tuple[str, str, str] Example: >>> self = ProgIter(clearline=False, show_times=False) >>> print(repr(self.format_message_parts()[1])) ' 0/?... ' >>> self.begin() >>> self.step() >>> print(repr(self.format_message_parts()[1])) ' 1/?... ' Example: >>> self = ProgIter(chunksize=10, total=100, clearline=False, >>> show_times=False, microseconds=True) >>> # hack, microseconds=True for coverage, needs real test >>> print(repr(self.format_message_parts()[1])) ' 0.00% of 10x100... ' >>> self.begin() >>> self.update() # tqdm alternative to step >>> print(repr(self.format_message_parts()[1])) ' 1.00% of 10x100... ' """ from datetime import timedelta if self._est_seconds_left is None: eta = '?' else: if self._microseconds: eta = str(timedelta(seconds=self._est_seconds_left)) else: eta = str(timedelta(seconds=int(self._est_seconds_left))) if self._microseconds: total = str(timedelta(seconds=self._total_seconds)) else: total = str(timedelta(seconds=int(self._total_seconds))) before, fmtstr, after = self._msg_fmtstr if self._extra_fn is not None: # User requested a dynamic extra callback. extra = self._extra_fn() else: extra = self.extra fmtkw = { 'desc': self.desc, 'iter_idx': self._curr_measurement.idx, 'eta': eta, 'total': total, 'wall': time.strftime('%Y-%m-%d %H:%M ') + time.tzname[0] if self.show_wall else None, 'extra': extra, 'percent': '', } # similar to tqdm.format_meter if self.chunksize and self.total: fmtkw.update({ 'percent': self._curr_measurement.idx / self.total * 100, 'rate': self._iters_per_second * self.chunksize, 'rate_format': '4.2f' if self._iters_per_second * self.chunksize > .001 else 'g', }) else: fmtkw.update({ 'percent': self._curr_measurement.idx / self.total * 100 if self.total is not None and self.total > 0 else 0, 'rate': self._iters_per_second, 'rate_format': '4.2f' if self._iters_per_second > .001 else 'g', }) msg = fmtstr.format(**fmtkw) return before, msg, after def ensure_newline(self): """ use before any custom printing when using the progress iter to ensure your print statement starts on a new line instead of at the end of a progress line Example: >>> # Unsafe version may write your message on the wrong line >>> prog = ProgIter(range(3), show_times=False, freq=2, adjust=False, ... time_thresh=0) >>> for n in prog: ... print('unsafe message') 0.00% 0/3... unsafe message unsafe message 66.67% 2/3... unsafe message 100.00% 3/3... >>> # apparently the safe version does this too. >>> print('---') --- >>> prog = ProgIter(range(3), show_times=False, freq=2, adjust=False, ... time_thresh=0) >>> for n in prog: ... prog.ensure_newline() ... print('safe message') 0.00% 0/3... safe message safe message 66.67% 2/3... safe message 100.00% 3/3... """ if not self._cursor_at_newline: self._write(AT_END) self._prev_msg_len = 0 self._cursor_at_newline = True def display_message(self): """ Writes current progress to the output stream """ # When we make a display, ensure need to have a recent time measurement if self._curr_measurement.idx != self._iter_idx: self._measure_time() before, msg, after = self.format_message_parts() msg_len = len(msg) # TODO account for unicode if self.clearline: padding = self._prev_msg_len - msg_len if padding > 0: msg = msg + ' ' * padding self._write(''.join([before, msg, after])) self._prev_msg_len = msg_len self._tryflush() self._cursor_at_newline = not self.clearline self._display_measurement = self._curr_measurement self._display_timedelta = 0 self._force_next_display = False def _tryflush(self): """ flush to the internal stream """ try: # flush sometimes causes issues in IPython notebooks self.stream.flush() except IOError: # nocover pass def _write(self, msg): """ write to the internal stream Args: msg (str): message to write """ self.stream.write(msg) ubelt-1.3.7/ubelt/progiter.pyi000066400000000000000000000103771472470106000163370ustar00rootroot00000000000000from typing import Iterable from _typeshed import SupportsWrite from typing import List import typing from typing import Callable from typing import Type from types import TracebackType from typing import Tuple from _typeshed import Incomplete from typing import NamedTuple default_timer: Callable class Measurement(NamedTuple): idx: Incomplete time: Incomplete CLEAR_BEFORE: str AT_END: str class _TQDMCompat: @classmethod def write(cls, s: str, file: None | SupportsWrite = None, end: str = '\n', nolock: bool = False) -> None: ... desc: str | None def set_description(self, desc: str | None = None, refresh: bool = ...) -> None: ... def set_description_str(self, desc: str | None = None, refresh: bool = ...) -> None: ... def update(self, n: int = ...) -> None: ... def close(self) -> None: ... def unpause(self) -> None: ... def moveto(self, n) -> None: ... def clear(self, nolock: bool = ...) -> None: ... def refresh(self, nolock: bool = ...) -> None: ... @property def pos(self) -> int: ... @classmethod def set_lock(cls, lock) -> None: ... @classmethod def get_lock(cls) -> None: ... def set_postfix_dict(self, ordered_dict: None | dict = None, refresh: bool = True, **kwargs) -> None: ... def set_postfix(self, postfix, **kwargs) -> None: ... def set_postfix_str(self, s: str = ..., refresh: bool = ...) -> None: ... class _BackwardsCompat: @property def length(self): ... @property def label(self): ... def start(self): ... def stop(self): ... class ProgIter(_TQDMCompat, _BackwardsCompat): stream: typing.IO iterable: List | Iterable desc: str | None total: int | None freq: int initial: int enabled: bool adjust: bool show_percent: bool show_times: bool show_rate: bool show_eta: bool show_total: bool show_wall: bool eta_window: int time_thresh: float clearline: bool chunksize: int | None rel_adjust_limit: float extra: str started: bool finished: bool homogeneous: bool | str def __init__(self, iterable: List | Iterable | None = None, desc: str | None = None, total: int | None = None, freq: int = 1, initial: int = 0, eta_window: int = 64, clearline: bool = True, adjust: bool = True, time_thresh: float = 2.0, show_percent: bool = True, show_times: bool = True, show_rate: bool = True, show_eta: bool = True, show_total: bool = True, show_wall: bool = False, enabled: bool = True, verbose: int | None = None, stream: typing.IO | None = None, chunksize: int | None = None, rel_adjust_limit: float = 4.0, homogeneous: bool | str = 'auto', timer: Callable | None = None, **kwargs) -> None: ... def __call__(self, iterable: Iterable) -> Iterable: ... def __enter__(self) -> ProgIter: ... def __exit__(self, ex_type: Type[BaseException] | None, ex_value: BaseException | None, ex_traceback: TracebackType | None) -> bool | None: ... def __iter__(self) -> Iterable: ... def set_extra(self, extra: str | Callable) -> None: ... def begin(self) -> ProgIter: ... def end(self) -> None: ... def step(self, inc: int = 1, force: bool = False) -> None: ... def format_message(self) -> str: ... def format_message_parts(self) -> Tuple[str, str, str]: ... def ensure_newline(self) -> None: ... def display_message(self) -> None: ... ubelt-1.3.7/ubelt/py.typed000066400000000000000000000000001472470106000154360ustar00rootroot00000000000000ubelt-1.3.7/ubelt/util_arg.py000066400000000000000000000130641472470106000161350ustar00rootroot00000000000000""" Simple ways to interact with the commandline without defining a full blown CLI. These are usually used for developer hacks. Any real interface should probably be defined using :py:mod:`argparse`, :py:mod:`click`, or :py:mod:`scriptconfig`. Be sure to ignore unknown arguments if you use them in conjunction with these functions. The :func:`argflag` function checks if a boolean ``--flag`` style CLI argument exists on the command line. The :func:`argval` function returns the value of a ``--key=value`` style CLI argument. """ import sys from ubelt import util_const __all__ = ['argval', 'argflag'] def argval(key, default=util_const.NoParam, argv=None): """ Get the value of a keyword argument specified on the command line. Values can be specified as `` `` or ``=`` The use-case for this function is to add hidden command line feature where a developer can pass in a special value. This can be used to prototype a command line interface, provide an easter egg, or add some other command line parsing that wont be exposed in CLI help docs. Args: key (str | Tuple[str, ...]): string or tuple of strings. Each key should be prefixed with two hyphens (i.e. ``--``) default (Any | NoParamType): a value to return if not specified. argv (List[str] | None): The command line arguments to parse. If unspecified, uses ``sys.argv`` directly. Returns: str | Any: value - the value specified after the key. It they key is specified multiple times, then the first value is returned. TODO: - [x] Can we handle the case where the value is a list of long paths? - No - [ ] Should we default the first or last specified instance of the flag. CommandLine: xdoctest -m ubelt.util_arg argval:0 xdoctest -m ubelt.util_arg argval:0 --devval xdoctest -m ubelt.util_arg argval:0 --devval=1 xdoctest -m ubelt.util_arg argval:0 --devval=2 xdoctest -m ubelt.util_arg argval:0 --devval 3 xdoctest -m ubelt.util_arg argval:0 --devval "4 5 6" Example: >>> # Everyday usage of this function might look like this where >>> import ubelt as ub >>> # grab a key/value pair if is given on the command line >>> value = ub.argval('--devval', default='1') >>> print('Checking if the hidden CLI key/value pair is given') >>> if value != '1': >>> print(ub.color_text( >>> 'A hidden developer secret: {!r}'.format(value), 'yellow')) >>> print('Pass the hidden CLI key/value pair to see a secret message') Example: >>> import ubelt as ub >>> argv = ['--ans', '42', '--quest=the grail', '--ans=6', '--bad'] >>> assert ub.argval('--spam', argv=argv) == ub.NoParam >>> assert ub.argval('--quest', argv=argv) == 'the grail' >>> assert ub.argval('--ans', argv=argv) == '42' >>> assert ub.argval('--bad', argv=argv) == ub.NoParam >>> assert ub.argval(('--bad', '--bar'), argv=argv) == ub.NoParam Example: >>> # Test fix for GH Issue #41 >>> import ubelt as ub >>> argv = ['--path=/path/with/k=3'] >>> ub.argval('--path', argv=argv) == '/path/with/k=3' """ if argv is None: # nocover argv = sys.argv keys = [key] if isinstance(key, str) else key n_max = len(argv) - 1 for argx, item in enumerate(argv): for key_ in keys: if item == key_: if argx < n_max: value = argv[argx + 1] return value elif item.startswith(key_ + '='): value = '='.join(item.split('=')[1:]) return value value = default return value def argflag(key, argv=None): """ Determines if a key is specified on the command line. This is a functional alternative to ``key in sys.argv``, but it also allows for multiple aliases of the same flag to be specified. Args: key (str | Tuple[str, ...]): string or tuple of strings. Each key should be prefixed with two hyphens (i.e. ``--``). argv (List[str] | None): The command line arguments to parse. If unspecified, uses ``sys.argv`` directly. Returns: bool: flag - True if the key (or any of the keys) was specified CommandLine: xdoctest -m ubelt.util_arg argflag:0 xdoctest -m ubelt.util_arg argflag:0 --devflag xdoctest -m ubelt.util_arg argflag:0 -df xdoctest -m ubelt.util_arg argflag:0 --devflag2 xdoctest -m ubelt.util_arg argflag:0 -df2 Example: >>> # Everyday usage of this function might look like this >>> import ubelt as ub >>> # Check if either of these strings are in sys.argv >>> flag = ub.argflag(('-df', '--devflag')) >>> if flag: >>> print(ub.color_text( >>> 'A hidden developer flag was given!', 'blue')) >>> print('Pass the hidden CLI flag to see a secret message') Example: >>> import ubelt as ub >>> argv = ['--spam', '--eggs', 'foo'] >>> assert ub.argflag('--eggs', argv=argv) is True >>> assert ub.argflag('--ans', argv=argv) is False >>> assert ub.argflag('foo', argv=argv) is True >>> assert ub.argflag(('bar', '--spam'), argv=argv) is True """ if argv is None: # nocover argv = sys.argv keys = [key] if isinstance(key, str) else key flag = any(k in argv for k in keys) return flag ubelt-1.3.7/ubelt/util_arg.pyi000066400000000000000000000005631472470106000163060ustar00rootroot00000000000000from typing import Tuple from ubelt.util_const import NoParamType from typing import List from typing import TypeVar T = TypeVar("T") def argval(key: str | Tuple[str, ...], default: T | NoParamType = ..., argv: List[str] | None = None) -> str | T: ... def argflag(key: str | Tuple[str, ...], argv: List[str] | None = None) -> bool: ... ubelt-1.3.7/ubelt/util_cache.py000066400000000000000000001475031472470106000164350ustar00rootroot00000000000000""" This module exposes :class:`Cacher` and :class:`CacheStamp` classes, which provide a simple API for on-disk caching. The :class:`Cacher` class is the simplest and most direct method of caching. In fact, it only requires four lines of boilerplate, which is the smallest general and robust way that I (Jon Crall) have achieved, and I don't think its possible to do better. These four lines implement the following necessary and sufficient steps for general robust on-disk caching. 1. Defining the cache dependencies 2. Checking if the cache missed 3. Loading the cache on a hit 4. Executing the process and saving the result on a miss. The following example illustrates these four points. Example: >>> import ubelt as ub >>> # Define a cache name and dependencies (which is fed to `ub.hash_data`) >>> cacher = ub.Cacher('name', depends='set-of-deps') # boilerplate:1 >>> # Calling tryload will return your data on a hit and None on a miss >>> data = cacher.tryload(on_error='clear') # boilerplate:2 >>> # Check if you need to recompute your data >>> if data is None: # boilerplate:3 >>> # Your code to recompute data goes here (this is not boilerplate). >>> data = 'mydata' >>> # Cache the computation result (via pickle) >>> cacher.save(data) # boilerplate:4 Surprisingly this uses just as many boilerplate lines as a decorator style cacher, but it is much more extensible. It is possible to use :class:`Cacher` in more sophisticated ways (e.g. metadata), but the simple in-line use is often easier and cleaner. The following example illustrates this: Example: >>> import ubelt as ub >>> @ub.Cacher('name', depends='set-of-deps') # boilerplate:1 >>> def func(): # boilerplate:2 >>> data = 'mydata' >>> return data # boilerplate:3 >>> data = func() # boilerplate:4 >>> cacher = ub.Cacher('name', depends='set-of-deps') # boilerplate:1 >>> data = cacher.tryload(on_error='clear') # boilerplate:2 >>> if data is None: # boilerplate:3 >>> data = 'mydata' >>> cacher.save(data) # boilerplate:4 While the above two are equivalent, the second version provides a simpler traceback, explicit procedures, and makes it easier to use breakpoint debugging (because there is no closure scope). While :class:`Cacher` is used to store direct results of in-line code in a pickle format, the :class:`CacheStamp` object is used to cache processes that produces an on-disk side effects other than the main return value. For instance, consider the following example: Example: >>> import ubelt as ub >>> def compute_many_files(dpath): ... for i in range(10): ... fpath = '{}/file{}.txt'.format(dpath, i) ... with open(fpath, 'w') as file: ... file.write('foo' + str(i)) >>> dpath = ub.Path.appdir('ubelt/demo/cache').delete().ensuredir() >>> # You must specify a directory, unlike in Cacher where it is optional >>> self = ub.CacheStamp('name', dpath=dpath, depends={'a': 1, 'b': 2}) >>> if self.expired(): >>> compute_many_files(dpath) >>> # Instead of caching the whole processes, we just write a file >>> # that signals the process has been done. >>> self.renew() >>> assert not self.expired() The CacheStamp is lightweight in that it simply marks that a process has been completed, but the job of saving / loading the actual data is left to the developer. The ``expired`` method checks if the stamp exists, and ``renew`` writes the stamp to disk. In ubelt version 1.1.0, several additional features were added to CacheStamp. In addition to specifying parameters via ``depends``, it is also possible for CacheStamp to determine if an associated file has been modified. To do this, the paths of the files must be known a-priori and passed to CacheStamp via the ``product`` argument. This will allow the CacheStamp to detect if the files have been modified since the ``renew`` method was called. It does this by remembering the size, modified time, and checksum of each file. If the hash of the expected hash of the product is known in advance, it is also possible to specify the expected ``hash_prefix`` of each product. In this case, ``renew`` will raise an Exception if this specified hash prefix does not match the files on disk. Lastly, it is possible to specify an expiration time via ``expires``, after which the CacheStamp will always be marked as invalid. This is now the mechanism via which the cache in :func:`ubelt.util_download.grabdata` works. Example: >>> import ubelt as ub >>> dpath = ub.Path.appdir('ubelt/demo/cache').delete().ensuredir() >>> params = {'a': 1, 'b': 2} >>> expected_fpaths = [dpath / 'file{}.txt'.format(i) for i in range(2)] >>> hash_prefix = ['a7a8a91659601590e17191301dc1', ... '55ae75d991c770d8f3ef07cbfde1'] >>> self = ub.CacheStamp('name', dpath=dpath, depends=params, >>> hash_prefix=hash_prefix, hasher='sha256', >>> product=expected_fpaths, expires='2101-01-01T000000Z') >>> if self.expired(): >>> for fpath in expected_fpaths: ... fpath.write_text(fpath.name) >>> self.renew() >>> # modifying or removing the file will cause the stamp to expire >>> expected_fpaths[0].write_text('corrupted') >>> assert self.expired() RelatedWork: https://github.com/shaypal5/cachier """ import os from os.path import join, normpath, basename, exists class Cacher: """ Saves data to disk and reloads it based on specified dependencies. Cacher uses pickle to save/load data to/from disk. Dependencies of the cached process can be specified, which ensures the cached data is recomputed if the dependencies change. If the location of the cache is not specified, it will default to the system user's cache directory. Related: ..[JobLibMemory] https://joblib.readthedocs.io/en/stable/memory.html Example: >>> import ubelt as ub >>> depends = 'repr-of-params-that-uniquely-determine-the-process' >>> # Create a cacher and try loading the data >>> cacher = ub.Cacher('demo_process', depends, verbose=4) >>> cacher.clear() >>> print(f'cacher.fpath={cacher.fpath}') >>> data = cacher.tryload() >>> if data is None: >>> # Put expensive functions in if block when cacher misses >>> myvar1 = 'result of expensive process' >>> myvar2 = 'another result' >>> # Tell the cacher to write at the end of the if block >>> # It is idomatic to put results in an object named data >>> data = myvar1, myvar2 >>> cacher.save(data) >>> # Last part of the Cacher pattern is to unpack the data object >>> myvar1, myvar2 = data >>> # >>> # If we know the data exists, we can also simply call load >>> data = cacher.tryload() Example: >>> # The previous example can be shorted if only a single value >>> from ubelt.util_cache import Cacher >>> depends = 'repr-of-params-that-uniquely-determine-the-process' >>> # Create a cacher and try loading the data >>> cacher = Cacher('demo_process', depends) >>> myvar = cacher.tryload() >>> if myvar is None: >>> myvar = ('result of expensive process', 'another result') >>> cacher.save(myvar) >>> assert cacher.exists(), 'should now exist' """ VERBOSE = 1 # default verbosity FORCE_DISABLE = False # global scope override def __init__(self, fname, depends=None, dpath=None, appname='ubelt', ext='.pkl', meta=None, verbose=None, enabled=True, log=None, hasher='sha1', protocol=-1, cfgstr=None, backend='auto'): """ Args: fname (str): A file name. This is the prefix that will be used by the cache. It will always be used as-is. depends (str | List[str] | None): Indicate dependencies of this cache. If the dependencies change, then the cache is recomputed. New in version 0.8.9, replaces ``cfgstr``. dpath (str | PathLike | None): Specifies where to save the cache. If unspecified, Cacher defaults to an application cache dir as given by appname. See :func:`ub.get_app_cache_dir` for more details. appname (str): Application name Specifies a folder in the application cache directory where to cache the data if ``dpath`` is not specified. Defaults to 'ubelt'. ext (str): File extension for the cache format. Can be ``'.pkl'`` or ``'.json'``. Defaults to ``'.pkl'``. meta (object | None): Metadata that is also saved with the ``cfgstr``. This can be useful to indicate how the ``cfgstr`` was constructed. Note: this is a candidate for deprecation. verbose (int): Level of verbosity. Can be 1, 2 or 3. Defaults to 1. enabled (bool): If set to False, then the load and save methods will do nothing. Defaults to True. log (Callable[[str], Any]): Overloads the print function. Useful for sending output to loggers (e.g. logging.info, tqdm.tqdm.write, ...) hasher (str): Type of hashing algorithm to use if ``cfgstr`` needs to be condensed to less than 49 characters. Defaults to sha1. protocol (int): Protocol version used by pickle. Defaults to the -1 which is the latest protocol. backend (str): Set to either ``'pickle'`` or ``'json'`` to force backend. Defaults to auto which chooses one based on the extension. cfgstr (str | None): Deprecated in favor of ``depends``. """ if depends is None: depends = cfgstr if cfgstr is not None: # nocover from ubelt import schedule_deprecation schedule_deprecation( modname='ubelt', migration='Use depends instead', name='cfgstr', type='Cacher class arg', deprecate='1.1.0', error='1.3.0', remove='1.4.0', ) depends = cfgstr if verbose is None: verbose = self.VERBOSE if dpath is None: # pragma: no branch from ubelt.util_platform import platform_cache_dir import pathlib cache_dpath = pathlib.Path(platform_cache_dir()) dpath = cache_dpath / (appname or 'ubelt') dpath.mkdir(parents=True, exist_ok=True) # from ubelt.util_path import Path # dpath = os.fspath(Path.appdir(appname, type='cache')) if backend == 'auto': if ext == '.pkl': backend = 'pickle' elif ext == '.json': backend = 'json' else: backend = 'pickle' else: if backend not in {'json', 'pickel'}: raise ValueError(backend) self.dpath = dpath self.fname = fname self.depends = depends self.cfgstr = cfgstr self.verbose = verbose self.ext = ext self.meta = meta self.enabled = enabled and not self.FORCE_DISABLE self.protocol = protocol self.hasher = hasher self.log = print if log is None else log self.backend = backend if len(self.ext) > 0 and self.ext[0] != '.': raise ValueError('Please be explicit and use a dot in ext') def _rectify_cfgstr(self, cfgstr=None): if cfgstr is not None: # nocover from ubelt import schedule_deprecation schedule_deprecation( modname='ubelt', migration=( 'In general, you should not need to specify a custom ' 'cfgstr after the Cacher has been created. ' 'If you must, then you can modify the ``depends`` class ' 'attribute instead, but in general it is recommend to ' 'avoid this.' ), name='cfgstr', type='Cacher method arg', deprecate='1.1.0', error='1.3.0', remove='1.4.0', ) cfgstr = self.cfgstr if cfgstr is None else cfgstr if cfgstr is None and self.depends is not None: # lazy hashing of depends data into cfgstr if isinstance(self.depends, str): self.cfgstr = self.depends else: from ubelt.util_hash import hash_data self.cfgstr = hash_data(self.depends) cfgstr = self.cfgstr if cfgstr is None and self.enabled: cfgstr = '' if self.fname is None: raise AssertionError('no fname specified in Cacher') if self.dpath is None: raise AssertionError('no dpath specified in Cacher') return cfgstr def _condense_cfgstr(self, cfgstr=None): cfgstr = self._rectify_cfgstr(cfgstr) # The 49 char maxlen is just long enough for an 8 char name, an 1 char # underscore, and a 40 char sha1 hash. max_len = 49 if len(cfgstr) > max_len: from ubelt.util_hash import hash_data condensed = hash_data(cfgstr, hasher=self.hasher, base='hex') condensed = condensed[0:max_len] else: condensed = cfgstr return condensed @property def fpath(self) -> os.PathLike: from ubelt.util_path import Path return Path(self.get_fpath()) def get_fpath(self, cfgstr=None): """ Reports the filepath that the cacher will use. It will attempt to use '{fname}_{cfgstr}{ext}' unless that is too long. Then cfgstr will be hashed. Args: cfgstr (str | None): overrides the instance-level cfgstr Returns: str | PathLike Example: >>> # xdoctest: +REQUIRES(module:pytest) >>> from ubelt.util_cache import Cacher >>> import pytest >>> #with pytest.warns(UserWarning): >>> if 1: # we no longer warn here >>> cacher = Cacher('test_cacher1') >>> cacher.get_fpath() >>> self = Cacher('test_cacher2', depends='cfg1') >>> self.get_fpath() >>> self = Cacher('test_cacher3', depends='cfg1' * 32) >>> self.get_fpath() """ condensed = self._condense_cfgstr(cfgstr) fname_cfgstr = '{}_{}{}'.format(self.fname, condensed, self.ext) fpath = join(self.dpath, fname_cfgstr) fpath = normpath(fpath) return fpath def exists(self, cfgstr=None): """ Check to see if the cache exists Args: cfgstr (str | None): overrides the instance-level cfgstr Returns: bool """ return exists(self.get_fpath(cfgstr=cfgstr)) def existing_versions(self): """ Returns data with different cfgstr values that were previously computed with this cacher. Yields: str: paths to cached files corresponding to this cacher Example: >>> # Ensure that some data exists >>> import ubelt as ub >>> dpath = ub.Path.appdir( >>> 'ubelt/tests/util_cache', >>> 'test-existing-versions').delete().ensuredir() >>> cacher = ub.Cacher('versioned_data_v2', depends='1', dpath=dpath) >>> cacher.ensure(lambda: 'data1') >>> known_fpaths = set() >>> known_fpaths.add(cacher.get_fpath()) >>> cacher = ub.Cacher('versioned_data_v2', depends='2', dpath=dpath) >>> cacher.ensure(lambda: 'data2') >>> known_fpaths.add(cacher.get_fpath()) >>> # List previously computed configs for this type >>> from os.path import basename >>> cacher = ub.Cacher('versioned_data_v2', depends='2', dpath=dpath) >>> exist_fpaths = set(cacher.existing_versions()) >>> exist_fnames = list(map(basename, exist_fpaths)) >>> print('exist_fnames = {!r}'.format(exist_fnames)) >>> print('exist_fpaths = {!r}'.format(exist_fpaths)) >>> print('known_fpaths={!r}'.format(known_fpaths)) >>> assert exist_fpaths.issubset(known_fpaths) """ import glob pattern = join(self.dpath, self.fname + '_*' + self.ext) for fname in glob.iglob(pattern): data_fpath = join(self.dpath, fname) yield data_fpath def clear(self, cfgstr=None): """ Removes the saved cache and metadata from disk Args: cfgstr (str | None): overrides the instance-level cfgstr """ data_fpath = self.get_fpath(cfgstr) if self.verbose > 0: self.log('[cacher] clear cache') if exists(data_fpath): if self.verbose > 0: self.log('[cacher] removing {}'.format(data_fpath)) os.remove(data_fpath) # Remove the metadata if it exists meta_fpath = data_fpath + '.meta' if exists(meta_fpath): os.remove(meta_fpath) else: if self.verbose > 0: self.log('[cacher] ... nothing to clear') def tryload(self, cfgstr=None, on_error='raise'): """ Like load, but returns None if the load fails due to a cache miss. Args: cfgstr (str | None): overrides the instance-level cfgstr on_error (str): How to handle non-io errors errors. Either 'raise', which re-raises the exception, or 'clear' which deletes the cache and returns None. Defaults to 'raise'. Returns: None | object: the cached data if it exists, otherwise returns None """ if self.enabled: try: if self.verbose > 1: self.log('[cacher] tryload fname={}'.format(self.fname)) return self.load(cfgstr) except IOError: if self.verbose > 0: self.log('[cacher] ... {} cache miss'.format(self.fname)) except Exception: if self.verbose > 0: self.log('[cacher] ... failed to load') if on_error == 'raise': raise elif on_error == 'clear': self.clear(cfgstr) return None else: raise KeyError('Unknown method on_error={}'.format( on_error)) else: if self.verbose > 1: self.log('[cacher] ... cache disabled: fname={}'.format( self.fname)) return None def load(self, cfgstr=None): """ Load the data cached and raise an error if something goes wrong. Args: cfgstr (str | None): overrides the instance-level cfgstr Returns: object: the cached data Raises: IOError - if the data is unable to be loaded. This could be due to a cache miss or because the cache is disabled. Example: >>> from ubelt.util_cache import * # NOQA >>> # Setting the cacher as enabled=False turns it off >>> cacher = Cacher('test_disabled_load', '', enabled=True, >>> appname='ubelt/tests/util_cache') >>> cacher.save('data') >>> assert cacher.load() == 'data' >>> cacher.enabled = False >>> assert cacher.tryload() is None """ cfgstr_ = self._rectify_cfgstr(cfgstr) dpath = self.dpath fname = self.fname verbose = self.verbose if not self.enabled: if verbose > 1: self.log('[cacher] ... cache disabled: fname={}'.format( self.fname)) raise IOError(3, 'Cache Loading Is Disabled') data_fpath = self.get_fpath(cfgstr=cfgstr) if not exists(data_fpath): if verbose > 2: self.log('[cacher] ... cache does not exist: ' 'dpath={} fname={} cfgstr={}'.format( basename(dpath), fname, cfgstr_)) raise IOError(2, 'No such file or directory: {!r}'.format(data_fpath)) else: if verbose > 3: sizestr = _byte_str(os.stat(data_fpath).st_size) self.log('[cacher] ... cache exists: ' 'dpath={} fname={} cfgstr={}, size={}'.format( basename(dpath), fname, cfgstr_, sizestr)) try: data = self._backend_load(data_fpath) except Exception as ex: if verbose > 0: self.log('CORRUPTED? fpath = {!r}'.format(data_fpath)) if verbose > 1: self.log('[cacher] ... CORRUPTED? dpath={} cfgstr={}'.format( basename(dpath), cfgstr_)) if isinstance(ex, (EOFError, IOError, ImportError)): raise IOError(str(ex)) else: if verbose > 1: self.log('[cacher] ... unknown reason for exception') raise else: if self.verbose > 2: self.log('[cacher] ... {} cache hit'.format(self.fname)) elif verbose > 1: self.log('[cacher] ... cache hit') return data def save(self, data, cfgstr=None): """ Writes data to path specified by ``self.fpath``. Metadata containing information about the cache will also be appended to an adjacent file with the `.meta` suffix. Args: data (object): arbitrary pickleable object to be cached cfgstr (str | None): overrides the instance-level cfgstr Example: >>> from ubelt.util_cache import * # NOQA >>> # Normal functioning >>> depends = 'long-cfg' * 32 >>> cacher = Cacher('test_enabled_save', depends=depends, >>> appname='ubelt/tests/util_cache') >>> cacher.save('data') >>> assert exists(cacher.get_fpath()), 'should be enabled' >>> assert exists(cacher.get_fpath() + '.meta'), 'missing metadata' >>> # Setting the cacher as enabled=False turns it off >>> cacher2 = Cacher('test_disabled_save', 'params', enabled=False, >>> appname='ubelt/tests/util_cache') >>> cacher2.save('data') >>> assert not exists(cacher2.get_fpath()), 'should be disabled' """ from ubelt.util_path import ensuredir from ubelt.util_time import timestamp if not self.enabled: return if self.verbose > 0: self.log('[cacher] ... {} cache save'.format(self.fname)) cfgstr_ = self._rectify_cfgstr(cfgstr) condensed = self._condense_cfgstr(cfgstr) # Make sure the cache directory exists ensuredir(self.dpath) data_fpath = self.get_fpath(cfgstr=cfgstr) meta_fpath = data_fpath + '.meta' # Also save metadata file to reconstruct hashing # This may be deprecated in the future. with open(meta_fpath, 'a') as file_: # TODO: maybe append this in json or YML format? file_.write('\n\nsaving {}\n'.format(timestamp())) file_.write(self.fname + '\n') file_.write(condensed + '\n') file_.write(cfgstr_ + '\n') file_.write(str(self.meta) + '\n') self._backend_dump(data_fpath, data) if self.verbose > 3: sizestr = _byte_str(os.stat(data_fpath).st_size) self.log('[cacher] ... finish save, size={}'.format(sizestr)) def _backend_load(self, data_fpath): """ Example: >>> import ubelt as ub >>> cacher = ub.Cacher('test_other_backend', depends=['a'], ext='.json') >>> cacher.save(['data']) >>> cacher.tryload() >>> import ubelt as ub >>> cacher = ub.Cacher('test_other_backend2', depends=['a'], ext='.yaml', backend='json') >>> cacher.save({'data': [1, 2, 3]}) >>> cacher.tryload() >>> import pytest >>> with pytest.raises(ValueError): >>> ub.Cacher('test_other_backend2', depends=['a'], ext='.yaml', backend='does-not-exist') >>> cacher = ub.Cacher('test_other_backend2', depends=['a'], ext='.really-a-pickle', backend='auto') >>> assert cacher.backend == 'pickle', 'should be default' """ if self.backend == 'pickle': import pickle with open(data_fpath, 'rb') as file_: data = pickle.load(file_) elif self.backend == 'json': import json with open(data_fpath, 'r') as file_: data = json.load(file_) else: raise NotImplementedError('self.backend = {}'.format(self.backend)) return data def _backend_dump(self, data_fpath, data): if self.backend == 'pickle': import pickle with open(data_fpath, 'wb') as file_: pickle.dump(data, file_, protocol=self.protocol) elif self.backend == 'json': import json with open(data_fpath, 'w') as file_: json.dump(data, file_) else: raise NotImplementedError('self.backend = {}'.format(self.backend)) return data def ensure(self, func, *args, **kwargs): """ Wraps around a function. A cfgstr must be stored in the base cacher. Args: func (Callable): function that will compute data on cache miss *args: passed to func **kwargs: passed to func Example: >>> from ubelt.util_cache import * # NOQA >>> def func(): >>> return 'expensive result' >>> fname = 'test_cacher_ensure' >>> depends = 'func params' >>> cacher = Cacher(fname, depends=depends) >>> cacher.clear() >>> data1 = cacher.ensure(func) >>> data2 = cacher.ensure(func) >>> assert data1 == 'expensive result' >>> assert data1 == data2 >>> cacher.clear() """ data = self.tryload() if data is None: data = func(*args, **kwargs) self.save(data) return data def __call__(self, func): """ Allows Cacher to be used as a decorator for functions with no arguments. This mode of usage has much less control than others, so it is only recommended for the simplest of cases. Args: func (Callable): function to decorate. Must have no arguments. Example: >>> from ubelt.util_cache import * # NOQA >>> @Cacher('demo_cacher_call', depends='foobar') >>> def func(): >>> return 'expensive result' >>> func.cacher.clear() >>> assert not func.cacher.exists() >>> data = func() >>> assert func.cacher.exists() >>> func.cacher.clear() """ # Can't return arguments because cfgstr won't take them into account def _wrapper(): data = self.ensure(func) return data _wrapper.cacher = self return _wrapper class CacheStamp(object): """ Quickly determine if a file-producing computation has been done. Check if the computation needs to be redone by calling ``expired``. If the stamp is not expired, the user can expect that the results exist and could be loaded. If the stamp is expired, the computation should be redone. After the result is updated, the calls ``renew``, which writes a "stamp" file to disk that marks that the procedure has been done. There are several ways to control how a stamp expires. At a bare minimum, removing the stamp file will force expiration. However, in this circumstance CacheStamp only knows that something has been done, but it doesn't have any information about what was done, so in general this is not sufficient. To achieve more robust expiration behavior, the user should specify the ``product`` argument, which is a list of file paths that are expected to exist whenever the stamp is renewed. When this is specified the CacheStamp will expire if any of these products are deleted, their size changes, their modified timestamp changes, or their hash (i.e. checksum) changes. Note that by setting ``hasher=None``, running and verifying checksums can be disabled. If the user knows what the hash of the file should be this can be specified to prevent renewal of the stamp unless these match the files on disk. This can be useful for security purposes. The stamp can also be set to expire at a specified time or after a specified duration using the ``expires`` argument. Notes: The size, mtime, and hash mechanism is similar to how Makefile and redo caches work. Attributes: cacher (Cacher): underlying cacher object Example: >>> import ubelt as ub >>> # Stamp the computation of expensive-to-compute.txt >>> dpath = ub.Path.appdir('ubelt/tests/cache-stamp') >>> dpath.delete().ensuredir() >>> product = dpath / 'expensive-to-compute.txt' >>> self = ub.CacheStamp('somedata', depends='someconfig', dpath=dpath, >>> product=product, hasher='sha256') >>> self.clear() >>> print(f'self.fpath={self.fpath}') >>> if self.expired(): >>> product.write_text('very expensive') >>> self.renew() >>> assert not self.expired() >>> # corrupting the output will cause the stamp to expire >>> product.write_text('very corrupted') >>> assert self.expired() """ def __init__(self, fname, dpath, cfgstr=None, product=None, hasher='sha1', verbose=None, enabled=True, depends=None, meta=None, hash_prefix=None, expires=None, ext='.pkl'): """ Args: fname (str): Name of the stamp file dpath (str | PathLike | None): Where to store the cached stamp file product (str | PathLike | Sequence[str | PathLike] | None): Path or paths that we expect the computation to produce. If specified the hash of the paths are stored. hasher (str): The type of hasher used to compute the file hash of product. If None, then we assume the file has not been corrupted or changed if the mtime and size are the same. Defaults to sha1. verbose (bool | None): Passed to internal :class:`ubelt.Cacher` object. Defaults to None. enabled (bool): if False, expired always returns True. Defaults to True. depends (str | List[str] | None): Indicate dependencies of this cache. If the dependencies change, then the cache is recomputed. New to CacheStamp in version 0.9.2. meta (object | None): Metadata that is also saved as a sidecar file. New to CacheStamp in version 0.9.2. Note: this is a candidate for deprecation. expires (str | int | datetime.datetime | datetime.timedelta | None): If specified, sets an expiration date for the certificate. This can be an absolute datetime or a timedelta offset. If specified as an int, this is interpreted as a time delta in seconds. If specified as a str, this is interpreted as an absolute timestamp. Time delta offsets are coerced to absolute times at "renew" time. hash_prefix (None | str | List[str]): If specified, we verify that these match the hash(s) of the product(s) in the stamp certificate. ext (str): File extension for the cache format. Can be ``'.pkl'`` or ``'.json'``. Defaults to ``'.pkl'``. cfgstr (str | None): DEPRECATED. """ self.cacher = Cacher(fname, cfgstr=cfgstr, dpath=dpath, verbose=verbose, enabled=enabled, depends=depends, meta=meta, ext=ext) self.product = product self.hasher = hasher self.expires = expires self.hash_prefix = hash_prefix # The user can modify these if they want to disable size or mtime # checks for expiration. Not sure if I want to expose it at the # top level API yet or not. self._expire_checks = { 'size': True, 'mtime': True, 'hash': True, } @property def fpath(self): return self.cacher.fpath def clear(self): """ Delete the stamp (the products are untouched) """ return self.cacher.clear() def _get_certificate(self, cfgstr=None): """ Returns the stamp certificate if it exists """ certificate = self.cacher.tryload(cfgstr=cfgstr, on_error='clear') return certificate def _rectify_products(self, product=None): """ puts products in a normalized format Returns: List[Path] """ from ubelt.util_path import Path products = self.product if product is None else product if products is None: return None if not isinstance(products, (list, tuple)): products = [products] products = list(map(Path, products)) return products def _rectify_hash_prefixes(self): """ puts products in a normalized format """ hash_prefixes = self.hash_prefix if hash_prefixes is None: return None if not isinstance(hash_prefixes, (list, tuple)): hash_prefixes = [hash_prefixes] return hash_prefixes def _product_info(self, product=None): """ Compute summary info about each product on disk. """ products = self._rectify_products(product) product_info = {} product_info.update(self._product_file_stats()) if self.hasher is None: hasher_name = None else: if not isinstance(self.hasher, str): # nocover from ubelt import schedule_deprecation schedule_deprecation( modname='ubelt', migration='Pass hasher as a string', name='hasher', type='CacheStamp arg', deprecate='1.1.0', error='1.3.0', remove='1.4.0') hasher_name = self.hasher.name else: hasher_name = self.hasher product_info['hasher'] = hasher_name product_info['hash'] = self._product_file_hash(products) return product_info def _product_file_stats(self, product=None): products = self._rectify_products(product) product_stats = [p.stat() for p in products] product_file_stats = { 'mtime': [stat.st_mtime for stat in product_stats], 'size': [stat.st_size for stat in product_stats] } return product_file_stats def _product_file_hash(self, product=None): if self.hasher is None: product_file_hash = None else: from ubelt.util_hash import hash_file products = self._rectify_products(product) product_file_hash = [ hash_file(p, hasher=self.hasher, base='hex') for p in products ] return product_file_hash def expired(self, cfgstr=None, product=None): """ Check to see if a previously existing stamp is still valid, if the expected result of that computation still exists, and if all other expiration criteria are met. Args: cfgstr (Any): DEPRECATED product (Any): DEPRECATED Returns: bool | str: True(-thy) if the stamp is invalid, expired, or does not exist. When the stamp is expired, the reason for expiration is returned as a string. If the stamp is still valid, False is returned. Example: >>> import ubelt as ub >>> import time >>> import os >>> # Stamp the computation of expensive-to-compute.txt >>> dpath = ub.Path.appdir('ubelt/tests/cache-stamp-expired') >>> dpath.delete().ensuredir() >>> products = [ >>> dpath / 'product1.txt', >>> dpath / 'product2.txt', >>> ] >>> self = ub.CacheStamp('myname', depends='myconfig', dpath=dpath, >>> product=products, hasher='sha256', >>> expires=0) >>> if self.expired(): >>> for fpath in products: >>> fpath.write_text(fpath.name) >>> self.renew() >>> fpath = products[0] >>> # Because we set the expiration delta to 0, we should already be expired >>> assert self.expired() == 'expired_cert' >>> # Disable the expiration date, renew and we should be ok >>> self.expires = None >>> self.renew() >>> assert not self.expired() >>> # Modify the mtime to cause expiration >>> orig_atime = fpath.stat().st_atime >>> orig_mtime = fpath.stat().st_mtime >>> os.utime(fpath, (orig_atime, orig_mtime + 200)) >>> assert self.expired() == 'mtime_diff' >>> self.renew() >>> assert not self.expired() >>> # rewriting the file will cause the size constraint to fail >>> # even if we hack the mtime to be the same >>> orig_atime = fpath.stat().st_atime >>> orig_mtime = fpath.stat().st_mtime >>> fpath.write_text('corrupted') >>> os.utime(fpath, (orig_atime, orig_mtime)) >>> assert self.expired() == 'size_diff' >>> self.renew() >>> assert not self.expired() >>> # Force a situation where the hash is the only thing >>> # that saves us, write a different file with the same >>> # size and mtime. >>> orig_atime = fpath.stat().st_atime >>> orig_mtime = fpath.stat().st_mtime >>> fpath.write_text('corrApted') >>> os.utime(fpath, (orig_atime, orig_mtime)) >>> assert self.expired() == 'hash_diff' >>> # Test what a wrong hash prefix causes expiration >>> certificate = self.renew() >>> self.hash_prefix = certificate['hash'] >>> self.expired() >>> self.hash_prefix = ['bad', 'hashes'] >>> self.expired() >>> # A bad hash will not allow us to renew >>> import pytest >>> with pytest.raises(RuntimeError): ... self.renew() """ if cfgstr is not None: # nocover from ubelt import schedule_deprecation schedule_deprecation( modname='ubelt', migration='Do not pass cfgstr to expired. Use the class depends arg', name='cfgstr', type='CacheStamp.expires arg', deprecate='1.1.0', error='1.3.0', remove='1.4.0', ) if product is not None: # nocover from ubelt import schedule_deprecation schedule_deprecation( modname='ubelt', migration='Do not pass product to expired. Use the class product arg', name='product', type='CacheStamp.expires arg', deprecate='1.1.0', error='1.3.0', remove='1.4.0', ) if not self.cacher.enabled: return 'disabled' certificate = self._get_certificate(cfgstr=cfgstr) if certificate is None: # We don't have a certificate, so we are expired err = 'no_cert' if self.cacher.verbose > 0: # pragma: nobranch print('[cacher] stamp expired {}'.format(err)) return err expires = certificate.get('expires', None) if expires is not None: from ubelt.util_time import timeparse # Need to add in the local timezone to compare against the cert. now = _localnow() expires_abs = timeparse(expires) if now >= expires_abs: # We are expired err = 'expired_cert' if self.cacher.verbose > 0: # pragma: nobranch print('[cacher] stamp expired {}'.format(err)) return err products = self._rectify_products(product) if products is None: # We don't have a product to check, so assume not expired return False elif not all(map(exists, products)): # We are expired if the expected product does not exist err = 'missing_products' if self.cacher.verbose > 0: # pragma: nobranch print('[cacher] stamp expired {}'.format(err)) return err else: # First test to see if the size or mtime of the files has changed # as a potentially quicker check. If sizes or mtimes do not exist # in the certificate (old ubelt version), then ignore them. product_file_stats = self._product_file_stats() sizes = certificate.get('size', None) if sizes is not None and self._expire_checks['size']: if sizes != product_file_stats['size']: # The sizes are different, we are expired err = 'size_diff' if self.cacher.verbose > 0: # pragma: nobranch print('[cacher] stamp expired {}'.format(err)) return err mtimes = certificate.get('mtime', None) if mtimes is not None and self._expire_checks['mtime']: if mtimes != product_file_stats['mtime']: # The sizes are different, we are expired err = 'mtime_diff' if self.cacher.verbose > 0: # pragma: nobranch print('[cacher] stamp expired {}'.format(err)) return err err = self._check_certificate_hashes(certificate) if err: return err # We are expired if the hash of the existing product data # does not match the expected hash in the certificate if self._expire_checks['hash']: certificate_hash = certificate.get('hash', None) product_file_hash = self._product_file_hash(products) if product_file_hash != certificate_hash: if self.cacher.verbose > 0: print('invalid hash value (expected "{}", got "{}")'.format( product_file_hash, certificate_hash)) # The hash is different, we are expired err = 'hash_diff' if self.cacher.verbose > 0: print('[cacher] stamp expired {}'.format(err)) return err # All tests passed, we are not expired return False def _check_certificate_hashes(self, certificate): certificate_hash = certificate.get('hash', None) hash_prefixes = self._rectify_hash_prefixes() if hash_prefixes is not None: for pref_hash, cert_hash in zip(hash_prefixes, certificate_hash): if not cert_hash.startswith(pref_hash): if self.cacher.verbose > 0: print('invalid hash prefix value (expected "{}", got "{}")'.format( pref_hash, cert_hash)) err = 'hash_prefix_mismatch' return err def _expires(self, now=None): """ Returns: datetime.datetime: the absolute local time when the stamp expires Example: >>> import ubelt as ub >>> dpath = ub.Path.appdir('ubelt/tests/cache-stamp-expires') >>> self = ub.CacheStamp('myname', depends='myconfig', dpath=dpath) >>> # Test str input >>> self.expires = '2020-01-01T000000Z' >>> assert self._expires().replace(tzinfo=None).isoformat() == '2020-01-01T00:00:00' >>> # Test datetime input >>> dt = ub.timeparse(ub.timestamp()) >>> self.expires = dt >>> assert self._expires() == dt >>> # Test None input >>> self.expires = None >>> assert self._expires() is None >>> # Test int input >>> self.expires = 0 >>> assert self._expires(dt) == dt >>> self.expires = 10 >>> assert self._expires(dt) > dt >>> self.expires = -10 >>> assert self._expires(dt) < dt >>> # Test timedelta input >>> import datetime as datetime_mod >>> self.expires = datetime_mod.timedelta(seconds=-10) >>> assert self._expires(dt) == dt + self.expires """ # Rectify into a datetime from ubelt.util_time import timeparse import datetime as datetime_mod import numbers if now is None: now = datetime_mod.datetime.now() expires = self.expires if expires is None: expires_abs = None elif isinstance(expires, numbers.Number): expires_abs = now + datetime_mod.timedelta(seconds=expires) elif isinstance(expires, datetime_mod.timedelta): expires_abs = now + expires elif isinstance(expires, str): expires_abs = timeparse(expires) elif isinstance(expires, datetime_mod.datetime): expires_abs = expires else: raise TypeError( 'expires must be a coercible to datetime or timedelta') return expires_abs def _new_certificate(self, cfgstr=None, product=None): """ Returns: dict: certificate information Example: >>> import ubelt as ub >>> # Stamp the computation of expensive-to-compute.txt >>> dpath = ub.Path.appdir('ubelt/tests/cache-stamp-cert').ensuredir() >>> product = dpath / 'product1.txt' >>> product.write_text('hi') >>> self = ub.CacheStamp('myname', depends='myconfig', dpath=dpath, >>> product=product) >>> cert = self._new_certificate() >>> assert cert['expires'] is None >>> self.expires = '2020-01-01T000000' >>> self.renew() >>> cert = self._new_certificate() >>> assert cert['expires'] is not None """ from ubelt.util_time import timestamp products = self._rectify_products(product) now = _localnow() expires = self._expires(now) certificate = { 'timestamp': timestamp(now, precision=4), 'expires': None if expires is None else timestamp(expires, precision=4), 'product': None if products is None else [os.fspath(p) for p in products], } if products is not None: if not all(map(exists, products)): raise IOError( 'The stamped product must exist: {}'.format(products)) product_info = self._product_info(products) certificate.update(product_info) return certificate def renew(self, cfgstr=None, product=None): """ Recertify that the product has been recomputed by writing a new certificate to disk. Args: cfgstr (None | str): deprecated, do not use. product (None | str | List): deprecated, do not use. Returns: None | dict: certificate information if enabled otherwise None. Example: >>> # Test that renew does nothing when the cacher is disabled >>> import ubelt as ub >>> dpath = ub.Path.appdir('ubelt/tests/cache-stamp-renew').ensuredir() >>> self = ub.CacheStamp('foo', dpath=dpath, enabled=False) >>> assert self.renew() is None """ if not self.cacher.enabled: return None if cfgstr is not None: # nocover from ubelt import schedule_deprecation schedule_deprecation( modname='ubelt', migration='Do not pass cfgstr to renew. Use the class depends arg', name='cfgstr', type='CacheStamp.renew arg', deprecate='1.1.0', error='1.3.0', remove='1.4.0', ) if product is not None: # nocover from ubelt import schedule_deprecation schedule_deprecation( modname='ubelt', migration='Do not pass product to renew. Use the class product arg', name='product', type='CacheStamp.renew arg', deprecate='1.1.0', error='1.3.0', remove='1.4.0', ) certificate = self._new_certificate(cfgstr, product) err = self._check_certificate_hashes(certificate) if err: raise RuntimeError(err) self.cacher.save(certificate, cfgstr=cfgstr) return certificate def _localnow(): # Might be nice to have a util_time function add in tzinfo import datetime as datetime_mod import time local_tzinfo = datetime_mod.timezone(datetime_mod.timedelta(seconds=-time.timezone)) now = datetime_mod.datetime.now().replace(tzinfo=local_tzinfo) return now def _byte_str(num, unit='auto', precision=2): """ Automatically chooses relevant unit (KB, MB, or GB) for displaying some number of bytes. Args: num (int): number of bytes unit (str): which unit to use, can be auto, B, KB, MB, GB, or TB References: .. [WikiOrdersOfMag] https://en.wikipedia.org/wiki/Orders_of_magnitude_(data) Returns: str: string representing the number of bytes with appropriate units Example: >>> from ubelt.util_cache import _byte_str >>> import ubelt as ub >>> num_list = [1, 100, 1024, 1048576, 1073741824, 1099511627776] >>> result = ub.urepr(list(map(_byte_str, num_list)), nl=0) >>> print(result) ['0.00KB', '0.10KB', '1.00KB', '1.00MB', '1.00GB', '1.00TB'] >>> _byte_str(10, unit='B') 10.00B """ abs_num = abs(num) if unit == 'auto': if abs_num < 2.0 ** 10: unit = 'KB' elif abs_num < 2.0 ** 20: unit = 'KB' elif abs_num < 2.0 ** 30: unit = 'MB' elif abs_num < 2.0 ** 40: unit = 'GB' else: unit = 'TB' if unit.lower().startswith('b'): num_unit = num elif unit.lower().startswith('k'): num_unit = num / (2.0 ** 10) elif unit.lower().startswith('m'): num_unit = num / (2.0 ** 20) elif unit.lower().startswith('g'): num_unit = num / (2.0 ** 30) elif unit.lower().startswith('t'): num_unit = num / (2.0 ** 40) else: raise ValueError('unknown num={!r} unit={!r}'.format(num, unit)) fmtstr = ('{:.' + str(precision) + 'f}{}') res = fmtstr.format(num_unit, unit) return res ubelt-1.3.7/ubelt/util_cache.pyi000066400000000000000000000061441472470106000166010ustar00rootroot00000000000000from typing import List from os import PathLike from typing import Callable from typing import Any from typing import Sequence import datetime import os from collections.abc import Generator class Cacher: VERBOSE: int FORCE_DISABLE: bool dpath: str | PathLike | None fname: str depends: str | List[str] | None cfgstr: str | None verbose: int ext: str meta: object | None enabled: bool protocol: int hasher: str log: Callable[[str], Any] backend: str def __init__(self, fname: str, depends: str | List[str] | None = None, dpath: str | PathLike | None = None, appname: str = 'ubelt', ext: str = '.pkl', meta: object | None = None, verbose: int | None = None, enabled: bool = True, log: Callable[[str], Any] | None = None, hasher: str = 'sha1', protocol: int = ..., cfgstr: str | None = None, backend: str = 'auto') -> None: ... @property def fpath(self) -> os.PathLike: ... def get_fpath(self, cfgstr: str | None = None) -> str | PathLike: ... def exists(self, cfgstr: str | None = None) -> bool: ... def existing_versions(self) -> Generator[str, None, None]: ... def clear(self, cfgstr: str | None = None) -> None: ... def tryload(self, cfgstr: str | None = None, on_error: str = 'raise') -> None | object: ... def load(self, cfgstr: str | None = None) -> object: ... def save(self, data: object, cfgstr: str | None = None) -> None: ... def ensure(self, func: Callable, *args, **kwargs): ... def __call__(self, func: Callable): ... class CacheStamp: cacher: Cacher product: str | PathLike | Sequence[str | PathLike] | None hasher: str expires: str | int | datetime.datetime | datetime.timedelta | None hash_prefix: None | str | List[str] def __init__(self, fname: str, dpath: str | PathLike | None, cfgstr: str | None = None, product: str | PathLike | Sequence[str | PathLike] | None = None, hasher: str = 'sha1', verbose: bool | None = None, enabled: bool = True, depends: str | List[str] | None = None, meta: object | None = None, hash_prefix: None | str | List[str] = None, expires: str | int | datetime.datetime | datetime.timedelta | None = None, ext: str = '.pkl') -> None: ... @property def fpath(self): ... def clear(self): ... def expired(self, cfgstr: Any | None = None, product: Any | None = None) -> bool | str: ... def renew(self, cfgstr: None | str = None, product: None | str | List = None) -> None | dict: ... ubelt-1.3.7/ubelt/util_cmd.py000066400000000000000000001002411472470106000161210ustar00rootroot00000000000000r""" This module exposes the :func:`ubelt.cmd` command, which provides a simple means for interacting with the command line. This uses :class:`subprocess.Popen` under the hood, but improves upon existing :mod:`subprocess` functionality by: (1) Adding the option to "tee" the output, i.e. simultaneously capture and write to stdout and stderr. (2) Always specify the command as a string. The :mod:`subprocess` module expects the command as either a ``List[str]`` if ``shell=False`` and ``str`` if ``shell=True``. If necessary, :func:`ubelt.util_cmd.cmd` will automatically convert from one format to the other, so passing in either case will work. (3) Specify if the process blocks or not by setting ``detach``. Note: when ``detach is True`` it is not possible to tee the output. Example: >>> import ubelt as ub >>> # Running with verbose=1 will write to stdout in real time >>> info = ub.cmd('echo "write your command naturally"', verbose=1) write your command naturally >>> # The return type is a dictionary of information depending >>> # on how `ub.cmd` was invoked. >>> print('info = ' + ub.repr2(info)) info = { 'command': 'echo "write your command naturally"', 'cwd': None, 'err': '', 'out': 'write your command naturally\n', 'proc': <...Popen...>, 'ret': 0, } The cmd is able to handle common uses cases of the subprocess module with a simpler interface. .. code:: python import subprocess import ubelt as ub Run without capturing output and without printing to the screen .. code:: python # stdlib subprocess.run(['ls', '-l'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, universal_newlines=True) # ubelt equivalent ub.cmd(['ls', '-l'], capture=False) Print output to the screen, but no programmatic access to the data .. code:: python # stdlib subprocess.check_call(['ls', '-l']) # ubelt equivalent ub.cmd(['ls', '-l'], verbose=1, capture=False) Get programmatic access to the data but don't show it on screen .. code:: python # stdlib subprocess.check_output(['ls', '-l'], universal_newlines=True) # ubelt equivalent ub.cmd(['ls', '-l'])['out'] Get programmatic access AND show it on screen .. code:: python # stdlib has no easy way to to this # ubelt has "tee" functionality ub.cmd(['ls', '-l'], verbose=1) """ import sys import os __pitch__ = """ The ubelt.cmd command is probably the easiest way to execute a command line program from Python. Unlike os.system, subprocess.check_output, and subprocess.call, The syntax for what you want to call is exactly the same no matter what type of configuration you are using. Either pass the text you would execute on the command line directly or break it up into a list where each item should be considered its own argument. This works regardless of if shell=True or shell=False, so if your command doesn't work with the safer shell=False, you can turn on shell=True without modifying anything else. You can capture output, print it to the screen, or namely --- something few other packages support --- both (via tee=True or verbose>1). You can also invoke the call via os.system instead of Popen by setting system=True (although this does come with all of the os.system benefits and restrictions). I'm biased because I wrote it, but subprocess-tee is the only other package I know of that comes close to getting this right. Maybe invoke? """ # import logging # logging.basicConfig( # format='[%(asctime)s %(threadName)s %(levelname)s] %(message)s', # level=logging.DEBUG, # force=True # ) # logger = logging.getLogger(__name__) __all__ = ['cmd'] POSIX: bool = 'posix' in sys.builtin_module_names WIN32: bool = sys.platform == 'win32' class CmdOutput(dict): """ An container that holds the output of :func:`ubelt.cmd`. This inherits from dictionary to be backwards compatible with older versions of ubelt, but also includes methods that ducktype the stdlib :class:`subprocess.CompletedProcess`, which makes it easier for existing code that uses :func:`subprocess.run` to switch to :func:`ubelt.cmd`. """ @property def stdout(self): """ Returns: str | bytes """ return self['out'] @property def stderr(self): """ Returns: str | bytes """ return self['err'] @property def returncode(self): """ Returns: int """ return self['ret'] def check_returncode(self): """Raise CalledProcessError if the exit code is non-zero.""" import subprocess if self.returncode: raise subprocess.CalledProcessError( self.returncode, self.args, self.stdout, self.stderr) def cmd(command, shell=False, detach=False, verbose=0, tee=None, cwd=None, env=None, tee_backend='auto', check=False, system=False, timeout=None, capture=True, # Do we support these? # universal_newlines=True, # stdout='unused', stderr='unused' ): """ Executes a command in a subprocess. The advantage of this wrapper around subprocess is that (1) you control if the subprocess prints to stdout, (2) the text written to stdout and stderr is returned for parsing, (3) cross platform behavior that lets you specify the command as a string or tuple regardless of whether or not shell=True. (4) ability to detach, return the process object and allow the process to run in the background (eventually we may return a Future object instead). Args: command (str | List[str]): command string, tuple of executable and args, or shell command. shell (bool): if True, process is run in shell. Defaults to False. detach (bool): if True, process is detached and run in background. Defaults to False. verbose (int): verbosity mode. Can be 0, 1, 2, or 3. Defaults to 0. tee (bool | None): if True, simultaneously writes to stdout while capturing output from the command. If not specified, defaults to True if verbose > 0. If detach is True, then this argument is ignored. cwd (str | PathLike | None): Path to run command. Defaults to current working directory if unspecified. env (Dict[str, str] | None): environment passed to Popen tee_backend (str): backend for tee output. Valid choices are: "auto", "select" (POSIX only), and "thread". Defaults to "auto". check (bool): if True, check that the return code was zero before returning, otherwise raise a :class:`subprocess.CalledProcessError`. Does nothing if detach is True. Defaults to False. system (bool): if True, most other considerations are dropped, and :func:`os.system` is used to execute the command in a platform dependent way. Other arguments such as env, tee, timeout, and shell are all ignored. Defaults to False. (New in version 1.1.0) timeout (float | None): If the process does not complete in ``timeout`` seconds, raise a :class:`subprocess.TimeoutExpired`. (New in version 1.1.0). capture (bool): if True, the stdout/stderr are captured and returned in the information dictionary. Ignored if detach or system is True. Returns: dict | CmdOutput: info - information about command status. if detach is False ``info`` contains captured standard out, standard error, and the return code if detach is True ``info`` contains a reference to the process. Raises: ValueError - on an invalid configuration subprocess.TimeoutExpired - if the timeout limit is exceeded subprocess.CalledProcessError - if check and the return value is non zero Note: When using the tee output, the stdout and stderr may be shuffled from what they would be on the command line. Note: While this function is generally compatible with subprocess.run and other variants of Popen, we force defaults of universal_newlines=True, and choose the values of stdout and stderr based on other arguments. We are considering the pros and cons of a completely drop-in-replacement API. Related Work: Similar to other libraries: [SubprocTee]_, [ShellJob]_, [CmdRunner]_, [PyInvoke]_. References: .. [SO_11495783] https://stackoverflow.com/questions/11495783/redirect-subprocess-stderr-to-stdout .. [SO_7729336] https://stackoverflow.com/questions/7729336/how-can-i-print-and-display-subprocess-stdout-and-stderr-output-without-distorti .. [SO_33560364] https://stackoverflow.com/questions/33560364/python-windows-parsing-command-lines-with-shlex .. [SubprocTee] https://github.com/pycontribs/subprocess-tee .. [ShellJob] https://github.com/mortoray/shelljob .. [CmdRunner] https://github.com/netinvent/command_runner .. [PyInvoke] https://www.pyinvoke.org/prior-art.html CommandLine: xdoctest -m ubelt.util_cmd cmd:6 python -c "import ubelt as ub; ub.cmd('ping localhost -c 2', verbose=2)" pytest "$(python -c 'import ubelt; print(ubelt.util_cmd.__file__)')" -sv --xdoctest-verbose 2 Example: >>> import ubelt as ub >>> info = ub.cmd(('echo', 'simple cmdline interface'), verbose=1) simple cmdline interface >>> assert info['ret'] == 0 >>> assert info['out'].strip() == 'simple cmdline interface' >>> assert info['err'].strip() == '' Example: >>> import ubelt as ub >>> info = ub.cmd('echo str noshell', verbose=0) >>> assert info['out'].strip() == 'str noshell' Example: >>> # windows echo will output extra single quotes >>> import ubelt as ub >>> info = ub.cmd(('echo', 'tuple noshell'), verbose=0) >>> assert info['out'].strip().strip("'") == 'tuple noshell' Example: >>> # Note this command is formatted to work on win32 and unix >>> import ubelt as ub >>> info = ub.cmd('echo str&&echo shell', verbose=0, shell=True) >>> assert info['out'].strip() == 'str' + chr(10) + 'shell' Example: >>> import ubelt as ub >>> info = ub.cmd(('echo', 'tuple shell'), verbose=0, shell=True) >>> assert info['out'].strip().strip("'") == 'tuple shell' Example: >>> import pytest >>> import ubelt as ub >>> info = ub.cmd('echo hi', check=True) >>> import subprocess >>> with pytest.raises(subprocess.CalledProcessError): >>> ub.cmd('exit 1', check=True, shell=True) Example: >>> import ubelt as ub >>> from os.path import join, exists >>> dpath = ub.Path.appdir('ubelt', 'test').ensuredir() >>> fpath1 = (dpath / 'cmdout1.txt').delete() >>> fpath2 = (dpath / 'cmdout2.txt').delete() >>> # Start up two processes that run simultaneously in the background >>> info1 = ub.cmd(('touch', str(fpath1)), detach=True) >>> info2 = ub.cmd('echo writing2 > ' + str(fpath2), shell=True, detach=True) >>> # Detached processes are running in the background >>> # We can run other code while we wait for them. >>> while not exists(fpath1): ... pass >>> while not exists(fpath2): ... pass >>> # communicate with the process before you finish >>> # (otherwise you may leak a text wrapper) >>> info1['proc'].communicate() >>> info2['proc'].communicate() >>> # Check that the process actually did finish >>> assert (info1['proc'].wait()) == 0 >>> assert (info2['proc'].wait()) == 0 >>> # Check that the process did what we expect >>> assert fpath1.read_text() == '' >>> assert fpath2.read_text().strip() == 'writing2' Example: >>> # Can also use ub.cmd to call os.system >>> import pytest >>> import ubelt as ub >>> import subprocess >>> info = ub.cmd('echo hi', check=True, system=True) >>> with pytest.raises(subprocess.CalledProcessError): >>> ub.cmd('exit 1', check=True, shell=True) """ # In the future we might allow the user to pass a custom log function # But this has weird interactions with how the tee process works # because of the assumption stdout.write does not emit a newline # TODO: # log (Callable | None): # If specified, verbose output is written using this function, # otherwise the builtin print function is used. log = print import subprocess # TODO: stdout, stderr - experimental - custom file to pipe stdout/stderr to # Determine if command is specified as text or a tuple if isinstance(command, str): command_text = command command_tup = None elif isinstance(command, os.PathLike): command_text = os.fspath(command) command_tup = None else: import shlex command_parts = [] # Allow the user to specify paths as part of the command for part in command: if isinstance(part, os.PathLike): part = os.fspath(part) command_parts.append(part) command_tup = list(command_parts) command_text = ' '.join(list(map(shlex.quote, command_tup))) # Inputs can either be text or tuple based. On UNIX we ensure conversion # to text if shell is True, and to tuple if shell is False. On windows, # the input is text if shell is True, but can be either if shell is # False as noted in [SO_33560364]_. if shell or system: # When shell=True, args is sent to the shell (e.g. bin/sh) as text args = command_text else: # When shell=False, args is a list of executable and arguments if command_tup is None: if sys.platform.startswith('win32'): # nocover # On windows when shell=False, args can be a str | List[str] # as noted in [SO_33560364] args = command_text else: # On linux when shell=False, args must be a List[str] import shlex args = shlex.split(command_text) else: args = command_tup if tee is None: tee = verbose > 0 if tee and tee_backend not in {'auto', 'thread', 'select'}: raise ValueError('tee_backend must be select, thread, or auto') # note: we use ``tee`` as a proxy for "show" # we may upgrade show to an actual argument show = tee if show and not capture: # even though tee was probably true, semantically it should be # considered false unless we are doing both. # when show becomes an arguments we should do error handling for # inconsistency here tee = False if verbose > 1: import platform import getpass from ubelt import shrinkuser if verbose > 2: try: log('┌─── START CMD ───') except Exception: # nocover log('+=== START CMD ===') cwd_ = os.getcwd() if cwd is None else cwd compname = platform.node() username = getpass.getuser() cwd_ = shrinkuser(cwd_) ps1 = '[ubelt.cmd] {}@{}:{}$ '.format(username, compname, cwd_) log(ps1 + command_text) # Create a new process to execute the command def make_proc(): # delay the creation of the process until we validate all args popen_kwargs = {'cwd': cwd, 'env': env, 'shell': shell} popen_kwargs['universal_newlines'] = True if capture: popen_kwargs['stdout'] = subprocess.PIPE popen_kwargs['stderr'] = subprocess.PIPE elif not show: # The only way to suppress printing to the screen is by # piping to devnull popen_kwargs['stdout'] = subprocess.DEVNULL popen_kwargs['stderr'] = subprocess.DEVNULL proc = subprocess.Popen(args, **popen_kwargs) return proc if system: from ubelt.util_path import ChDir with ChDir(cwd): ret = os.system(command_text) info = CmdOutput(**{ 'out': None, 'err': None, 'ret': ret, 'cwd': cwd, 'command': command_text, }) elif detach: info = CmdOutput(**{ # Not including out/err/ret because the user could still compute # them via proc. I'm open to reconsidering this design decision. 'proc': make_proc(), 'cwd': cwd, 'command': command_text }) if verbose > 1: # nocover log('...detaching') else: if tee: # tee means both capture and show are true. # We logging stdout and stderr, while simultaneously piping it to # another stream. stdout = sys.stdout stderr = sys.stderr proc = make_proc() with proc: out, err = _tee_output( proc=proc, stdout=stdout, stderr=stderr, backend=tee_backend, timeout=timeout, command_text=command_text) (out_, err_) = proc.communicate(timeout=timeout) elif capture: proc = make_proc() # Follow the error handling in the stdlib implementation of # subprocess.run with proc: try: (out, err) = proc.communicate(timeout=timeout) except subprocess.TimeoutExpired as exc: proc.kill() if WIN32: # nocover # Win32 needs a communicate after the kill to get the # output. See stdlib for details. exc.stdout, exc.stderr = proc.communicate() else: # Posix implementations already handle the populate. proc.wait() raise else: # Not capturing output, but it might print to the screen # i.e. capture is False, but show might be True or False proc = make_proc() out = None err = None # Follow the error handling in the stdlib implementation of # subprocess.call with proc: try: proc.wait(timeout=timeout) except: # NOQA # Including KeyboardInterrupt, wait handled that. proc.kill() # We don't call p.wait() again as p.__exit__ does that for us. raise # We used the popen context manager, which means that wait was called, # the process has existed, so it is safe to return a reference to the # process object. ret = proc.poll() info = CmdOutput(**{ 'out': out, 'err': err, 'ret': ret, 'proc': proc, 'cwd': cwd, 'command': command_text, }) # For subprocess compatibility info.args = args if not detach: if verbose > 2: # https://en.wikipedia.org/wiki/Box-drawing_character try: log('└─── END CMD ───') except Exception: # nocover log('L___ END CMD ___') if check: if info['ret'] != 0: raise subprocess.CalledProcessError( info['ret'], info['command'], info['out'], info['err']) return info def _textio_iterlines(stream): """ Iterates over lines in a TextIO stream until an EOF is encountered. This is the iterator version of stream.readlines() Args: stream (io.TextIOWrapper): The stream to finish reading. Yields: str: a line read from the stream. """ try: # These if statements help mitigate race conditions but does not solve # them if the stream closes in the middle of a readline. if stream.closed: # nocover return line = stream.readline() while line != '': yield line if stream.closed: # nocover return line = stream.readline() except ValueError: # nocover # Ignore I/O operation on closed files, the process was likely # killed. raise ... def _proc_async_iter_stream(proc, stream, buffersize=1, timeout=None): """ Reads output from a process in a separate thread Args: proc (subprocess.Popen): The process being run stream (io.TextIOWrapper): A stream belonging to the process e.g. ``proc.stdout`` or ``proc.stderr``. buffersize (int): Size of the returned queue. Returns: queue.Queue: The queue that the output lines will be asynchronously written to as they are read from the stream. """ import queue import threading # logger.debug(f"Create and start thread for {id(stream)}") out_queue = queue.Queue(maxsize=buffersize) control_queue = queue.Queue(maxsize=1) io_thread = threading.Thread( target=_enqueue_output_thread_worker, args=( proc, stream, out_queue, control_queue, timeout)) io_thread.daemon = True # thread dies with the program io_thread.start() return io_thread, out_queue, control_queue def _enqueue_output_thread_worker(proc, stream, out_queue, control_queue, timeout=None): """ Thread worker function This follows a similar strategy employed in http://eyalarubas.com/python-subproc-nonblock.html and https://stackoverflow.com/questions/375427/a-non-blocking-read-on-a-subprocess-pipe-in-python/4896288#4896288 Args: proc (subprocess.Popen): The process being run stream (io.TextIOWrapper): A stream belonging to the process e.g. ``proc.stdout`` or ``proc.stderr``. out_queue (queue.Queue): The queue to write to. control_queue (queue.Queue): For sending a signal to stop the thread timeout (None | float): amount of time to allow before stopping """ import queue # logger.debug(f"Start worker for {id(stream)=} with {timeout=}") def _check_if_stopped(): # nocover try: # Check if we were told to stop control_queue.get_nowait() except queue.Empty: ... else: # logger.debug(f"Thread acknowledges stop request for {id(stream)}") return True def enqueue(item): # nocover # Alternate between checking if we were stopped and putting the item in # the queue. This helps with the issue of an open process stream on # exit but it doesn't fully solve the issue because we still might # block on the stream.readline, therefore we can't guarantee this # thread will exit before the process does. if timeout is None: # If timeout is None, we can optimize this and just use the # blocking call. out_queue.put(item) return True # logger.debug('Waiting to put in item') while True: if _check_if_stopped(): return False try: out_queue.put(item, block=False) # logger.debug('Thread put in item') except queue.Full: pass else: return True while proc.poll() is None: # Note: if the underlying process has buffered output, we may get this # line well after it is initially emitted and thus be stuck waiting # here for some time. # logger.debug(f"ENQUEUE Waiting for line {id(stream)}") line = stream.readline() # logger.debug(f"ENQUEUE LIVE {id(stream)} {line!r}") if not enqueue(line): # nocover return if _check_if_stopped(): # nocover return # Coverage note: on Python 3.10 it seems like the tests don't always cover # these lines. We don't have much control over if this happens or not, so # we will exclude them from coverage checks. for line in _textio_iterlines(stream): # nocover # logger.debug(f"ENQUEUE FINAL {id(stream)} {line!r}") if not enqueue(line): # nocover return # logger.debug(f"STREAM IS DONE {id(stream)}") # signal that the stream is finished if not enqueue(None): # nocover return def _proc_iteroutput_thread(proc, timeout=None): """ Iterates over output from a process line by line. Follows the answers from [SO_375427]_. Note: WARNING. Current implementation might have bugs with other threads. This behavior was seen when using earlier versions of tqdm. I'm not sure if this was our bug or tqdm's. Newer versions of tqdm fix this, but I cannot guarantee that there isn't an issue on our end. Yields: Tuple[str, str]: oline, eline - stdout and stderr line References: .. [SO_375427] https://stackoverflow.com/questions/375427/non-blocking-read-subproc """ import queue # logger.debug("Create stdout/stderr streams") # Create threads that read stdout / stderr and queue up the output stdout_thread, stdout_queue, stdout_ctrl = _proc_async_iter_stream(proc, proc.stdout, timeout=timeout) stderr_thread, stderr_queue, stderr_ctrl = _proc_async_iter_stream(proc, proc.stderr, timeout=timeout) stdout_live = True stderr_live = True if timeout is not None: from time import monotonic as _time import subprocess start_time = _time() # read from the output asynchronously until while stdout_live or stderr_live: # Note: This function loop happens very quickly. # # logger.debug("Fast loop: check stdout / stderr threads") if timeout is not None: # Check for timeouts elapsed = _time() - start_time if elapsed >= timeout: stdout_ctrl.put('STOP') stderr_ctrl.put('STOP') # Unfortunately we can't guarantee that the threads will stop # because they might get stuck in a readline # stdout_thread.join() # stderr_thread.join() yield subprocess.TimeoutExpired, subprocess.TimeoutExpired if stdout_live: # pragma: nobranch try: oline = stdout_queue.get_nowait() stdout_live = oline is not None except queue.Empty: oline = None if stderr_live: try: eline = stderr_queue.get_nowait() stderr_live = eline is not None except queue.Empty: eline = None if oline is not None or eline is not None: yield oline, eline def _proc_iteroutput_select(proc, timeout=None): """ Iterates over output from a process line by line UNIX only. Use :func:`_proc_iteroutput_thread` instead for a cross platform solution based on threads. Args: proc (subprocess.Popen): the process being run timeout (None | float): amount of time to allow before stopping Yields: Tuple[str, str]: oline, eline - stdout and stderr line """ from itertools import zip_longest import select if timeout is not None: from time import monotonic as _time import subprocess start_time = _time() # Read output while the external program is running while proc.poll() is None: if timeout is not None: elapsed = _time() - start_time if elapsed >= timeout: yield subprocess.TimeoutExpired, subprocess.TimeoutExpired return # nocover reads = [proc.stdout.fileno(), proc.stderr.fileno()] ret = select.select(reads, [], [], timeout) oline = eline = None for fd in ret[0]: if fd == proc.stdout.fileno(): oline = proc.stdout.readline() if fd == proc.stderr.fileno(): eline = proc.stderr.readline() yield oline, eline # Grab any remaining data in stdout and stderr after the process finishes oline_iter = _textio_iterlines(proc.stdout) eline_iter = _textio_iterlines(proc.stderr) for oline, eline in zip_longest(oline_iter, eline_iter): yield oline, eline def _tee_output(proc, stdout=None, stderr=None, backend='thread', timeout=None, command_text=None): """ Simultaneously reports and captures stdout and stderr from a process subprocess must be created using (stdout=subprocess.PIPE, stderr=subprocess.PIPE) Args: proc (subprocess.Popen): the process being run stdout (io.TextIOWrapper): typically sys.stdout stderr (io.TextIOWrapper): typically sys.stderr backend (str): thread, select or auto timeout (None | float): time before raising a timeout error command_text (str): used only to construct a TimeoutExpired error. Returns: Tuple[str, str]: recorded stdout and stderr """ import subprocess logged_out = [] logged_err = [] if backend == 'auto': # backend = 'select' if POSIX else 'thread' backend = 'thread' if backend == 'select': if not POSIX: # nocover raise NotImplementedError('select is only available on posix') # the select-based version is stable, but slow _proc_iteroutput = _proc_iteroutput_select elif backend == 'thread': # the thread version is fast, but might run into issues. _proc_iteroutput = _proc_iteroutput_thread else: # nocover # The value of "backend" should be checked before we create the # processes, otherwise we will have a dangling process raise AssertionError( 'Invalid backend, but the check should have already a happened') output_gen = _proc_iteroutput(proc, timeout=timeout) # logger.debug("Start waiting for buffered output") for oline, eline in output_gen: if timeout is not None: if oline is subprocess.TimeoutExpired or eline is subprocess.TimeoutExpired: # logger.error("Timeout error triggered!") try: out = ''.join(logged_out) except UnicodeDecodeError: # nocover out = '\n'.join(_.decode('utf-8') for _ in logged_out) try: err = ''.join(logged_err) except UnicodeDecodeError: # nocover err = '\n'.join(_.decode('utf-8') for _ in logged_err) # Following the standard library implementation of # :func:`subprocess.run`, we kill (not terminate) the process # when the timeout expires. We shouldn't need the extra # communicate fix for windows because we report the tee-ed # output that already exists. But lets see what the CI says. proc.kill() proc.wait() raise subprocess.TimeoutExpired(command_text, timeout, out, err) if oline: # logger.debug("Write oline to stdout.write and logged_out") if stdout: # pragma: nobranch stdout.write(oline) stdout.flush() logged_out.append(oline) if eline: # logger.debug("Write eline to stderr.write and logged_err") if stderr: # pragma: nobranch stderr.write(eline) stderr.flush() logged_err.append(eline) # logger.debug("Continue waiting for buffered output") # The motivation for this logic is unclear. # In what cases is the logged output returned as bytes or text? # Using a bytes join probably makes more sense in most cases. try: out = ''.join(logged_out) except UnicodeDecodeError: # nocover out = '\n'.join(_.decode('utf-8') for _ in logged_out) try: err = ''.join(logged_err) except UnicodeDecodeError: # nocover err = '\n'.join(_.decode('utf-8') for _ in logged_err) return out, err # Stub for possible object oriented interface # class Command: # """ # TODO # """ # ... ubelt-1.3.7/ubelt/util_cmd.pyi000066400000000000000000000014511472470106000162750ustar00rootroot00000000000000from typing import List from os import PathLike from typing import Dict __pitch__: str POSIX: bool WIN32: bool class CmdOutput(dict): @property def stdout(self) -> str | bytes: ... @property def stderr(self) -> str | bytes: ... @property def returncode(self) -> int: ... def check_returncode(self) -> None: ... def cmd(command: str | List[str], shell: bool = False, detach: bool = False, verbose: int = 0, tee: bool | None = None, cwd: str | PathLike | None = None, env: Dict[str, str] | None = None, tee_backend: str = 'auto', check: bool = False, system: bool = False, timeout: float | None = None, capture: bool = True) -> dict | CmdOutput: ... ubelt-1.3.7/ubelt/util_colors.py000066400000000000000000000167141472470106000166720ustar00rootroot00000000000000""" This module defines simple functions to color your text and highlight your code using `ANSI `_ escape sequences. This works using the `Pygments `_ library, which is an optional requirement. Therefore, these functions only work properly if Pygments is installed, otherwise these functions will return the unmodified text and a warning will be printed. The :func:`highlight_code` function uses pygments to highlight syntax of a programming language. The :func:`color_text` function colors text with a solid color. Note the functions in this module require the optional :mod:`pygments` library to work correctly. These functions will warn if :mod:`pygments` is not installed. This module contains a global variable ``NO_COLOR``, which if set to True will force all ANSI text coloring functions to become no-ops. This defaults to the value of the ``bool(os.environ.get('NO_COLOR'))`` flag, which is compliant with [NoColor]_. New in 1.3.4: The :py:mod:`rich` backend was added as an alternative to pygments. Related work: https://github.com/Textualize/rich References: .. [NoColor] https://no-color.org/ Requirements: pip install pygments """ import sys import warnings import os # Global state that determines if ANSI-coloring text is allowed # (which is mainly to address non-ANSI compliant windows consoles) # compliant with https://no-color.org/ NO_COLOR = bool(os.environ.get('NO_COLOR')) # type: bool def highlight_code(text, lexer_name='python', backend='pygments', **kwargs): """ Highlights a block of text using ANSI tags based on language syntax. Args: text (str): Plain text to parse and highlight lexer_name (str): Name of language. eg: python, docker, c++. For an exhaustive list see :func:`pygments.lexers.get_all_lexers`. Defaults to "python". backend (str): Either "pygments" or "rich". Defaults to "pygments". **kwargs: If the backend is "pygments", passed to pygments.lexers.get_lexer_by_name. Returns: str: text - highlighted text if the requested backend is installed, otherwise the plain text is returned unmodified. Example: >>> import ubelt as ub >>> text = 'import ubelt as ub; print(ub)' >>> new_text = ub.highlight_code(text) >>> print(new_text) Example: >>> import ubelt as ub >>> text = 'import ubelt as ub; print(ub)' >>> new_text = ub.highlight_code(text, backend='pygments') >>> print(new_text) >>> new_text = ub.highlight_code(text, backend='rich') >>> print(new_text) """ if NO_COLOR: return text # Resolve extensions to languages lexer_name = { 'py': 'python', 'h': 'cpp', 'cpp': 'cpp', 'cxx': 'cpp', 'c': 'cpp', }.get(lexer_name.replace('.', ''), lexer_name) try: if backend == 'pygments': new_text = _pygments_highlight(text, lexer_name, **kwargs) elif backend == 'rich': new_text = _rich_highlight(text, lexer_name, **kwargs) else: raise KeyError(backend) except ImportError: # nocover warnings.warn(f'{backend} is not installed, code will not be highlighted') new_text = text return new_text def _pygments_highlight(text, lexer_name, **kwargs): """ Original pygments highlight logic """ if sys.platform.startswith('win32'): # nocover # Hack on win32 to support colored output try: import colorama if not colorama.initialise.atexit_done: # Only init if it hasn't been done colorama.init() except ImportError: warnings.warn( 'colorama is not installed, ansi colors may not work') import pygments # type: ignore import pygments.lexers # type: ignore import pygments.formatters # type: ignore import pygments.formatters.terminal # type: ignore formatter = pygments.formatters.terminal.TerminalFormatter(bg='dark') lexer = pygments.lexers.get_lexer_by_name(lexer_name, **kwargs) new_text = pygments.highlight(text, lexer, formatter) return new_text def _rich_highlight(text, lexer_name): # nocover """ Alternative rich-based highlighter References: .. [RichDiscuss3076] https://github.com/Textualize/rich/discussions/3076 """ from rich.syntax import Syntax from rich.console import Console import io syntax = Syntax(text, lexer_name, background_color='default') stream = io.StringIO() write_console = Console(file=stream, soft_wrap=True, color_system='standard') write_console.print(syntax) new_text = write_console.file.getvalue() return new_text def color_text(text, color): r""" Colorizes text a single color using ansi tags. Args: text (str): text to colorize color (str): color code. different systems may have different colors. commonly available colors are: 'red', 'brown', 'yellow', 'green', 'blue', 'black', and 'white'. Returns: str: text - colorized text. If pygments is not installed plain text is returned. SeeAlso: https://rich.readthedocs.io/en/stable/markup.html Example: >>> text = 'raw text' >>> import pytest >>> import ubelt as ub >>> if ub.modname_to_modpath('pygments'): >>> # Colors text only if pygments is installed >>> ansi_text = ub.color_text(text, 'red') >>> prefix = '\x1b[31' >>> print('prefix = {!r}'.format(prefix)) >>> print('ansi_text = {!r}'.format(ansi_text)) >>> assert ansi_text.startswith(prefix) >>> assert ub.color_text(text, None) == 'raw text' >>> else: >>> # Otherwise text passes through unchanged >>> assert ub.color_text(text, 'red') == 'raw text' >>> assert ub.color_text(text, None) == 'raw text' Example: >>> # xdoctest: +REQUIRES(module:pygments) >>> import pygments.console >>> import ubelt as ub >>> # List available colors codes >>> known_colors = pygments.console.codes.keys() >>> for color in known_colors: ... print(ub.color_text(color, color)) """ if NO_COLOR or color is None: return text try: if sys.platform.startswith('win32'): # nocover # Hack on win32 to support colored output try: import colorama if not colorama.initialise.atexit_done: # Only init if it hasn't been done colorama.init() except ImportError: warnings.warn( 'colorama is not installed, ansi colors may not work') import pygments # type: ignore import pygments.console # type: ignore try: ansi_text = pygments.console.colorize(color, text) except KeyError: warnings.warn('unable to find color: {!r}'.format(color)) return text except Exception as ex: # nocover warnings.warn('some other issue with text color: {!r}'.format(ex)) return text return ansi_text except ImportError: # nocover warnings.warn('pygments is not installed, text will not be colored') return text ubelt-1.3.7/ubelt/util_colors.pyi000066400000000000000000000003611472470106000170320ustar00rootroot00000000000000NO_COLOR: bool def highlight_code(text: str, lexer_name: str = 'python', backend: str = 'pygments', **kwargs) -> str: ... def color_text(text: str, color: str) -> str: ... ubelt-1.3.7/ubelt/util_const.py000066400000000000000000000143521472470106000165130ustar00rootroot00000000000000""" This module defines :data:`ub.NoParam`. This is a robust sentinel value that can act like ``None`` when None might be a valid value. The value of :data:`NoParam` is robust to reloading, pickling, and copying (i.e. ``var is ub.NoParam`` will return ``True`` after these operations). Use cases that demonstrate the value of :data:`NoParam` can be found in :mod:`ubelt.util_dict`, where it simplifies the implementation of methods that behave like :meth:`dict.get`. The value of :data:`NoParam` is robust to reloading, pickling, and copying. See [SO_41048643]_ for more details. References: .. [SO_41048643] http://stackoverflow.com/questions/41048643/a-second-none Example: >>> import ubelt as ub >>> def func(a=ub.NoParam): >>> if a is ub.NoParam: >>> print('no param specified') >>> else: >>> print('a = {}'.format(a)) >>> func() no param specified >>> func(a=None) a = None >>> func(a=1) a = 1 >>> # note: typically it is bad practice to use NoParam as an actual >>> # (non-default) parameter. It goes against the sprit of the idea. >>> func(a=ub.NoParam) no param specified """ __all__ = ['NoParam'] class NoParamType(object): r""" Class used to define :data:`NoParam`, a sentinel that acts like None when None might be a valid value. The value of :data:`NoParam` is robust to reloading, pickling, and copying. See [SO_41048643]_ for more details. However, try to never assign this value to a persistent variable. Use this class sparingly. References: .. [SO_41048643]: http://stackoverflow.com/questions/41048643/a-second-none Example: >>> # Use case >>> # Imagine you need a function with a default argument, but you need to >>> # distinguish between cases where the user called it without specifying >>> # a default, versus the user specifying None. For instance, imagine you >>> # are writing the code for a dictionary ``get(key, default)``. >>> # >>> # You want the user to distinguish between the user calling it with >>> # None and the user not calling it at all. >>> # >>> # So you can't write it like this because you can't distinguish between >>> # the user passing default as None, or not passing a default at all. >>> def get(self, key, default=None): >>> if default is None: >>> ... # What do?! >>> # >>> # You could write it like this, which is long and annoying >>> def get(self, key, *args, **kw): >>> try: >>> return self[key] >>> except KeyError as ke: >>> if len(args) > 0: >>> return args[0] >>> elif 'default' in kw: >>> return kw['default'] >>> else: >>> raise >>> # >>> # Instead write it like this, which is short and nice >>> from ubelt import NoParam >>> def get(self, key, default=NoParam): >>> try: >>> return self[key] >>> except KeyError: >>> if default is NoParam: >>> raise >>> return default >>> # >>> # setup some data >>> self = {} >>> key = 'spam' >>> # >>> # If the key is not in the dictionary, raise a KeyError >>> import pytest >>> with pytest.raises(KeyError): >>> get(self, key) >>> # >>> # If the key is not in the dictionary, return ``default`` >>> get(self, key, None) # with positional args >>> get(self, key, default=None) # with keyword args Example: >>> import ubelt as ub >>> from ubelt import util_const >>> from ubelt.util_const import NoParamType, NoParam >>> import pickle >>> import copy >>> id_ = id(NoParam) >>> versions = { ... 'ub.util_const.NoParam': ub.util_const.NoParam, ... 'NoParam': NoParam, ... 'NoParamType()': NoParamType(), ... 'ub.NoParam': ub.NoParam, ... 'copy': copy.copy(NoParam), ... 'deepcopy': copy.deepcopy(NoParam), ... 'pickle': pickle.loads(pickle.dumps(NoParam)) ... } >>> print(versions) >>> assert all(id(v) == id_ for v in versions.values()) >>> from importlib import reload >>> reload(util_const) >>> assert id(util_const.NoParam) == id_ >>> assert all(id(v) == id_ for v in versions.values()) >>> assert str(NoParam) == repr(NoParam) >>> assert not any(v for v in versions.values()) >>> assert all(not v for v in versions.values()) >>> assert all(not bool(v) for v in versions.values()) """ def __new__(cls): """ Returns: NoParamType """ return NoParam def __reduce__(self): """ Returns: Tuple[type, Tuple] """ return (NoParamType, ()) def __copy__(self): """ Returns: NoParamType """ return NoParam def __deepcopy__(self, memo): """ Returns: NoParamType """ return NoParam def __str__(cls): """ Returns: str """ return 'NoParam' def __repr__(cls): """ Returns: str """ return 'NoParam' def __bool__(self): """ Returns: bool """ # Ensure NoParam is Falsey return False # Backwards compat _NoParamType = NoParamType # Create the only instance of NoParamType that should ever exist try: # If the module is reloaded (via imp.reload), globals() will contain # NoParam. This skips the code that would instantiate a second object NoParam # pragma: no cover # Note: it is possible to hack around this via # >>> del util_const.NoParam # >>> imp.reload(util_const) except NameError: # pragma: no cover # When the module is first loaded, globals() will not contain NoParam. A # NameError will be thrown, causing the first instance of NoParam to be # instantiated. NoParam = object.__new__(NoParamType) # type: NoParamType ubelt-1.3.7/ubelt/util_const.pyi000066400000000000000000000005231472470106000166570ustar00rootroot00000000000000from typing import Tuple class NoParamType: def __new__(cls) -> NoParamType: ... def __reduce__(self) -> Tuple[type, Tuple]: ... def __copy__(self) -> NoParamType: ... def __deepcopy__(self, memo) -> NoParamType: ... def __bool__(self) -> bool: ... NoParam: NoParamType ubelt-1.3.7/ubelt/util_deprecate.py000066400000000000000000000226071472470106000173230ustar00rootroot00000000000000""" Currently this module provides one utility :func:`ubelt.util_deprecate.schedule_deprecation` which allows a developer to easily mark features in their libraries as deprecated. """ def schedule_deprecation(modname=None, name='?', type='?', migration='', deprecate=None, error=None, remove=None, # TODO: let the user have more control over the # message. # message=None, warncls=DeprecationWarning, stacklevel=1): """ Raise a deprecation warning or error based on the version of a package. This helps provide users with a smoother transition by specifying a version when the deprecation warning will start, when it transitions into an error, and when the maintainers should remove the feature all together. This function provides a concise way to mark a feature as deprecated by providing a description of the deprecated feature, documentation on how to migrate away from the deprecated feature, and the versions that the feature is scheduled for deprecation and eventual removal. Based on the version of the library and the specified schedule this function will either do nothing, emit a warning, or raise an error with helpful messages for both users and developers. Args: modname (str | None): The name of the underlying module associated with the feature to be deprecated. The module must already be imported and have a passable ``__version__`` attribute. If unspecified, version info cannot be used. name (str): The name of the feature to deprecate. This is usually a function or argument name. type (str): A description of what the feature is. This is not a formal type, but rather a prose description: e.g. "argument to my_func". migration (str): A description that lets users know what they should do instead of using the deprecated feature. deprecate (str | None): The version when the feature is officially deprecated and this function should start to emit a deprecation warning. Can also be the strings: "soon" or "now" if the timeline isnt perfectly defined. error (str | None): The version when the feature is officially no longer supported, and will start to raise a RuntimeError. Can also be the strings: "soon" or "now". remove (str | None): The version when the feature is completely removed. An AssertionError will be raised if this function is still present reminding the developer to remove the feature (or extend the remove version). Can also be the strings: "soon" or "now". warncls (type): This is the category of warning to use. Defaults to :class:`DeprecationWarning`. stacklevel (int): The stacklevel can be used by wrapper functions to indicate where the warning is occurring. Returns: str : the constructed message Note: If deprecate, remove, or error is specified as "now" or a truthy value it will force that check to trigger immediately. If the value is "soon", then the check will not trigger. Note: The :class:`DeprecationWarning` is not visible by default. https://docs.python.org/3/library/warnings.html Example: >>> # xdoctest: +REQUIRES(module:packaging) >>> import ubelt as ub >>> import sys >>> import types >>> import pytest >>> dummy_module = sys.modules['dummy_module'] = types.ModuleType('dummy_module') >>> # When less than the deprecated version this does nothing >>> dummy_module.__version__ = '1.0.0' >>> ub.schedule_deprecation( ... modname='dummy_module', name='myfunc', type='function', ... migration='do something else', ... deprecate='1.1.0', error='1.2.0', remove='1.3.0') >>> # But when the module version increases above the threshold, >>> # the warning is raised. >>> dummy_module.__version__ = '1.1.0' >>> with pytest.warns(DeprecationWarning): ... msg = ub.schedule_deprecation( ... 'dummy_module', 'myfunc', 'function', 'do something else', ... deprecate='1.1.0', error='1.2.0', remove='1.3.0') >>> print(msg) The "myfunc" function was deprecated in dummy_module 1.1.0, will cause an error in dummy_module 1.2.0 and will be removed in dummy_module 1.3.0. The current dummy_module version is 1.1.0. do something else Example: >>> # xdoctest: +REQUIRES(module:packaging) >>> # Demo the various cases >>> import ubelt as ub >>> import sys >>> import types >>> import pytest >>> dummy_module = sys.modules['dummy_module'] = types.ModuleType('dummy_module') >>> # When less than the deprecated version this does nothing >>> dummy_module.__version__ = '1.1.0' >>> # Now this raises warning >>> with pytest.warns(DeprecationWarning): ... dummy_module.__version__ = '1.1.0' ... ub.schedule_deprecation( ... 'dummy_module', 'myfunc', 'function', 'do something else', ... deprecate='1.1.0', error='1.2.0', remove='1.3.0') >>> # Now this raises an error for the user >>> with pytest.raises(RuntimeError): ... dummy_module.__version__ = '1.2.0' ... ub.schedule_deprecation( ... 'dummy_module', 'myfunc', 'function', 'do something else', ... deprecate='1.1.0', error='1.2.0', remove='1.3.0') >>> # Now this raises an error for the developer >>> with pytest.raises(AssertionError): ... dummy_module.__version__ = '1.3.0' ... ub.schedule_deprecation( ... 'dummy_module', 'myfunc', 'function', 'do something else', ... deprecate='1.1.0', error='1.2.0', remove='1.3.0') >>> # When no versions are specified, it simply emits the warning >>> with pytest.warns(DeprecationWarning): ... dummy_module.__version__ = '1.1.0' ... ub.schedule_deprecation( ... 'dummy_module', 'myfunc', 'function', 'do something else') >>> # Test with soon / now >>> with pytest.warns(Warning): ... ub.schedule_deprecation( ... 'dummy_module', 'myfunc', 'function', 'do something else', ... deprecate='now', error='soon', remove='soon', warncls=Warning) >>> # Test with truthy values >>> with pytest.raises(RuntimeError): ... ub.schedule_deprecation( ... 'dummy_module', 'myfunc', 'function', 'do something else', ... deprecate=True, error=1, remove=False) >>> # Test with No module >>> with pytest.warns(Warning): ... ub.schedule_deprecation( ... None, 'myfunc', 'function', 'do something else', ... deprecate='now', error='soon', remove='soon', warncls=Warning) >>> # Test with No module >>> with pytest.warns(Warning): ... ub.schedule_deprecation( ... None, 'myfunc', 'function', 'do something else', ... deprecate='now', error='2.0.0', remove='soon', warncls=Warning) """ import sys import warnings from packaging.version import parse as Version if modname is not None: module = sys.modules[modname] current = Version(module.__version__) else: # TODO: use the inspect module to get the function / module this was # called from and fill in unspecified values. current = 'unknown' if modname is None: modname_str = '' else: modname_str = f'{modname} ' def _handle_when(when, default): if when is None: is_now = default when_str = '' elif isinstance(when, str): if when in {'soon', 'now'}: when_str = ' {}{}'.format(modname_str, when) is_now = (when == 'now') else: when = Version(when) when_str = ' in {}{}'.format(modname_str, when) if current == 'unknown': is_now = default else: is_now = current >= when else: is_now = bool(when) when_str = '' return is_now, when_str deprecate_now, deprecate_str = _handle_when(deprecate, default=True) remove_now, remove_str = _handle_when(remove, default=False) error_now, error_str = _handle_when(error, default=False) # TODO: make the message more customizable. msg = ( 'The "{name}" {type} was deprecated{deprecate_str}, will cause ' 'an error{error_str} and will be removed{remove_str}. The current ' '{modname_str}version is {current}. {migration}' ).format(**locals()).strip() if remove_now: raise AssertionError( 'Forgot to remove deprecated: ' + msg + ' ' + 'Remove the function, or extend the scheduled remove version.' ) if error_now: raise RuntimeError(msg) if deprecate_now: warnings.warn(msg, warncls, stacklevel=1 + stacklevel) return msg ubelt-1.3.7/ubelt/util_deprecate.pyi000066400000000000000000000007021472470106000174640ustar00rootroot00000000000000def schedule_deprecation(modname: str, name: str = '?', type: str = '?', migration: str = '', deprecate: str | None = None, error: str | None = None, remove: str | None = None, warncls: type = DeprecationWarning, stacklevel: int = 1) -> str: ... ubelt-1.3.7/ubelt/util_dict.py000066400000000000000000002306461472470106000163160ustar00rootroot00000000000000""" Functions for working with dictionaries. The :class:`UDict` is a subclass of :class:`dict` with quality of life improvements. It contains methods for n-ary key-wise set operations as well as support for the binary operators in addition to other methods for mapping, inversion, subdicts, and peeking. It can be accessed via the alias ``ubelt.udict``. The :class:`SetDict` only contains the key-wise set extensions to dict. It can be accessed via the alias ``ubelt.sdict``. The :func:`dict_hist` function counts the number of discrete occurrences of hashable items. Similarly :func:`find_duplicates` looks for indices of items that occur more than `k=1` times. The :func:`map_keys` and :func:`map_values` functions are useful for transforming the keys and values of a dictionary with less syntax than a dict comprehension. The :func:`dict_union`, :func:`dict_isect`, and :func:`dict_diff` functions are similar to the set equivalents. The :func:`dzip` function zips two iterables and packs them into a dictionary where the first iterable is used to generate keys and the second generates values. The :func:`group_items` function takes two lists and returns a dict mapping values in the second list to all items in corresponding locations in the first list. The :func:`invert_dict` function swaps keys and values. See the function docs for details on dealing with unique and non-unique values. The :func:`ddict` and :func:`odict` functions are alias for the commonly used :func:`collections.defaultdict` and :func:`collections.OrderedDict` classes. Related Work: * Note that Python does support set operations on dictionary **views** [DictView]_ [Pep3106]_, but these methods can be inflexible and often leave you only with keys (and no dictionary subset operation), whereas the ubelt definition of these operations is more straightforward. * There are several recipes for dictionaries that support set operations [SetDictRecipe1]_ [SetDictRecipe2]_. * The :py:mod:`dictmap` package contains a function similar to :func:`map_values` [GHDictMap]_. * The :py:mod:`dictdiffer` package contains tools for nested difference operations [PypiDictDiffer]_. * There are lots of other python dictionary utility libraries [PyPIAddict]_. References: .. [PyPIAddict] https://github.com/mewwts/addict .. [SetDictRecipe1] https://gist.github.com/rossmacarthur/38fa948b175abb512e12c516cc3b936d .. [SetDictRecipe2] https://code.activestate.com/recipes/577471-setdict/ .. [PypiDictDiffer] https://pypi.org/project/dictdiffer/ .. [DictView] https://docs.python.org/3.0/library/stdtypes.html#dictionary-view-objects .. [Pep3106] https://peps.python.org/pep-3106/ .. [GHDictMap] https://github.com/ulisesojeda/dictionary_map """ import sys import operator as op import itertools as it from collections import OrderedDict from collections import defaultdict from ubelt.util_const import NoParam __all__ = [ 'AutoDict', 'AutoOrderedDict', 'dzip', 'ddict', 'dict_hist', 'dict_subset', 'dict_union', 'dict_isect', 'dict_diff', 'find_duplicates', 'group_items', 'invert_dict', 'map_keys', 'map_vals', 'map_values', 'sorted_keys', 'sorted_vals', 'sorted_values', 'odict', 'named_product', 'varied_values', 'SetDict', 'UDict', 'sdict', 'udict', ] # Expose for convenience odict = OrderedDict ddict = defaultdict # Use an ordered dictionary in < 3.7 as the base if sys.version_info[0:2] <= (3, 6): # nocover DictBase = OrderedDict else: # nocover DictBase = dict def dzip(items1, items2, cls=dict): """ Zips elementwise pairs between items1 and items2 into a dictionary. Values from items2 can be broadcast onto items1. Args: items1 (Iterable[KT]): full sequence items2 (Iterable[VT]): can either be a sequence of one item or a sequence of equal length to ``items1`` cls (Type[dict]): dictionary type to use. Defaults to ``dict``. Returns: Dict[KT, VT]: similar to ``dict(zip(items1, items2))``. Example: >>> import ubelt as ub >>> assert ub.dzip([1, 2, 3], [4]) == {1: 4, 2: 4, 3: 4} >>> assert ub.dzip([1, 2, 3], [4, 4, 4]) == {1: 4, 2: 4, 3: 4} >>> assert ub.dzip([], [4]) == {} """ try: len(items1) except TypeError: items1 = list(items1) try: len(items2) except TypeError: items2 = list(items2) if len(items1) == 0 and len(items2) == 1: # Corner case: # allow the first list to be empty and the second list to broadcast a # value. This means that the equality check won't work for the case # where items1 and items2 are supposed to correspond, but the length of # items2 is 1. items2 = [] if len(items2) == 1 and len(items1) > 1: items2 = items2 * len(items1) if len(items1) != len(items2): raise ValueError('out of alignment len(items1)=%r, len(items2)=%r' % ( len(items1), len(items2))) return cls(zip(items1, items2)) def group_items(items, key): """ Groups a list of items by group id. Args: items (Iterable[VT]): a list of items to group key (Iterable[KT] | Callable[[VT], KT]): either a corresponding list of group-ids for each item or a function used to map each item to a group-id. Returns: dict[KT, List[VT]]: a mapping from each group-id to the list of corresponding items Example: >>> import ubelt as ub >>> items = ['ham', 'jam', 'spam', 'eggs', 'cheese', 'banana'] >>> groupids = ['protein', 'fruit', 'protein', 'protein', 'dairy', 'fruit'] >>> id_to_items = ub.group_items(items, groupids) >>> print(ub.repr2(id_to_items, nl=0)) {'dairy': ['cheese'], 'fruit': ['jam', 'banana'], 'protein': ['ham', 'spam', 'eggs']} Example: >>> import ubelt as ub >>> rows = [ >>> {'index': 0, 'group': 'aa'}, >>> {'index': 1, 'group': 'aa'}, >>> {'index': 2, 'group': 'bb'}, >>> {'index': 3, 'group': 'cc'}, >>> {'index': 4, 'group': 'aa'}, >>> {'index': 5, 'group': 'cc'}, >>> {'index': 6, 'group': 'cc'}, >>> ] >>> id_to_items = ub.group_items(rows, key=lambda r: r['group']) >>> print(ub.repr2(id_to_items, nl=2)) { 'aa': [ {'group': 'aa', 'index': 0}, {'group': 'aa', 'index': 1}, {'group': 'aa', 'index': 4}, ], 'bb': [ {'group': 'bb', 'index': 2}, ], 'cc': [ {'group': 'cc', 'index': 3}, {'group': 'cc', 'index': 5}, {'group': 'cc', 'index': 6}, ], } """ if callable(key): keyfunc = key pair_list = ((keyfunc(item), item) for item in items) else: pair_list = zip(key, items) # Optimized alternatives are benchmarked in # ../dev/bench/bench_group_items.py # Initialize a dict of lists id_to_items = defaultdict(list) # Insert each item into the correct group for key, item in pair_list: id_to_items[key].append(item) return id_to_items def dict_hist(items, weights=None, ordered=False, labels=None): """ Builds a histogram of items, counting the number of time each item appears in the input. Args: items (Iterable[T]): hashable items (usually containing duplicates) weights (Iterable[float] | None): Corresponding weights for each item, defaults to 1 if unspecified. Defaults to None. ordered (bool): If True the result is ordered by frequency. Defaults to False. labels (Iterable[T] | None): Expected labels. Allows this function to pre-initialize the histogram. If specified the frequency of each label is initialized to zero and ``items`` can only contain items specified in labels. Defaults to None. Returns: dict[T, int] : dictionary where the keys are unique elements from ``items``, and the values are the number of times the item appears in ``items``. SeeAlso: :class:`collections.Counter` Example: >>> import ubelt as ub >>> items = [1, 2, 39, 900, 1232, 900, 1232, 2, 2, 2, 900] >>> hist = ub.dict_hist(items) >>> print(ub.repr2(hist, nl=0)) {1: 1, 2: 4, 39: 1, 900: 3, 1232: 2} Example: >>> import ubelt as ub >>> import pytest >>> items = [1, 2, 39, 900, 1232, 900, 1232, 2, 2, 2, 900] >>> hist1 = ub.dict_hist(items) >>> hist2 = ub.dict_hist(items, ordered=True) >>> with pytest.raises(KeyError): >>> hist3 = ub.dict_hist(items, labels=[]) >>> weights = [1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1] >>> hist4 = ub.dict_hist(items, weights=weights) >>> print(ub.repr2(hist1, nl=0)) {1: 1, 2: 4, 39: 1, 900: 3, 1232: 2} >>> print(ub.repr2(hist4, nl=0)) {1: 1, 2: 4, 39: 1, 900: 1, 1232: 0} """ if weights is None and labels is None: # Accumulate discrete frequency. # In this special case we use an optimized stdlib routine from collections import Counter hist_ = Counter() hist_.update(items) else: if labels is None: hist_ = defaultdict(lambda: 0) else: hist_ = {k: 0 for k in labels} if weights is None: weights = it.repeat(1) # 2x slower than Counter # Accumulate weighted frequency for item, weight in zip(items, weights): hist_[item] += weight if ordered: # Order by value getval = op.itemgetter(1) hist = OrderedDict([ (key, value) for (key, value) in sorted(hist_.items(), key=getval) ]) else: # Cast to a normal dictionary hist = dict(hist_) return hist def find_duplicates(items, k=2, key=None): """ Find all duplicate items in a list. Search for all items that appear more than ``k`` times and return a mapping from each (k)-duplicate item to the positions it appeared in. Args: items (Iterable[T]): Hashable items possibly containing duplicates k (int): Only return items that appear at least ``k`` times. Defaults to 2. key (Callable[[T], Any] | None): Returns indices where `key(items[i])` maps to a particular value at least k times. Default to None. Returns: dict[T, List[int]] : Maps each duplicate item to the indices at which it appears Notes: Similar to :func:`more_itertools.duplicates_everseen`, :func:`more_itertools.duplicates_justseen`. Example: >>> import ubelt as ub >>> items = [0, 0, 1, 2, 3, 3, 0, 12, 2, 9] >>> duplicates = ub.find_duplicates(items) >>> # Duplicates are a mapping from each item that occurs 2 or more >>> # times to the indices at which they occur. >>> assert duplicates == {0: [0, 1, 6], 2: [3, 8], 3: [4, 5]} >>> # You can set k=3 if you want to don't mind duplicates but you >>> # want to find triplicates or quadruplets etc. >>> assert ub.find_duplicates(items, k=3) == {0: [0, 1, 6]} Example: >>> import ubelt as ub >>> items = [0, 0, 1, 2, 3, 3, 0, 12, 2, 9] >>> # note: k can less then 2 >>> duplicates = ub.find_duplicates(items, k=0) >>> print(ub.repr2(duplicates, nl=0)) {0: [0, 1, 6], 1: [2], 2: [3, 8], 3: [4, 5], 9: [9], 12: [7]} Example: >>> import ubelt as ub >>> items = [10, 11, 12, 13, 14, 15, 16] >>> duplicates = ub.find_duplicates(items, key=lambda x: x // 2) >>> print(ub.repr2(duplicates, nl=0)) {5: [0, 1], 6: [2, 3], 7: [4, 5]} """ # Build mapping from items to the indices at which they appear duplicates = defaultdict(list) if key is None: for count, item in enumerate(items): duplicates[item].append(count) else: for count, item in enumerate(items): duplicates[key(item)].append(count) # remove items seen fewer than k times. for key in list(duplicates.keys()): if len(duplicates[key]) < k: del duplicates[key] duplicates = dict(duplicates) return duplicates def dict_subset(dict_, keys, default=NoParam, cls=OrderedDict): """ Get a subset of a dictionary Args: dict_ (Dict[KT, VT]): superset dictionary keys (Iterable[KT]): keys to take from ``dict_`` default (Any | NoParamType): if specified uses default if keys are missing. cls (Type[Dict]): type of the returned dictionary. Defaults to ``OrderedDict``. Returns: Dict[KT, VT]: subset dictionary SeeAlso: :func:`dict_isect` - similar functionality, but ignores missing keys ::py:meth:`UDict.subdict` - object oriented version of this function Example: >>> import ubelt as ub >>> dict_ = {'K': 3, 'dcvs_clip_max': 0.2, 'p': 0.1} >>> keys = ['K', 'dcvs_clip_max'] >>> subdict_ = ub.dict_subset(dict_, keys) >>> print(ub.repr2(subdict_, nl=0)) {'K': 3, 'dcvs_clip_max': 0.2} """ from ubelt import util_list keys = list(keys) items = util_list.take(dict_, keys, default) subdict_ = cls(list(zip(keys, items))) return subdict_ def dict_union(*args): """ Dictionary set extension for ``set.union`` Combines items with from multiple dictionaries. For items with intersecting keys, dictionaries towards the end of the sequence are given precedence. Args: *args (List[Dict]) : A sequence of dictionaries. Values are taken from the last Returns: Dict | OrderedDict : OrderedDict if the first argument is an OrderedDict, otherwise dict Notes: In Python 3.8+, the bitwise or operator "|" operator performs a similar operation, but as of 2022-06-01 there is still no public method for dictionary union (or any other dictionary set operator). References: .. [SO38987] https://stackoverflow.com/questions/38987/merge-two-dict SeeAlso: :func:`collections.ChainMap` - a standard python builtin data structure that provides a view that treats multiple dicts as a single dict. ``_ ::py:meth:`UDict.union` - object oriented version of this function Example: >>> import ubelt as ub >>> result = ub.dict_union({'a': 1, 'b': 1}, {'b': 2, 'c': 2}) >>> assert result == {'a': 1, 'b': 2, 'c': 2} >>> output = ub.dict_union( >>> ub.odict([('a', 1), ('b', 2)]), >>> ub.odict([('c', 3), ('d', 4)])) >>> print(ub.urepr(output, nl=0)) {'a': 1, 'b': 2, 'c': 3, 'd': 4} >>> ub.dict_union() {} """ if not args: return {} else: dictclass = OrderedDict if isinstance(args[0], OrderedDict) else dict return dictclass(it.chain.from_iterable(d.items() for d in args)) def dict_diff(*args): """ Dictionary set extension for :func:`set.difference` Constructs a dictionary that contains any of the keys in the first arg, which are not in any of the following args. Args: *args (List[Dict[KT, VT] | Iterable[KT]]) : A sequence of dictionaries (or sets of keys). The first argument should always be a dictionary, but the subsequent arguments can just be sets of keys. Returns: Dict[KT, VT] | OrderedDict[KT, VT] : OrderedDict if the first argument is an OrderedDict, otherwise dict SeeAlso: ::py:meth:`UDict.difference` - object oriented version of this function Example: >>> import ubelt as ub >>> ub.dict_diff({'a': 1, 'b': 1}, {'a'}, {'c'}) {'b': 1} >>> result = ub.dict_diff(ub.odict([('a', 1), ('b', 2)]), ub.odict([('c', 3)])) >>> print(ub.urepr(result, nl=0)) {'a': 1, 'b': 2} >>> ub.dict_diff() {} >>> ub.dict_diff({'a': 1, 'b': 2}, {'c'}) """ if not args: return {} else: first_dict = args[0] dictclass = OrderedDict if isinstance(first_dict, OrderedDict) else dict # remove_keys = set.union(*map(set, args[1:])) # new = dictclass((k, v) for k, v in first_dict.items() if k not in remove_keys) remove_keys = set.union(*map(set, args[1:])) new = dictclass((k, first_dict[k]) for k in first_dict.keys() if k not in remove_keys) return new def dict_isect(*args): """ Dictionary set extension for :func:`set.intersection` Constructs a dictionary that contains keys common between all inputs. The returned values will only belong to the first dictionary. Args: *args (List[Dict[KT, VT] | Iterable[KT]]) : A sequence of dictionaries (or sets of keys). The first argument should always be a dictionary, but the subsequent arguments can just be sets of keys. Returns: Dict[KT, VT] | OrderedDict[KT, VT] : OrderedDict if the first argument is an OrderedDict, otherwise dict SeeAlso: ::py:meth:`UDict.intersection` - object oriented version of this function Note: This function can be used as an alternative to :func:`dict_subset` where any key not in the dictionary is ignored. See the following example: >>> import ubelt as ub >>> # xdoctest: +IGNORE_WANT >>> ub.dict_isect({'a': 1, 'b': 2, 'c': 3}, ['a', 'c', 'd']) {'a': 1, 'c': 3} Example: >>> import ubelt as ub >>> ub.dict_isect({'a': 1, 'b': 1}, {'b': 2, 'c': 2}) {'b': 1} >>> ub.dict_isect(odict([('a', 1), ('b', 2)]), odict([('c', 3)])) OrderedDict() >>> ub.dict_isect() {} """ if not args: return {} else: dictclass = OrderedDict if isinstance(args[0], OrderedDict) else dict common_keys = set.intersection(*map(set, args)) first_dict = args[0] return dictclass((k, first_dict[k]) for k in first_dict if k in common_keys) def map_values(func, dict_, cls=None): """ Apply a function to every value in a dictionary. Creates a new dictionary with the same keys and modified values. Args: func (Callable[[VT], T] | Mapping[VT, T]): a function or indexable object dict_ (Dict[KT, VT]): a dictionary cls (type | None): specifies the dict subclassof the result. if unspecified will be dict or OrderedDict. This behavior may change. SeeAlso: ::py:meth:`UDict.map_values` - object oriented version of this function Returns: Dict[KT, T]: transformed dictionary Notes: Similar to :py:mod:`dictmap.dict_map` Example: >>> import ubelt as ub >>> dict_ = {'a': [1, 2, 3], 'b': []} >>> newdict = ub.map_values(len, dict_) >>> assert newdict == {'a': 3, 'b': 0} Example: >>> # Can also use an indexable as ``func`` >>> import ubelt as ub >>> dict_ = {'a': 0, 'b': 1} >>> func = [42, 21] >>> newdict = ub.map_values(func, dict_) >>> assert newdict == {'a': 42, 'b': 21} >>> print(newdict) """ if not hasattr(func, '__call__'): func = func.__getitem__ keyval_list = [(key, func(val)) for key, val in dict_.items()] if cls is None: cls = OrderedDict if isinstance(dict_, OrderedDict) else dict newdict = cls(keyval_list) return newdict map_vals = map_values # backwards compatibility def map_keys(func, dict_, cls=None): """ Apply a function to every key in a dictionary. Creates a new dictionary with the same values and modified keys. An error is raised if the new keys are not unique. Args: func (Callable[[KT], T] | Mapping[KT, T]): a function or indexable object dict_ (Dict[KT, VT]): a dictionary cls (type | None): specifies the dict subclassof the result. if unspecified will be dict or OrderedDict. This behavior may change. SeeAlso: ::py:meth:`UDict.map_keys` - object oriented version of this function Returns: Dict[T, VT]: transformed dictionary Raises: Exception : if multiple keys map to the same value Example: >>> import ubelt as ub >>> dict_ = {'a': [1, 2, 3], 'b': []} >>> func = ord >>> newdict = ub.map_keys(func, dict_) >>> print(newdict) >>> assert newdict == {97: [1, 2, 3], 98: []} >>> dict_ = {0: [1, 2, 3], 1: []} >>> func = ['a', 'b'] >>> newdict = ub.map_keys(func, dict_) >>> print(newdict) >>> assert newdict == {'a': [1, 2, 3], 'b': []} """ if not hasattr(func, '__call__'): func = func.__getitem__ keyval_list = [(func(key), val) for key, val in dict_.items()] if cls is None: cls = OrderedDict if isinstance(dict_, OrderedDict) else dict newdict = cls(keyval_list) if len(newdict) != len(dict_): raise Exception('multiple input keys mapped to the same output key') return newdict def sorted_values(dict_, key=None, reverse=False, cls=OrderedDict): """ Return an ordered dictionary sorted by its values Args: dict_ (Dict[KT, VT]): dictionary to sort. The values must be of comparable types. key (Callable[[VT], Any] | None): If given as a callable, customizes the sorting by ordering using transformed values. reverse (bool): If True returns in descending order. Defaults to False. cls (type): Specifies the dict return type. Default to OrderedDict. SeeAlso: ::py:meth:`UDict.sorted_values` - object oriented version of this function Returns: OrderedDict[KT, VT]: new dictionary where the values are ordered Example: >>> import ubelt as ub >>> dict_ = {'spam': 2.62, 'eggs': 1.20, 'jam': 2.92} >>> newdict = sorted_values(dict_) >>> print(ub.repr2(newdict, nl=0)) {'eggs': 1.2, 'spam': 2.62, 'jam': 2.92} >>> newdict = sorted_values(dict_, reverse=True) >>> print(ub.repr2(newdict, nl=0)) {'jam': 2.92, 'spam': 2.62, 'eggs': 1.2} >>> newdict = sorted_values(dict_, key=lambda x: x % 1.6) >>> print(ub.repr2(newdict, nl=0)) {'spam': 2.62, 'eggs': 1.2, 'jam': 2.92} """ if key is None: newdict = OrderedDict(sorted(dict_.items(), key=lambda kv: kv[1], reverse=reverse)) else: newdict = OrderedDict(sorted(dict_.items(), key=lambda kv: key(kv[1]), reverse=reverse)) return newdict sorted_vals = sorted_values # backwards compatibility def sorted_keys(dict_, key=None, reverse=False, cls=OrderedDict): """ Return an ordered dictionary sorted by its keys Args: dict_ (Dict[KT, VT]): Dictionary to sort. The keys must be of comparable types. key (Callable[[KT], Any] | None): If given as a callable, customizes the sorting by ordering using transformed keys. reverse (bool): If True returns in descending order. Default to False. cls (type): specifies the dict return type SeeAlso: ::py:meth:`UDict.sorted_keys` - object oriented version of this function Returns: OrderedDict[KT, VT]: new dictionary where the keys are ordered Example: >>> import ubelt as ub >>> dict_ = {'spam': 2.62, 'eggs': 1.20, 'jam': 2.92} >>> newdict = sorted_keys(dict_) >>> print(ub.repr2(newdict, nl=0)) {'eggs': 1.2, 'jam': 2.92, 'spam': 2.62} >>> newdict = sorted_keys(dict_, reverse=True) >>> print(ub.repr2(newdict, nl=0)) {'spam': 2.62, 'jam': 2.92, 'eggs': 1.2} >>> newdict = sorted_keys(dict_, key=lambda x: sum(map(ord, x))) >>> print(ub.repr2(newdict, nl=0)) {'jam': 2.92, 'eggs': 1.2, 'spam': 2.62} """ if key is None: newdict = OrderedDict(sorted(dict_.items(), key=lambda kv: kv[0], reverse=reverse)) else: newdict = OrderedDict(sorted(dict_.items(), key=lambda kv: key(kv[0]), reverse=reverse)) return newdict def invert_dict(dict_, unique_vals=True, cls=None): """ Swaps the keys and values in a dictionary. Args: dict_ (Dict[KT, VT]): dictionary to invert unique_vals (bool): if False, the values of the new dictionary are sets of the original keys. Defaults to True. cls (type | None): specifies the dict subclassof the result. if unspecified will be dict or OrderedDict. This behavior may change. SeeAlso: ::py:meth:`UDict.invert` - object oriented version of this function Returns: Dict[VT, KT] | Dict[VT, Set[KT]]: the inverted dictionary Note: The must values be hashable. If the original dictionary contains duplicate values, then only one of the corresponding keys will be returned and the others will be discarded. This can be prevented by setting ``unique_vals=False``, causing the inverted keys to be returned in a set. Example: >>> import ubelt as ub >>> dict_ = {'a': 1, 'b': 2} >>> inverted = ub.invert_dict(dict_) >>> assert inverted == {1: 'a', 2: 'b'} Example: >>> import ubelt as ub >>> dict_ = ub.odict([(2, 'a'), (1, 'b'), (0, 'c'), (None, 'd')]) >>> inverted = ub.invert_dict(dict_) >>> assert list(inverted.keys())[0] == 'a' Example: >>> import ubelt as ub >>> dict_ = {'a': 1, 'b': 0, 'c': 0, 'd': 0, 'f': 2} >>> inverted = ub.invert_dict(dict_, unique_vals=False) >>> assert inverted == {0: {'b', 'c', 'd'}, 1: {'a'}, 2: {'f'}} """ if cls is None: cls = OrderedDict if isinstance(dict_, OrderedDict) else dict if unique_vals: # Wonder what byte code is better here? if cls is dict: inverted = {val: key for key, val in dict_.items()} else: inverted = cls((val, key) for key, val in dict_.items()) else: # Handle non-unique keys using groups inverted = defaultdict(set) for key, value in dict_.items(): inverted[value].add(key) inverted = cls(inverted) return inverted def named_product(_=None, **basis): """ Generates the Cartesian product of the ``basis.values()``, where each generated item labeled by ``basis.keys()``. In other words, given a dictionary that maps each "axes" (i.e. some variable) to its "basis" (i.e. the possible values that it can take), generate all possible points in that grid (i.e. unique assignments of variables to values). Args: _ (Dict[str, List[VT]] | None): Use of this positional argument is not recommend. Instead specify all arguments as keyword args. Defaults to None. If specified, this should be a dictionary is unioned with the keyword args. This exists to support ordered dictionaries before Python 3.6, and may eventually be removed. basis (Dict[str, List[VT]]): A dictionary where the keys correspond to "columns" and the values are a list of possible values that "column" can take. I.E. each key corresponds to an "axes", the values are the list of possible values for that "axes". Yields: Dict[str, VT] : a "row" in the "longform" data containing a point in the Cartesian product. Note: This function is similar to :func:`itertools.product`, the only difference is that the generated items are a dictionary that retains the input keys instead of an tuple. This function used to be called "basis_product", but "named_product" might be more appropriate. This function exists in other places ([minstrel271_namedproduct]_, [pytb_namedproduct]_, and [Hettinger_namedproduct]_). References: .. [minstrel271_namedproduct] https://gist.github.com/minstrel271/d51654af3fa4e6411267 .. [pytb_namedproduct] https://py-toolbox.readthedocs.io/en/latest/modules/itertools.html# .. [Hettinger_namedproduct] https://twitter.com/raymondh/status/970380630822305792 Example: >>> # An example use case is looping over all possible settings in a >>> # configuration dictionary for a grid search over parameters. >>> import ubelt as ub >>> basis = { >>> 'arg1': [1, 2, 3], >>> 'arg2': ['A1', 'B1'], >>> 'arg3': [9999, 'Z2'], >>> 'arg4': ['always'], >>> } >>> import ubelt as ub >>> # sort input data for older python versions >>> basis = ub.odict(sorted(basis.items())) >>> got = list(ub.named_product(basis)) >>> print(ub.repr2(got, nl=-1)) [ {'arg1': 1, 'arg2': 'A1', 'arg3': 9999, 'arg4': 'always'}, {'arg1': 1, 'arg2': 'A1', 'arg3': 'Z2', 'arg4': 'always'}, {'arg1': 1, 'arg2': 'B1', 'arg3': 9999, 'arg4': 'always'}, {'arg1': 1, 'arg2': 'B1', 'arg3': 'Z2', 'arg4': 'always'}, {'arg1': 2, 'arg2': 'A1', 'arg3': 9999, 'arg4': 'always'}, {'arg1': 2, 'arg2': 'A1', 'arg3': 'Z2', 'arg4': 'always'}, {'arg1': 2, 'arg2': 'B1', 'arg3': 9999, 'arg4': 'always'}, {'arg1': 2, 'arg2': 'B1', 'arg3': 'Z2', 'arg4': 'always'}, {'arg1': 3, 'arg2': 'A1', 'arg3': 9999, 'arg4': 'always'}, {'arg1': 3, 'arg2': 'A1', 'arg3': 'Z2', 'arg4': 'always'}, {'arg1': 3, 'arg2': 'B1', 'arg3': 9999, 'arg4': 'always'}, {'arg1': 3, 'arg2': 'B1', 'arg3': 'Z2', 'arg4': 'always'} ] Example: >>> import ubelt as ub >>> list(ub.named_product(a=[1, 2, 3])) [{'a': 1}, {'a': 2}, {'a': 3}] >>> # xdoctest: +IGNORE_WANT >>> list(ub.named_product(a=[1, 2, 3], b=[4, 5])) [{'a': 1, 'b': 4}, {'a': 1, 'b': 5}, {'a': 2, 'b': 4}, {'a': 2, 'b': 5}, {'a': 3, 'b': 4}, {'a': 3, 'b': 5}] """ # Handle one positional argument. if _ is not None: _basis = _ _basis.update(basis) basis = _basis keys = list(basis.keys()) for vals in it.product(*basis.values()): kw = dict(zip(keys, vals)) yield kw def varied_values(longform, min_variations=0, default=NoParam): """ Given a list of dictionaries, find the values that differ between them. Args: longform (List[Dict[KT, VT]]): This is longform data, as described in [SeabornLongform]_. It is a list of dictionaries. Each item in the list - or row - is a dictionary and can be thought of as an observation. The keys in each dictionary are the columns. The values of the dictionary must be hashable. Lists will be converted into tuples. min_variations (int): "columns" with fewer than ``min_variations`` unique values are removed from the result. Defaults to 0. default (VT | NoParamType): if specified, unspecified columns are given this value. Defaults to NoParam. Returns: Dict[KT, List[VT]] : a mapping from each "column" to the set of unique values it took over each "row". If a column is not specified for each row, it is assumed to take a `default` value, if it is specified. Raises: KeyError: If ``default`` is unspecified and all the rows do not contain the same columns. References: .. [SeabornLongform] https://seaborn.pydata.org/tutorial/data_structure.html#long-form-data Example: >>> # An example use case is to determine what values of a >>> # configuration dictionary were tried in a random search >>> # over a parameter grid. >>> import ubelt as ub >>> longform = [ >>> {'col1': 1, 'col2': 'foo', 'col3': None}, >>> {'col1': 1, 'col2': 'foo', 'col3': None}, >>> {'col1': 2, 'col2': 'bar', 'col3': None}, >>> {'col1': 3, 'col2': 'bar', 'col3': None}, >>> {'col1': 9, 'col2': 'bar', 'col3': None}, >>> {'col1': 1, 'col2': 'bar', 'col3': None}, >>> ] >>> varied = ub.varied_values(longform) >>> print('varied = {}'.format(ub.repr2(varied, nl=1))) varied = { 'col1': {1, 2, 3, 9}, 'col2': {'bar', 'foo'}, 'col3': {None}, } Example: >>> import ubelt as ub >>> import random >>> longform = [ >>> {'col1': 1, 'col2': 'foo', 'col3': None}, >>> {'col1': 1, 'col2': [1, 2], 'col3': None}, >>> {'col1': 2, 'col2': 'bar', 'col3': None}, >>> {'col1': 3, 'col2': 'bar', 'col3': None}, >>> {'col1': 9, 'col2': 'bar', 'col3': None}, >>> {'col1': 1, 'col2': 'bar', 'col3': None, 'extra_col': 3}, >>> ] >>> # Operation fails without a default >>> import pytest >>> with pytest.raises(KeyError): >>> varied = ub.varied_values(longform) >>> # >>> # Operation works with a default >>> varied = ub.varied_values(longform, default='') >>> expected = { >>> 'col1': {1, 2, 3, 9}, >>> 'col2': {'bar', 'foo', (1, 2)}, >>> 'col3': set([None]), >>> 'extra_col': {'', 3}, >>> } >>> print('varied = {!r}'.format(varied)) >>> assert varied == expected Example: >>> # xdoctest: +REQUIRES(PY3) >>> # Random numbers are different in Python2, so skip in that case >>> import ubelt as ub >>> import random >>> num_cols = 11 >>> num_rows = 17 >>> rng = random.Random(0) >>> # Generate a set of columns >>> columns = sorted(ub.hash_data(i)[0:8] for i in range(num_cols)) >>> # Generate rows for each column >>> longform = [ >>> {key: ub.hash_data(key)[0:8] for key in columns} >>> for _ in range(num_rows) >>> ] >>> # Add in some varied values in random positions >>> for row in longform: >>> if rng.random() > 0.5: >>> for key in sorted(row.keys()): >>> if rng.random() > 0.95: >>> row[key] = 'special-' + str(rng.randint(1, 32)) >>> varied = ub.varied_values(longform, min_variations=1) >>> print('varied = {}'.format(ub.repr2(varied, nl=1, sort=True))) varied = { '095f3e44': {'8fb4d4c9', 'special-23'}, '365d11a1': {'daa409da', 'special-31', 'special-32'}, '5815087d': {'1b823610', 'special-3'}, '7b54b668': {'349a782c', 'special-10'}, 'b8244d02': {'d57bca90', 'special-8'}, 'f27b5bf8': {'fa0f90d1', 'special-19'}, } """ # Enumerate all defined columns columns = set() for row in longform: if default is NoParam and len(row) != len(columns) and len(columns): missing = set(columns).symmetric_difference(set(row)) raise KeyError(( 'No default specified and not every ' 'row contains columns {}').format(missing)) columns.update(row.keys()) # Build up the set of unique values for each column varied = ddict(set) for row in longform: for key in columns: value = row.get(key, default) if isinstance(value, list): value = tuple(value) varied[key].add(value) # Remove any column that does not have enough variation if min_variations > 0: for key, values in list(varied.items()): if len(values) <= min_variations: varied.pop(key) return varied class SetDict(dict): """ A dictionary subclass where all set operations are defined. All of the set operations are defined in a key-wise fashion, that is it is like performing the operation on sets of keys. Value conflicts are handled with left-most priority (default for ``intersection`` and ``difference``), right-most priority (default for ``union`` and ``symmetric_difference``), or via a custom ``merge`` callable similar to [RubyMerge]_. The set operations are: * union (or the ``|`` operator) combines multiple dictionaries into one. This is nearly identical to the update operation. Rightmost values take priority. * intersection (or the ``&`` operator). Takes the items from the first dictionary that share keys with the following dictionaries (or lists or sets of keys). Leftmost values take priority. * difference (or the ``-`` operator). Takes only items from the first dictionary that do not share keys with following dictionaries. Leftmost values take priority. * symmetric_difference (or the ``^`` operator). Takes the items from all dictionaries where the key appears an odd number of times. Rightmost values take priority. The full set of set operations was originally determined to be beyond the scope of [Pep584]_, but there was discussion of these additional operations. Some choices were ambiguous, but we believe this design could be considered "natural". Note: By default the right-most values take priority in union / symmetric_difference and left-most values take priority in intersection / difference. In summary this is because we consider intersection / difference to be "subtractive" operations, and union / symmetric_difference to be "additive" operations. We expand on this in the following points: 1. intersection / difference is for removing keys --- i.e. is used to find values in the first (main) dictionary that are also in some other dictionary (or set or list of keys), whereas 2. union is for adding keys --- i.e. it is basically just an alias for dict.update, so the new (rightmost) keys clobber the old. 3. symmetric_difference is somewhat strange if you aren't familiar with it. At a pure-set level it's not really a difference, its a pairty operation (think of it more like xor or addition modulo 2). You only keep items where the key appears an odd number of times. Unlike intersection and difference, the results may not be a subset of either input. The union has the same property. This symmetry motivates having the newest (rightmost) keys cobber the old. Also, union / symmetric_difference does not make sense if arguments on the rights are lists/sets, whereas difference / intersection does. Note: The SetDict class only defines key-wise set operations. Value-wise or item-wise operations are in general not hashable and therefore not supported. A heavier extension would be needed for that. TODO: - [ ] implement merge callables so the user can specify how to resolve value conflicts / combine values. References: .. [RubyMerge] https://ruby-doc.org/core-2.7.0/Hash.html#method-i-merge .. [Pep584] https://peps.python.org/pep-0584/#what-about-the-full-set-api CommandLine: xdoctest -m ubelt.util_dict SetDict Example: >>> import ubelt as ub >>> a = ub.SetDict({'A': 'Aa', 'B': 'Ba', 'D': 'Da'}) >>> b = ub.SetDict({'A': 'Ab', 'B': 'Bb', 'C': 'Cb', }) >>> print(a.union(b)) >>> print(a.intersection(b)) >>> print(a.difference(b)) >>> print(a.symmetric_difference(b)) {'A': 'Ab', 'B': 'Bb', 'D': 'Da', 'C': 'Cb'} {'A': 'Aa', 'B': 'Ba'} {'D': 'Da'} {'D': 'Da', 'C': 'Cb'} >>> print(a | b) # union >>> print(a & b) # intersection >>> print(a - b) # difference >>> print(a ^ b) # symmetric_difference {'A': 'Ab', 'B': 'Bb', 'D': 'Da', 'C': 'Cb'} {'A': 'Aa', 'B': 'Ba'} {'D': 'Da'} {'D': 'Da', 'C': 'Cb'} Example: >>> import ubelt as ub >>> a = ub.SetDict({'A': 'Aa', 'B': 'Ba', 'D': 'Da'}) >>> b = ub.SetDict({'A': 'Ab', 'B': 'Bb', 'C': 'Cb', }) >>> c = ub.SetDict({'A': 'Ac', 'B': 'Bc', 'E': 'Ec'}) >>> d = ub.SetDict({'A': 'Ad', 'C': 'Cd', 'D': 'Dd'}) >>> # 3-ary operations >>> print(a.union(b, c)) >>> print(a.intersection(b, c)) >>> print(a.difference(b, c)) >>> print(a.symmetric_difference(b, c)) {'A': 'Ac', 'B': 'Bc', 'D': 'Da', 'C': 'Cb', 'E': 'Ec'} {'A': 'Aa', 'B': 'Ba'} {'D': 'Da'} {'D': 'Da', 'C': 'Cb', 'A': 'Ac', 'B': 'Bc', 'E': 'Ec'} >>> # 4-ary operations >>> print(ub.UDict.union(a, b, c, c)) >>> print(ub.UDict.intersection(a, b, c, c)) >>> print(ub.UDict.difference(a, b, c, d)) >>> print(ub.UDict.symmetric_difference(a, b, c, d)) {'A': 'Ac', 'B': 'Bc', 'D': 'Da', 'C': 'Cb', 'E': 'Ec'} {'A': 'Aa', 'B': 'Ba'} {} {'B': 'Bc', 'E': 'Ec'} Example: >>> import ubelt as ub >>> primes = ub.sdict({v: f'prime_{v}' for v in [2, 3, 5, 7, 11]}) >>> evens = ub.sdict({v: f'even_{v}' for v in [0, 2, 4, 6, 8, 10]}) >>> odds = ub.sdict({v: f'odd_{v}' for v in [1, 3, 5, 7, 9, 11]}) >>> squares = ub.sdict({v: f'square_{v}' for v in [0, 1, 4, 9]}) >>> div3 = ub.sdict({v: f'div3_{v}' for v in [0, 3, 6, 9]}) >>> # All of the set methods are defined >>> results1 = {} >>> results1['ints'] = ints = odds.union(evens) >>> results1['composites'] = ints.difference(primes) >>> results1['even_primes'] = evens.intersection(primes) >>> results1['odd_nonprimes_and_two'] = odds.symmetric_difference(primes) >>> print('results1 = {}'.format(ub.repr2(results1, nl=2, sort=True))) results1 = { 'composites': { 0: 'even_0', 1: 'odd_1', 4: 'even_4', 6: 'even_6', 8: 'even_8', 9: 'odd_9', 10: 'even_10', }, 'even_primes': { 2: 'even_2', }, 'ints': { 0: 'even_0', 1: 'odd_1', 2: 'even_2', 3: 'odd_3', 4: 'even_4', 5: 'odd_5', 6: 'even_6', 7: 'odd_7', 8: 'even_8', 9: 'odd_9', 10: 'even_10', 11: 'odd_11', }, 'odd_nonprimes_and_two': { 1: 'odd_1', 2: 'prime_2', 9: 'odd_9', }, } >>> # As well as their corresponding binary operators >>> assert results1['ints'] == odds | evens >>> assert results1['composites'] == ints - primes >>> assert results1['even_primes'] == evens & primes >>> assert results1['odd_nonprimes_and_two'] == odds ^ primes >>> # These can also be used as classmethods >>> assert results1['ints'] == ub.sdict.union(odds, evens) >>> assert results1['composites'] == ub.sdict.difference(ints, primes) >>> assert results1['even_primes'] == ub.sdict.intersection(evens, primes) >>> assert results1['odd_nonprimes_and_two'] == ub.sdict.symmetric_difference(odds, primes) >>> # The narry variants are also implemented >>> results2 = {} >>> results2['nary_union'] = ub.sdict.union(primes, div3, odds) >>> results2['nary_difference'] = ub.sdict.difference(primes, div3, odds) >>> results2['nary_intersection'] = ub.sdict.intersection(primes, div3, odds) >>> # Note that the definition of symmetric difference might not be what you think in the nary case. >>> results2['nary_symmetric_difference'] = ub.sdict.symmetric_difference(primes, div3, odds) >>> print('results2 = {}'.format(ub.repr2(results2, nl=2, sort=True))) results2 = { 'nary_difference': { 2: 'prime_2', }, 'nary_intersection': { 3: 'prime_3', }, 'nary_symmetric_difference': { 0: 'div3_0', 1: 'odd_1', 2: 'prime_2', 3: 'odd_3', 6: 'div3_6', }, 'nary_union': { 0: 'div3_0', 1: 'odd_1', 2: 'prime_2', 3: 'odd_3', 5: 'odd_5', 6: 'div3_6', 7: 'odd_7', 9: 'odd_9', 11: 'odd_11', }, } Example: >>> # A neat thing about our implementation is that often the right >>> # hand side is not required to be a dictionary, just something >>> # that can be cast to a set. >>> import ubelt as ub >>> primes = ub.sdict({2: 'a', 3: 'b', 5: 'c', 7: 'd', 11: 'e'}) >>> assert primes - {2, 3} == {5: 'c', 7: 'd', 11: 'e'} >>> assert primes & {2, 3} == {2: 'a', 3: 'b'} >>> # Union does need to have a second dictionary >>> import pytest >>> with pytest.raises(AttributeError): >>> primes | {2, 3} """ def copy(self): """ Example: >>> import ubelt as ub >>> a = ub.sdict({1: 1, 2: 2, 3: 3}) >>> b = ub.udict({1: 1, 2: 2, 3: 3}) >>> c = a.copy() >>> d = b.copy() >>> assert c is not a >>> assert d is not b >>> assert d == b >>> assert c == a >>> list(map(type, [a, b, c, d])) >>> assert isinstance(c, ub.sdict) >>> assert isinstance(d, ub.udict) """ return self.__class__(self) # We could just use the builtin variant for this specific operation def __or__(self, other): """ The ``|`` union operator Args: other (SupportsKeysAndGetItem[Any, Any] | Iterable[Tuple[Any, Any]]): Returns: SetDict """ return self.union(other) def __and__(self, other): """ The ``&`` intersection operator Args: other (Mapping | Iterable): Returns: SetDict """ return self.intersection(other) def __sub__(self, other): """ The ``-`` difference operator Args: other (Mapping | Iterable): Returns: SetDict """ return self.difference(other) def __xor__(self, other): """ The ``^`` symmetric_difference operator Args: other (Mapping): Returns: SetDict """ return self.symmetric_difference(other) # - reverse versions def __ror__(self, other): """ Args: other (Mapping): Returns: dict Example: >>> import ubelt as ub >>> self = ub.sdict({1: 1, 2: 2, 3: 3}) >>> other = {1: 10, 2:20, 4: 40} >>> d1 = self | other >>> d2 = other | self >>> assert isinstance(d1, ub.SetDict), 'should use own type' >>> assert isinstance(d2, ub.SetDict), 'should promote type' >>> print(f'd1={d1}') >>> print(f'd2={d2}') d1={1: 10, 2: 20, 3: 3, 4: 40} d2={1: 1, 2: 2, 4: 40, 3: 3} """ return SetDict.union(other, self, cls=self.__class__) def __rand__(self, other): """ Args: other (Mapping): Returns: dict Example: >>> import ubelt as ub >>> self = ub.sdict({1: 1, 2: 2, 3: 3}) >>> other = {1: 10, 2:20, 4: 40} >>> d1 = self & other >>> d2 = other & self >>> assert isinstance(d1, ub.SetDict), 'should use own type' >>> assert isinstance(d2, ub.SetDict), 'should promote type' >>> print(f'd1={d1}') >>> print(f'd2={d2}') d1={1: 1, 2: 2} d2={1: 10, 2: 20} """ return SetDict.intersection(other, self, cls=self.__class__) def __rsub__(self, other): """ Args: other (Mapping): Returns: dict Example: >>> import ubelt as ub >>> self = ub.sdict({1: 1, 2: 2, 3: 3}) >>> other = {1: 10, 2:20, 4: 40} >>> d1 = self - other >>> d2 = other - self >>> assert isinstance(d1, ub.SetDict), 'should use own type' >>> assert isinstance(d2, ub.SetDict), 'should promote type' >>> print(f'd1={d1}') >>> print(f'd2={d2}') d1={3: 3} d2={4: 40} """ return SetDict.difference(other, self, cls=self.__class__) def __rxor__(self, other): """ Args: other (Mapping): Returns: dict Example: >>> import ubelt as ub >>> self = ub.sdict({1: 1, 2: 2, 3: 3}) >>> other = {1: 10, 2:20, 4: 40} >>> d1 = self ^ other >>> d2 = other ^ self >>> assert isinstance(d1, ub.SetDict), 'should use own type' >>> assert isinstance(d2, ub.SetDict), 'should promote type' >>> print(f'd1={d1}') >>> print(f'd2={d2}') d1={3: 3, 4: 40} d2={4: 40, 3: 3} """ return SetDict.symmetric_difference(other, self, cls=self.__class__) # - inplace versions def __ior__(self, other): """ The inplace union operator ``|=``. Args: other (SupportsKeysAndGetItem[Any, Any] | Iterable[Tuple[Any, Any]]): Returns: SetDict Example: >>> import ubelt as ub >>> self = orig_ref = ub.sdict({1: 1, 2: 2, 3: 3}) >>> orig_val = orig_ref.copy() >>> other = {1: 10, 2:20, 4: 40} >>> self |= other >>> print(f'self={self}') >>> assert self is orig_ref >>> assert self == (orig_val | other) self={1: 10, 2: 20, 3: 3, 4: 40} """ self.update(other) return self def __iand__(self, other): """ The inplace intersection operator ``&=``. Args: other (Mapping | Iterable): Example: >>> import ubelt as ub >>> self = orig_ref = ub.sdict({1: 1, 2: 2, 3: 3}) >>> orig_val = orig_ref.copy() >>> other = {1: 10, 2:20, 4: 40} >>> self &= other >>> print(f'self={self}') >>> assert self is orig_ref >>> assert self == (orig_val & other) self={1: 1, 2: 2} """ remove_keys = self.keys() - set(other) for k in remove_keys: del self[k] return self def __isub__(self, other): """ The inplace difference operator ``-=``. Args: other (Mapping | Iterable): Example: >>> import ubelt as ub >>> self = orig_ref = ub.sdict({1: 1, 2: 2, 3: 3}) >>> orig_val = orig_ref.copy() >>> other = {1: 10, 2:20} >>> self -= other >>> print(f'self={self}') >>> assert self is orig_ref >>> assert self == (orig_val - other) self={3: 3} >>> import ubelt as ub >>> self = orig_ref = ub.sdict({1: 1, 2: 2, 3: 3}) >>> orig_val = orig_ref.copy() >>> other = [1] >>> self -= other >>> print(f'self={self}') >>> assert self is orig_ref >>> assert self == (orig_val - other) self={2: 2, 3: 3} >>> import ubelt as ub >>> self = orig_ref = ub.sdict({1: 1, 2: 2, 3: 3}) >>> orig_val = orig_ref.copy() >>> other = {1: 10, 2:20, 4: 40} >>> self -= other >>> print(f'self={self}') >>> assert self is orig_ref >>> assert self == (orig_val - other) """ remove_keys = self.keys() & set(other) for k in remove_keys: del self[k] return self def __ixor__(self, other): """ The inplace symmetric difference operator ``^=``. Args: other (Mapping): Example: >>> import ubelt as ub >>> self = orig_ref = ub.sdict({1: 1, 2: 2, 3: 3}) >>> orig_val = orig_ref.copy() >>> other = {1: 10, 2:20, 4: 40} >>> self ^= other >>> print(f'self={self}') >>> assert self is orig_ref >>> assert self == (orig_val ^ other) """ other_keys = set(other.keys()) remove_keys = self.keys() & other_keys add_keys = other_keys - remove_keys for k in remove_keys: del self[k] for k in add_keys: self[k] = other[k] return self ### Main set operations def union(self, *others, cls=None, merge=None): """ Return the key-wise union of two or more dictionaries. Values chosen with *right-most* priority. I.e. for items with intersecting keys, dictionaries towards the end of the sequence are given precedence. Args: self (SetDict | dict): if called as a static method this must be provided. *others : other dictionary like objects that have an ``items`` method. (i.e. it must return an iterable of 2-tuples where the first item is hashable.) cls (type | None): the desired return dictionary type. merge (None | Callable): if specified this function must accept an iterable of values and return a new value to use (which typically is derived from input values). NotImplemented, help wanted. Returns: dict : items from all input dictionaries. Conflicts are resolved with right-most priority unless ``merge`` is specified. Specific return type is specified by ``cls`` or defaults to the leftmost input. Example: >>> import ubelt as ub >>> a = ub.SetDict({k: 'A_' + chr(97 + k) for k in [2, 3, 5, 7]}) >>> b = ub.SetDict({k: 'B_' + chr(97 + k) for k in [2, 4, 0, 7]}) >>> c = ub.SetDict({k: 'C_' + chr(97 + k) for k in [2, 8, 3]}) >>> d = ub.SetDict({k: 'D_' + chr(97 + k) for k in [9, 10, 11]}) >>> e = ub.SetDict({k: 'E_' + chr(97 + k) for k in []}) >>> assert a | b == {2: 'B_c', 3: 'A_d', 5: 'A_f', 7: 'B_h', 4: 'B_e', 0: 'B_a'} >>> a.union(b) >>> a | b | c >>> res = ub.SetDict.union(a, b, c, d, e) >>> print(ub.repr2(res, sort=1, nl=0, si=1)) {0: B_a, 2: C_c, 3: C_d, 4: B_e, 5: A_f, 7: B_h, 8: C_i, 9: D_j, 10: D_k, 11: D_l} """ if cls is None: # Some subclass-constructors need special handling # Not sure if it is in-scope to do that here or not. # if isinstance(self.__class__, defaultdict): # ... cls = self.__class__ args = it.chain([self], others) if merge is None: new = cls(it.chain.from_iterable(d.items() for d in args)) else: raise NotImplementedError('merge function is not yet implemented') return new def intersection(self, *others, cls=None, merge=None): """ Return the key-wise intersection of two or more dictionaries. Values returned with *left-most* priority. I.e. all items returned will be from the first dictionary for keys that exist in all other dictionaries / sets provided. Args: self (SetDict | dict): if called as a static method this must be provided. *others : other dictionary or set like objects that can be coerced into a set of keys. cls (type | None): the desired return dictionary type. merge (None | Callable): if specified this function must accept an iterable of values and return a new value to use (which typically is derived from input values). NotImplemented, help wanted. Returns: dict : items with keys shared by all the inputs. Values take left-most priority unless ``merge`` is specified. Specific return type is specified by ``cls`` or defaults to the leftmost input. Example: >>> import ubelt as ub >>> a = ub.SetDict({'a': 1, 'b': 2, 'd': 4}) >>> b = ub.SetDict({'a': 10, 'b': 20, 'c': 30}) >>> a.intersection(b) {'a': 1, 'b': 2} >>> a & b {'a': 1, 'b': 2} Example: >>> import ubelt as ub >>> a = ub.SetDict({k: 'A_' + chr(97 + k) for k in [2, 3, 5, 7]}) >>> b = ub.SetDict({k: 'B_' + chr(97 + k) for k in [2, 4, 0, 7]}) >>> c = ub.SetDict({k: 'C_' + chr(97 + k) for k in [2, 8, 3]}) >>> d = ub.SetDict({k: 'D_' + chr(97 + k) for k in [9, 10, 11]}) >>> e = ub.SetDict({k: 'E_' + chr(97 + k) for k in []}) >>> assert a & b == {2: 'A_c', 7: 'A_h'} >>> a.intersection(b) >>> a & b & c >>> res = ub.SetDict.intersection(a, b, c, d, e) >>> print(ub.repr2(res, sort=1, nl=0, si=1)) {} """ cls = cls or self.__class__ isect_keys = set(self.keys()) for v in others: isect_keys.intersection_update(v) if merge is None: new = cls((k, self[k]) for k in self if k in isect_keys) else: raise NotImplementedError('merge function is not yet implemented') return new def difference(self, *others, cls=None, merge=None): """ Return the key-wise difference between this dictionary and one or more other dictionary / keys. Values returned with *left-most* priority. I.e. the returned items will be from the first dictionary, and will only contain keys that do not appear in any of the other dictionaries / sets. Args: self (SetDict | dict): if called as a static method this must be provided. *others : other dictionary or set like objects that can be coerced into a set of keys. cls (type | None): the desired return dictionary type. merge (None | Callable): if specified this function must accept an iterable of values and return a new value to use (which typically is derived from input values). NotImplemented, help wanted. Returns: dict : items from the first dictionary with keys not in any of the following inputs. Values take left-most priority unless ``merge`` is specified. Specific return type is specified by ``cls`` or defaults to the leftmost input. Example: >>> import ubelt as ub >>> a = ub.SetDict({k: 'A_' + chr(97 + k) for k in [2, 3, 5, 7]}) >>> b = ub.SetDict({k: 'B_' + chr(97 + k) for k in [2, 4, 0, 7]}) >>> c = ub.SetDict({k: 'C_' + chr(97 + k) for k in [2, 8, 3]}) >>> d = ub.SetDict({k: 'D_' + chr(97 + k) for k in [9, 10, 11]}) >>> e = ub.SetDict({k: 'E_' + chr(97 + k) for k in []}) >>> assert a - b == {3: 'A_d', 5: 'A_f'} >>> a.difference(b) >>> a - b - c >>> res = ub.SetDict.difference(a, b, c, d, e) >>> print(ub.repr2(res, sort=1, nl=0, si=1)) {5: A_f} """ cls = cls or self.__class__ other_keys = set() for v in others: other_keys.update(v) if merge is None: # Looping over original keys is important to maintain partial order. new = cls((k, self[k]) for k in self.keys() if k not in other_keys) else: raise NotImplementedError('merge function is not yet implemented') return new def symmetric_difference(self, *others, cls=None, merge=None): """ Return the key-wise symmetric difference between this dictionary and one or more other dictionaries. Values chosen with *right-most* priority. Returns items that are (key-wise) in an odd number of the given dictionaries. This is consistent with the standard n-ary definition of symmetric difference [WikiSymDiff]_ and corresponds with the xor operation. Args: self (SetDict | dict): if called as a static method this must be provided. *others : other dictionary or set like objects that can be coerced into a set of keys. cls (type | None): the desired return dictionary type. merge (None | Callable): if specified this function must accept an iterable of values and return a new value to use (which typically is derived from input values). NotImplemented, help wanted. Returns: dict : items from input dictionaries where the key appears an odd number of times. Values take right-most priority unless ``merge`` is specified. Specific return type is specified by ``cls`` or defaults to the leftmost input. References: .. [WikiSymDiff] https://en.wikipedia.org/wiki/Symmetric_difference Example: >>> import ubelt as ub >>> a = ub.SetDict({k: 'A_' + chr(97 + k) for k in [2, 3, 5, 7]}) >>> b = ub.SetDict({k: 'B_' + chr(97 + k) for k in [2, 4, 0, 7]}) >>> c = ub.SetDict({k: 'C_' + chr(97 + k) for k in [2, 8, 3]}) >>> d = ub.SetDict({k: 'D_' + chr(97 + k) for k in [9, 10, 11]}) >>> e = ub.SetDict({k: 'E_' + chr(97 + k) for k in []}) >>> a ^ b {3: 'A_d', 5: 'A_f', 4: 'B_e', 0: 'B_a'} >>> a.symmetric_difference(b) >>> a - b - c >>> res = ub.SetDict.symmetric_difference(a, b, c, d, e) >>> print(ub.repr2(res, sort=1, nl=0, si=1)) {0: B_a, 2: C_c, 4: B_e, 5: A_f, 8: C_i, 9: D_j, 10: D_k, 11: D_l} """ cls = cls or self.__class__ new = cls(self) # shallow copy if merge is None: for d in others: for k, v in d.items(): if k in new: new.pop(k) else: new[k] = v else: raise NotImplementedError('merge function is not yet implemented') return new sdict = SetDict # Might need to make these mixins for 3.6 class UDict(SetDict): """ A subclass of dict with ubelt enhancements This builds on top of :class:`SetDict` which itself is a simple extension that contains only that extra functionality. The extra invert, map, sorted, and peek functions are less fundamental and there are at least reasonable workarounds when they are not available. The UDict class is a simple subclass of dict that provides the following upgrades: * set operations - inherited from :class:`SetDict` + intersection - find items in common + union - merge dicts + difference - find items in one but not the other + symmetric_difference - find items that appear an odd number of times * subdict - take a subset with optional default values. (similar to intersection, but the later ignores non-common values) * inversion - + invert - swaps a dictionary keys and values (with options for dealing with duplicates). * mapping - + map_keys - applies a function over each key and keeps the values the same + map_values - applies a function over each key and keeps the values the same * sorting - + sorted_keys - returns a dictionary ordered by the keys + sorted_values - returns a dictionary ordered by the values IMO key-wise set operations on dictionaries are fundamentaly and sorely missing from the stdlib, mapping is super convenient, sorting and inversion are less common, but still useful to have. TODO: - [ ] UbeltDict, UltraDict, not sure what the name is. We may just rename this to Dict, Example: >>> import ubelt as ub >>> a = ub.udict({1: 20, 2: 20, 3: 30, 4: 40}) >>> b = ub.udict({0: 0, 2: 20, 4: 42}) >>> c = ub.udict({3: -1, 5: -1}) >>> # Demo key-wise set operations >>> assert a & b == {2: 20, 4: 40} >>> assert a - b == {1: 20, 3: 30} >>> assert a ^ b == {1: 20, 3: 30, 0: 0} >>> assert a | b == {1: 20, 2: 20, 3: 30, 4: 42, 0: 0} >>> # Demo new n-ary set methods >>> a.union(b, c) == {1: 20, 2: 20, 3: -1, 4: 42, 0: 0, 5: -1} >>> a.intersection(b, c) == {} >>> a.difference(b, c) == {1: 20} >>> a.symmetric_difference(b, c) == {1: 20, 0: 0, 5: -1} >>> # Demo new quality of life methods >>> assert a.subdict({2, 4, 6, 8}, default=None) == {8: None, 2: 20, 4: 40, 6: None} >>> assert a.invert() == {20: 2, 30: 3, 40: 4} >>> assert a.invert(unique_vals=0) == {20: {1, 2}, 30: {3}, 40: {4}} >>> assert a.peek_key() == ub.peek(a.keys()) >>> assert a.peek_value() == ub.peek(a.values()) >>> assert a.map_keys(lambda x: x * 10) == {10: 20, 20: 20, 30: 30, 40: 40} >>> assert a.map_values(lambda x: x * 10) == {1: 200, 2: 200, 3: 300, 4: 400} """ def subdict(self, keys, default=NoParam): """ Get a subset of a dictionary Args: self (Dict[KT, VT]): dictionary or the implicit instance keys (Iterable[KT]): keys to take from ``self`` default (Any | NoParamType): if specified uses default if keys are missing. Raises: KeyError : if a key does not exist and default is not specified SeeAlso: :func:`ubelt.util_dict.dict_subset` :func:`ubelt.UDict.take` Example: >>> import ubelt as ub >>> a = ub.udict({k: 'A_' + chr(97 + k) for k in [2, 3, 5, 7]}) >>> s = a.subdict({2, 5}) >>> print('s = {}'.format(ub.repr2(s, nl=0, sort=1))) s = {2: 'A_c', 5: 'A_f'} >>> import pytest >>> with pytest.raises(KeyError): >>> s = a.subdict({2, 5, 100}) >>> s = a.subdict({2, 5, 100}, default='DEF') >>> print('s = {}'.format(ub.repr2(s, nl=0, sort=1))) s = {2: 'A_c', 5: 'A_f', 100: 'DEF'} """ # TODO: make this work with defaultdict? cls = self.__class__ if default is NoParam: new = cls([(k, self[k]) for k in keys]) else: new = cls([(k, self.get(k, default)) for k in keys]) return new def take(self, keys, default=NoParam): """ Get values of an iterable of keys. Args: self (Dict[KT, VT]): dictionary or the implicit instance keys (Iterable[KT]): keys to take from ``self`` default (Any | NoParamType): if specified uses default if keys are missing. Yields: VT: a selected value within the dictionary Raises: KeyError : if a key does not exist and default is not specified SeeAlso: :func:`ubelt.util_list.take` :func:`ubelt.UDict.subdict` Example: >>> import ubelt as ub >>> a = ub.udict({k: 'A_' + chr(97 + k) for k in [2, 3, 5, 7]}) >>> s = list(a.take({2, 5})) >>> print('s = {}'.format(ub.repr2(s, nl=0, sort=1))) s = ['A_c', 'A_f'] >>> import pytest >>> with pytest.raises(KeyError): >>> s = a.subdict({2, 5, 100}) >>> s = list(a.take({2, 5, 100}, default='DEF')) >>> print('s = {}'.format(ub.repr2(s, nl=0, sort=1))) s = ['A_c', 'A_f', 'DEF'] """ if default is NoParam: for k in keys: yield self[k] else: for k in keys: yield self.get(k, default) def invert(self, unique_vals=True): """ Swaps the keys and values in a dictionary. Args: self (Dict[KT, VT]): dictionary or the implicit instance to invert unique_vals (bool, default=True): if False, the values of the new dictionary are sets of the original keys. cls (type | None): specifies the dict subclassof the result. if unspecified will be dict or OrderedDict. This behavior may change. Returns: Dict[VT, KT] | Dict[VT, Set[KT]]: the inverted dictionary Note: The must values be hashable. If the original dictionary contains duplicate values, then only one of the corresponding keys will be returned and the others will be discarded. This can be prevented by setting ``unique_vals=False``, causing the inverted keys to be returned in a set. Example: >>> import ubelt as ub >>> inverted = ub.udict({'a': 1, 'b': 2}).invert() >>> assert inverted == {1: 'a', 2: 'b'} """ return invert_dict(self, unique_vals=unique_vals, cls=self.__class__) def map_keys(self, func): """ Apply a function to every value in a dictionary. Creates a new dictionary with the same keys and modified values. Args: self (Dict[KT, VT]): a dictionary or the implicit instance. func (Callable[[VT], T] | Mapping[VT, T]): a function or indexable object Returns: Dict[KT, T]: transformed dictionary Example: >>> import ubelt as ub >>> new = ub.udict({'a': [1, 2, 3], 'b': []}).map_keys(ord) >>> assert new == {97: [1, 2, 3], 98: []} """ return map_keys(func, self, cls=self.__class__) def map_values(self, func): """ Apply a function to every value in a dictionary. Creates a new dictionary with the same keys and modified values. Args: self (Dict[KT, VT]): a dictionary or the implicit instance. func (Callable[[VT], T] | Mapping[VT, T]): a function or indexable object Returns: Dict[KT, T]: transformed dictionary Example: >>> import ubelt as ub >>> newdict = ub.udict({'a': [1, 2, 3], 'b': []}).map_values(len) >>> assert newdict == {'a': 3, 'b': 0} """ return map_values(func, self, cls=self.__class__) def sorted_keys(self, key=None, reverse=False): """ Return an ordered dictionary sorted by its keys Args: self (Dict[KT, VT]): dictionary to sort or the implicit instance. The keys must be of comparable types. key (Callable[[KT], Any] | None): If given as a callable, customizes the sorting by ordering using transformed keys. reverse (bool): if True returns in descending order Returns: OrderedDict[KT, VT]: new dictionary where the keys are ordered Example: >>> import ubelt as ub >>> new = ub.udict({'spam': 2.62, 'eggs': 1.20, 'jam': 2.92}).sorted_keys() >>> assert new == ub.odict([('eggs', 1.2), ('jam', 2.92), ('spam', 2.62)]) """ return sorted_keys(self, key=key, reverse=reverse, cls=self.__class__) def sorted_values(self, key=None, reverse=False): """ Return an ordered dictionary sorted by its values Args: self (Dict[KT, VT]): dictionary to sort or the implicit instance. The values must be of comparable types. key (Callable[[VT], Any] | None): If given as a callable, customizes the sorting by ordering using transformed values. reverse (bool): if True returns in descending order Returns: OrderedDict[KT, VT]: new dictionary where the values are ordered Example: >>> import ubelt as ub >>> new = ub.udict({'spam': 2.62, 'eggs': 1.20, 'jam': 2.92}).sorted_values() >>> assert new == ub.odict([('eggs', 1.2), ('spam', 2.62), ('jam', 2.92)]) """ return sorted_values(self, key=key, reverse=reverse, cls=self.__class__) def peek_key(self, default=NoParam): """ Get the first key in the dictionary Args: self (Dict): a dictionary or the implicit instance default (KT | NoParamType): default item to return if the iterable is empty, otherwise a StopIteration error is raised Returns: KT: the first value or the default Example: >>> import ubelt as ub >>> assert ub.udict({1: 2}).peek_key() == 1 """ from ubelt.util_list import peek return peek(self.keys(), default=default) def peek_value(self, default=NoParam): """ Get the first value in the dictionary Args: self (Dict[KT, VT]): a dictionary or the implicit instance default (VT | NoParamType): default item to return if the iterable is empty, otherwise a StopIteration error is raised Returns: VT: the first value or the default Example: >>> import ubelt as ub >>> assert ub.udict({1: 2}).peek_value() == 2 """ from ubelt.util_list import peek return peek(self.values(), default=default) class AutoDict(UDict): """ An infinitely nested default dict of dicts. Implementation of Perl's autovivification feature that follows [SO_651794]_. References: .. [SO_651794] http://stackoverflow.com/questions/651794/init-dict-of-dicts Example: >>> import ubelt as ub >>> auto = ub.AutoDict() >>> auto[0][10][100] = None >>> assert str(auto) == '{0: {10: {100: None}}}' """ _base = UDict def __getitem__(self, key): """ Args: key (KT): key to lookup Returns: VT | AutoDict: an existing value or a new AutoDict """ try: # value = super(AutoDict, self).__getitem__(key) value = self._base.__getitem__(self, key) except KeyError: value = self[key] = self.__class__() return value def to_dict(self): """ Recursively casts a AutoDict into a regular dictionary. All directly nested AutoDict values are also converted. This effectively de-defaults the structure. Returns: dict: a copy of this dict without autovivification Example: >>> import ubelt as ub >>> auto = ub.AutoDict() >>> auto[1] = 1 >>> auto['n1'] = ub.AutoDict() >>> static = auto.to_dict() >>> assert not isinstance(static, ub.AutoDict) >>> assert not isinstance(static['n1'], ub.AutoDict) Example: >>> import ubelt as ub >>> auto = ub.AutoOrderedDict() >>> auto[0][3] = 3 >>> auto[0][2] = 2 >>> auto[0][1] = 1 >>> assert list(auto[0].values()) == [3, 2, 1] """ return self._base( (key, (value.to_dict() if isinstance(value, AutoDict) else value)) for key, value in self.items()) # DEPRECATED. This is no longer needed. AutoDict is always ordered AutoOrderedDict = AutoDict udict = UDict ubelt-1.3.7/ubelt/util_dict.pyi000066400000000000000000000137571472470106000164710ustar00rootroot00000000000000import sys from typing import Iterable from typing import Type from typing import Dict from typing import Callable from typing import List from typing import Any from typing import Optional from ubelt.util_const import NoParam from ubelt.util_const import NoParamType from collections import OrderedDict from typing import Mapping from typing import Set from typing import Tuple from _typeshed import SupportsKeysAndGetItem from collections import OrderedDict, defaultdict from collections.abc import Generator from typing import Any, TypeVar VT = TypeVar("VT") T = TypeVar("T") KT = TypeVar("KT") odict = OrderedDict ddict = defaultdict DictBase = OrderedDict if sys.version_info[0:2] <= (3, 6) else dict def dzip(items1: Iterable[KT], items2: Iterable[VT], cls: Type[dict] = dict) -> Dict[KT, VT]: ... def group_items(items: Iterable[VT], key: Iterable[KT] | Callable[[VT], KT]) -> dict[KT, List[VT]]: ... def dict_hist(items: Iterable[T], weights: Iterable[float] | None = None, ordered: bool = False, labels: Iterable[T] | None = None) -> dict[T, int]: ... def find_duplicates( items: Iterable[T], k: int = 2, key: Callable[[T], Any] | None = None) -> dict[T, List[int]]: ... def dict_subset(dict_: Dict[KT, VT], keys: Iterable[KT], default: Optional[object] | NoParamType = NoParam, cls: Type[Dict] = OrderedDict) -> Dict[KT, VT]: ... def dict_union(*args: List[Dict]) -> Dict | OrderedDict: ... def dict_diff( *args: List[Dict[KT, VT] | Iterable[KT]] ) -> Dict[KT, VT] | OrderedDict[KT, VT]: ... def dict_isect( *args: List[Dict[KT, VT] | Iterable[KT]] ) -> Dict[KT, VT] | OrderedDict[KT, VT]: ... def map_values(func: Callable[[VT], T] | Mapping[VT, T], dict_: Dict[KT, VT], cls: type | None = None) -> Dict[KT, T]: ... map_vals = map_values def map_keys(func: Callable[[KT], T] | Mapping[KT, T], dict_: Dict[KT, VT], cls: type | None = None) -> Dict[T, VT]: ... def sorted_values(dict_: Dict[KT, VT], key: Callable[[VT], Any] | None = None, reverse: bool = False, cls: type = OrderedDict) -> OrderedDict[KT, VT]: ... sorted_vals = sorted_values def sorted_keys(dict_: Dict[KT, VT], key: Callable[[KT], Any] | None = None, reverse: bool = False, cls: type = OrderedDict) -> OrderedDict[KT, VT]: ... def invert_dict(dict_: Dict[KT, VT], unique_vals: bool = True, cls: type | None = None) -> Dict[VT, KT] | Dict[VT, Set[KT]]: ... def named_product( _: Dict[str, List[VT]] | None = None, **basis: Dict[str, List[VT]]) -> Generator[Dict[str, VT], None, None]: ... def varied_values(longform: List[Dict[KT, VT]], min_variations: int = 0, default: VT | NoParamType = NoParam) -> Dict[KT, List[VT]]: ... class SetDict(dict): def copy(self): ... def __or__( self, other: SupportsKeysAndGetItem[Any, Any] | Iterable[Tuple[Any, Any]] ) -> SetDict: ... def __and__(self, other: Mapping | Iterable) -> SetDict: ... def __sub__(self, other: Mapping | Iterable) -> SetDict: ... def __xor__(self, other: Mapping) -> SetDict: ... def __ror__(self, other: Mapping) -> dict: ... def __rand__(self, other: Mapping) -> dict: ... def __rsub__(self, other: Mapping) -> dict: ... def __rxor__(self, other: Mapping) -> dict: ... def __ior__( self, other: SupportsKeysAndGetItem[Any, Any] | Iterable[Tuple[Any, Any]] ) -> SetDict: ... def __iand__(self, other: Mapping | Iterable): ... def __isub__(self, other: Mapping | Iterable): ... def __ixor__(self, other: Mapping): ... def union(self, *others, cls: type | None = None, merge: None | Callable = None) -> dict: ... def intersection(self, *others, cls: type | None = None, merge: None | Callable = None) -> dict: ... def difference(self, *others, cls: type | None = None, merge: None | Callable = None) -> dict: ... def symmetric_difference(self, *others, cls: type | None = None, merge: None | Callable = None) -> dict: ... sdict = SetDict class UDict(SetDict): def subdict(self, keys: Iterable[KT], default: Optional[object] | NoParamType = NoParam): ... def take( self, keys: Iterable[KT], default: Optional[object] | NoParamType = NoParam ) -> Generator[VT, None, None]: ... def invert(self, unique_vals: bool = True) -> Dict[VT, KT] | Dict[VT, Set[KT]]: ... def map_keys(self, func: Callable[[VT], T] | Mapping[VT, T]) -> Dict[KT, T]: ... def map_values(self, func: Callable[[VT], T] | Mapping[VT, T]) -> Dict[KT, T]: ... def sorted_keys(self, key: Callable[[KT], Any] | None = None, reverse: bool = False) -> OrderedDict[KT, VT]: ... def sorted_values(self, key: Callable[[VT], Any] | None = None, reverse: bool = False) -> OrderedDict[KT, VT]: ... def peek_key(self, default: KT | NoParamType = NoParam) -> KT: ... def peek_value(self, default: VT | NoParamType = NoParam) -> VT: ... class AutoDict(UDict): def __getitem__(self, key: KT) -> VT | AutoDict: ... def to_dict(self) -> dict: ... AutoOrderedDict = AutoDict udict = UDict ubelt-1.3.7/ubelt/util_download.py000066400000000000000000000472071472470106000172010ustar00rootroot00000000000000""" Helpers for downloading data The :func:`download` function access the network and requests the content at a specific url using :mod:`urllib`. You can either specify where the data goes or download it to the default location in ubelt cache. Either way this function returns the location of the downloaded data. You can also specify the expected hash in order to check the validity of the data. By default downloading is verbose. The :func:`grabdata` function is almost identitcal to :func:`download`, but it checks if the data already exists in the download location, and only downloads if it needs to. """ from ubelt.util_const import NoParam from os.path import basename, join, exists, dirname, split import os __all__ = ['download', 'grabdata'] def download(url, fpath=None, dpath=None, fname=None, appname=None, hash_prefix=None, hasher='sha512', chunksize=8192, filesize=None, verbose=1, timeout=NoParam, progkw=None, requestkw=None): """ Downloads a url to a file on disk and returns the path. If unspecified the location and name of the file is chosen automatically. A hash_prefix can be specified to verify the integrity of the downloaded data. This function will download the data every time its called. For cached downloading see :func:`grabdata`. Args: url (str): The url to download. fpath (str | PathLike | io.BytesIO | None): The path to download to. Defaults to basename of url and ubelt's application cache. If this is a :class:`io.BytesIO` object then information is directly written to this object (note this prevents the use of temporary files). dpath (str | PathLike | None): where to download the file. If unspecified `appname` is used to determine this. Mutually exclusive with fpath. fname (str | None): What to name the downloaded file. Defaults to the url basename. Mutually exclusive with fpath. appname (str | None): set dpath to ``ub.Path.appdir(appname or 'ubelt', type='cache')`` if dpath and fpath are not given. hash_prefix (None | str): If specified, download will retry / error if the file hash does not match this value. Defaults to None. hasher (str | Hasher): If hash_prefix is specified, this indicates the hashing algorithm to apply to the file. Defaults to sha512. chunksize (int): Download chunksize in bytes. Default to ``2 ** 13`` filesize (int | None): If known, the filesize in bytes. If unspecified, attempts to read that data from content headers. verbose (int | bool): Verbosity flag. Quiet is 0, higher is more verbose. Defaults to 1. timeout (float | NoParamType): Specify timeout in seconds for :func:`urllib.request.urlopen`. (if not specified, the global default timeout setting will be used) This only works for HTTP, HTTPS and FTP connections for blocking operations like the connection attempt. progkw (Dict | NoParamType | None): if specified provides extra arguments to the progress iterator. See :class:`ubelt.progiter.ProgIter` for available options. requestkw (Dict | NoParamType | None): if specified provides extra arguments to :class:`urllib.request.Request`, which can be used to customize headers and other low level information sent to the target server. The common use-case would be to specify ``headers: Dict[str, str]`` in order to "spoof" the user agent. E.g. ``headers={'User-Agent': 'Mozilla/5.0'}``. (new in ubelt 1.3.7). Returns: str | PathLike: fpath - path to the downloaded file. Raises: URLError - if there is problem downloading the url. RuntimeError - if the hash does not match the hash_prefix. Note: Based largely on code in pytorch [TorchDL]_ with modifications influenced by other resources [Shichao_2012]_ [SO_15644964]_ [SO_16694907]_. References: .. [Shichao_2012] https://blog.shichao.io/2012/10/04/progress_speed_indicator_for_urlretrieve_in_python.html .. [SO_15644964] http://stackoverflow.com/questions/15644964/python-progress-bar-and-downloads .. [SO_16694907] http://stackoverflow.com/questions/16694907/how-to-download-large-file-in-python-with-requests-py .. [TorchDL] https://github.com/pytorch/pytorch/blob/2787f1d8edbd4aadd4a8680d204341a1d7112e2d/torch/hub.py#L347 Example: >>> # xdoctest: +REQUIRES(--network) >>> # The default usage is to simply download an image to the default >>> # download folder and return the path to the file. >>> import ubelt as ub >>> url = 'http://i.imgur.com/rqwaDag.png' >>> fpath = download(url) >>> print(ub.Path(fpath).name) rqwaDag.png Example: >>> # xdoctest: +REQUIRES(--network) >>> # To ensure you get the file you are expecting, it is a good idea >>> # to specify a hash that will be checked. >>> import ubelt as ub >>> url = 'http://i.imgur.com/rqwaDag.png' >>> fpath = ub.download(url, hasher='sha1', hash_prefix='f79ea24571da6ddd2ba12e3d57b515249ecb8a35') >>> print(ub.Path(fpath).name) Downloading url='http://i.imgur.com/rqwaDag.png' to fpath=...rqwaDag.png ... ...1233/1233... rate=... Hz, eta=..., total=... rqwaDag.png Example: >>> # xdoctest: +REQUIRES(--network) >>> # You can save directly to bytes in memory using a BytesIO object. >>> import ubelt as ub >>> import io >>> url = 'http://i.imgur.com/rqwaDag.png' >>> file = io.BytesIO() >>> fpath = ub.download(url, file) >>> file.seek(0) >>> data = file.read() >>> assert ub.hash_data(data, hasher='sha1').startswith('f79ea24571') Example: >>> # xdoctest: +REQUIRES(--network) >>> # Bad hashes will raise a RuntimeError, which could indicate >>> # corrupted data or a security issue. >>> import pytest >>> import ubelt as ub >>> url = 'http://i.imgur.com/rqwaDag.png' >>> with pytest.raises(RuntimeError): >>> ub.download(url, hasher='sha512', hash_prefix='BAD_HASH') """ from ubelt import ProgIter as Progress from ubelt.util_platform import platform_cache_dir import pathlib import shutil import tempfile import hashlib if timeout is NoParam: import socket timeout = socket._GLOBAL_DEFAULT_TIMEOUT from urllib.request import urlopen, Request if fpath and (dpath or fname): raise ValueError('Cannot specify fpath with dpath or fname') if fpath is None: if dpath is None: cache_dpath = pathlib.Path(platform_cache_dir()) dpath = cache_dpath / (appname or 'ubelt') dpath.mkdir(parents=True, exist_ok=True) if fname is None: fname = basename(url) fpath = join(dpath, fname) # Check if fpath was given as an BytesIO object _dst_is_io_object = hasattr(fpath, 'write') if not _dst_is_io_object and not exists(dirname(fpath)): raise Exception('parent of {} does not exist'.format(fpath)) if verbose: if _dst_is_io_object: print('Downloading url={!r} to IO object'.format(url)) else: print('Downloading url={!r} to fpath={!r}'.format( url, fpath)) requestkw = requestkw or {} requestkw['headers'] = {'User-Agent': 'Mozilla/5.0'} req = Request(url, **requestkw) urldata = urlopen(req, timeout=timeout) meta = urldata.info() if filesize is None: try: if hasattr(meta, 'getheaders'): # nocover filesize = int(meta.getheaders("Content-Length")[0]) else: filesize = int(meta.get_all("Content-Length")[0]) except Exception: # nocover # sometimes the url does not contain content length metadata # TODO: find a public URL that exemplifies this or figure out how to # mock it locally. filesize = None if hash_prefix: if isinstance(hasher, str): if hasher == 'sha1': hasher = hashlib.sha1() elif hasher == 'md5': hasher = hashlib.md5() elif hasher == 'sha256': hasher = hashlib.sha256() elif hasher == 'sha512': hasher = hashlib.sha512() else: raise KeyError(hasher) if _dst_is_io_object: _file_write = fpath.write else: tmp = tempfile.NamedTemporaryFile(delete=False) _file_write = tmp.write # possible optimization (have not tested or timed) _urldata_read = urldata.read try: # TODO: this outputs a lot of information that can bog down a CI # Might need to update defaults of ProgIter to reduce clutter _progkw = { 'total': filesize, # 'chunksize': chunksize, # 'freq': chunksize, 'freq': 1, 'time_thresh': 2, 'adjust': False, 'show_rate': False, } # import time # start_time = time.monotonic() def _build_extra(): pbar._curr_measurement.time bytes_down = pbar._iter_idx total_seconds = pbar._total_seconds + 1E-9 num_kb_down = int(bytes_down) / 1024 num_mb_down = int(num_kb_down / 1024) kb_per_second = int(num_kb_down / (total_seconds)) # fmt_msg = ' {:d} MB, {:d} KB/s' fmt_msg = ' {:d} KB/s' msg = fmt_msg.format(num_mb_down, kb_per_second) return msg if progkw is not None: _progkw.update(progkw) _progkw['disable'] = not verbose pbar = Progress(**_progkw) pbar.set_extra(_build_extra) with pbar: _pbar_update = pbar.update def _critical_loop(): # Initialize the buffer to a non-empty object buffer = ' ' if hash_prefix: _hasher_update = hasher.update while buffer: buffer = _urldata_read(chunksize) _file_write(buffer) _hasher_update(buffer) _pbar_update(len(buffer)) else: # Same code as above, just without the hasher update. # (tight loop optimization: remove in-loop conditional) while buffer: buffer = _urldata_read(chunksize) _file_write(buffer) _pbar_update(len(buffer)) _critical_loop() if not _dst_is_io_object: tmp.close() # We keep a potentially corrupted file if the hash doesn't match. # It could be the case that the user simply specified the wrong # hash_prefix. shutil.move(tmp.name, fpath) if hash_prefix: got = hasher.hexdigest() if got[:len(hash_prefix)] != hash_prefix: print('hash_prefix = {!r}'.format(hash_prefix)) print('got = {!r}'.format(got)) if _dst_is_io_object: raise RuntimeError( 'invalid hash value ' '(expected "{}", got "{}")'.format(hash_prefix, got)) else: raise RuntimeError( 'invalid hash value for fpath={!r} ' '(expected "{}", got "{}")'.format( fpath, hash_prefix, got)) finally: if not _dst_is_io_object: # nocover tmp.close() # If for some reason the move failed, delete the temporary file if exists(tmp.name): os.remove(tmp.name) return fpath def grabdata(url, fpath=None, dpath=None, fname=None, redo=False, verbose=1, appname=None, hash_prefix=None, hasher='sha512', expires=None, **download_kw): """ Downloads a file, caches it, and returns its local path. If unspecified the location and name of the file is chosen automatically. A hash_prefix can be specified to verify the integrity of the downloaded data. Args: url (str): url of the file to download fpath (Optional[str | PathLike]): The full path to download the file to. If unspecified, the arguments `dpath` and `fname` are used to determine this. dpath (Optional[str | PathLike]): where to download the file. If unspecified `appname` is used to determine this. Mutually exclusive with fpath. fname (Optional[str]): What to name the downloaded file. Defaults to the url basename. Mutually exclusive with fpath. redo (bool): if True forces redownload of the file. Defaults to False. verbose (int): Verbosity flag. Quiet is 0, higher is more verbose. Defaults to 1. appname (str | None): set dpath to ``ub.get_app_cache_dir(appname or 'ubelt')`` if dpath and fpath are not given. hash_prefix (None | str): If specified, grabdata verifies that this matches the hash of the file, and then saves the hash in a adjacent file to certify that the download was successful. Defaults to None. hasher (str | Hasher): If hash_prefix is specified, this indicates the hashing algorithm to apply to the file. Defaults to sha512. NOTE: Only pass hasher as a string. Passing as an instance is deprecated and can cause unexpected results. expires (str | int | datetime.datetime | None): when the cache should expire and redownload or the number of seconds to wait before the cache should expire. **download_kw: additional kwargs to pass to :func:`ubelt.util_download.download`. This includes ``chunksize``, ``filesize``, ``timeout``, ``progkw``, and ``requestkw``. Ignore: # helper logic to determine what needs to be documented for download_kw import ubelt as ub import inspect grabdata_sig = inspect.signature(ub.grabdata) download_sig = inspect.signature(ub.download) extra = ub.udict(download_sig.parameters) - ub.udict(grabdata_sig.parameters) print(', '.join([f'``{k}``' for k in extra.keys()])) Returns: str | PathLike: fpath - path to downloaded or cached file. CommandLine: xdoctest -m ubelt.util_download grabdata --network Example: >>> # xdoctest: +REQUIRES(--network) >>> import ubelt as ub >>> url = 'http://i.imgur.com/rqwaDag.png' >>> fpath = ub.grabdata(url, fname='mario.png') >>> result = basename(fpath) >>> print(result) mario.png Example: >>> # xdoctest: +REQUIRES(--network) >>> import ubelt as ub >>> import json >>> fname = 'foo.bar' >>> url = 'http://i.imgur.com/rqwaDag.png' >>> prefix1 = '944389a39dfb8fa9' >>> fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1, verbose=3) >>> stamp_fpath = ub.Path(fpath + '.stamp_sha512.json') >>> assert json.loads(stamp_fpath.read_text())['hash'][0].startswith(prefix1) >>> # Check that the download doesn't happen again >>> fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1) >>> # todo: check file timestamps have not changed >>> # >>> # Check redo works with hash >>> fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1, redo=True) >>> # todo: check file timestamps have changed >>> # >>> # Check that a redownload occurs when the stamp is changed >>> with open(stamp_fpath, 'w') as file: >>> file.write('corrupt-stamp') >>> fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1) >>> assert json.loads(stamp_fpath.read_text())['hash'][0].startswith(prefix1) >>> # >>> # Check that a redownload occurs when the stamp is removed >>> ub.delete(stamp_fpath) >>> with open(fpath, 'w') as file: >>> file.write('corrupt-data') >>> assert not ub.hash_file(fpath, base='hex', hasher='sha512').startswith(prefix1) >>> fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1) >>> assert ub.hash_file(fpath, base='hex', hasher='sha512').startswith(prefix1) >>> # >>> # Check that requesting new data causes redownload >>> #url2 = 'https://data.kitware.com/api/v1/item/5b4039308d777f2e6225994c/download' >>> #prefix2 = 'c98a46cb31205cf' # hack SSL >>> url2 = 'http://i.imgur.com/rqwaDag.png' >>> prefix2 = '944389a39dfb8fa9' >>> fpath = ub.grabdata(url2, fname=fname, hash_prefix=prefix2) >>> assert json.loads(stamp_fpath.read_text())['hash'][0].startswith(prefix2) """ import pathlib from ubelt.util_platform import platform_cache_dir from ubelt.util_cache import CacheStamp if appname and dpath: raise ValueError('Cannot specify appname with dpath') if fpath and (dpath or fname or appname): raise ValueError('Cannot specify fpath with dpath or fname') if fpath is None: if dpath is None: cache_dpath = pathlib.Path(platform_cache_dir()) dpath = cache_dpath / (appname or 'ubelt') dpath.mkdir(parents=True, exist_ok=True) if fname is None: fname = basename(url) fpath = join(dpath, fname) if dpath is None or fname is None: dpath, fname = split(fpath) if hasher is not None: if not isinstance(hasher, str): # nocover from ubelt import schedule_deprecation schedule_deprecation( modname='ubelt', migration='Pass hasher as a string, otherwise unexpected behavior can occur', name='hasher', type='grabdata arg', deprecate='1.1.0', error='1.3.0', remove='1.4.0') hasher_name = hasher.name else: hasher_name = hasher else: hasher_name = None if hasher_name is not None and hash_prefix: depends = hasher_name else: depends = '' # If the hash isn't specified, should we force download a different url # to the same file? # depends = url # TODO: it would be nice to have better control over the name of the stamp. # Specifically we have no control over the separator between fname, # depends, and the extension. stamp = CacheStamp( fname + '.stamp', dpath, depends=depends, hasher=hasher, ext='.json', product=fpath, hash_prefix=hash_prefix, verbose=verbose, expires=expires, ) if redo or stamp.expired(): try: if not hash_prefix or redo: raise Exception # If an expected hash is specified and the file exists, but the # stamp is invalid, try to simply compute the hash of the existing # file instead of redownloading it. Redownload if this fails. stamp.renew() except Exception: needs_download = 1 else: needs_download = 0 if needs_download: fpath = download( url, fpath, verbose=verbose, hash_prefix=hash_prefix, hasher=hasher, **download_kw) stamp.renew() return fpath ubelt-1.3.7/ubelt/util_download.pyi000066400000000000000000000024111472470106000173360ustar00rootroot00000000000000from typing import Optional import io from os import PathLike from ubelt.util_const import NoParam from ubelt.util_const import NoParamType from typing import Dict import datetime from typing import TypeVar Hasher = TypeVar("Hasher") def download(url: str, fpath: Optional[str | PathLike | io.BytesIO] = None, dpath: Optional[PathLike] = None, fname: Optional[str] = None, appname: str | None = None, hash_prefix: None | str = None, hasher: str | Hasher = 'sha512', chunksize: int = 8192, filesize: int | None = None, verbose: int | bool = 1, timeout: float | NoParamType = NoParam, progkw: Dict | NoParamType | None = None) -> str | PathLike: ... def grabdata(url: str, fpath: Optional[str | PathLike] = None, dpath: Optional[str | PathLike] = None, fname: Optional[str] = None, redo: bool = False, verbose: int = 1, appname: str | None = None, hash_prefix: None | str = None, hasher: str | Hasher = 'sha512', expires: str | int | datetime.datetime | None = None, **download_kw) -> str | PathLike: ... ubelt-1.3.7/ubelt/util_download_manager.py000066400000000000000000000127631472470106000206720ustar00rootroot00000000000000""" A simple download manager """ __all__ = ['DownloadManager'] class DownloadManager: """ Simple implementation of the download manager Example: >>> # xdoctest: +REQUIRES(--network) >>> import ubelt as ub >>> # Download a file with a known hash >>> manager = ub.DownloadManager() >>> job = manager.submit( >>> 'http://i.imgur.com/rqwaDag.png', >>> hash_prefix='31a129618c87dd667103e7154182e3c39a605eefe90f84f2283f3c87efee8e40' >>> ) >>> fpath = job.result() >>> print('fpath = {!r}'.format(fpath)) Example: >>> # Does not require network >>> import ubelt as ub >>> manager = ub.DownloadManager() >>> for i in range(100): ... job = manager.submit('localhost/might-not-exist-i-{}'.format(i)) >>> file_paths = [] >>> for job in manager.as_completed(prog=True): ... try: ... fpath = job.result() ... file_paths += [fpath] ... except Exception: ... pass >>> print('file_paths = {!r}'.format(file_paths)) Example: >>> # xdoctest: +REQUIRES(--network) >>> import pytest >>> import ubelt as ub >>> manager = ub.DownloadManager() >>> item1 = { >>> 'url': 'https://data.kitware.com/api/v1/item/5b4039308d777f2e6225994c/download', >>> 'dst': 'forgot_what_the_name_really_is', >>> 'hash_prefix': 'c98a46cb31205cf', >>> 'hasher': 'sha512', >>> } >>> item2 = { >>> 'url': 'http://i.imgur.com/rqwaDag.png', >>> 'hash_prefix': 'f79ea24571da6ddd2ba12e3d57b515249ecb8a35', >>> 'hasher': 'sha1', >>> } >>> item1 = item2 # hack around SSL error >>> manager.submit(**item1) >>> manager.submit(**item2) >>> for job in manager.as_completed(prog=True, verbose=3): >>> fpath = job.result() >>> print('fpath = {!r}'.format(fpath)) """ def __init__(self, download_root=None, mode='thread', max_workers=None, cache=True): """ Args: download_root (str | PathLike): default download location mode (str): either thread, process, or serial cache (bool): defaults to True max_workers (int | None): maximum concurrent tasks TODO: - [ ] Will likely have to initialize and store some sort of "connection state" objects. """ import ubelt as ub if download_root is None: download_root = ub.ensure_app_config_dir('ubelt', 'dlman') self._pool = ub.JobPool(mode=mode, max_workers=max_workers) self.download_root = download_root self.cache = cache if self.cache: self._dl_func = ub.grabdata else: self._dl_func = ub.download def submit(self, url, dst=None, hash_prefix=None, hasher='sha256'): """ Add a job to the download Queue Args: url (str | PathLike): pointer to the data to download dst (str | None): The relative or absolute path to download to. If unspecified, the destination name is derived from the url. hash_prefix (str | None): If specified, verifies that the hash of the downloaded file starts with this. hasher (str): hashing algorithm to use if hash_prefix is specified. Defaults to ``'sha256'``. Returns: concurrent.futures.Future : a Future object that will point to the downloaded location. """ job = self._pool.submit( self._dl_func, url, fname=dst, dpath=self.download_root, hash_prefix=hash_prefix, hasher=hasher, verbose=0, ) return job def as_completed(self, prog=None, desc=None, verbose=1): """ Generate completed jobs as they become available Args: prog (None | bool | type): if True, uses a ub.ProgIter progress bar. Can also be a class with a compatible progiter API. desc (str | None): if specified, reports progress with a :class:`ubelt.progiter.ProgIter` object. verbose (int): verbosity Example: >>> import pytest >>> import ubelt as ub >>> download_root = ub.ensure_app_config_dir('ubelt', 'dlman') >>> manager = ub.DownloadManager(download_root=download_root, >>> cache=False) >>> for i in range(3): >>> manager.submit('localhost') >>> results = list(manager) >>> print('results = {!r}'.format(results)) >>> manager.shutdown() """ if prog is True: import ubelt as ub prog = ub.ProgIter if prog is not None: return prog(self._pool.as_completed(), total=len(self), desc=desc, verbose=verbose) else: return self._pool.as_completed() def shutdown(self): """ Cancel all jobs and close all connections. """ self._pool.executor.shutdown() def __iter__(self): """ Returns: Iterable """ return self.as_completed() def __len__(self): """ Returns: int """ return len(self._pool) ubelt-1.3.7/ubelt/util_download_manager.pyi000066400000000000000000000016611472470106000210360ustar00rootroot00000000000000from os import PathLike import concurrent import concurrent.futures from typing import Iterable class DownloadManager: download_root: str | PathLike cache: bool def __init__(self, download_root: str | PathLike | None = None, mode: str = 'thread', max_workers: int | None = None, cache: bool = True) -> None: ... def submit(self, url: str | PathLike, dst: str | None = None, hash_prefix: str | None = None, hasher: str = 'sha256') -> concurrent.futures.Future: ... def as_completed(self, prog: None | bool | type = None, desc: str | None = None, verbose: int = 1): ... def shutdown(self) -> None: ... def __iter__(self) -> Iterable: ... def __len__(self) -> int: ... ubelt-1.3.7/ubelt/util_format.py000066400000000000000000000043201472470106000166470ustar00rootroot00000000000000""" Warning: This module is deprecated. Use :mod:`ubelt.util_repr` instead. """ from .util_repr import urepr, ReprExtensions, _REPR_EXTENSIONS def repr2(data, **kwargs): """ Alias of :func:`ubelt.util_repr.urepr`. Warning: Deprecated for urepr Example: >>> # Test that repr2 remains backwards compatible >>> import ubelt as ub >>> dict_ = { ... 'custom_types': [slice(0, 1, None), 1/3], ... 'nest_dict': {'k1': [1, 2, {3: {4, 5}}], ... 'key2': [1, 2, {3: {4, 5}}], ... 'key3': [1, 2, {3: {4, 5}}], ... }, ... 'nest_dict2': {'k': [1, 2, {3: {4, 5}}]}, ... 'nested_tuples': [tuple([1]), tuple([2, 3]), frozenset([4, 5, 6])], ... 'one_tup': tuple([1]), ... 'simple_dict': {'spam': 'eggs', 'ham': 'jam'}, ... 'simple_list': [1, 2, 'red', 'blue'], ... 'odict': ub.odict([(2, '1'), (1, '2')]), ... } >>> import pytest >>> with pytest.warns(DeprecationWarning): >>> result = ub.repr2(dict_, nl=1, precision=2) >>> print(result) { 'custom_types': [slice(0, 1, None), 0.33], 'nest_dict': {'k1': [1, 2, {3: {4, 5}}], 'key2': [1, 2, {3: {4, 5}}], 'key3': [1, 2, {3: {4, 5}}]}, 'nest_dict2': {'k': [1, 2, {3: {4, 5}}]}, 'nested_tuples': [(1,), (2, 3), {4, 5, 6}], 'odict': {2: '1', 1: '2'}, 'one_tup': (1,), 'simple_dict': {'ham': 'jam', 'spam': 'eggs'}, 'simple_list': [1, 2, 'red', 'blue'], } """ from ubelt.util_deprecate import schedule_deprecation schedule_deprecation( modname='ubelt', name='repr2', type='function', migration='use urepr instead', deprecate='1.2.5', error='2.0.0', remove='2.1.0', ) kwargs['_dict_sort_behavior'] = kwargs.get('_dict_sort_behavior', 'old') return urepr(data, **kwargs) repr2.extensions = urepr.extensions repr2.register = urepr.register # Deprecated aliases FormatterExtensions = ReprExtensions _FORMATTER_EXTENSIONS = _REPR_EXTENSIONS __all__ = ['repr2', 'urepr', 'FormatterExtensions'] ubelt-1.3.7/ubelt/util_format.pyi000066400000000000000000000001621472470106000170200ustar00rootroot00000000000000from .util_repr import ReprExtensions def repr2(data, **kwargs): ... FormatterExtensions = ReprExtensions ubelt-1.3.7/ubelt/util_func.py000066400000000000000000000205171472470106000163200ustar00rootroot00000000000000""" Helpers for functional programming. The :func:`identity` function simply returns its own inputs. This is useful for bypassing print statements and many other cases. I also think it looks a little nicer than ``lambda x: x``. The :func:`inject_method` function "injects" another function into a class instance as a method. This is useful for monkey patching. The :func:`compatible` introspects a functions signature for accepted keyword arguments and returns the subset of a configuration dictionary that agrees with that signature. """ def identity(arg=None, *args, **kwargs): """ Return the value of the first argument unchanged. All other positional and keyword inputs are ignored. Defaults to None if called without any args. The name identity is used in the mathematical sense [WikiIdentity]_. This is slightly different than the pure identity function, which is defined strictly with a single argument. This implementation allows but ignores extra arguments, making it easier to use as a drop in replacement for functions that accept extra configuration arguments that change their behavior and aren't true inputs. The value of this utility is a cleaner way to write ``lambda x: x`` or more precisely ``lambda x=None, *a, **k: x`` or writing the function inline. Unlike the lambda variant, this does not trigger common linter errors when assigning it to a value. Args: arg (Any | None): The value to return unchanged. *args: Ignored **kwargs: Ignored Returns: Any: arg - The same value of the first positional argument. References: .. [WikiIdentity] https://en.wikipedia.org/wiki/Identity_function Example: >>> import ubelt as ub >>> ub.identity(42) 42 >>> ub.identity(42, 43) 42 >>> ub.identity() None """ return arg def inject_method(self, func, name=None): """ Injects a function into an object instance as a bound method The main use case of this function is for monkey patching. While monkey patching is sometimes necessary it should generally be avoided. Thus, we simply remind the developer that there might be a better way. Args: self (T): Instance to inject a function into. func (Callable[..., Any]): The function to inject (must contain an arg for self). name (str | None): Specify the name of the new method. If not specified the name of the function is used. Example: >>> import ubelt as ub >>> class Foo(object): >>> def bar(self): >>> return 'bar' >>> def baz(self): >>> return 'baz' >>> self = Foo() >>> assert self.bar() == 'bar' >>> assert not hasattr(self, 'baz') >>> ub.inject_method(self, baz) >>> assert not hasattr(Foo, 'baz'), 'should only change one instance' >>> assert self.baz() == 'baz' >>> ub.inject_method(self, baz, 'bar') >>> assert self.bar() == 'baz' """ # TODO: if func is a bound method we should probably unbind it new_method = func.__get__(self, self.__class__) if name is None: name = func.__name__ setattr(self, name, new_method) def compatible(config, func, start=0, keywords=True): """ Take the "compatible" subset of a dictionary that a function will accept as keyword arguments. A common pattern is to track the configuration of a program in a single dictionary. Often there will be functions that only require subsets of this dictionary, and they will be written such that those items are passed via keyword arguments. The :func:`ubelt.compatible` utility makes it easier select only the relevant config variables. It does this by inspecting the signature of the function to determine what keyword arguments it accepts, and returns the dictionary intersection of the full config and the allowed keywords. The user can then call the function with the normal ``**`` mechanism. Args: config (Dict[str, Any]): A dictionary that contains keyword arguments that might be passed to a function. func (Callable): A function or method to check the arguments of start (int): Only take args after this position. Set to 1 if calling with an unbound method to avoid the ``self`` argument. Defaults to 0. keywords (bool | Iterable[str]): If True (default), and ``**kwargs`` is in the signature, prevent any filtering of the ``config`` dictionary. If False, then ignore that ``**kwargs`` is in the signature and only return the subset of ``config`` that matches the explicit signature. Otherwise if specified as a non-string iterable of strings, assume these are the allowed keys that are compatible with the way ``kwargs`` is handled in the function. Returns: Dict[str, Any] : A subset of ``config`` that only contains items compatible with the signature of ``func``. Example: >>> # An example use case is to select a subset of of a config >>> # that can be passed to some function as kwargs >>> import ubelt as ub >>> # Define a function with args that match some keys in a config. >>> def func(a, e, f): >>> return a * e * f >>> # Define a config that has a superset of items needed by the func >>> config = { ... 'a': 2, 'b': 3, 'c': 7, ... 'd': 11, 'e': 13, 'f': 17, ... } >>> # Call the function only with keys that are compatible >>> func(**ub.compatible(config, func)) 442 Example: >>> # Test case with kwargs >>> import ubelt as ub >>> def func(a, e, f, *args, **kwargs): >>> return a * e * f >>> config = { ... 'a': 2, 'b': 3, 'c': 7, ... 'd': 11, 'e': 13, 'f': 17, ... } >>> func(**ub.compatible(config, func)) 442 >>> print(sorted(ub.compatible(config, func))) ['a', 'b', 'c', 'd', 'e', 'f'] >>> print(sorted(ub.compatible(config, func, keywords=False))) ['a', 'e', 'f'] >>> print(sorted(ub.compatible(config, func, keywords={'b'}))) ['a', 'b', 'e', 'f'] Ignore: # xdoctest: +REQUIRES(syntax:python>=3.6) todo: new xdoctest directive? # Test case with positional only 3.6 + import ubelt as ub def func(a, e, /, f): return a * e * f config = { 'a': 2, 'b': 3, 'c': 7, 'd': 11, 'e': 13, 'f': 17, } funckw = ub.compatible(config, func) func(1, 2, **funckw) ### While the stdlib inspect.signature is useful, it does not ### have a concise way of getting the subset of the dictionary ### that can be passed as keyword arguments. import inspect sig = inspect.signature(func) funckw2 = ub.udict(config) & sig.parameters ub.udict(report_config) & (sig.parameters) """ import inspect sig = inspect.signature(func) argnames = [] has_kwargs = False for arg in sig.parameters.values(): if arg.kind == inspect.Parameter.VAR_KEYWORD: has_kwargs = True elif arg.kind == inspect.Parameter.VAR_POSITIONAL: pass # Ignore variadic positional args elif arg.kind == inspect.Parameter.POSITIONAL_ONLY: pass # Ignore positional only arguments elif arg.kind in {inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.KEYWORD_ONLY}: argnames.append(arg.name) else: # nocover raise TypeError(arg.kind) # Test if keywords is a non-string iterable if not isinstance(keywords, (bool, str)): try: iter(keywords) except Exception: keywords = bool(keywords) else: argnames.extend(keywords) keywords = False if has_kwargs and keywords: # kwargs could be anything, so keep everything common = config else: common = {k: config[k] for k in argnames[start:] if k in config} # dict-intersection return common # class Function: # """ # TODO # """ # ... ubelt-1.3.7/ubelt/util_func.pyi000066400000000000000000000007511472470106000164670ustar00rootroot00000000000000from typing import Any from typing import Callable from typing import Dict from typing import Iterable def identity(arg: Any | None = None, *args, **kwargs) -> Any: ... def inject_method(self, func: Callable[..., Any], name: str | None = None) -> None: ... def compatible(config: Dict[str, Any], func: Callable, start: int = 0, keywords: bool | Iterable[str] = True) -> Dict[str, Any]: ... ubelt-1.3.7/ubelt/util_futures.py000066400000000000000000000511331472470106000170600ustar00rootroot00000000000000""" Introduces the :class:`Executor` class that wraps the standard ThreadPoolExecutor, ProcessPoolExecutor, and the new SerialExecutor with a common interface and a configurable backend. This makes is easy to test if your code benefits from parallelism, how much it benefits, and gives you the ability to disable if if you need to. The :class:`Executor` class lets you choose the right level of concurrency (which might be no concurrency). An excellent blog post on when to use threads, processes, or asyncio [ChooseTheRightConcurrency]_. Note that executor does not currently support asyncio, but this might be a feature added in the future, but its unclear how interoperable this would be. References: .. [ChooseTheRightConcurrency] https://superfastpython.com/python-concurrency-choose-api/ Example: >>> # xdoctest: +SKIP >>> # Note: while this works in IPython, this does not work when running >>> # in xdoctest. https://github.com/Erotemic/xdoctest/issues/101 >>> # xdoctest: +REQUIRES(module:timerit) >>> # Does my function benefit from parallelism? >>> def my_function(arg1, arg2): ... return (arg1 + arg2) * 3 >>> # >>> def run_process(inputs, mode='serial', max_workers=0): ... from concurrent.futures import as_completed ... import ubelt as ub ... # The executor interface is the same regardless of modes ... executor = ub.Executor(mode=mode, max_workers=max_workers) ... # submit returns a Future object ... jobs = [executor.submit(my_function, *args) for args in inputs] ... # future objects will contain results when they are done ... results = [job.result() for job in as_completed(jobs)] ... return results >>> # The same code tests our method in serial, thread, or process mode >>> import timerit >>> ti = timerit.Timerit(100, bestof=10, verbose=2) >>> # Setup test data >>> import random >>> rng = random.Random(0) >>> max_workers = 4 >>> inputs = [(rng.random(), rng.random()) for _ in range(100)] >>> for mode in ['serial', 'process', 'thread']: >>> for timer in ti.reset('mode={} max_workers={}'.format(mode, max_workers)): >>> with timer: >>> run_process(inputs, mode=mode, max_workers=max_workers) >>> print(ub.repr2(ti)) """ import concurrent.futures from concurrent.futures import as_completed __all__ = ['Executor', 'JobPool'] class SerialFuture(concurrent.futures.Future): """ Non-threading / multiprocessing version of future for drop in compatibility with concurrent.futures. TODO: warn if the user specifies timeout as we cannot handle it without threads Attributes: func (Callable): function to be called args (Tuple): positional arguments to call the function with kw (Dict): keyword arguments to call the function with """ def __init__(self, func, *args, **kw): super(SerialFuture, self).__init__() self.func = func self.args = args self.kw = kw # self._condition = FakeCondition() self._run_count = 0 # fake being finished to cause __get_result to be called self._state = concurrent.futures._base.FINISHED def _run(self): result = self.func(*self.args, **self.kw) self.set_result(result) self._run_count += 1 def set_result(self, result): """ Overrides the implementation to revert to pre python3.8 behavior Example: >>> # Just for coverage >>> from ubelt.util_futures import SerialFuture # NOQA >>> self = SerialFuture(print, 'arg1', 'arg2') >>> self.add_done_callback(lambda x: print('done callback got x = {}'.format(x))) >>> print('result() before set_result()') >>> ret = self.result() >>> print('ret = {!r}'.format(ret)) >>> self.set_result(1) >>> ret = self.result() >>> print('ret = {!r}'.format(ret)) >>> # >>> print('set_result() before result()') >>> self = SerialFuture(print, 'arg1', 'arg2') >>> self.add_done_callback(lambda x: print('done callback got x = {}'.format(x))) >>> self.set_result(1) >>> ret = self.result() >>> print('ret = {!r}'.format(ret)) """ with self._condition: self._result = result self._state = concurrent.futures._base.FINISHED # I'm cheating a little by not covering this. # Lets call it, cheating in good faith. *shifty eyes* # I don't know how to test it, and its not a critical pieces of the # library. Consider it a bug. help wanted. for waiter in self._waiters: # nocover waiter.add_result(self) self._condition.notify_all() self._invoke_callbacks() def _Future__get_result(self): # overrides private __getresult method if not self._run_count: self._run() return self._result # Rename to SerialPoolExecutor? class SerialExecutor(object): """ Implements the concurrent.futures API around a single-threaded backend Notes: When using the SerialExecutor, any timeout specified to the result will be ignored. Example: >>> from ubelt.util_futures import SerialExecutor # NOQA >>> import concurrent.futures >>> with SerialExecutor() as executor: >>> futures = [] >>> for i in range(100): >>> f = executor.submit(lambda x: x + 1, i) >>> futures.append(f) >>> for f in concurrent.futures.as_completed(futures): >>> assert f.result() > 0 >>> for i, f in enumerate(futures): >>> assert i + 1 == f.result() """ def __enter__(self): self.max_workers = 0 return self def __exit__(self, ex_type, ex_value, ex_traceback): """ Args: ex_type (Type[BaseException] | None): ex_value (BaseException | None): ex_traceback (TracebackType | None): Returns: bool | None """ return False def submit(self, func, *args, **kw): """ Submit a job to be executed later Returns: concurrent.futures.Future: a future representing the job """ return SerialFuture(func, *args, **kw) def shutdown(self): """ Ignored for the serial executor """ pass def map(self, fn, *iterables, **kwargs): """Returns an iterator equivalent to map(fn, iter). Args: fn (Callable[..., Any]): A callable that will take as many arguments as there are passed iterables. timeout: This argument is ignored for SerialExecutor chunksize: This argument is ignored for SerialExecutor Yields: Any: equivalent to: map(func, *iterables) but the calls may be evaluated out-of-order. Raises: Exception: If fn(*args) raises for any values. Example: >>> from ubelt.util_futures import SerialExecutor # NOQA >>> import concurrent.futures >>> import string >>> with SerialExecutor() as executor: ... result_iter = executor.map(int, string.digits) ... results = list(result_iter) >>> print('results = {!r}'.format(results)) results = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] """ kwargs.pop('chunksize', None) kwargs.pop('timeout', None) if len(kwargs) != 0: # nocover raise ValueError('Unknown arguments {}'.format(kwargs)) fs = [self.submit(fn, *args) for args in zip(*iterables)] for f in fs: yield f.result() # See ../dev/experimental/async_executor_poc.py for # work ona potential AsyncIOExecutor class class Executor(object): """ A concrete asynchronous executor with a configurable backend. The type of parallelism (or lack thereof) is configured via the ``mode`` parameter, which can be: "process", "thread", or "serial". This allows the user to easily enable / disable parallelism or switch between processes and threads without modifying the surrounding logic. SeeAlso: * :class:`concurrent.futures.ThreadPoolExecutor` * :class:`concurrent.futures.ProcessPoolExecutor` * :class:`SerialExecutor` * :class:`JobPool` In the case where you cant or dont want to use ubelt.Executor you can get similar behavior with the following pure-python snippet: .. code:: python def Executor(max_workers): # Stdlib-only "ubelt.Executor"-like behavior if max_workers == 1: import contextlib def submit_partial(func, *args, **kwargs): def wrapper(): return func(*args, **kwargs) wrapper.result = wrapper return wrapper executor = contextlib.nullcontext() executor.submit = submit_partial else: from concurrent.futures import ThreadPoolExecutor executor = ThreadPoolExecutor(max_workers=max_workers) return executor executor = Executor(0) with executor: jobs = [] for arg in range(1000): job = executor.submit(chr, arg) jobs.append(job) results = [] for job in jobs: result = job.result() results.append(result) print('results = {}'.format(ub.urepr(results, nl=1))) Attributes: backend (SerialExecutor | ThreadPoolExecutor | ProcessPoolExecutor): Example: >>> import ubelt as ub >>> # Prototype code using simple serial processing >>> executor = ub.Executor(mode='serial', max_workers=0) >>> jobs = [executor.submit(sum, [i + 1, i]) for i in range(10)] >>> print([job.result() for job in jobs]) [1, 3, 5, 7, 9, 11, 13, 15, 17, 19] >>> # Enable parallelism by only changing one parameter >>> executor = ub.Executor(mode='process', max_workers=0) >>> jobs = [executor.submit(sum, [i + 1, i]) for i in range(10)] >>> print([job.result() for job in jobs]) [1, 3, 5, 7, 9, 11, 13, 15, 17, 19] """ def __init__(self, mode='thread', max_workers=0): """ Args: mode (str): The backend parallelism mechanism. Can be either thread, serial, or process. Defaults to 'thread'. max_workers (int): number of workers. If 0, serial is forced. Defaults to 0. """ from concurrent import futures if mode == 'serial' or max_workers == 0: backend = SerialExecutor() elif mode == 'thread': backend = futures.ThreadPoolExecutor(max_workers=max_workers) elif mode == 'process': backend = futures.ProcessPoolExecutor(max_workers=max_workers) # elif mode == 'asyncio': # # Experimental # backend = AsyncIOExecutor() else: raise KeyError(mode) self.backend = backend def __enter__(self): self.backend.__enter__() return self def __exit__(self, ex_type, ex_value, ex_traceback): """ Args: ex_type (Type[BaseException] | None): ex_value (BaseException | None): ex_traceback (TracebackType | None): Returns: bool | None """ # Note: the following call will block return self.backend.__exit__(ex_type, ex_value, ex_traceback) def submit(self, func, *args, **kw): """ Calls the submit function of the underlying backend. Returns: concurrent.futures.Future: a future representing the job """ return self.backend.submit(func, *args, **kw) def shutdown(self): """ Calls the shutdown function of the underlying backend. """ return self.backend.shutdown() def map(self, fn, *iterables, **kwargs): """ Calls the map function of the underlying backend. CommandLine: xdoctest -m ubelt.util_futures Executor.map Example: >>> import ubelt as ub >>> import concurrent.futures >>> import string >>> with ub.Executor(mode='serial') as executor: ... result_iter = executor.map(int, string.digits) ... results = list(result_iter) >>> print('results = {!r}'.format(results)) results = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] >>> with ub.Executor(mode='thread', max_workers=2) as executor: ... result_iter = executor.map(int, string.digits) ... results = list(result_iter) >>> # xdoctest: +IGNORE_WANT >>> print('results = {!r}'.format(results)) results = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] """ # Hack for python2 chunksize = kwargs.pop('chunksize', 1) timeout = kwargs.pop('timeout', None) if len(kwargs) != 0: # nocover raise ValueError('Unknown arguments {}'.format(kwargs)) return self.backend.map(fn, *iterables, timeout=timeout, chunksize=chunksize) class JobPool(object): """ Abstracts away boilerplate of submitting and collecting jobs This is a basic wrapper around :class:`ubelt.util_futures.Executor` that simplifies the most basic case by 1. keeping track of references to submitted futures for you and 2. providing an as_completed method to consume those futures as they are ready. Attributes: executor (Executor): internal executor object jobs (List[Future]): internal job list. Note: do not rely on this attribute, it may change in the future. Example: >>> import ubelt as ub >>> def worker(data): >>> return data + 1 >>> pool = ub.JobPool('thread', max_workers=16) >>> for data in ub.ProgIter(range(10), desc='submit jobs'): >>> pool.submit(worker, data) >>> final = [] >>> for job in pool.as_completed(desc='collect jobs'): >>> info = job.result() >>> final.append(info) >>> print('final = {!r}'.format(final)) """ def __init__(self, mode='thread', max_workers=0, transient=False): """ Args: mode (str): The backend parallelism mechanism. Can be either thread, serial, or process. Defaults to 'thread'. max_workers (int): number of workers. If 0, serial is forced. Defaults to 0. transient (bool): if True, references to jobs will be discarded as they are returned by :func:`as_completed`. Otherwise the ``jobs`` attribute holds a reference to all jobs ever submitted. Default to False. """ self.executor = Executor(mode=mode, max_workers=max_workers) self.transient = transient self.jobs = [] def __len__(self): return len(self.jobs) def submit(self, func, *args, **kwargs): """ Submit a job managed by the pool Args: func (Callable[..., Any]): A callable that will take as many arguments as there are passed iterables. *args : positional arguments to pass to the function *kwargs : keyword arguments to pass to the function Returns: concurrent.futures.Future: a future representing the job """ job = self.executor.submit(func, *args, **kwargs) self.jobs.append(job) return job def shutdown(self): self.jobs = None return self.executor.shutdown() def __enter__(self): self.executor.__enter__() return self def __exit__(self, ex_type, ex_value, ex_traceback): """ Args: ex_type (Type[BaseException] | None): ex_value (BaseException | None): ex_traceback (TracebackType | None): Returns: bool | None """ return self.executor.__exit__(ex_type, ex_value, ex_traceback) def _clear_completed(self): active_jobs = [job for job in self.jobs if job.running()] self.jobs = active_jobs def as_completed(self, timeout=None, desc=None, progkw=None): """ Generates completed jobs in an arbitrary order Args: timeout (float | None): Specify the the maximum number of seconds to wait for a job. Note: this is ignored in serial mode. desc (str | None): if specified, reports progress with a :class:`ubelt.progiter.ProgIter` object. progkw (dict | None): extra keyword arguments to :class:`ubelt.progiter.ProgIter`. Yields: concurrent.futures.Future: The completed future object containing the results of a job. CommandLine: xdoctest -m ubelt.util_futures JobPool.as_completed Example: >>> import ubelt as ub >>> pool = ub.JobPool('thread', max_workers=8) >>> text = ub.paragraph( ... ''' ... UDP is a cool protocol, check out the wiki: ... ... UDP-based Data Transfer Protocol (UDT), is a high-performance ... data transfer protocol designed for transferring large ... volumetric datasets over high-speed wide area networks. Such ... settings are typically disadvantageous for the more common TCP ... protocol. ... ''') >>> for word in text.split(' '): ... pool.submit(print, word) >>> for _ in pool.as_completed(): ... pass >>> pool.shutdown() """ from ubelt.progiter import ProgIter job_iter = as_completed(self.jobs, timeout=timeout) if desc is not None: if progkw is None: progkw = {} job_iter = ProgIter( job_iter, desc=desc, total=len(self.jobs), **progkw) self._prog = job_iter for job in job_iter: if self.transient: # Maybe keep a reference to the job index and then null it out # in our job list? Should probably think about a good # implementation. See kwcoco.CocoDataset._load_multiple self.jobs.remove(job) yield job def join(self, **kwargs): """ Like :func:`JobPool.as_completed`, but executes the `result` method of each future and returns only after all processes are complete. This allows for lower-boilerplate prototyping. Args: **kwargs: passed to :func:`JobPool.as_completed` Returns: List[Any]: list of results Example: >>> import ubelt as ub >>> # We just want to try replacing our simple iterative algorithm >>> # with the embarrassingly parallel version >>> arglist = list(zip(range(1000), range(1000))) >>> func = ub.identity >>> # >>> # Original version >>> for args in arglist: >>> func(*args) >>> # >>> # Potentially parallel version >>> jobs = ub.JobPool(max_workers=0) >>> for args in arglist: >>> jobs.submit(func, *args) >>> _ = jobs.join(desc='running') """ results = [] for job in self.as_completed(**kwargs): result = job.result() results.append(result) return results def __iter__(self): """ An alternative to as completed. NOTE: The order of iteration may be changed in the future to be the submission order instead. Yields: concurrent.futures.Future: The completed future object containing the results of a job. Example: >>> import ubelt as ub >>> pool = ub.JobPool('serial') >>> assert len(list(iter(pool))) == 0 >>> pool.submit(print, 'hi') >>> assert len(list(iter(pool))) == 1 """ for job in self.as_completed(): yield job ubelt-1.3.7/ubelt/util_futures.pyi000066400000000000000000000052541472470106000172340ustar00rootroot00000000000000from typing import Callable from typing import Tuple from typing import Dict from typing import Type from types import TracebackType import concurrent import concurrent.futures from concurrent.futures import Future from typing import Any from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ProcessPoolExecutor from typing import List from collections.abc import Generator class SerialFuture(concurrent.futures.Future): func: Callable args: Tuple kw: Dict def __init__(self, func, *args, **kw) -> None: ... def set_result(self, result) -> None: ... class SerialExecutor: max_workers: int def __enter__(self): ... def __exit__(self, ex_type: Type[BaseException] | None, ex_value: BaseException | None, ex_traceback: TracebackType | None) -> bool | None: ... def submit(self, func, *args, **kw) -> concurrent.futures.Future: ... def shutdown(self) -> None: ... def map(self, fn: Callable[..., Any], *iterables, **kwargs) -> Generator[Any, None, None]: ... class Executor: backend: SerialExecutor | ThreadPoolExecutor | ProcessPoolExecutor def __init__(self, mode: str = 'thread', max_workers: int = 0) -> None: ... def __enter__(self): ... def __exit__(self, ex_type: Type[BaseException] | None, ex_value: BaseException | None, ex_traceback: TracebackType | None) -> bool | None: ... def submit(self, func, *args, **kw) -> concurrent.futures.Future: ... def shutdown(self): ... def map(self, fn, *iterables, **kwargs): ... class JobPool: executor: Executor jobs: List[Future] transient: bool def __init__(self, mode: str = 'thread', max_workers: int = 0, transient: bool = False) -> None: ... def __len__(self): ... def submit(self, func: Callable[..., Any], *args, **kwargs) -> concurrent.futures.Future: ... def shutdown(self): ... def __enter__(self): ... def __exit__(self, ex_type: Type[BaseException] | None, ex_value: BaseException | None, ex_traceback: TracebackType | None) -> bool | None: ... def as_completed( self, timeout: float | None = None, desc: str | None = None, progkw: dict | None = None ) -> Generator[concurrent.futures.Future, None, None]: ... def join(self, **kwargs) -> List[Any]: ... def __iter__(self) -> concurrent.futures.Future: ... ubelt-1.3.7/ubelt/util_hash.py000066400000000000000000001506361472470106000163160ustar00rootroot00000000000000r""" Wrappers around hashlib functions to generate hash signatures for common data. The hashes are deterministic across Python versions and operating systems. This is verified by CI testing on 32 and 64 bit versions of Windows, Linux, and OSX with all supported Python. Use Case #1: You have data that you want to hash. If we assume the data is in standard python scalars or ordered sequences: e.g. tuple, list, OrderedDict, OrderedSet, int, str, etc..., then the solution is :func:`hash_data`. Use Case #2: You have a file you want to hash, but your system doesn't have a sha1sum executable (or you dont want to use Popen). The solution is :func:`hash_file` The :func:`ubelt.util_hash.hash_data` function recursively hashes most builtin python data structures. This is similar to the deephash functionality provided in [PypiDeepDiff]_. The :func:`ubelt.util_hash.hash_file` function hashes data on disk. Both of the aforementioned functions have options for different hashers and alphabets. References: .. [PypiDeepDiff] https://pypi.org/project/deepdiff/ Example: >>> import ubelt as ub >>> data = ub.odict(sorted({ >>> 'param1': True, >>> 'param2': 0, >>> 'param3': [None], >>> 'param4': ('str', 4.2), >>> }.items())) >>> # hash_data can hash any ordered builtin object >>> ub.hash_data(data, hasher='sha256') 0b101481e4b894ddf6de57... Example: >>> import ubelt as ub >>> from os.path import join >>> fpath = (ub.Path.appdir('ubelt/tests').ensuredir() / 'empty_file').touch() >>> ub.hash_file(fpath, hasher='sha1') da39a3ee5e6b4b0d3255bfef95601890afd80709 Note: The exact hashes generated for data object and files may change in the future. When this happens the ``HASH_VERSION`` attribute will be incremented. Note: [util_hash.Note.1] pre 0.10.2, the protected function _hashable_sequence defaulted to types=True setting to True here for backwards compat. This means that extensions using the ``_hashable_sequence`` helper will always include types in their hashable encoding regardless of the argument setting. We may change this in the future, to be more consistent. This is a minor detail unless you are getting into the weeds of how we coerce technically non-hashable sequences into a hashable encoding. """ import hashlib import math from collections import OrderedDict from ubelt.util_const import NoParam __all__ = ['hash_data', 'hash_file'] # incremented when we make a change that modifies hashes HASH_VERSION = 2 # type: int _ALPHABET_10 = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] # type: List[str] _ALPHABET_16 = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'] _ALPHABET_26 = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] _ALPHABET_36 = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] # RFC 4648 Base32 alphabet _ALPHABET_32 = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '2', '3', '4', '5', '6', '7'] DEFAULT_ALPHABET = _ALPHABET_16 # type: List[str] def b(s): """ Args: s (str): Returns: bytes """ return s.encode("latin-1") # Sensible choices for default hashers are sha1, sha512, and xxh64. # xxhash.xxh64 is very fast, but non-crypto-grade and not in the standard lib # Reference: http://cyan4973.github.io/xxHash/ # Reference: https://github.com/Cyan4973/xxHash # We dont default to sha1 because it has a known collision and other issues # Reference: https://stackoverflow.com/questions/28159071/more-modern-sha # Reference: https://security.googleblog.com/2017/02/announcing-first-sha1-collision.html # Default to 512 because it is often faster than 256 on 64bit systems: # Reference: https://crypto.stackexchange.com/questions/26336/faster # DEFAULT_HASHER = xxhash.xxh32 # DEFAULT_HASHER = xxhash.xxh64 # xxh64 is the fastest, but non-standard # DEFAULT_HASHER = hashlib.sha1 # fast algo, but has a known collision # DEFAULT_HASHER = hashlib.sha512 # most robust algo, but slower than others DEFAULT_HASHER = hashlib.sha512 # type: Callable # This controls if types are used when generating hashable sequences for more # complex objects. Currently there is no way for the user to control this, and # that might need to addressed, but it will require some thought. _COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT = True # Note: the Hasher refers to hashlib._hashlib.HASH # but this does not play well with type annotations # try: # HASH = hashlib._hashlib.HASH # except AttributeError: # nocover # # Python seems to have been compiled without OpenSSL # HASH = None def _int_to_bytes(int_): r""" Converts an integer into its byte representation assumes int32 by default, but dynamically handles larger ints Args: int_ (int): Returns: bytes Example: >>> from ubelt.util_hash import _int_to_bytes, _bytes_to_int >>> int_ = 1 >>> assert _bytes_to_int((_int_to_bytes(int_))) == int_ >>> assert _int_to_bytes(int_) == b'\x01' >>> assert _bytes_to_int((_int_to_bytes(0))) == 0 >>> assert _bytes_to_int((_int_to_bytes(-1))) == -1 >>> assert _bytes_to_int((_int_to_bytes(-1000000))) == -1000000 >>> assert _bytes_to_int((_int_to_bytes(1000000))) == 1000000 """ bit_length = int_.bit_length() + 1 length = math.ceil(bit_length / 8.0) # bytelength bytes_ = int_.to_bytes(length, byteorder='big', signed=True) return bytes_ def _bytes_to_int(bytes_): r""" Converts a string of bytes into its integer representation (big-endian) Args: bytes_ (bytes): Returns: int Example: >>> bytes_ = b'\x01' >>> assert _int_to_bytes((_bytes_to_int(bytes_))) == bytes_ >>> assert _bytes_to_int(bytes_) == 1 """ int_ = int.from_bytes(bytes_, 'big', signed=True) return int_ class _Hashers(object): """ We offer hashers beyond what is available in hashlib. This class is used to lazy load them. Attributes: algos (Dict[str, object]): aliases (Dict[str, str]): """ def __init__(self): self.algos = {} # type: Dict[str, object] self.aliases = {} # type: Dict[str, str] self._lazy_queue = [ self._register_xxhash, self._register_blake3, self._register_hashlib, ] def available(self): """ The names of available hash algorithms Returns: List[str] """ if self._lazy_queue: # nocover self._evaluate_registration_queue() return list(self.algos.keys()) def _evaluate_registration_queue(self): for func in self._lazy_queue: try: func() except ImportError: # nocover ... self._lazy_queue.clear() def __contains__(self, key): """ Args: key (str): name of hash algo to check Returns: bool: if the algo is available """ if self._lazy_queue: # nocover self._evaluate_registration_queue() return key in self.algos or key in self.aliases def _register_xxhash(self): # nocover import xxhash # type: ignore self.algos['xxh32'] = xxhash.xxh32 self.algos['xxh64'] = xxhash.xxh64 self.aliases.update({ 'xxhash': 'xxh32', 'xx32': 'xxh32', 'xx64': 'xxh64', }) def _register_blake3(self): # nocover import blake3 # type: ignore self.algos['blake3'] = blake3.blake3 self.aliases['b3'] = 'blake3' def _register_hashlib(self): guaranteed = set(hashlib.algorithms_guaranteed) for key in guaranteed: # nocover self.algos[key] = getattr(hashlib, key) if 0: # nocover # Do we want to expose these hash algos? available = set(hashlib.algorithms_available) extra = available - guaranteed for key in extra: self.algos[key] = hashlib.new(key) def lookup(self, hasher): """ Args: hasher (NoParamType | str | Any): something coercible to a hasher Returns: Callable: a function to construct the requested hahser """ if hasher is NoParam or hasher == 'default': hasher = DEFAULT_HASHER elif hasattr(hasher, 'hexdigest'): # HASH is not None and isinstance(hasher, HASH): # by default the result of this function is a class we will make an # instance of, if we already have an instance, wrap it in a # callable so the external syntax does not need to change. return lambda: hasher else: # Ensure lazy registration functions have been executed if self._lazy_queue: self._evaluate_registration_queue() if isinstance(hasher, str): hasher_ = self.aliases.get(hasher, hasher) if hasher_ in self.algos: # pragma: no cover return self.algos[hasher_] else: # TODO: provide pip install messages for known hashers. raise KeyError('unknown hasher: {}'.format(hasher)) return hasher _HASHERS = _Hashers() def _rectify_hasher(hasher): """ Convert a string-based key into a hasher class Note: In terms of speed on 64bit systems, sha1 is the fastest followed by md5 and sha512. The slowest algorithm is sha256. If xxhash is installed the fastest algorithm is xxh64. Example: >>> from ubelt.util_hash import (_rectify_hasher, DEFAULT_HASHER, >>> hashlib, NoParam, _HASHERS) >>> assert _rectify_hasher(NoParam) is DEFAULT_HASHER >>> assert _rectify_hasher('sha1') is hashlib.sha1 >>> assert _rectify_hasher('sha256') is hashlib.sha256 >>> assert _rectify_hasher('sha512') is hashlib.sha512 >>> assert _rectify_hasher('md5') is hashlib.md5 >>> assert _rectify_hasher(hashlib.sha1) is hashlib.sha1 >>> #if HASH is not None: >>> assert _rectify_hasher(hashlib.sha1())().name == 'sha1' >>> import pytest >>> assert pytest.raises(KeyError, _rectify_hasher, '42') >>> #assert pytest.raises(TypeError, _rectify_hasher, object) >>> if 'xxh32' in _HASHERS: >>> import xxhash >>> assert _rectify_hasher('xxh64') is xxhash.xxh64 >>> assert _rectify_hasher('xxh32') is xxhash.xxh32 >>> if 'blake3' in _HASHERS: >>> import blake3 >>> assert _rectify_hasher('blake3') is blake3.blake3 >>> if 'whirlpool' in _HASHERS: >>> assert _rectify_hasher('whirlpool') is blake3.blake3 """ # Keeping this function for backwards compatibility (even though its not # part of the public API) return _HASHERS.lookup(hasher) def _rectify_base(base): """ transforms base shorthand into the full list representation Args: base (str | int | List[str]): Can be a list of characters in the base. Can be a number indicating the size of the base (only 26, 32, 26, 16, and 10 are currently allowed). Can be special string key identifying a supported list of characters: abc123 or alphanum abc or alpha hex dec Example: >>> assert _rectify_base(NoParam) is DEFAULT_ALPHABET >>> assert _rectify_base('hex') is _ALPHABET_16 >>> assert _rectify_base('abc') is _ALPHABET_26 >>> assert _rectify_base('alphanum') is _ALPHABET_36 >>> assert _rectify_base(10) is _ALPHABET_10 >>> assert _rectify_base(['1', '2']) == ['1', '2'] >>> import pytest >>> assert pytest.raises(TypeError, _rectify_base, 'uselist') """ if base is NoParam or base == 'default': return DEFAULT_ALPHABET elif base in [36, 'abc123', 'alphanum']: return _ALPHABET_36 # elif base in [32, 'rfc4648']: elif base in [32]: return _ALPHABET_32 # cant call it RFC until it conforms to a standard elif base in [26, 'abc', 'alpha']: return _ALPHABET_26 elif base in [16, 'hex']: return _ALPHABET_16 elif base in [10, 'dec']: return _ALPHABET_10 else: if not isinstance(base, (list, tuple)): raise TypeError( 'Argument `base` must be a key, list, or tuple; not {}'.format( type(base))) return base class HashableExtensions(object): """ Helper class for managing non-primitive (e.g. numpy) hash types Note: We are introducing experimental functionality where custom instances of this class can be created and passed as arguments to hash_data. Attributes: iterable_checks (List[Callable]): """ def __init__(self): self.iterable_checks = [] self._lazy_queue = [] # type: List[Callable] # NOQA # New singledispatch registry implementation from functools import singledispatch def _hash_dispatch(data): raise NotImplementedError _hash_dispatch.__is_base__ = True self._hash_dispatch = singledispatch(_hash_dispatch) def _evaluate_lazy_queue(self): for func in self._lazy_queue: func() self._lazy_queue.clear() def register(self, hash_types): """ Registers a function to generate a hash for data of the appropriate types. This can be used to register custom classes. Internally this is used to define how to hash non-builtin objects like ndarrays and uuids. The registered function should return a tuple of bytes. First a small prefix hinting at the data type, and second the raw bytes that can be hashed. Args: hash_types (type | Tuple[type]): Returns: Callable: closure to be used as the decorator Example: >>> import ubelt as ub >>> import pytest >>> class MyType(object): ... def __init__(self, id): ... self.id = id >>> data = MyType(1) >>> # Custom types won't work with ub.hash_data by default >>> with pytest.raises(TypeError): ... ub.hash_data(data) >>> # To handle custom types, you can create custom extensions >>> # and pass them to hash_data explicitly. >>> extensions = ub.util_hash.HashableExtensions() >>> @extensions.register(MyType) >>> def hash_my_type(data): ... return b'mytype', b(ub.hash_data(data.id)) >>> my_instance = MyType(1) >>> ub.hash_data(my_instance, extensions=extensions) Example: >>> # xdoctest: +SKIP >>> # Simple example >>> import ubelt as ub >>> ub.hash_data.register(pathlib.Path)(lambda x: (b'PATH', str)) Example: >>> # Can specify more than one type when you register >>> import ubelt as ub >>> import pytest >>> extensions = ub.util_hash.HashableExtensions() >>> class Type1(object): >>> ... >>> class Type2(object): >>> ... >>> @extensions.register([Type1, Type2]) >>> def hash_my_type(data): ... return b'mytype', b'constant' >>> inst1 = Type1() >>> inst2 = Type2() >>> ub.hash_data(inst1, extensions=extensions) >>> ub.hash_data(inst2, extensions=extensions) Example: >>> # xdoctest: +SKIP >>> # Skip this doctest because we dont want tests to modify >>> # the global state. >>> import ubelt as ub >>> import pytest >>> class MyType(object): ... def __init__(self, id): ... self.id = id >>> data = MyType(1) >>> # Custom types won't work with ub.hash_data by default >>> with pytest.raises(TypeError): ... ub.hash_data(data) >>> # You can register your functions with ubelt's internal >>> # hashable_extension registry. >>> @ub.util_hash._HASHABLE_EXTENSIONS.register(MyType) >>> def hash_my_type(data): ... return b'mytype', b(ub.hash_data(data.id)) >>> my_instance = MyType(1) >>> ub.hash_data(my_instance) >>> # New in ubelt 1.1.0: you can now do: >>> # You can register your functions with ubelt's internal >>> # hashable_extension registry. >>> @ub.hash_data.register(MyType) >>> def hash_my_type(data): ... return b'mytype', b(ub.hash_data(data.id)) >>> my_instance = MyType(1) >>> ub.hash_data(my_instance) """ # ensure iterable if not isinstance(hash_types, (list, tuple)): hash_types = [hash_types] def _decor_closure(hash_func): for hash_type in hash_types: self._hash_dispatch.register(hash_type)(hash_func) return hash_func return _decor_closure def lookup(self, data): """ Returns an appropriate function to hash ``data`` if one has been registered. Args: data (object): the object the user would like to hash Returns: Callable: a function that can hash the object Raises: TypeError : if data has no registered hash methods Example: >>> import ubelt as ub >>> import pytest >>> if not ub.modname_to_modpath('numpy'): ... raise pytest.skip('numpy is optional') >>> self = ub.util_hash.HashableExtensions() >>> self._register_numpy_extensions() >>> self._register_builtin_class_extensions() >>> import numpy as np >>> data = np.array([1, 2, 3]) >>> self.lookup(data[0]) >>> class Foo(object): >>> def __init__(f): >>> f.attr = 1 >>> data = Foo() >>> assert pytest.raises(TypeError, self.lookup, data) >>> # If ub.hash_data does not support your object, >>> # then you can register it. >>> @self.register(Foo) >>> def _hashfoo(data): >>> return b'FOO', data.attr >>> func = self.lookup(data) >>> assert func(data)[1] == 1 >>> import uuid >>> data = uuid.uuid4() >>> self.lookup(data) Example: >>> # xdoctest: +REQUIRES(module:numpy) >>> import numpy as np >>> import ubelt as ub >>> self = ub.util_hash.HashableExtensions() >>> self._register_numpy_extensions() >>> self._register_builtin_class_extensions() >>> #f1 = self.lookup(3) >>> data = np.array([1, 2, 3]) >>> f2 = self.lookup(data) >>> print(f2(data)) >>> data = np.uint8(3) >>> f3 = self.lookup(data) >>> print(f3(data)) """ # Evaluate the lazy queue if anything is in it if self._lazy_queue: # nocover # Added nocover, because a bugfix prevents this from running and it # is unclear how to build a test in for this. self._evaluate_lazy_queue() query_hash_type = data.__class__ # TODO: recognize some special dunder method instead # of strictly using this registry. hash_func = self._hash_dispatch.dispatch(query_hash_type) if getattr(hash_func, '__is_base__', False): base_msg = f'No registered hash func for hashable type={query_hash_type!r}' try: msg = f'{base_msg} with mro: {query_hash_type.__mro__}' except AttributeError: msg = base_msg raise TypeError(msg) return hash_func def add_iterable_check(self, func): """ Registers a function that detects when a type is iterable Args: func (Callable): Returns: Callable """ self.iterable_checks.append(func) return func def _register_numpy_extensions(self): """ Registers custom functions to hash numpy data structures. By default ubelt enables numpy extensions """ # system checks import numpy as np @self.add_iterable_check def is_object_ndarray(data): # ndarrays of objects cannot be hashed directly. return isinstance(data, np.ndarray) and data.dtype.kind == 'O' @self.register(np.ndarray) def _convert_numpy_array(data): """ Example: >>> import ubelt as ub >>> if not ub.modname_to_modpath('numpy'): ... raise pytest.skip() >>> import numpy as np >>> data_f32 = np.zeros((3, 3, 3), dtype=np.float64) >>> data_i64 = np.zeros((3, 3, 3), dtype=np.int64) >>> data_i32 = np.zeros((3, 3, 3), dtype=np.int32) >>> hash_f64 = _hashable_sequence(data_f32, types=True) >>> hash_i64 = _hashable_sequence(data_i64, types=True) >>> hash_i32 = _hashable_sequence(data_i64, types=True) >>> assert hash_i64 != hash_f64 >>> assert hash_i64 != hash_i32 """ if data.dtype.kind == 'O': msg = 'directly hashing ndarrays with dtype=object is unstable' raise TypeError(msg) else: # tobytes() views the array in 1D (via ravel()) # encode the shape as well # See: [util_hash.Note.1] header = b''.join(_hashable_sequence( (len(data.shape), data.shape), extensions=self, types=_COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT)) dtype = b''.join(_hashable_sequence( data.dtype.descr, extensions=self, types=_COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT)) hashable = header + dtype + data.tobytes() prefix = b'NDARR' return prefix, hashable @self.register(np.random.RandomState) def _convert_numpy_random_state(data): """ Example: >>> import ubelt as ub >>> if not ub.modname_to_modpath('numpy'): ... raise pytest.skip() >>> import numpy as np >>> rng = np.random.RandomState(0) >>> _hashable_sequence(rng, types=True) """ # See: [util_hash.Note.1] hashable = b''.join(_hashable_sequence( data.get_state(), extensions=self, types=_COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT)) prefix = b'RNG' return prefix, hashable def _register_builtin_class_extensions(self): """ Register hashing extensions for a selection of classes included in python stdlib. This registers extensions for the following types: * uuid.UUID * collections.OrderedDict * dict (caveat: will be sorted, so must be sortable) CommandLine: xdoctest -m ubelt.util_hash HashableExtensions._register_builtin_class_extensions:0 xdoctest -m ubelt.util_hash HashableExtensions._register_builtin_class_extensions:1 Example: >>> import uuid >>> data = uuid.UUID('7e9d206b-dc02-4240-8bdb-fffe858121d0') >>> print(hash_data(data, base='abc', hasher='sha512', types=True)[0:8]) cryarepd >>> data = OrderedDict([('a', 1), ('b', 2), ('c', [1, 2, 3]), >>> (4, OrderedDict())]) >>> print(hash_data(data, base='abc', hasher='sha512', types=True)[0:8]) qjspicvv Example: >>> # Ordered dictionaries are hashed differently that builtin dicts >>> import ubelt as ub >>> from collections import OrderedDict >>> datas = {} >>> datas['odict_data1'] = OrderedDict([ >>> ('4', OrderedDict()), >>> ('a', 1), >>> ('b', 2), >>> ('c', [1, 2, 3]), >>> ]) >>> datas['udict_data1'] = { >>> '4': {}, >>> 'a': 1, >>> 'b': 2, >>> 'c': [1, 2, 3], >>> } >>> datas['odict_data2'] = ub.dict_subset(datas['odict_data1'], ['a', '4', 'c', 'b']) >>> datas['udict_data2'] = ub.dict_isect(datas['udict_data1'], ['a', '4', 'c', 'b']) >>> datas['odict_data3'] = ub.dict_subset(datas['odict_data1'], ['c', 'b', 'a', '4']) >>> datas['udict_data3'] = ub.dict_isect(datas['udict_data1'], ['c', 'b', 'a', '4']) >>> # print('datas = {}'.format(ub.urepr(datas, nl=-1))) >>> for key, val in sorted(datas.items()): >>> hashstr = ub.hash_data(val, base='abc', hasher='sha512', types=True)[0:8] >>> print('{} = {}'.format(key, hashstr)) odict_data1 = omnqalbe odict_data2 = tjrlsoel odict_data3 = cycowefz udict_data1 = bvshfmzm udict_data2 = bvshfmzm udict_data3 = bvshfmzm Example: >>> # Ordered dictionaries are hashed differently that builtin dicts >>> import ubelt as ub >>> print(ub.hash_data({1, 2, 3})[0:8]) >>> print(ub.hash_data({2, 3, 1})[0:8]) 36fb38a1 36fb38a1 >>> # xdoctest: +REQUIRES(PY3): >>> print(ub.hash_data({'2', 3, 1})[0:8]) >>> print(ub.hash_data({3, 1, '2'})[0:8]) 742ae82d 742ae82d Example: >>> import ubelt as ub >>> assert ub.hash_data(slice(None)).startswith('0178e55a247d09ad282dc2e44f5388f477') Example: >>> import ubelt as ub >>> print(ub.hash_data(ub.Path('foo'))[0:8]) >>> print(ub.hash_data('foo')[0:8]) >>> print(ub.hash_data(ub.Path('foo'), types=True)[0:8]) >>> print(ub.hash_data('foo', types=True)[0:8]) f7fbba6e f7fbba6e cc21b9fa bd1cabd0 """ # TODO: can we only register a stdlib class if we need it? # Some of the stdlib modules dont need to be imported and # cause extra import time overhead. import uuid import pathlib import numbers import decimal import datetime as datetime_mod @self.register(numbers.Integral) def _convert_numpy_int(data): return _convert_to_hashable(int(data), extensions=self) @self.register(numbers.Real) def _convert_numpy_float(data): return _convert_to_hashable(float(data), extensions=self) @self.register(decimal.Decimal) def _convert_decimal(data): _hashable_sequence seq = _hashable_sequence( data.as_tuple(), extensions=self, types=_COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT) hashable = b''.join(seq) prefix = b'DECIMAL' return prefix, hashable @self.register(datetime_mod.date) def _convert_date(data): _hashable_sequence seq = _hashable_sequence( data.timetuple(), extensions=self, types=_COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT) hashable = b''.join(seq) prefix = b'DATE' return prefix, hashable @self.register(datetime_mod.datetime) def _convert_datetime(data): _hashable_sequence seq = _hashable_sequence( data.timetuple(), extensions=self, types=_COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT) hashable = b''.join(seq) prefix = b'DATETIME' return prefix, hashable @self.register(uuid.UUID) def _convert_uuid(data): hashable = data.bytes prefix = b'UUID' return prefix, hashable @self.register(set) def _convert_set(data): try: # what raises a TypeError differs between Python 2 and 3 ordered_ = sorted(data) except TypeError: from ubelt.util_list import argsort data_ = list(data) sortx = argsort(data_, key=str) ordered_ = [data_[k] for k in sortx] # See: [util_hash.Note.1] hashable = b''.join(_hashable_sequence( ordered_, extensions=self, types=_COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT)) prefix = b'SET' return prefix, hashable @self.register(dict) def _convert_dict(data): try: ordered_ = sorted(data.items()) # what raises a TypeError differs between Python 2 and 3 except TypeError: from ubelt.util_list import argsort sortx = argsort(data, key=str) ordered_ = [(k, data[k]) for k in sortx] # See: [util_hash.Note.1] hashable = b''.join(_hashable_sequence( ordered_, extensions=self, types=_COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT)) prefix = b'DICT' return prefix, hashable @self.register(OrderedDict) def _convert_ordered_dict(data): """ Currently ordered dictionaries are considered separately from regular dictionaries. I'm not sure what the right thing to do is. """ # See: [util_hash.Note.1] hashable = b''.join(_hashable_sequence( list(data.items()), extensions=self, types=_COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT)) prefix = b'ODICT' return prefix, hashable @self.register(slice) def _convert_slice(data): """ Currently ordered dictionaries are considered separately from regular dictionaries. I'm not sure what the right thing to do is. """ # See: [util_hash.Note.1] hashable = b''.join(_hashable_sequence( [data.start, data.stop, data.step], extensions=self, types=_COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT)) prefix = b'SLICE' return prefix, hashable self.register(pathlib.Path)(lambda x: (b'PATH', str(x).encode('utf-8'))) # other data structures def _register_agressive_extensions(self): # nocover """ Extensions that might be desired, but we do not enable them by default This registers extensions for the following types: * none right now * """ pass def _register_torch_extensions(self): # nocover """ Experimental. Define a default hash function for torch tensors, but do not use it by default. Currently, the user must call this explicitly. """ import torch @self.register(torch.Tensor) def _convert_torch_tensor(data): data_ = data.data.cpu().numpy() prefix = b'TORCH_TENSOR' return prefix, _convert_to_hashable(data_, extensions=self)[1] _HASHABLE_EXTENSIONS = HashableExtensions() def _lazy_init(): """ Delay the registration of any external libraries until a hashable extension is needed. """ try: _HASHABLE_EXTENSIONS._register_builtin_class_extensions() _HASHABLE_EXTENSIONS._register_numpy_extensions() except ImportError: # nocover pass _HASHABLE_EXTENSIONS._lazy_queue.append(_lazy_init) class _HashTracer(object): """ Helper class to extract hashed sequences Attributes: sequence (List[bytes]): """ def __init__(self): self.sequence = [] # type: List[bytes] def update(self, item): """ Args: item (bytes): """ self.sequence.append(item) def hexdigest(self): """ Returns: bytes """ return b''.join(self.sequence) def _hashable_sequence(data, types=False, extensions=None): r""" Extracts the sequence of bytes that would be hashed by hash_data Example: >>> data = [2, (3, 4)] >>> result1 = (b''.join(_hashable_sequence(data, types=False))) >>> result2 = (b''.join(_hashable_sequence(data, types=True))) >>> assert result1 == b'_[_\x02_,__[_\x03_,_\x04_,__]__]_' >>> assert result2 == b'_[_INT\x02_,__[_INT\x03_,_INT\x04_,__]__]_' """ hasher = _HashTracer() _update_hasher(hasher, data, types=types, extensions=extensions) return hasher.sequence def _convert_to_hashable(data, types=True, extensions=None): r""" Converts ``data`` into a hashable byte representation if an appropriate hashing function is known. Args: data (object): ordered data with structure types (bool): include type prefixes in the hash Returns: Tuple[bytes, bytes]: prefix, hashable: a prefix hinting the original data type and the byte representation of ``data``. Raises: TypeError : if data has no registered hash methods Example: >>> from ubelt.util_hash import _convert_to_hashable >>> assert _convert_to_hashable(None) == (b'NULL', b'NONE') >>> assert _convert_to_hashable('string') == (b'TXT', b'string') >>> assert _convert_to_hashable(1) == (b'INT', b'\x01') >>> assert _convert_to_hashable(1.0) == (b'FLT', b'\x01/\x01') >>> assert _convert_to_hashable(int(1)) == (b'INT', b'\x01') >>> import uuid >>> data = uuid.UUID('7e9d206b-dc02-4240-8bdb-fffe858121d0') >>> assert _convert_to_hashable(data) == (b'UUID', b'~\x9d k\xdc\x02B@\x8b\xdb\xff\xfe\x85\x81!\xd0') >>> # Test special floats >>> assert _convert_to_hashable(float('nan')) == (b'FLT', b'nan') >>> assert _convert_to_hashable(float('inf')) == (b'FLT', b'inf') >>> assert _convert_to_hashable(-float('inf')) == (b'FLT', b'-inf') >>> assert _convert_to_hashable(-0.) == (b'FLT', b'\x00/\x01') >>> assert _convert_to_hashable(+0.) == (b'FLT', b'\x00/\x01') """ # HANDLE MOST COMMON TYPES FIRST if data is None: hashable = b'NONE' prefix = b'NULL' elif isinstance(data, bytes): hashable = data prefix = b'TXT' elif isinstance(data, str): # convert unicode into bytes hashable = data.encode('utf-8') prefix = b'TXT' elif isinstance(data, int): # warnings.warn('Hashing ints is slow, numpy is preferred') hashable = _int_to_bytes(data) # hashable = data.to_bytes(8, byteorder='big') prefix = b'INT' elif isinstance(data, float): data_ = float(data) # convert to a base-float try: a, b = data_.as_integer_ratio() except (ValueError, OverflowError): hashable = str(data_).encode('utf-8') # handle and nan, inf else: hashable = _int_to_bytes(a) + b'/' + _int_to_bytes(b) prefix = b'FLT' else: if extensions is None: extensions = _HASHABLE_EXTENSIONS # Then dynamically look up any other type hash_func = extensions.lookup(data) prefix, hashable = hash_func(data) if types: return prefix, hashable else: return b'', hashable # TODO: convert to an iterative variant? # See ~/code/ubelt/dev/bench/bench_hash_impls.py _SEP = b'_,_' _ITER_PREFIX = b'_[_' _ITER_SUFFIX = b'_]_' def _update_hasher(hasher, data, types=True, extensions=None): """ Converts ``data`` into a byte representation and calls update on the hasher :class:`hashlib._hashlib.HASH` algorithm. Args: hasher (Hasher): instance of a hashlib algorithm data (object): ordered data with structure types (bool): include type prefixes in the hash extensions (HashableExtensions | None): overrides global extensions Example: >>> hasher = hashlib.sha512() >>> data = [1, 2, ['a', 2, 'c']] >>> _update_hasher(hasher, data) >>> print(hasher.hexdigest()[0:8]) e2c67675 """ if extensions is None: extensions = _HASHABLE_EXTENSIONS # bugfix: ensure the lazy registration queue is evaluated before running # the iterable checks. if extensions._lazy_queue: extensions._evaluate_lazy_queue() # Determine if the data should be hashed directly or iterated through if isinstance(data, (tuple, list, zip)): needs_iteration = True else: needs_iteration = any(check(data) for check in extensions.iterable_checks) if needs_iteration: # Denote that we are hashing over an iterable # Multiple structure bytes make it harder to accidentally introduce # conflicts, but this is not perfect. # SEP = b'_,_' # ITER_PREFIX = b'_[_' # ITER_SUFFIX = b'_]_' iter_ = iter(data) hasher.update(_ITER_PREFIX) # first, try to nest quickly without recursive calls # (this works if all data in the sequence is a non-iterable) try: for item in iter_: prefix, hashable = _convert_to_hashable(item, types, extensions=extensions) binary_data = prefix + hashable + _SEP hasher.update(binary_data) hasher.update(_ITER_SUFFIX) except TypeError: # need to use recursive calls # Update based on current item _update_hasher(hasher, item, types, extensions=extensions) # !>> WHOOPS: THIS IS A BUG. THERE SHOULD BE A # !>> hasher.update(_SEP) # !>> SEPARATOR HERE. # !>> BUT FIXING IT WILL BREAK BACKWARDS COMPAT. # !>> We will need to expose versions of the hasher that can be # configured, and ideally new versions will have speed improvements. for item in iter_: # Ensure the items have a spacer between them _update_hasher(hasher, item, types, extensions=extensions) hasher.update(_SEP) hasher.update(_ITER_SUFFIX) else: prefix, hashable = _convert_to_hashable(data, types, extensions=extensions) binary_data = prefix + hashable hasher.update(binary_data) def _convert_hexstr_base(hexstr, base): r""" Packs a long hexstr into a shorter length string with a larger base. Args: hexstr (str): string of hexadecimal symbols to convert base (list): symbols of the conversion base Example: >>> print(_convert_hexstr_base('ffffffff', _ALPHABET_26)) nxmrlxv >>> print(_convert_hexstr_base('0', _ALPHABET_26)) 0 >>> print(_convert_hexstr_base('-ffffffff', _ALPHABET_26)) -nxmrlxv >>> print(_convert_hexstr_base('aafffff1', _ALPHABET_16)) aafffff1 Sympy: >>> import sympy as sy >>> # Determine the length savings with lossless conversion >>> consts = dict(hexbase=16, hexlen=256, baselen=27) >>> symbols = sy.symbols('hexbase, hexlen, baselen, newlen') >>> haexbase, hexlen, baselen, newlen = symbols >>> eqn = sy.Eq(16 ** hexlen, baselen ** newlen) >>> newlen_ans = sy.solve(eqn, newlen)[0].subs(consts).evalf() >>> print('newlen_ans = %r' % (newlen_ans,)) >>> # for a 26 char base we can get 216 >>> print('Required length for lossless conversion len2 = %r' % (len2,)) >>> def info(base, len): >>> bits = base ** len >>> print('base = %r' % (base,)) >>> print('len = %r' % (len,)) >>> print('bits = %r' % (bits,)) >>> info(16, 256) >>> info(27, 16) >>> info(27, 64) >>> info(27, 216) """ if base is _ALPHABET_16: # already in hex, no conversion needed return hexstr baselen = len(base) if 1: # NOTE: This code has an incompatibility with standard base encodings # because it does not pad the bytes. # In general for an input of M bytes, we need to use N = log(256 ** # M)/log(B) symbols in base B to represent it. # This is not always an integer, so RFC encodings use paddings, but # we don't do that here. # TODO: we can probably do this faster if we implement the logic in # a similar way to base64, but for now this is fine. x = int(hexstr, 16) # first convert to an integer in base 16 if x == 0: return '0' # bug: should be base[0] sign = 1 if x > 0 else -1 x *= sign digits = [] while x: digits.append(base[x % baselen]) x //= baselen if sign < 0: digits.append('-') digits.reverse() newbase_str = ''.join(digits) return newbase_str def _digest_hasher(hasher, base): """ counterpart to _update_hasher """ # Get a 128 character hex string hex_text = hasher.hexdigest() # Shorten length of string (by increasing base) base_text = _convert_hexstr_base(hex_text, base) # Truncate text = base_text return text # @profile def hash_data(data, hasher=NoParam, base=NoParam, types=False, convert=False, extensions=None): """ Get a unique hash depending on the state of the data. Args: data (object): Any sort of loosely organized data hasher (str | Hasher | NoParamType): string code or a hash algorithm from hashlib. Valid hashing algorithms are defined by :py:obj:`hashlib.algorithms_guaranteed` (e.g. 'sha1', 'sha512', 'md5') as well as 'xxh32' and 'xxh64' if :mod:`xxhash` is installed. Defaults to 'sha512'. base (List[str] | str | NoParamType): list of symbols or shorthand key. Valid keys are 'dec', 'hex', 'abc', and 'alphanum', 10, 16, 26, 32. Defaults to 'hex'. types (bool): If True data types are included in the hash, otherwise only the raw data is hashed. Defaults to False. convert (bool): if True, try and convert the data to json an the json is hashed instead. This can improve runtime in some instances, however the hash will likely differ from the case where convert=False. extensions (HashableExtensions | None): a custom :class:`HashableExtensions` instance that can overwrite or define how different types of objects are hashed. Note: The types allowed are specified by the HashableExtensions object. By default ubelt will register: OrderedDict, uuid.UUID, np.random.RandomState, np.int64, np.int32, np.int16, np.int8, np.uint64, np.uint32, np.uint16, np.uint8, np.float16, np.float32, np.float64, np.float128, np.ndarray, bytes, str, int, float, long (in python2), list, tuple, set, and dict Returns: str: text representing the hashed data Note: The alphabet26 base is a pretty nice base, I recommend it. However we default to ``base='hex'`` because it is standard. You can try the alphabet26 base by setting ``base='abc'``. Example: >>> import ubelt as ub >>> print(ub.hash_data([1, 2, (3, '4')], convert=False)) 60b758587f599663931057e6ebdf185a... >>> print(ub.hash_data([1, 2, (3, '4')], base='abc', hasher='sha512')[:32]) hsrgqvfiuxvvhcdnypivhhthmrolkzej """ if convert and not isinstance(data, str): # nocover import json try: data = json.dumps(data) except TypeError: # import warnings # warnings.warn('Unable to encode input as json due to: {!r}'.format(ex)) pass base = _rectify_base(base) hasher = _rectify_hasher(hasher)() # Feed the data into the hasher _update_hasher(hasher, data, types=types, extensions=extensions) # Get the hashed representation text = _digest_hasher(hasher, base) return text def hash_file(fpath, blocksize=1048576, stride=1, maxbytes=None, hasher=NoParam, base=NoParam): r""" Hashes the data in a file on disk. The results of this function agree with standard hashing programs (e.g. sha1sum, sha512sum, md5sum, etc...) Args: fpath (PathLike): location of the file to be hashed. blocksize (int): Amount of data to read and hash at a time. There is a trade off and the optimal number will depend on specific hardware. This number was chosen to be optimal on a developer system. See "dev/bench_hash_file" for methodology to choose this number for your use case. Defaults to 2 ** 20. stride (int): strides > 1 skip data to hash, useful for faster hashing, but less accurate, also makes hash dependent on blocksize. Defaults to 1. maxbytes (int | None): if specified, only hash the leading `maxbytes` of data in the file. hasher (str | Hasher | NoParamType): string code or a hash algorithm from hashlib. Valid hashing algorithms are defined by :py:obj:`hashlib.algorithms_guaranteed` (e.g. 'sha1', 'sha512', 'md5') as well as 'xxh32' and 'xxh64' if :mod:`xxhash` is installed. Defaults to 'sha512'. base (List[str] | int | str | NoParamType): list of symbols or shorthand key. Valid keys are 'dec', 'hex', 'abc', and 'alphanum', 10, 16, 26, 32. Defaults to 'hex'. Returns: str: the hash text References: .. [SO_3431825] http://stackoverflow.com/questions/3431825/md5-checksum-of-a-file .. [SO_5001893] http://stackoverflow.com/questions/5001893/when-to-use-sha-1-vs-sha-2 Example: >>> import ubelt as ub >>> from os.path import join >>> dpath = ub.Path.appdir('ubelt/tests/test-hash').ensuredir() >>> fpath = dpath / 'tmp1.txt' >>> fpath.write_text('foobar') >>> print(ub.hash_file(fpath, hasher='sha1', base='hex')) 8843d7f92416211de9ebb963ff4ce28125932878 Example: >>> import ubelt as ub >>> dpath = ub.Path.appdir('ubelt/tests/test-hash').ensuredir() >>> fpath = dpath / 'tmp2.txt' >>> # We have the ability to only hash at most ``maxbytes`` in a file >>> fpath.write_text('abcdefghijklmnop') >>> h0 = ub.hash_file(fpath, hasher='sha1', base='hex', maxbytes=11, blocksize=3) >>> h1 = ub.hash_file(fpath, hasher='sha1', base='hex', maxbytes=32, blocksize=3) >>> h2 = ub.hash_file(fpath, hasher='sha1', base='hex', maxbytes=32, blocksize=32) >>> h3 = ub.hash_file(fpath, hasher='sha1', base='hex', maxbytes=16, blocksize=1) >>> h4 = ub.hash_file(fpath, hasher='sha1', base='hex', maxbytes=16, blocksize=18) >>> assert h1 == h2 == h3 == h4 >>> assert h1 != h0 >>> # Using a stride makes the result dependent on the blocksize >>> h0 = ub.hash_file(fpath, hasher='sha1', base='hex', maxbytes=11, blocksize=3, stride=2) >>> h1 = ub.hash_file(fpath, hasher='sha1', base='hex', maxbytes=32, blocksize=3, stride=2) >>> h2 = ub.hash_file(fpath, hasher='sha1', base='hex', maxbytes=32, blocksize=32, stride=2) >>> h3 = ub.hash_file(fpath, hasher='sha1', base='hex', maxbytes=16, blocksize=1, stride=2) >>> h4 = ub.hash_file(fpath, hasher='sha1', base='hex', maxbytes=16, blocksize=18, stride=2) >>> assert h1 != h2 != h3 >>> assert h1 == h0 >>> assert h2 == h4 Example: >>> import ubelt as ub >>> from os.path import join >>> dpath = ub.Path.appdir('ubelt/tests/test-hash').ensuredir() >>> fpath = ub.touch(join(dpath, 'empty_file')) >>> # Test that the output is the same as sha1sum executable >>> if ub.find_exe('sha1sum'): >>> want = ub.cmd(['sha1sum', fpath], verbose=2)['out'].split(' ')[0] >>> got = ub.hash_file(fpath, hasher='sha1') >>> print('want = {!r}'.format(want)) >>> print('got = {!r}'.format(got)) >>> assert want.endswith(got) >>> # Do the same for sha512 sum and md5sum >>> if ub.find_exe('sha512sum'): >>> want = ub.cmd(['sha512sum', fpath], verbose=2)['out'].split(' ')[0] >>> got = ub.hash_file(fpath, hasher='sha512') >>> print('want = {!r}'.format(want)) >>> print('got = {!r}'.format(got)) >>> assert want.endswith(got) >>> if ub.find_exe('md5sum'): >>> want = ub.cmd(['md5sum', fpath], verbose=2)['out'].split(' ')[0] >>> got = ub.hash_file(fpath, hasher='md5') >>> print('want = {!r}'.format(want)) >>> print('got = {!r}'.format(got)) >>> assert want.endswith(got) Ignore: # Our hashdata with base32 should be compatible with the standard # (note: in general depending on the base it isn't because I think a padding issue) text = 'foobar2' std_result = ub.cmd(f'printf "{text}" | sha1sum', shell=True)['out'].split(' ')[0] our_result = ub.hash_data(text, hasher='sha1', types=False) print(f'std_result={std_result}') print(f'our_result={our_result}') assert our_result == std_result std_result = ub.cmd(fr'printf "{text}" | sha1sum | cut -f1 -d\ | xxd -r -p | base32', shell=True)['out'].split()[0] our_result = ub.hash_data(text, hasher='sha1', base=32) print(f'std_result={std_result}') print(f'our_result={our_result}') assert our_result == std_result """ # TODO: add logic such that you can update an existing hasher base = _rectify_base(base) hasher = _rectify_hasher(hasher)() with open(fpath, 'rb') as file: buf = file.read(blocksize) # We separate implementations for speed. Haven't benchmarked, but the # idea is to keep the inner loop extremely tight if maxbytes is None: if stride > 1: # skip blocks when stride is greater than 1 while len(buf) > 0: hasher.update(buf) file.seek(blocksize * (stride - 1), 1) buf = file.read(blocksize) else: # otherwise hash the entire file while len(buf) > 0: hasher.update(buf) buf = file.read(blocksize) else: # In this case we hash at most ``maxbytes`` maxremain = maxbytes if stride > 1: while len(buf) > 0 and maxremain > 0: buf = buf[:maxremain] maxremain -= len(buf) hasher.update(buf) if maxremain > 0: file.seek(blocksize * (stride - 1), 1) buf = file.read(blocksize) else: while len(buf) > 0 and maxremain > 0: buf = buf[:maxremain] maxremain -= len(buf) hasher.update(buf) if maxremain > 0: buf = file.read(blocksize) # Get the hashed representation text = _digest_hasher(hasher, base) return text # Give the hash_data function itself a reference to the default extensions # register method so the user can modify them without accessing this module hash_data.extensions = _HASHABLE_EXTENSIONS hash_data.register = _HASHABLE_EXTENSIONS.register # class Hasher: # """ # TODO # """ # def __init__(self, hasher): # ... # hash_data = ... # hash_file = ... ubelt-1.3.7/ubelt/util_hash.pyi000066400000000000000000000034251472470106000164600ustar00rootroot00000000000000from typing import Dict from typing import List from ubelt.util_const import NoParam from ubelt.util_const import NoParamType from typing import Any from typing import Callable from typing import Tuple from os import PathLike from typing import Any, TypeVar Hasher = TypeVar("Hasher") HASH_VERSION: int DEFAULT_ALPHABET: List[str] def b(s: str) -> bytes: ... DEFAULT_HASHER: Callable class _Hashers: algos: Dict[str, object] aliases: Dict[str, str] def __init__(self) -> None: ... def available(self) -> List[str]: ... def __contains__(self, key: str) -> bool: ... def lookup(self, hasher: NoParamType | str | Any) -> Callable: ... class HashableExtensions: iterable_checks: List[Callable] def __init__(self) -> None: ... def register(self, hash_types: type | Tuple[type]) -> Callable: ... def lookup(self, data: object) -> Callable: ... def add_iterable_check(self, func: Callable) -> Callable: ... class _HashTracer: sequence: List[bytes] def __init__(self) -> None: ... def update(self, item: bytes) -> None: ... def hexdigest(self) -> bytes: ... def hash_data(data: object, hasher: str | Hasher | NoParamType = NoParam, base: List[str] | str | NoParamType = NoParam, types: bool = False, convert: bool = False, extensions: HashableExtensions | None = None) -> str: ... def hash_file(fpath: PathLike, blocksize: int = 1048576, stride: int = 1, maxbytes: int | None = None, hasher: str | Hasher | NoParamType = NoParam, base: List[str] | int | str | NoParamType = NoParam) -> str: ... ubelt-1.3.7/ubelt/util_import.py000066400000000000000000001176441472470106000167070ustar00rootroot00000000000000r""" Expose functions to simplify importing from module names and paths. The :func:`ubelt.import_module_from_path` function does its best to load a python file into th current set of global modules. The :func:`ubelt.import_module_from_name` works similarly. The :func:`ubelt.modname_to_modpath` and :func:`ubelt.modname_to_modpath` work statically and convert between module names and file paths on disk. The :func:`ubelt.split_modpath` function separates modules into a root and base path depending on where the first ``__init__.py`` file is. """ from os.path import (abspath, basename, dirname, exists, expanduser, isdir, isfile, join, realpath, relpath, split, splitext) import os import sys import warnings __all__ = [ 'split_modpath', 'modname_to_modpath', 'modpath_to_modname', 'import_module_from_name', 'import_module_from_path', ] IS_PY_GE_308 = sys.version_info[0] >= 3 and sys.version_info[1] >= 8 # type: bool class PythonPathContext(object): """ Context for temporarily adding a dir to the PYTHONPATH. Used in testing, and used as a helper in certain ubelt functions. Warning: Even though this context manager takes precautions, this modifies ``sys.path``, and things can go wrong when that happens. This is generally safe as long as nothing else you do inside of this context modifies the path. If the path is modified in this context, we will try to detect it and warn. Attributes: dpath (str | PathLike): directory to insert into the PYTHONPATH index (int): position to add to. Typically either -1 or 0. Example: >>> from ubelt.util_import import PythonPathContext >>> import sys >>> with PythonPathContext('foo', -1): >>> assert sys.path[-1] == 'foo' >>> assert sys.path[-1] != 'foo' >>> with PythonPathContext('bar', 0): >>> assert sys.path[0] == 'bar' >>> assert sys.path[0] != 'bar' Example: >>> # xdoctest: +REQUIRES(module:pytest) >>> # Mangle the path inside the context >>> from ubelt.util_import import PythonPathContext >>> import sys >>> self = PythonPathContext('foo', 0) >>> self.__enter__() >>> sys.path.insert(0, 'mangled') >>> import pytest >>> with pytest.warns(UserWarning): >>> self.__exit__(None, None, None) Example: >>> # xdoctest: +REQUIRES(module:pytest) >>> from ubelt.util_import import PythonPathContext >>> import sys >>> self = PythonPathContext('foo', 0) >>> self.__enter__() >>> sys.path.remove('foo') >>> import pytest >>> with pytest.raises(RuntimeError): >>> self.__exit__(None, None, None) """ def __init__(self, dpath, index=0): """ Args: dpath (str | PathLike): directory to insert into the PYTHONPATH index (int): position to add to. Typically either -1 or 0. """ self.dpath = os.fspath(dpath) self.index = index def __enter__(self): if self.index < 0: self.index = len(sys.path) + self.index + 1 sys.path.insert(self.index, self.dpath) def __exit__(self, ex_type, ex_value, ex_traceback): """ Args: ex_type (Type[BaseException] | None): ex_value (BaseException | None): ex_traceback (TracebackType | None): Returns: bool | None """ need_recover = False if len(sys.path) <= self.index: # nocover msg_parts = [ 'sys.path changed while in PythonPathContext.', 'len(sys.path) = {!r} but index is {!r}'.format( len(sys.path), self.index), ] need_recover = True if sys.path[self.index] != self.dpath: # nocover # The path is not where we put it, the path must have been mangled msg_parts = [ 'sys.path changed while in PythonPathContext', 'Expected dpath={!r} at index={!r} in sys.path, but got ' 'dpath={!r}'.format( self.dpath, self.index, sys.path[self.index] ) ] need_recover = True if need_recover: # Try and find where the temporary path went try: real_index = sys.path.index(self.dpath) except ValueError: msg_parts.append('Expected dpath was not in sys.path') raise RuntimeError('\n'.join(msg_parts)) else: # We were able to recover, but warn the user. This method of # recovery is a heuristic and does not work in some cases. msg_parts.append(( 'Expected dpath was at index {}. ' 'This could indicate conflicting module namespaces.' ).format(real_index)) warnings.warn('\n'.join(msg_parts)) sys.path.pop(real_index) else: sys.path.pop(self.index) def import_module_from_path(modpath, index=-1): """ Imports a module via a filesystem path. This works by modifying ``sys.path``, importing the module name, and then attempting to undo the change to sys.path. This function may produce unexpected results in the case where the imported module itself itself modifies ``sys.path`` or if there is another conflicting module with the same name. Args: modpath (str | PathLike): Path to the module on disk or within a zipfile. Paths within a zipfile can be given by ``.zip/.py``. index (int): Location at which we modify PYTHONPATH if necessary. If your module name does not conflict, the safest value is -1, However, if there is a conflict, then use an index of 0. The default may change to 0 in the future. Returns: ModuleType: the imported module References: .. [SO_67631] https://stackoverflow.com/questions/67631/import-module-given-path Raises: IOError - when the path to the module does not exist ImportError - when the module is unable to be imported Note: If the module is part of a package, the package will be imported first. These modules may cause problems when reloading via IPython magic This can import a module from within a zipfile. To do this modpath should specify the path to the zipfile and the path to the module within that zipfile separated by a colon or pathsep. E.g. "/path/to/archive.zip:mymodule.pl" Warning: It is best to use this with paths that will not conflict with previously existing modules. If the modpath conflicts with a previously existing module name. And the target module does imports of its own relative to this conflicting path. In this case, the module that was loaded first will win. For example if you try to import '/foo/bar/pkg/mod.py' from the folder structure: .. code:: - foo/ +- bar/ +- pkg/ + __init__.py |- mod.py |- helper.py If there exists another module named ``pkg`` already in ``sys.modules`` and mod.py contains the code ``from . import helper``, Python will assume helper belongs to the ``pkg`` module already in ``sys.modules``. This can cause a NameError or worse --- an incorrect helper module. SeeAlso: :func:`import_module_from_name` Example: >>> import ubelt as ub >>> import xdoctest >>> modpath = xdoctest.__file__ >>> module = ub.import_module_from_path(modpath) >>> assert module is xdoctest Example: >>> # Test importing a module from within a zipfile >>> import ubelt as ub >>> import zipfile >>> from xdoctest import utils >>> import os >>> from os.path import join, expanduser, normpath >>> dpath = expanduser('~/.cache/xdoctest') >>> dpath = utils.ensuredir(dpath) >>> #dpath = utils.TempDir().ensure() >>> # Write to an external module named bar >>> external_modpath = join(dpath, 'bar.py') >>> # For pypy support we have to write this using with >>> with open(external_modpath, 'w') as file: >>> file.write('testvar = 1') >>> internal = 'folder/bar.py' >>> # Move the external bar module into a zipfile >>> zippath = join(dpath, 'myzip.zip') >>> with zipfile.ZipFile(zippath, 'w') as myzip: >>> myzip.write(external_modpath, internal) >>> # Import the bar module from within the zipfile >>> modpath = zippath + ':' + internal >>> modpath = zippath + os.path.sep + internal >>> module = ub.import_module_from_path(modpath) >>> assert normpath(module.__name__) == normpath('folder/bar') >>> assert module.testvar == 1 Example: >>> import pytest >>> import ubelt as ub >>> with pytest.raises(IOError): >>> ub.import_module_from_path('does-not-exist') >>> with pytest.raises(IOError): >>> ub.import_module_from_path('does-not-exist.zip/') """ modpath = os.fspath(modpath) if not os.path.exists(modpath): import re import zipimport # We allow (if not prefer or force) the colon to be a path.sep in order # to agree with the mod.__name__ attribute that will be produced # TODO: we could codify this by using `util_zip.split_archive` # zip followed by colon or slash pat = '(.zip[' + re.escape(os.path.sep) + '/:])' parts = re.split(pat, modpath, flags=re.IGNORECASE) if len(parts) > 2: archivepath = ''.join(parts[:-1])[:-1] internal = parts[-1] modname = os.path.splitext(internal)[0] modname = os.path.normpath(modname) if os.path.exists(archivepath): zimp_file = zipimport.zipimporter(archivepath) try: try: module = zimp_file.load_module(modname) except Exception: # nocover module = zimp_file.load_module(modname.replace('\\', '/')) # hack except Exception as ex: # nocover text = ( 'Encountered error in import_module_from_path ' 'while calling load_module: ' 'modpath={modpath!r}, ' 'internal={internal!r}, ' 'modname={modname!r}, ' 'archivepath={archivepath!r}, ' 'ex={ex!r}' ).format( modpath=modpath, internal=internal, modname=modname, archivepath=archivepath, ex=ex) raise Exception(text) return module raise IOError('modpath={} does not exist'.format(modpath)) else: # the importlib version does not work in pytest module = _custom_import_modpath(modpath, index=index) # TODO: use this implementation once pytest fixes importlib # module = _importlib_import_modpath(modpath) return module def import_module_from_name(modname): """ Imports a module from its string name (i.e. ``__name__``) This is a simple wrapper around :func:`importlib.import_module`, but is provided as a companion function to :func:`import_module_from_path`, which contains functionality not provided in the Python standard library. Args: modname (str): module name Returns: ModuleType: module SeeAlso: :func:`import_module_from_path` Example: >>> # test with modules that won't be imported in normal circumstances >>> # todo write a test where we guarantee this >>> import ubelt as ub >>> import sys >>> modname_list = [ >>> 'pickletools', >>> 'email.mime.text', >>> ] >>> #assert not any(m in sys.modules for m in modname_list) >>> modules = [ub.import_module_from_name(modname) for modname in modname_list] >>> assert [m.__name__ for m in modules] == modname_list >>> assert all(m in sys.modules for m in modname_list) """ if True: # See if this fixes the Docker issue we saw but were unable to # reproduce on another environment. Either way its better to use the # standard importlib implementation than the one I wrote a long time # ago. import importlib module = importlib.import_module(modname) else: # nocover # The __import__ statement is weird if '.' in modname: fromlist = modname.split('.')[-1] fromlist_ = list(map(str, fromlist)) # needs to be ascii for python2.7 module = __import__(modname, {}, {}, fromlist_, 0) else: module = __import__(modname, {}, {}, [], 0) return module def _extension_module_tags(): """ Returns valid tags an extension module might have Returns: List[str] """ import sysconfig tags = [] # handle PEP 3149 -- ABI version tagged .so files # ABI = application binary interface tags.append(sysconfig.get_config_var('SOABI')) tags.append('abi3') # not sure why this one is valid but it is tags = [t for t in tags if t] return tags def _platform_pylib_exts(): # nocover """ Returns .so, .pyd, or .dylib depending on linux, win or mac. On python3 return the previous with and without abi (e.g. .cpython-35m-x86_64-linux-gnu) flags. On python2 returns with and without multiarch. Returns: tuple """ import sysconfig valid_exts = [] # return with and without API flags # handle PEP 3149 -- ABI version tagged .so files base_ext = '.' + sysconfig.get_config_var('EXT_SUFFIX').split('.')[-1] for tag in _extension_module_tags(): valid_exts.append('.' + tag + base_ext) valid_exts.append(base_ext) return tuple(valid_exts) def _syspath_modname_to_modpath(modname, sys_path=None, exclude=None): """ syspath version of modname_to_modpath Args: modname (str): name of module to find sys_path (None | List[str | PathLike]): The paths to search for the module. If unspecified, defaults to ``sys.path``. exclude (List[str | PathLike] | None): If specified prevents these directories from being searched. Defaults to None. Returns: str: path to the module. Note: This is much slower than the pkgutil mechanisms. There seems to be a change to the editable install mechanism: https://github.com/pypa/setuptools/issues/3548 Trying to find more docs about it. TODO: add a test where we make an editable install, regular install, standalone install, and check that we always find the right path. Example: >>> from ubelt.util_import import * # NOQA >>> from ubelt.util_import import _syspath_modname_to_modpath >>> print(_syspath_modname_to_modpath('xdoctest.static_analysis')) ...static_analysis.py >>> print(_syspath_modname_to_modpath('xdoctest')) ...xdoctest >>> # xdoctest: +REQUIRES(CPython) >>> print(_syspath_modname_to_modpath('_ctypes')) ..._ctypes... >>> assert _syspath_modname_to_modpath('xdoctest', sys_path=[]) is None >>> assert _syspath_modname_to_modpath('xdoctest.static_analysis', sys_path=[]) is None >>> assert _syspath_modname_to_modpath('_ctypes', sys_path=[]) is None >>> assert _syspath_modname_to_modpath('this', sys_path=[]) is None Example: >>> # test what happens when the module is not visible in the path >>> from ubelt.util_import import * # NOQA >>> from ubelt.util_import import _syspath_modname_to_modpath >>> modname = 'xdoctest.static_analysis' >>> modpath = _syspath_modname_to_modpath(modname) >>> exclude = [split_modpath(modpath)[0]] >>> found = _syspath_modname_to_modpath(modname, exclude=exclude) >>> if found is not None: >>> # Note: the basic form of this test may fail if there are >>> # multiple versions of the package installed. Try and fix that. >>> other = split_modpath(found)[0] >>> assert other not in exclude >>> exclude.append(other) >>> found = _syspath_modname_to_modpath(modname, exclude=exclude) >>> if found is not None: >>> raise AssertionError( >>> 'should not have found {}.'.format(found) + >>> ' because we excluded: {}.'.format(exclude) + >>> ' cwd={} '.format(os.getcwd()) + >>> ' sys.path={} '.format(sys.path) >>> ) """ import glob def _isvalid(modpath, base): # every directory up to the module, should have an init subdir = dirname(modpath) while subdir and subdir != base: if not exists(join(subdir, '__init__.py')): return False subdir = dirname(subdir) return True _fname_we = modname.replace('.', os.path.sep) candidate_fnames = [ _fname_we + '.py', # _fname_we + '.pyc', # _fname_we + '.pyo', ] # Add extension library suffixes candidate_fnames += [_fname_we + ext for ext in _platform_pylib_exts()] if sys_path is None: sys_path = sys.path # the empty string in sys.path indicates cwd. Change this to a '.' candidate_dpaths = ['.' if p == '' else p for p in sys_path] if exclude: def normalize(p): if sys.platform.startswith('win32'): # nocover return realpath(p).lower() else: return realpath(p) # Keep only the paths not in exclude real_exclude = {normalize(p) for p in exclude} candidate_dpaths = [p for p in candidate_dpaths if normalize(p) not in real_exclude] def check_dpath(dpath): # Check for directory-based modules (has precedence over files) modpath = join(dpath, _fname_we) if exists(modpath): if isfile(join(modpath, '__init__.py')): if _isvalid(modpath, dpath): return modpath # If that fails, check for file-based modules for fname in candidate_fnames: modpath = join(dpath, fname) if isfile(modpath): if _isvalid(modpath, dpath): return modpath _pkg_name = _fname_we.split(os.path.sep)[0] _pkg_name_hypen = _pkg_name.replace('_', '-') _egglink_fname1 = _pkg_name + '.egg-link' _egglink_fname2 = _pkg_name_hypen + '.egg-link' # FIXME! suffixed modules will clobber break! # Currently mitigating this by looping over all possible matches, # but it would be nice to ensure we are not matching suffixes. # however, we should probably match and handle different versions. _editable_fname_pth_pat = '__editable__.' + _pkg_name + '-*.pth' # NOTE: the __editable__ finders are named after the package, but the # module could have a different name, so we cannot use the package name # (which in this case is really the module name) in the pattern, and we # have to check all of the finders. # _editable_fname_finder_py_pat = '__editable___' + _pkg_name + '_*finder.py' _editable_fname_finder_py_pat = '__editable___*_*finder.py' found_modpath = None for dpath in candidate_dpaths: modpath = check_dpath(dpath) if modpath: found_modpath = modpath break # Attempt to handle PEP660 import hooks. # We should look for a finder path first, because a pth might # not contain a real path, but code to load the finder. # Which one is used is defined in setuptools/editable_wheel.py # It will depend on an "Editable Strategy". # Basically a finder will be used for "complex" structures and # basic pth will be used for "simple" structures (which means has a # src/modname folder). new_editable_finder_paths = sorted(glob.glob(join(dpath, _editable_fname_finder_py_pat))) if new_editable_finder_paths: # nocover # This makes some assumptions, which may not hold in general # We may need to fallback entirely on pkgutil, which would # ultimately be good. Hopefully the new standards mean it does not # break with pytest anymore? Nope, pytest still doesn't work right # with it. for finder_fpath in new_editable_finder_paths: try: mapping = _static_parse('MAPPING', finder_fpath) except AttributeError: ... else: try: target = dirname(mapping[_pkg_name]) except KeyError: ... else: if not exclude or normalize(target) not in real_exclude: # pragma: nobranch modpath = check_dpath(target) if modpath: # pragma: nobranch found_modpath = modpath break if found_modpath is not None: break # If a finder does not exist, then the __editable__ pth file might hold # the path itself. Check for that. new_editable_pth_paths = sorted(glob.glob(join(dpath, _editable_fname_pth_pat))) if new_editable_pth_paths: # nocover # Disable coverage because the test that covers this is too slow. # It can be made faster, re-enable when that lands. import pathlib for editable_pth in new_editable_pth_paths: editable_pth = pathlib.Path(editable_pth) target = editable_pth.read_text().strip().split('\n')[-1] if not exclude or normalize(target) not in real_exclude: modpath = check_dpath(target) if modpath: # pragma: nobranch found_modpath = modpath break if found_modpath is not None: break # If file path checks fails, check for egg-link based modules # (Python usually puts egg links into sys.path, but if the user is # providing the path then it is important to check them explicitly) linkpath1 = join(dpath, _egglink_fname1) linkpath2 = join(dpath, _egglink_fname2) linkpath = None if isfile(linkpath1): # nocover linkpath = linkpath1 elif isfile(linkpath2): # nocover linkpath = linkpath2 if linkpath is not None: # nocover # We exclude this from coverage because its difficult to write a # unit test where we can enforce that there is a module installed # in development mode. # Note: the new test_editable_modules.py test can do this, but # this old method may no longer be supported. # TODO: ensure this is the correct way to parse egg-link files # https://setuptools.readthedocs.io/en/latest/formats.html#egg-links # The docs state there should only be one line, but I see two. with open(linkpath, 'r') as file: target = file.readline().strip() if not exclude or normalize(target) not in real_exclude: modpath = check_dpath(target) if modpath: found_modpath = modpath break return found_modpath def _custom_import_modpath(modpath, index=-1): dpath, rel_modpath = split_modpath(modpath) modname = modpath_to_modname(modpath) try: with PythonPathContext(dpath, index=index): module = import_module_from_name(modname) except Exception as ex: # nocover msg_parts = [( 'ERROR: Failed to import modname={} with modpath={} and ' 'sys.path modified with {} at index={}').format( modname, modpath, repr(dpath), index) ] msg_parts.append('Caused by: {}'.format(repr(ex))) raise RuntimeError('\n'.join(msg_parts)) return module def _importlib_import_modpath(modpath): # nocover """ Alternative to import_module_from_path using importlib mechainsms Args: modname (str): the module name. """ dpath, rel_modpath = split_modpath(modpath) modname = modpath_to_modname(modpath) import importlib.util spec = importlib.util.spec_from_file_location(modname, modpath) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) return module def _importlib_modname_to_modpath(modname): # nocover import importlib.util spec = importlib.util.find_spec(modname) modpath = spec.origin.replace('.pyc', '.py') return modpath def _pkgutil_modname_to_modpath(modname): # nocover """ faster version of :func:`_syspath_modname_to_modpath` using builtin python mechanisms, but unfortunately it doesn't play nice with pytest. Note: pkgutil.find_loader is deprecated in 3.12 and removed in 3.14 Args: modname (str): the module name. Example: >>> # xdoctest: +SKIP >>> from ubelt.util_import import _pkgutil_modname_to_modpath >>> modname = 'xdoctest.static_analysis' >>> _pkgutil_modname_to_modpath(modname) ...static_analysis.py >>> # xdoctest: +REQUIRES(CPython) >>> _pkgutil_modname_to_modpath('_ctypes') ..._ctypes... Ignore: >>> _pkgutil_modname_to_modpath('cv2') """ import pkgutil loader = pkgutil.find_loader(modname) if loader is None: raise Exception('No module named {} in the PYTHONPATH'.format(modname)) modpath = loader.get_filename().replace('.pyc', '.py') return modpath def modname_to_modpath(modname, hide_init=True, hide_main=False, sys_path=None): """ Finds the path to a python module from its name. Determines the path to a python module without directly import it Converts the name of a module (__name__) to the path (__file__) where it is located without importing the module. Returns None if the module does not exist. Args: modname (str): The name of a module in ``sys_path``. hide_init (bool): if False, __init__.py will be returned for packages. Defaults to True. hide_main (bool): if False, and ``hide_init`` is True, __main__.py will be returned for packages, if it exists. Defaults to False. sys_path (None | List[str | PathLike]): The paths to search for the module. If unspecified, defaults to ``sys.path``. Returns: str | None: modpath - path to the module, or None if it doesn't exist Example: >>> from ubelt.util_import import modname_to_modpath >>> modname = 'xdoctest.__main__' >>> modpath = modname_to_modpath(modname, hide_main=False) >>> assert modpath.endswith('__main__.py') >>> modname = 'xdoctest' >>> modpath = modname_to_modpath(modname, hide_init=False) >>> assert modpath.endswith('__init__.py') >>> # xdoctest: +REQUIRES(CPython) >>> modpath = basename(modname_to_modpath('_ctypes')) >>> assert 'ctypes' in modpath """ if hide_main or sys_path: modpath = _syspath_modname_to_modpath(modname, sys_path) else: # import xdev # with xdev.embed_on_exception_context: # try: # modpath = _importlib_modname_to_modpath(modname) # except Exception: # modpath = _syspath_modname_to_modpath(modname, sys_path) # modpath = _pkgutil_modname_to_modpath(modname, sys_path) modpath = _syspath_modname_to_modpath(modname, sys_path) if modpath is None: return None modpath = normalize_modpath(modpath, hide_init=hide_init, hide_main=hide_main) return modpath def normalize_modpath(modpath, hide_init=True, hide_main=False): """ Normalizes __init__ and __main__ paths. Args: modpath (str | PathLike): path to a module hide_init (bool): if True, always return package modules as __init__.py files otherwise always return the dpath. Defaults to True. hide_main (bool): if True, always strip away main files otherwise ignore __main__.py. Defaults to False. Returns: str | PathLike: a normalized path to the module Note: Adds __init__ if reasonable, but only removes __main__ by default Example: >>> from ubelt.util_import import normalize_modpath >>> from xdoctest import static_analysis as module >>> modpath = module.__file__ >>> assert normalize_modpath(modpath) == modpath.replace('.pyc', '.py') >>> dpath = dirname(modpath) >>> res0 = normalize_modpath(dpath, hide_init=0, hide_main=0) >>> res1 = normalize_modpath(dpath, hide_init=0, hide_main=1) >>> res2 = normalize_modpath(dpath, hide_init=1, hide_main=0) >>> res3 = normalize_modpath(dpath, hide_init=1, hide_main=1) >>> assert res0.endswith('__init__.py') >>> assert res1.endswith('__init__.py') >>> assert not res2.endswith('.py') >>> assert not res3.endswith('.py') """ if hide_init: if basename(modpath) == '__init__.py': modpath = dirname(modpath) hide_main = True else: # add in init, if reasonable modpath_with_init = join(modpath, '__init__.py') if exists(modpath_with_init): modpath = modpath_with_init if hide_main: # We can remove main, but dont add it if basename(modpath) == '__main__.py': # corner case where main might just be a module name not in a pkg parallel_init = join(dirname(modpath), '__init__.py') if exists(parallel_init): modpath = dirname(modpath) return modpath def modpath_to_modname(modpath, hide_init=True, hide_main=False, check=True, relativeto=None): """ Determines importable name from file path Converts the path to a module (__file__) to the importable python name (__name__) without importing the module. The filename is converted to a module name, and parent directories are recursively included until a directory without an __init__.py file is encountered. Args: modpath (str): Module filepath hide_init (bool): Removes the __init__ suffix. Defaults to True. hide_main (bool): Removes the __main__ suffix. Defaults to False. check (bool): If False, does not raise an error if modpath is a dir and does not contain an __init__ file. Defaults to True. relativeto (str | None): If specified, all checks are ignored and this is considered the path to the root module. Defaults to None. TODO: - [ ] Does this need modification to support PEP 420? https://www.python.org/dev/peps/pep-0420/ Returns: str: modname Raises: ValueError: if check is True and the path does not exist Example: >>> from ubelt.util_import import modpath_to_modname >>> from xdoctest import static_analysis >>> modpath = static_analysis.__file__.replace('.pyc', '.py') >>> modpath = modpath.replace('.pyc', '.py') >>> modname = modpath_to_modname(modpath) >>> assert modname == 'xdoctest.static_analysis' Example: >>> from ubelt.util_import import modpath_to_modname >>> import xdoctest >>> assert modpath_to_modname(xdoctest.__file__.replace('.pyc', '.py')) == 'xdoctest' >>> assert modpath_to_modname(dirname(xdoctest.__file__.replace('.pyc', '.py'))) == 'xdoctest' Example: >>> # xdoctest: +REQUIRES(CPython) >>> from ubelt.util_import import modpath_to_modname >>> from ubelt.util_import import modname_to_modpath >>> modpath = modname_to_modpath('_ctypes') >>> modname = modpath_to_modname(modpath) >>> assert modname == '_ctypes' Example: >>> from ubelt.util_import import modpath_to_modname >>> modpath = '/foo/libfoobar.linux-x86_64-3.6.so' >>> modname = modpath_to_modname(modpath, check=False) >>> assert modname == 'libfoobar' """ if check and relativeto is None: if not exists(modpath): raise ValueError('modpath={} does not exist'.format(modpath)) modpath_ = abspath(expanduser(modpath)) modpath_ = normalize_modpath(modpath_, hide_init=hide_init, hide_main=hide_main) if relativeto: dpath = dirname(abspath(expanduser(relativeto))) rel_modpath = relpath(modpath_, dpath) else: dpath, rel_modpath = split_modpath(modpath_, check=check) modname = splitext(rel_modpath)[0] if '.' in modname: modname, abi_tag = modname.split('.', 1) modname = modname.replace('/', '.') modname = modname.replace('\\', '.') return modname def split_modpath(modpath, check=True): """ Splits the modpath into the dir that must be in PYTHONPATH for the module to be imported and the modulepath relative to this directory. Args: modpath (str): module filepath check (bool): if False, does not raise an error if modpath is a directory and does not contain an ``__init__.py`` file. Returns: Tuple[str, str]: (directory, rel_modpath) Raises: ValueError: if modpath does not exist or is not a package Example: >>> from xdoctest import static_analysis >>> from ubelt.util_import import split_modpath >>> modpath = static_analysis.__file__.replace('.pyc', '.py') >>> modpath = abspath(modpath) >>> dpath, rel_modpath = split_modpath(modpath) >>> recon = join(dpath, rel_modpath) >>> assert recon == modpath >>> assert rel_modpath == join('xdoctest', 'static_analysis.py') """ modpath_ = abspath(expanduser(modpath)) if check: if not exists(modpath_): if not exists(modpath): raise ValueError('modpath={} does not exist'.format(modpath)) raise ValueError('modpath={} is not a module'.format(modpath)) if isdir(modpath_) and not exists(join(modpath, '__init__.py')): # dirs without inits are not modules raise ValueError('modpath={} is not a module'.format(modpath)) full_dpath, fname_ext = split(modpath_) _relmod_parts = [fname_ext] # Recurse down directories until we are out of the package dpath = full_dpath while exists(join(dpath, '__init__.py')): dpath, dname = split(dpath) _relmod_parts.append(dname) relmod_parts = _relmod_parts[::-1] rel_modpath = os.path.sep.join(relmod_parts) return dpath, rel_modpath def is_modname_importable(modname, sys_path=None, exclude=None): """ Determines if a modname is importable based on your current sys.path Args: modname (str): name of module to check sys_path (list | None): if specified overrides ``sys.path`` exclude (list | None): list of directory paths. if specified prevents these directories from being searched. Returns: bool: True if the module can be imported Example: >>> from ubelt.util_import import is_modname_importable >>> is_modname_importable('xdoctest') True >>> is_modname_importable('not_a_real_module') False >>> is_modname_importable('xdoctest', sys_path=[]) False """ modpath = _syspath_modname_to_modpath(modname, sys_path=sys_path, exclude=exclude) flag = bool(modpath is not None) return flag def _static_parse(varname, fpath): """ Statically parse the a constant variable from a python file Args: varname (str): variable name to extract fpath (str | PathLike): path to python file to parse Returns: Any: the static value Example: >>> import ubelt as ub >>> from ubelt.util_import import _static_parse >>> dpath = ub.Path.appdir('tests/import/staticparse').ensuredir() >>> fpath = (dpath / 'foo.py') >>> fpath.write_text('a = {1: 2}') >>> assert _static_parse('a', fpath) == {1: 2} >>> fpath.write_text('a = 2') >>> assert _static_parse('a', fpath) == 2 >>> fpath.write_text('a = "3"') >>> assert _static_parse('a', fpath) == "3" >>> fpath.write_text('a = ["3", 5, 6]') >>> assert _static_parse('a', fpath) == ["3", 5, 6] >>> fpath.write_text('a = ("3", 5, 6)') >>> assert _static_parse('a', fpath) == ("3", 5, 6) >>> fpath.write_text('b = 10' + chr(10) + 'a = None') >>> assert _static_parse('a', fpath) is None >>> import pytest >>> with pytest.raises(TypeError): >>> fpath.write_text('a = list(range(10))') >>> assert _static_parse('a', fpath) is None >>> with pytest.raises(AttributeError): >>> fpath.write_text('a = list(range(10))') >>> assert _static_parse('c', fpath) is None >>> if sys.version_info[0:2] >= (3, 6): >>> # Test with type annotations >>> fpath.write_text('b: int = 10') >>> assert _static_parse('b', fpath) == 10 """ import ast if not exists(fpath): raise ValueError('fpath={!r} does not exist'.format(fpath)) with open(fpath, 'r') as file_: sourcecode = file_.read() pt = ast.parse(sourcecode) class StaticVisitor(ast.NodeVisitor): def visit_Assign(self, node): for target in node.targets: target_id = getattr(target, 'id', None) if target_id == varname: self.static_value = _parse_static_node_value(node.value) def visit_AnnAssign(self, node): target = node.target target_id = getattr(target, 'id', None) if target_id == varname: self.static_value = _parse_static_node_value(node.value) visitor = StaticVisitor() visitor.visit(pt) try: value = visitor.static_value except AttributeError: value = 'Unknown {}'.format(varname) raise AttributeError(value) return value def _parse_static_node_value(node): """ Extract a constant value from a node if possible """ import ast from collections import OrderedDict import numbers if (isinstance(node, ast.Constant) and isinstance(node.value, numbers.Number) if IS_PY_GE_308 else isinstance(node, ast.Num)): value = node.value if IS_PY_GE_308 else node.n elif (isinstance(node, ast.Constant) and isinstance(node.value, str) if IS_PY_GE_308 else isinstance(node, ast.Str)): value = node.value if IS_PY_GE_308 else node.s elif isinstance(node, ast.List): value = list(map(_parse_static_node_value, node.elts)) elif isinstance(node, ast.Tuple): value = tuple(map(_parse_static_node_value, node.elts)) elif isinstance(node, (ast.Dict)): keys = map(_parse_static_node_value, node.keys) values = map(_parse_static_node_value, node.values) value = OrderedDict(zip(keys, values)) # value = dict(zip(keys, values)) elif isinstance(node, (ast.NameConstant)): value = node.value else: raise TypeError('Cannot parse a static value from non-static node ' 'of type: {!r}'.format(type(node))) return value ubelt-1.3.7/ubelt/util_import.pyi000066400000000000000000000031201472470106000170370ustar00rootroot00000000000000from os import PathLike from typing import Type from types import TracebackType from types import ModuleType from typing import List from typing import Tuple IS_PY_GE_308: bool class PythonPathContext: dpath: str | PathLike index: int def __init__(self, dpath: str | PathLike, index: int = 0) -> None: ... def __enter__(self) -> None: ... def __exit__(self, ex_type: Type[BaseException] | None, ex_value: BaseException | None, ex_traceback: TracebackType | None) -> bool | None: ... def import_module_from_path(modpath: str | PathLike, index: int = ...) -> ModuleType: ... def import_module_from_name(modname: str) -> ModuleType: ... def modname_to_modpath( modname: str, hide_init: bool = True, hide_main: bool = False, sys_path: None | List[str | PathLike] = None) -> str | None: ... def normalize_modpath(modpath: str | PathLike, hide_init: bool = True, hide_main: bool = False) -> str | PathLike: ... def modpath_to_modname(modpath: str, hide_init: bool = True, hide_main: bool = False, check: bool = True, relativeto: str | None = None) -> str: ... def split_modpath(modpath: str, check: bool = True) -> Tuple[str, str]: ... def is_modname_importable(modname: str, sys_path: list | None = None, exclude: list | None = None) -> bool: ... ubelt-1.3.7/ubelt/util_indexable.py000066400000000000000000000631561472470106000173260ustar00rootroot00000000000000""" The util_indexable module defines ``IndexableWalker`` which is a powerful way to iterate through nested Python containers. RelatedWork: * [PypiDictDigger]_ References: .. [PypiDictDigger] https://pypi.org/project/dict_digger/ .. [PypiDeepDiff] https://pypi.org/project/deepdiff/ """ from math import isclose from collections.abc import Generator from typing import NamedTuple, Tuple, Any # from collections.abc import Iterable try: from functools import cache except ImportError: from ubelt.util_memoize import memoize as cache @cache def _lazy_numpy(): try: import numpy as np except ImportError: return None return np class Difference(NamedTuple): """ A result class of indexable_diff that organizes what the difference between the indexables is. """ path: Tuple value1: Any value2: Any class IndexableWalker(Generator): """ Traverses through a nested tree-liked indexable structure. Generates a path and value to each node in the structure. The path is a list of indexes which if applied in order will reach the value. The ``__setitem__`` method can be used to modify a nested value based on the path returned by the generator. When generating values, you can use "send" to prevent traversal of a particular branch. RelatedWork: * https://pypi.org/project/python-benedict/ - implements a dictionary subclass with similar nested indexing abilities. Attributes: data (dict | list | tuple): the wrapped indexable data dict_cls (Tuple[type]): the types that should be considered dictionary mappings for the purpose of nested iteration. Defaults to ``dict``. list_cls (Tuple[type]): the types that should be considered list-like for the purposes of nested iteration. Defaults to ``(list, tuple)``. indexable_cls (Tuple[type]): combined dict_cls and list_cls Example: >>> import ubelt as ub >>> # Given Nested Data >>> data = { >>> 'foo': {'bar': 1}, >>> 'baz': [{'biz': 3}, {'buz': [4, 5, 6]}], >>> } >>> # Create an IndexableWalker >>> walker = ub.IndexableWalker(data) >>> # We iterate over the data as if it was flat >>> # ignore the string due to order issues on older Pythons >>> # xdoctest: +IGNORE_WANT >>> for path, val in walker: >>> print(path) ['foo'] ['baz'] ['baz', 0] ['baz', 1] ['baz', 1, 'buz'] ['baz', 1, 'buz', 0] ['baz', 1, 'buz', 1] ['baz', 1, 'buz', 2] ['baz', 0, 'biz'] ['foo', 'bar'] >>> # We can use "paths" as keys to getitem into the walker >>> path = ['baz', 1, 'buz', 2] >>> val = walker[path] >>> assert val == 6 >>> # We can use "paths" as keys to setitem into the walker >>> assert data['baz'][1]['buz'][2] == 6 >>> walker[path] = 7 >>> assert data['baz'][1]['buz'][2] == 7 >>> # We can use "paths" as keys to delitem into the walker >>> assert data['baz'][1]['buz'][1] == 5 >>> del walker[['baz', 1, 'buz', 1]] >>> assert data['baz'][1]['buz'][1] == 7 Example: >>> # Create nested data >>> # xdoctest: +REQUIRES(module:numpy) >>> import numpy as np >>> import ubelt as ub >>> data = ub.ddict(lambda: int) >>> data['foo'] = ub.ddict(lambda: int) >>> data['bar'] = np.array([1, 2, 3]) >>> data['foo']['a'] = 1 >>> data['foo']['b'] = np.array([1, 2, 3]) >>> data['foo']['c'] = [1, 2, 3] >>> data['baz'] = 3 >>> print('data = {}'.format(ub.repr2(data, nl=True))) >>> # We can walk through every node in the nested tree >>> walker = ub.IndexableWalker(data) >>> for path, value in walker: >>> print('walk path = {}'.format(ub.repr2(path, nl=0))) >>> if path[-1] == 'c': >>> # Use send to prevent traversing this branch >>> got = walker.send(False) >>> # We can modify the value based on the returned path >>> walker[path] = 'changed the value of c' >>> print('data = {}'.format(ub.repr2(data, nl=True))) >>> assert data['foo']['c'] == 'changed the value of c' Example: >>> # Test sending false for every data item >>> import ubelt as ub >>> data = {1: [1, 2, 3], 2: [1, 2, 3]} >>> walker = ub.IndexableWalker(data) >>> # Sending false means you wont traverse any further on that path >>> num_iters_v1 = 0 >>> for path, value in walker: >>> print('[v1] walk path = {}'.format(ub.repr2(path, nl=0))) >>> walker.send(False) >>> num_iters_v1 += 1 >>> num_iters_v2 = 0 >>> for path, value in walker: >>> # When we dont send false we walk all the way down >>> print('[v2] walk path = {}'.format(ub.repr2(path, nl=0))) >>> num_iters_v2 += 1 >>> assert num_iters_v1 == 2 >>> assert num_iters_v2 == 8 Example: >>> # Test numpy >>> # xdoctest: +REQUIRES(CPython) >>> # xdoctest: +REQUIRES(module:numpy) >>> import ubelt as ub >>> import numpy as np >>> # By default we don't recurse into ndarrays because they >>> # Are registered as an indexable class >>> data = {2: np.array([1, 2, 3])} >>> walker = ub.IndexableWalker(data) >>> num_iters = 0 >>> for path, value in walker: >>> print('walk path = {}'.format(ub.repr2(path, nl=0))) >>> num_iters += 1 >>> assert num_iters == 1 >>> # Currently to use top-level ndarrays, you need to extend what the >>> # list class is. This API may change in the future to be easier >>> # to work with. >>> data = np.random.rand(3, 5) >>> walker = ub.IndexableWalker(data, list_cls=(list, tuple, np.ndarray)) >>> num_iters = 0 >>> for path, value in walker: >>> print('walk path = {}'.format(ub.repr2(path, nl=0))) >>> num_iters += 1 >>> assert num_iters == 3 + 3 * 5 """ def __init__(self, data, dict_cls=(dict,), list_cls=(list, tuple)): self.data = data self.dict_cls = dict_cls self.list_cls = list_cls self.indexable_cls = self.dict_cls + self.list_cls self._walk_gen = None def __iter__(self): """ Iterates through the indexable ``self.data`` Can send a False flag to prevent a branch from being traversed Returns: Generator[Tuple[List, Any], Any, Any]: path (List): list of index operations to arrive at the value value (Any): the value at the path """ # Calling iterate multiple times will clobber the internal state self._walk_gen = self._walk() return self._walk_gen def __next__(self): """ returns next item from this generator Returns: Any """ if self._walk_gen is None: self._walk_gen = self._walk() return next(self._walk_gen) # TODO: maybe we implement a map function? def send(self, arg): """ send(arg) -> send 'arg' into generator, return next yielded value or raise StopIteration. """ # Note: this will error if called before __next__ self._walk_gen.send(arg) def throw(self, typ, val=None, tb=None): # type: ignore """ throw(typ[,val[,tb]]) -> raise exception in generator, return next yielded value or raise StopIteration. Args: typ (Any): Type of the exception. Should be a ``type[BaseException]``, type checking is not working right here. val (Optional[object]): tb (Optional[TracebackType]): Returns: Any Raises: StopIteration References: .. [GeneratorThrow] https://docs.python.org/3/reference/expressions.html#generator.throw """ raise StopIteration def __setitem__(self, path, value): """ Set nested value by path Args: path (List): list of indexes into the nested structure value (Any): new value """ import itertools as it d = self.data # note: slice unpack seems faster in 3.9 at least, dont change # ~/misc/tests/python/bench_unpack.py # Using islice allows path to be a list or deque key_index = len(path) - 1 prefix = it.islice(path, 0, key_index) key = path[key_index] # prefix, key = path[:-1], path[-1] # *prefix, key = path for k in prefix: d = d[k] d[key] = value def __getitem__(self, path): """ Get nested value by path Args: path (List): list of indexes into the nested structure Returns: Any: value """ import itertools as it d = self.data # Using islice allows path to be a list or deque key_index = len(path) - 1 prefix = it.islice(path, 0, key_index) key = path[key_index] # prefix, key = path[:-1], path[-1] # *prefix, key = path for k in prefix: d = d[k] return d[key] def __delitem__(self, path): """ Remove nested value by path Note: It can be dangerous to use this while iterating (because we may try to descend into a deleted location) or on leaf items that are list-like (because the indexes of all subsequent items will be modified). Args: path (List): list of indexes into the nested structure. The item at the last index will be removed. """ d = self.data prefix, key = path[:-1], path[-1] # *prefix, key = path for k in prefix: d = d[k] del d[key] def _walk(self, data=None, prefix=[]): """ Defines the underlying generator used by IndexableWalker Yields: Tuple[List, Any] | None: path (List) - a "path" through the nested data structure value (Any) - the value indexed by that "path". Can also yield None in the case that `send` is called on the generator. """ if data is None: # pragma: nobranch data = self.data stack = [(data, prefix)] while stack: _data, _prefix = stack.pop() # Create an items iterable of depending on the indexable data type if isinstance(_data, self.list_cls): items = enumerate(_data) elif isinstance(_data, self.dict_cls): items = _data.items() else: raise TypeError(type(_data)) for key, value in items: # Yield the full path to this position and its value path = _prefix + [key] message = yield path, value # If the value at this path is also indexable, then continue # the traversal, unless the False message was explicitly sent # by the caller. if message is False: # Because the `send` method will return the next value, # we yield a dummy value so we don't clobber the next # item in the traversal. yield None else: if isinstance(value, self.indexable_cls): stack.append((value, path)) def allclose(self, other, rel_tol=1e-9, abs_tol=0.0, equal_nan=False, return_info=False): """ Walks through this and another nested data structures and checks if everything is roughly the same. Args: other (IndexableWalker | List | Dict): a nested indexable item to compare against. rel_tol (float): maximum difference for being considered "close", relative to the magnitude of the input values abs_tol (float): maximum difference for being considered "close", regardless of the magnitude of the input values equal_nan (bool): if True, numpy must be available, and consider nans as equal. return_info (bool): if True, return extra info dict. Defaults to False. Returns: bool | Tuple[bool, Dict] : A boolean result if ``return_info`` is false, otherwise a tuple of the boolean result and an "info" dict containing detailed results indicating what matched and what did not. Example: >>> import ubelt as ub >>> items1 = ub.IndexableWalker({ >>> 'foo': [1.222222, 1.333], >>> 'bar': 1, >>> 'baz': [], >>> }) >>> items2 = ub.IndexableWalker({ >>> 'foo': [1.22222, 1.333], >>> 'bar': 1, >>> 'baz': [], >>> }) >>> flag, return_info = items1.allclose(items2, return_info=True) >>> print('return_info = {}'.format(ub.repr2(return_info, nl=1))) >>> print('flag = {!r}'.format(flag)) >>> for p1, v1, v2 in return_info['faillist']: >>> v1_ = items1[p1] >>> print('*fail p1, v1, v2 = {}, {}, {}'.format(p1, v1, v2)) >>> for p1 in return_info['passlist']: >>> v1_ = items1[p1] >>> print('*pass p1, v1_ = {}, {}'.format(p1, v1_)) >>> assert not flag >>> import ubelt as ub >>> items1 = ub.IndexableWalker({ >>> 'foo': [1.0000000000000000000000001, 1.], >>> 'bar': 1, >>> 'baz': [], >>> }) >>> items2 = ub.IndexableWalker({ >>> 'foo': [0.9999999999999999, 1.], >>> 'bar': 1, >>> 'baz': [], >>> }) >>> flag, return_info = items1.allclose(items2, return_info=True) >>> print('return_info = {}'.format(ub.repr2(return_info, nl=1))) >>> print('flag = {!r}'.format(flag)) >>> assert flag Example: >>> import ubelt as ub >>> flag, return_info = ub.IndexableWalker([]).allclose(ub.IndexableWalker([]), return_info=True) >>> print('return_info = {!r}'.format(return_info)) >>> print('flag = {!r}'.format(flag)) >>> assert flag Example: >>> import ubelt as ub >>> flag = ub.IndexableWalker([]).allclose([], return_info=False) >>> print('flag = {!r}'.format(flag)) >>> assert flag Example: >>> import ubelt as ub >>> flag, return_info = ub.IndexableWalker([]).allclose([1], return_info=True) >>> print('return_info = {!r}'.format(return_info)) >>> print('flag = {!r}'.format(flag)) >>> assert not flag Example: >>> # xdoctest: +REQUIRES(module:numpy) >>> import ubelt as ub >>> import numpy as np >>> a = np.random.rand(3, 5) >>> b = a + 1 >>> wa = ub.IndexableWalker(a, list_cls=(np.ndarray,)) >>> wb = ub.IndexableWalker(b, list_cls=(np.ndarray,)) >>> flag, return_info = wa.allclose(wb, return_info=True) >>> print('return_info = {!r}'.format(return_info)) >>> print('flag = {!r}'.format(flag)) >>> assert not flag >>> a = np.random.rand(3, 5) >>> b = a.copy() + 1e-17 >>> wa = ub.IndexableWalker([a], list_cls=(np.ndarray, list)) >>> wb = ub.IndexableWalker([b], list_cls=(np.ndarray, list)) >>> flag, return_info = wa.allclose(wb, return_info=True) >>> assert flag >>> print('return_info = {!r}'.format(return_info)) >>> print('flag = {!r}'.format(flag)) """ walker1 = self if isinstance(other, IndexableWalker): walker2 = other else: walker2 = IndexableWalker(other, dict_cls=self.dict_cls, list_cls=self.list_cls) _isclose_fn, _iskw = _make_isclose_fn(rel_tol, abs_tol, equal_nan) flat_items1 = [ (path, value) for path, value in walker1 if not isinstance(value, walker1.indexable_cls) or len(value) == 0] flat_items2 = [ (path, value) for path, value in walker2 if not isinstance(value, walker1.indexable_cls) or len(value) == 0] flat_items1 = sorted(flat_items1) flat_items2 = sorted(flat_items2) if len(flat_items1) != len(flat_items2): info = { 'faillist': ['length mismatch'] } final_flag = False else: passlist = [] faillist = [] for t1, t2 in zip(flat_items1, flat_items2): p1, v1 = t1 p2, v2 = t2 assert p1 == p2, 'paths to the nested items should be the same' # TODO: Could add a numpy optimization here. flag = (v1 == v2) or ( isinstance(v1, float) and isinstance(v2, float) and _isclose_fn(v1, v2, **_iskw) ) if flag: passlist.append(p1) else: faillist.append((p1, v1, v2)) final_flag = len(faillist) == 0 info = { 'passlist': passlist, 'faillist': faillist, } if return_info: info.update({ 'walker1': walker1, 'walker2': walker2, }) return final_flag, info else: return final_flag def diff(self, other, rel_tol=1e-9, abs_tol=0.0, equal_nan=False): """ Walks through two nested data structures finds differences in the structures. Args: other (IndexableWalker | List | Dict): a nested indexable item to compare against. rel_tol (float): maximum difference for being considered "close", relative to the magnitude of the input values abs_tol (float): maximum difference for being considered "close", regardless of the magnitude of the input values equal_nan (bool): if True, numpy must be available, and consider nans as equal. Returns: dict: information about the diff with "similarity": a score between 0 and 1 "num_differences" being the number of paths not common plus the number of common paths with differing values. "unique1": being the paths that were unique to self "unique2": being the paths that were unique to other "faillist": a list 3-tuples of common path and differing values "num_approximations": is the number of approximately equal items (i.e. floats) there were Example: >>> import ubelt as ub >>> dct1 = { >>> 'foo': [1.222222, 1.333], >>> 'bar': 1, >>> 'baz': [], >>> 'top': [1, 2, 3], >>> 'L0': {'L1': {'L2': {'K1': 'V1', 'K2': 'V2', 'D1': 1, 'D2': 2}}}, >>> } >>> dct2 = { >>> 'foo': [1.22222, 1.333], >>> 'bar': 1, >>> 'baz': [], >>> 'buz': {1: 2}, >>> 'top': [1, 1, 2], >>> 'L0': {'L1': {'L2': {'K1': 'V1', 'K2': 'V2', 'D1': 10, 'D2': 20}}}, >>> } >>> info = ub.IndexableWalker(dct1).diff(dct2) >>> print(f'info = {ub.urepr(info, nl=2)}') Example: >>> # xdoctest: +REQUIRES(module:numpy) >>> import ubelt as ub >>> import numpy as np >>> a = np.random.rand(3, 5) >>> b = a + 1 >>> wa = ub.IndexableWalker(a, list_cls=(np.ndarray,)) >>> wb = ub.IndexableWalker(b, list_cls=(np.ndarray,)) >>> info = wa.diff(wb) >>> print(f'info = {ub.urepr(info, nl=2)}') >>> a = np.random.rand(3, 5) >>> b = a.copy() + 1e-17 >>> wa = ub.IndexableWalker([a], list_cls=(np.ndarray, list)) >>> wb = ub.IndexableWalker([b], list_cls=(np.ndarray, list)) >>> info = wa.diff(wb) >>> print(f'info = {ub.urepr(info, nl=2)}') Example: >>> import ubelt as ub >>> # test null similarity >>> wa = ub.IndexableWalker({}).diff({}) >>> assert wa['similarity'] == 1.0 """ walker1 = self if isinstance(other, IndexableWalker): walker2 = other else: walker2 = IndexableWalker(other, dict_cls=self.dict_cls, list_cls=self.list_cls) # TODO: numpy optimizations flat_items1 = { tuple(path): value for path, value in walker1 if not isinstance(value, walker1.indexable_cls) or len(value) == 0} flat_items2 = { tuple(path): value for path, value in walker2 if not isinstance(value, walker1.indexable_cls) or len(value) == 0} common = flat_items1.keys() & flat_items2.keys() unique1 = flat_items1.keys() - flat_items2.keys() unique2 = flat_items2.keys() - flat_items1.keys() num_approximations = 0 _isclose_fn, _iskw = _make_isclose_fn(rel_tol, abs_tol, equal_nan) faillist = [] passlist = [] for key in common: v1 = flat_items1[key] v2 = flat_items2[key] flag = (v1 == v2) if not flag: flag = ( isinstance(v1, float) and isinstance(v2, float) and _isclose_fn(v1, v2, **_iskw) ) num_approximations += flag if flag: passlist.append(key) else: faillist.append(Difference(key, v1, v2)) num_differences = len(unique1) + len(unique2) + len(faillist) num_similarities = len(passlist) if num_similarities == 0 and num_differences == 0: similarity = 1.0 else: similarity = num_similarities / (num_similarities + num_differences) info = { 'similarity': similarity, 'num_approximations': num_approximations, 'num_differences': num_differences, 'num_similarities': num_similarities, 'unique1': unique1, 'unique2': unique2, 'faillist': faillist, 'passlist': passlist, } return info def _make_isclose_fn(rel_tol, abs_tol, equal_nan): np = _lazy_numpy() if np is None: _isclose_fn = isclose _iskw = dict(rel_tol=rel_tol, abs_tol=abs_tol) if equal_nan: raise NotImplementedError('requires numpy') else: _isclose_fn = np.isclose _iskw = dict(rtol=rel_tol, atol=abs_tol, equal_nan=equal_nan) return _isclose_fn, _iskw def indexable_allclose(items1, items2, rel_tol=1e-9, abs_tol=0.0, return_info=False): """ Walks through two nested data structures and ensures that everything is roughly the same. NOTE: Deprecated. Instead use: ub.IndexableWalker(items1).allclose(items2) Args: items1 (dict | list | tuple): a nested indexable item items2 (dict | list | tuple): a nested indexable item rel_tol (float): maximum difference for being considered "close", relative to the magnitude of the input values abs_tol (float): maximum difference for being considered "close", regardless of the magnitude of the input values return_info (bool): if True, return extra info. Defaults to False. Returns: bool | Tuple[bool, Dict] : A boolean result if ``return_info`` is false, otherwise a tuple of the boolean result and an "info" dict containing detailed results indicating what matched and what did not. Example: >>> import ubelt as ub >>> items1 = { >>> 'foo': [1.222222, 1.333], >>> 'bar': 1, >>> 'baz': [], >>> } >>> items2 = { >>> 'foo': [1.22222, 1.333], >>> 'bar': 1, >>> 'baz': [], >>> } >>> flag, return_info = ub.indexable_allclose(items1, items2, return_info=True) >>> print('return_info = {}'.format(ub.repr2(return_info, nl=1))) >>> print('flag = {!r}'.format(flag)) """ from ubelt.util_deprecate import schedule_deprecation schedule_deprecation( 'ubelt', 'indexable_allclose', 'function', migration=( 'Use `ub.IndexableWalker(items1).allclose(items2)` instead' )) walker1 = IndexableWalker(items1) walker2 = IndexableWalker(items2) return walker1.allclose(walker2, rel_tol=rel_tol, abs_tol=abs_tol, return_info=return_info) # Nested = IndexableWalker # class Indexable(IndexableWalker): # """ # In the future IndexableWalker may simply change to Indexable or maybe Nested # """ # ... ubelt-1.3.7/ubelt/util_indexable.pyi000066400000000000000000000026751472470106000174760ustar00rootroot00000000000000from typing import Tuple from typing import List from typing import Any from typing import Optional from types import TracebackType from typing import Dict from collections.abc import Generator class IndexableWalker(Generator): data: dict | list | tuple dict_cls: Tuple[type] list_cls: Tuple[type] indexable_cls: Tuple[type] def __init__(self, data, dict_cls=..., list_cls=...) -> None: ... def __iter__(self) -> Generator[Tuple[List, Any], Any, Any]: ... def __next__(self) -> Any: ... def send(self, arg) -> None: ... def throw(self, typ: Any, val: Optional[object] = None, tb: Optional[TracebackType] = None) -> Any: ... def __setitem__(self, path: List, value: Any) -> None: ... def __getitem__(self, path: List) -> Any: ... def __delitem__(self, path: List) -> None: ... def allclose(self, other: IndexableWalker | List | Dict, rel_tol: float = 1e-09, abs_tol: float = 0.0, return_info: bool = False) -> bool | Tuple[bool, Dict]: ... def indexable_allclose(items1: dict | list | tuple, items2: dict | list | tuple, rel_tol: float = 1e-09, abs_tol: float = 0.0, return_info: bool = False) -> bool | Tuple[bool, Dict]: ... ubelt-1.3.7/ubelt/util_io.py000066400000000000000000000243211472470106000157710ustar00rootroot00000000000000""" Functions for reading and writing files on disk. :func:`writeto` and :func:`readfrom` wrap ``open().write()`` and ``open().read()`` and primarily serve to indicate that the type of data being written and read is unicode text. :func:`delete` wraps :func:`os.unlink` and :func:`shutil.rmtree` and does not throw an error if the file or directory does not exist. It also contains workarounds for win32 issues with :mod:`shutil`. """ import sys import os from os.path import exists __all__ = [ 'readfrom', 'writeto', 'touch', 'delete', ] def writeto(fpath, to_write, aslines=False, verbose=None): r""" Writes (utf8) text to a file. Args: fpath (str | PathLike): file path to_write (str): text to write (must be unicode text) aslines (bool): if True to_write is assumed to be a list of lines verbose (int | None): verbosity flag Note: In CPython you may want to use ``open().write()`` instead. This function exists as a convenience for writing in Python2. After 2020-01-01, we may consider deprecating the function. NOTE: In PyPy ``open().write()`` does not work. See `https://pypy.org/compat.html`. This is an argument for keeping this function. NOTE: With modern versions of Python, it is generally recommend to use :func:`pathlib.Path.write_text` instead. Although there does seem to be some corner case this handles better on win32, so maybe useful? Example: >>> import ubelt as ub >>> import os >>> from os.path import exists >>> dpath = ub.Path.appdir('ubelt').ensuredir() >>> fpath = dpath + '/' + 'testwrite.txt' >>> if exists(fpath): >>> os.remove(fpath) >>> to_write = 'utf-8 symbols Δ, Й, ק, م, ๗, あ, 叶, 葉, and 말.' >>> ub.writeto(fpath, to_write) >>> read_ = ub.readfrom(fpath) >>> print('read_ = ' + read_) >>> print('to_write = ' + to_write) >>> assert read_ == to_write Example: >>> import ubelt as ub >>> import os >>> from os.path import exists >>> dpath = ub.Path.appdir('ubelt').ensuredir() >>> fpath = dpath + '/' + 'testwrite2.txt' >>> if exists(fpath): >>> os.remove(fpath) >>> to_write = ['a\n', 'b\n', 'c\n', 'd\n'] >>> ub.writeto(fpath, to_write, aslines=True) >>> read_ = ub.readfrom(fpath, aslines=True) >>> print('read_ = {}'.format(read_)) >>> print('to_write = {}'.format(to_write)) >>> assert read_ == to_write Example: >>> # With modern Python, use pathlib.Path (or ub.Path) instead >>> import ubelt as ub >>> dpath = ub.Path.appdir('ubelt/tests/io').ensuredir() >>> fpath = (dpath / 'test_file.txt').delete() >>> to_write = 'utf-8 symbols Δ, Й, ק, م, ๗, あ, 叶, 葉, and 말.' >>> ub.writeto(fpath, to_write) >>> fpath.write_bytes(to_write.encode('utf8')) >>> assert fpath.read_bytes().decode('utf8') == to_write """ if verbose: print('Writing to text file: %r ' % (fpath,)) from ubelt import schedule_deprecation schedule_deprecation( modname='ubelt', name='writeto', type='function', migration='use ubelt.Path(...).write_text() instead', deprecate='1.2.0', error='2.0.0', remove='2.1.0') with open(fpath, 'wb') as file: if aslines: to_write = map(_ensure_bytes , to_write) file.writelines(to_write) else: # convert to bytes for writing bytes = _ensure_bytes(to_write) file.write(bytes) def _ensure_bytes(text): """ ensures text is in a suitable format for writing """ return text.encode('utf8') def readfrom(fpath, aslines=False, errors='replace', verbose=None): """ Reads (utf8) text from a file. Note: You probably should use ``ub.Path().read_text()`` instead. This function exists as a convenience for writing in Python2. After 2020-01-01, we may consider deprecating the function. Args: fpath (str | PathLike): file path aslines (bool): if True returns list of lines errors (str): how to handle decoding errors verbose (int | None): verbosity flag Returns: str: text from fpath (this is unicode) """ if verbose: print('Reading text file: %r ' % (fpath,)) if not exists(fpath): raise IOError('File %r does not exist' % (fpath,)) from ubelt import schedule_deprecation schedule_deprecation( modname='ubelt', name='readfrom', type='function', migration='use ubelt.Path(...).read_text() instead', deprecate='1.2.0', error='2.0.0', remove='2.1.0') with open(fpath, 'rb') as file: if aslines: text = [line.decode('utf8', errors=errors) for line in file.readlines()] if sys.platform.startswith('win32'): # nocover # fix line endings on windows text = [ line[:-2] + '\n' if line.endswith('\r\n') else line for line in text ] else: text = file.read().decode('utf8', errors=errors) return text def touch(fpath, mode=0o666, dir_fd=None, verbose=0, **kwargs): """ change file timestamps Works like the touch unix utility Args: fpath (str | PathLike): name of the file mode (int): file permissions (python3 and unix only) dir_fd (io.IOBase | None): optional directory file descriptor. If specified, fpath is interpreted as relative to this descriptor (python 3 only). verbose (int): verbosity **kwargs : extra args passed to :func:`os.utime` (python 3 only). Returns: str: path to the file References: .. [SO_1158076] https://stackoverflow.com/questions/1158076/implement-touch-using-python Example: >>> import ubelt as ub >>> from os.path import join >>> dpath = ub.Path.appdir('ubelt').ensuredir() >>> fpath = join(dpath, 'touch_file') >>> assert not exists(fpath) >>> ub.touch(fpath) >>> assert exists(fpath) >>> os.unlink(fpath) """ if verbose: print('Touching file {}'.format(fpath)) flags = os.O_CREAT | os.O_APPEND with os.fdopen(os.open(fpath, flags=flags, mode=mode, dir_fd=dir_fd)) as f: os.utime(f.fileno() if os.utime in os.supports_fd else fpath, dir_fd=None if os.supports_fd else dir_fd, **kwargs) return fpath def delete(path, verbose=False): """ Removes a file or recursively removes a directory. If a path does not exist, then this is does nothing. Args: path (str | PathLike): file or directory to remove verbose (bool): if True prints what is being done SeeAlso: `send2trash `_ - A cross-platform Python package for sending files to the trash instead of irreversibly deleting them. :func:`ubelt.util_path.Path.delete` Notes: This can call :func:`os.unlink`, :func:`os.rmdir`, or :func:`shutil.rmtree`, depending on what ``path`` references on the filesystem. (On windows may also call a custom :func:`ubelt._win32_links._win32_rmtree`). Example: >>> import ubelt as ub >>> from os.path import join >>> base = ub.Path.appdir('ubelt', 'delete_test').ensuredir() >>> dpath1 = ub.ensuredir(join(base, 'dir')) >>> ub.ensuredir(join(base, 'dir', 'subdir')) >>> ub.touch(join(base, 'dir', 'to_remove1.txt')) >>> fpath1 = join(base, 'dir', 'subdir', 'to_remove3.txt') >>> fpath2 = join(base, 'dir', 'subdir', 'to_remove2.txt') >>> ub.touch(fpath1) >>> ub.touch(fpath2) >>> assert all(map(exists, (dpath1, fpath1, fpath2))) >>> ub.delete(fpath1) >>> assert all(map(exists, (dpath1, fpath2))) >>> assert not exists(fpath1) >>> ub.delete(dpath1) >>> assert not any(map(exists, (dpath1, fpath1, fpath2))) Example: >>> import ubelt as ub >>> from os.path import exists, join >>> dpath = ub.Path.appdir('ubelt', 'delete_test2').ensuredir() >>> dpath1 = ub.ensuredir(join(dpath, 'dir')) >>> fpath1 = ub.touch(join(dpath1, 'to_remove.txt')) >>> assert exists(fpath1) >>> ub.delete(dpath) >>> assert not exists(fpath1) """ if not os.path.exists(path): # if the file does exists and is not a broken link if os.path.islink(path): if verbose: # nocover print('Deleting broken link="{}"'.format(path)) os.unlink(path) elif os.path.isdir(path): # nocover # Only on windows will a file be a directory and not exist if verbose: print('Deleting broken directory link="{}"'.format(path)) os.rmdir(path) elif os.path.isfile(path): # nocover # This is a windows only case if verbose: print('Deleting broken file link="{}"'.format(path)) os.unlink(path) else: if verbose: # nocover print('Not deleting non-existent path="{}"'.format(path)) else: if os.path.islink(path): if verbose: # nocover print('Deleting symbolic link="{}"'.format(path)) os.unlink(path) elif os.path.isfile(path): if verbose: # nocover print('Deleting file="{}"'.format(path)) os.unlink(path) elif os.path.isdir(path): if verbose: # nocover print('Deleting directory="{}"'.format(path)) if sys.platform.startswith('win32') and sys.version_info[0:2] < (3, 8): # nocover # Workaround bug that prevents shutil from working if # the directory contains junctions # https://bugs.python.org/issue36621 from ubelt import _win32_links _win32_links._win32_rmtree(path, verbose=verbose) else: import shutil shutil.rmtree(path) ubelt-1.3.7/ubelt/util_io.pyi000066400000000000000000000011331472470106000161360ustar00rootroot00000000000000from os import PathLike import io def writeto(fpath: str | PathLike, to_write: str, aslines: bool = False, verbose: int | None = None) -> None: ... def readfrom(fpath: str | PathLike, aslines: bool = False, errors: str = 'replace', verbose: int | None = None) -> str: ... def touch(fpath: str | PathLike, mode: int = 438, dir_fd: io.IOBase | None = None, verbose: int = 0, **kwargs) -> str: ... def delete(path: str | PathLike, verbose: bool = False) -> None: ... ubelt-1.3.7/ubelt/util_links.py000066400000000000000000000353071472470106000165100ustar00rootroot00000000000000r""" Cross-platform logic for dealing with symlinks. Basic functionality should work on all operating systems including everyone's favorite pathological OS (note that there is an additional helper file for this case), but there are some corner cases depending on your version. Recent versions of Windows tend to work, but there certain system settings that cause issues. Any POSIX system works without difficulty. Example: >>> import pytest >>> import ubelt as ub >>> if ub.WIN32: >>> pytest.skip() # hack for windows for now. Todo cleaner xdoctest conditional >>> import ubelt as ub >>> from os.path import normpath, join >>> dpath = ub.Path.appdir('ubelt', normpath('demo/symlink')).ensuredir() >>> real_path = dpath / 'real_file.txt' >>> link_path = dpath / 'link_file.txt' >>> ub.touch(real_path) >>> result = ub.symlink(real_path, link_path, overwrite=True, verbose=3) >>> parts = result.split(os.path.sep) >>> print(parts[-1]) link_file.txt """ from os.path import exists, islink, join, normpath import os import sys import warnings from ubelt import util_io from ubelt import util_platform __all__ = ['symlink'] if sys.platform.startswith('win32'): # nocover from ubelt import _win32_links else: _win32_links = None def symlink(real_path, link_path, overwrite=False, verbose=0): """ Create a link ``link_path`` that mirrors ``real_path``. This function attempts to create a real symlink, but will fall back on a hard link or junction if symlinks are not supported. Args: real_path (str | PathLike): path to real file or directory link_path (str | PathLike): path to desired location for symlink overwrite (bool): overwrite existing symlinks. This will not overwrite real files on systems with proper symlinks. However, on older versions of windows junctions are indistinguishable from real files, so we cannot make this guarantee. Defaults to False. verbose (int): verbosity level. Defaults to 0. Returns: str | PathLike: link path Note: In the future we may rework and rename this function to something like ``link``, ``pathlink``, ``fslink``, etc... to indicate that it may perform multiple types of links. We may also allow the user to specify which type of link (e.g. symlink, hardlink, reflink, junction) they would like to use. Note: On systems that do not contain support for symlinks (e.g. some versions / configurations of Windows), this function will fall back on hard links or junctions [WikiNTFSLinks]_, [WikiHardLink]_. The differences between the two are explained in [WikiSymLink]_. If symlinks are not available, then ``link_path`` and ``real_path`` must exist on the same filesystem. Given that, this function always works in the sense that (1) ``link_path`` will mirror the data from ``real_path``, (2) updates to one will effect the other, and (3) no extra space will be used. More details can be found in :mod:`ubelt._win32_links`. On systems that support symlinks (e.g. Linux), none of the above applies. Note: This function may contain a bug when creating a relative link References: .. [WikiSymLink] https://en.wikipedia.org/wiki/Symbolic_link .. [WikiHardLink] https://en.wikipedia.org/wiki/Hard_link .. [WikiNTFSLinks] https://en.wikipedia.org/wiki/NTFS_links Example: >>> import pytest >>> import ubelt as ub >>> if ub.WIN32: >>> pytest.skip() # hack for windows for now. Todo cleaner xdoctest conditional >>> import ubelt as ub >>> dpath = ub.Path.appdir('ubelt', 'test_symlink0').delete().ensuredir() >>> real_path = (dpath / 'real_file.txt') >>> link_path = (dpath / 'link_file.txt') >>> real_path.write_text('foo') >>> result = ub.symlink(real_path, link_path) >>> assert ub.Path(result).read_text() == 'foo' >>> dpath.delete() # clenaup Example: >>> import pytest >>> import ubelt as ub >>> if ub.WIN32: >>> pytest.skip() # hack for windows for now. Todo cleaner xdoctest conditional >>> import ubelt as ub >>> from ubelt.util_links import _dirstats >>> dpath = ub.Path.appdir('ubelt', 'test_symlink1').delete().ensuredir() >>> _dirstats(dpath) >>> real_dpath = (dpath / 'real_dpath').ensuredir() >>> link_dpath = real_dpath.augment(stem='link_dpath') >>> real_path = (dpath / 'afile.txt') >>> link_path = (dpath / 'afile.txt') >>> real_path.write_text('foo') >>> result = ub.symlink(real_dpath, link_dpath) >>> assert link_path.read_text() == 'foo', 'read should be same' >>> link_path.write_text('bar') >>> _dirstats(dpath) >>> assert link_path.read_text() == 'bar', 'very bad bar' >>> assert real_path.read_text() == 'bar', 'changing link did not change real' >>> real_path.write_text('baz') >>> _dirstats(dpath) >>> assert real_path.read_text() == 'baz', 'very bad baz' >>> assert link_path.read_text() == 'baz', 'changing real did not change link' >>> ub.delete(link_dpath, verbose=1) >>> _dirstats(dpath) >>> assert not link_dpath.exists(), 'link should not exist' >>> assert real_path.exists(), 'real path should exist' >>> _dirstats(dpath) >>> ub.delete(dpath, verbose=1) >>> _dirstats(dpath) >>> assert not real_path.exists() Example: >>> import pytest >>> import ubelt as ub >>> if ub.WIN32: >>> pytest.skip() # hack for windows for now. Todo cleaner xdoctest conditional >>> # Specifying bad paths should error. >>> import ubelt as ub >>> import pytest >>> dpath = ub.Path.appdir('ubelt', 'test_symlink2').ensuredir() >>> real_path = dpath / 'real_file.txt' >>> link_path = dpath / 'link_file.txt' >>> real_path.write_text('foo') >>> with pytest.raises(ValueError, match='link_path .* cannot be empty'): >>> ub.symlink(real_path, '') >>> with pytest.raises(ValueError, match='real_path .* cannot be empty'): >>> ub.symlink('', link_path) """ if not real_path: raise ValueError('The real_path argument cannot be empty') if not link_path: raise ValueError('The link_path argument cannot be empty') path = normpath(real_path) link = normpath(link_path) if not os.path.isabs(path): # if path is not absolute it must be specified relative to link if not _can_symlink(): # nocover # On windows, we need to use absolute paths path = os.path.abspath(path) else: # FIXME: This behavior seems like it might be wrong. path = os.path.relpath(path, os.path.dirname(link)) # abs_path = join(os.path.dirname(link), path) ... if verbose: print('Symlink: {link} -> {path}'.format(path=path, link=link)) if islink(link): if verbose: print('... already exists') pointed = _readlink(link) if pointed == path: if verbose > 1: print('... and points to the right place') return link if verbose > 1: if not exists(link): print('... but it is broken and points somewhere else: {}'.format(pointed)) else: # TODO: if we fix the relative symlink bug, this text might be better # import pathlib # abs_path = join(os.path.dirname(link), path) # resolved_path = pathlib.Path(abs_path).resolve() # resolved_pointed = (pathlib.Path(link).parent / pointed).resolve() # if resolved_path == resolved_pointed: # print('... and it resolves to the right location') # print('... but the pointer is different: {}'.format(pointed)) # else: print('... but it points somewhere else: {}'.format(pointed)) if overwrite: util_io.delete(link, verbose=verbose > 1) elif exists(link): if _win32_links is None: if verbose: print('... already exists, but its a file. This will error.') raise FileExistsError( 'cannot overwrite a physical path: "{}"'.format(path)) else: # nocover if verbose: print('... already exists, and is either a file or hard link. ' 'Assuming it is a hard link. ' 'On non-win32 systems this would error.') if _win32_links is None: os.symlink(path, link) else: # nocover _win32_links._symlink(path, link, overwrite=overwrite, verbose=verbose) return link def _readlink(link): # Note: # https://docs.python.org/3/library/os.html#os.readlink # os.readlink was changed on win32 in version 3.8: Added support for # directory junctions, and changed to return the substitution path (which # typically includes \\?\ prefix) rather than the optional “print name” # field that was previously returned. if _win32_links: # nocover if _win32_links._win32_is_junction(link): import platform if platform.python_implementation() == 'PyPy': # On PyPy this test can have a false positive # for what should be a regular link. path = os.readlink(link) junction_prefix = '\\\\?\\' if path.startswith(junction_prefix): path = path[len(junction_prefix):] return path return _win32_links._win32_read_junction(link) try: path = os.readlink(link) if util_platform.WIN32: # nocover junction_prefix = '\\\\?\\' if path.startswith(junction_prefix): path = path[len(junction_prefix):] return path except Exception: # nocover # On modern operating systems, we should never get here. (I think) if exists(link): warnings.warn('Reading symlinks seems to not be supported') raise def _can_symlink(verbose=0): # nocover """ Return true if we have permission to create real symlinks. This check always returns True on non-win32 systems. If this check returns false, then we still may be able to use junctions. """ if _win32_links is not None: return _win32_links._win32_can_symlink(verbose) else: return True def _dirstats(dpath=None): # nocover """ Testing helper for printing directory information (mostly for investigating windows weirdness) The column prefixes stand for: (E - exists), (L - islink), (F - isfile), (D - isdir), (J - isjunction) Example: >>> from ubelt.util_links import _dirstats >>> _dirstats('.') """ from ubelt import util_colors if dpath is None: dpath = os.getcwd() print('+--------------') print('Listing for dpath={}'.format(dpath)) print('E L F D J - path') print('+--------------') if not os.path.exists(dpath): print('... does not exist') else: paths = sorted(os.listdir(dpath)) for path in paths: full_path = join(dpath, path) E = os.path.exists(full_path) L = os.path.islink(full_path) F = os.path.isfile(full_path) D = os.path.isdir(full_path) J = util_platform.WIN32 and _win32_links._win32_is_junction(full_path) ELFDJ = [E, L, F, D, J] if ELFDJ == [1, 0, 0, 1, 0]: # A directory path = util_colors.color_text(path, 'green') elif ELFDJ == [1, 0, 1, 0, 0]: # A file (or a hard link, they're indistinguishable with 1 query) path = util_colors.color_text(path, 'white') elif ELFDJ == [1, 0, 0, 1, 1]: # A directory junction path = util_colors.color_text(path, 'yellow') elif ELFDJ == [1, 1, 1, 0, 0]: # A file link path = util_colors.color_text(path, 'brightgreen') elif ELFDJ == [1, 1, 0, 1, 0]: # A directory link path = util_colors.color_text(path, 'brightcyan') elif ELFDJ == [0, 1, 0, 0, 0]: # A broken file link path = util_colors.color_text(path, 'red') elif ELFDJ == [0, 1, 0, 1, 0]: # A broken directory link path = util_colors.color_text(path, 'darkred') elif ELFDJ == [0, 0, 0, 1, 1]: # A broken directory junction path = util_colors.color_text(path, 'purple') elif ELFDJ == [1, 0, 1, 0, 1]: # A file junction? That's not good. # I guess this is a windows 7 thing? path = util_colors.color_text(path, 'red') elif ELFDJ == [1, 1, 0, 0, 0]: # Windows? Why? What does this mean!? # A directory link that can't be resolved? path = util_colors.color_text(path, 'red') elif ELFDJ == [0, 0, 0, 0, 0]: # Windows? AGAIN? HOW DO YOU LIST FILES THAT DONT EXIST? # I get it, they are probably broken junctions, but common # That should probably be 00011 not 00000 path = util_colors.color_text(path, 'red') elif ELFDJ == [1, 1, 0, 1, 1]: # Agg, on windows pypy, it looks like junctions and links are # harder to distinguish. See # https://github.com/pypy/pypy/issues/4976 path = util_colors.color_text(path, 'red') elif ELFDJ == [1, 1, 1, 0, 1]: # Again? on windows pypy, its a link/file/junction what? path = util_colors.color_text(path, 'red') else: print('dpath = {!r}'.format(dpath)) print('pathhttps://github.com/pypy/pypy/issues/4976 = {!r}'.format(path)) raise AssertionError(str(ELFDJ) + str(path)) line = '{E:d} {L:d} {F:d} {D:d} {J:d} - {path}'.format(**locals()) if os.path.islink(full_path): # line += ' -> ' + os.readlink(full_path) line += ' -> ' + _readlink(full_path) elif _win32_links is not None: if _win32_links._win32_is_junction(full_path): resolved = _win32_links._win32_read_junction(full_path) line += ' => ' + resolved print(line) print('+--------------') ubelt-1.3.7/ubelt/util_links.pyi000066400000000000000000000003061472470106000166500ustar00rootroot00000000000000from os import PathLike def symlink(real_path: str | PathLike, link_path: str | PathLike, overwrite: bool = False, verbose: int = 0) -> str | PathLike: ... ubelt-1.3.7/ubelt/util_list.py000066400000000000000000001015541472470106000163410ustar00rootroot00000000000000""" Utility functions for manipulating iterables, lists, and sequences. The :func:`chunks` function splits a list into smaller parts. There are different strategies for how to do this. The :func:`flatten` function take a list of lists and removes the inner lists. This only removes one level of nesting. The :func:`iterable` function checks if an object is iterable or not. Similar to the :func:`callable` builtin function. The :func:`argmax`, :func:`argmin`, and :func:`argsort` work similarly to the analogous :mod:`numpy` functions, except they operate on dictionaries and other Python builtin types. The :func:`take` and :func:`compress` are generators, and also similar to their lesser known, but very useful numpy equivalents. There are also other numpy inspired functions: :func:`unique`, :func:`argunique`, :func:`unique_flags`, and :func:`boolmask`. """ import itertools as it import math import operator from collections import abc as collections_abc from itertools import zip_longest from ubelt import util_const from ubelt import util_dict __all__ = [ 'allsame', 'argmax', 'argmin', 'argsort', 'argunique', 'boolmask', 'chunks', 'compress', 'flatten', 'iter_window', 'iterable', 'peek', 'take', 'unique', 'unique_flags', ] class chunks(object): """ Generates successive n-sized chunks from ``items``. If the last chunk has less than n elements, ``bordermode`` is used to determine fill values. Note: FIXME: When nchunks is given, that's how many chunks we should get but the issue is that chunksize is not well defined in that instance For instance how do we turn a list with 4 elements into 3 chunks where does the extra item go? In ubelt <= 0.10.3 there is a bug when specifying nchunks, where it chooses a chunksize that is too large. Specify ``legacy=True`` to get the old buggy behavior if needed. Notes: This is similar to functionality provided by :func:`more_itertools.chunked`, :func:`more_itertools.chunked_even`, :func:`more_itertools.sliced`, :func:`more_itertools.divide`, Yields: List[T]: subsequent non-overlapping chunks of the input items Attributes: remainder (int): number of leftover items that don't divide cleanly References: .. [SO_434287] http://stackoverflow.com/questions/434287/iterate-over-a-list-in-chunks Example: >>> import ubelt as ub >>> items = '1234567' >>> genresult = ub.chunks(items, chunksize=3) >>> list(genresult) [['1', '2', '3'], ['4', '5', '6'], ['7']] Example: >>> import ubelt as ub >>> items = [1, 2, 3, 4, 5, 6, 7] >>> genresult = ub.chunks(items, chunksize=3, bordermode='none') >>> assert list(genresult) == [[1, 2, 3], [4, 5, 6], [7]] >>> genresult = ub.chunks(items, chunksize=3, bordermode='cycle') >>> assert list(genresult) == [[1, 2, 3], [4, 5, 6], [7, 1, 2]] >>> genresult = ub.chunks(items, chunksize=3, bordermode='replicate') >>> assert list(genresult) == [[1, 2, 3], [4, 5, 6], [7, 7, 7]] Example: >>> import ubelt as ub >>> assert len(list(ub.chunks(range(2), nchunks=2))) == 2 >>> assert len(list(ub.chunks(range(3), nchunks=2))) == 2 >>> # Note: ub.chunks will not do the 2,1,1 split >>> assert len(list(ub.chunks(range(4), nchunks=3))) == 3 >>> assert len(list(ub.chunks([], 2, bordermode='none'))) == 0 >>> assert len(list(ub.chunks([], 2, bordermode='cycle'))) == 0 >>> assert len(list(ub.chunks([], 2, None, bordermode='replicate'))) == 0 Example: >>> from ubelt.util_list import * # NOQA >>> def _check_len(self): ... assert len(self) == len(list(self)) >>> _check_len(chunks(list(range(3)), nchunks=2)) >>> _check_len(chunks(list(range(2)), nchunks=2)) >>> _check_len(chunks(list(range(2)), nchunks=3)) Example: >>> from ubelt.util_list import * # NOQA >>> import pytest >>> assert pytest.raises(ValueError, chunks, range(9)) >>> assert pytest.raises(ValueError, chunks, range(9), chunksize=2, nchunks=2) >>> assert pytest.raises(TypeError, len, chunks((_ for _ in range(2)), 2)) Example: >>> from ubelt.util_list import * # NOQA >>> import ubelt as ub >>> basis = { >>> 'legacy': [False, True], >>> 'chunker': [{'nchunks': 3}, {'nchunks': 4}, {'nchunks': 5}, {'nchunks': 7}, {'chunksize': 3}], >>> 'items': [range(2), range(4), range(5), range(7), range(9)], >>> 'bordermode': ['none', 'cycle', 'replicate'], >>> } >>> grid_items = list(ub.named_product(basis)) >>> rows = [] >>> for grid_item in ub.ProgIter(grid_items): >>> chunker = grid_item.get('chunker') >>> grid_item.update(chunker) >>> kw = ub.dict_diff(grid_item, {'chunker'}) >>> self = chunk_iter = ub.chunks(**kw) >>> chunked = list(chunk_iter) >>> chunk_lens = list(map(len, chunked)) >>> row = ub.dict_union(grid_item, {'chunk_lens': chunk_lens, 'chunks': chunked}) >>> row['chunker'] = str(row['chunker']) >>> if not row['legacy'] and 'nchunks' in kw: >>> assert kw['nchunks'] == row['nchunks'] >>> row.update(chunk_iter.__dict__) >>> rows.append(row) >>> # xdoctest: +SKIP >>> import pandas as pd >>> df = pd.DataFrame(rows) >>> for _, subdf in df.groupby('chunker'): >>> print(subdf) """ def __init__(self, items, chunksize=None, nchunks=None, total=None, bordermode='none', legacy=False): """ Args: items (Iterable): input to iterate over chunksize (int | None): size of each sublist yielded nchunks (int | None): number of chunks to create ( cannot be specified if chunksize is specified) bordermode (str): determines how to handle the last case if the length of the input is not divisible by chunksize valid values are: {'none', 'cycle', 'replicate'} total (int | None): hints about the length of the input legacy (bool): if True use old behavior, defaults to False. This will be removed in the future. """ if nchunks is not None and chunksize is not None: # nocover raise ValueError('Cannot specify both chunksize and nchunks') if nchunks is None and chunksize is None: # nocover raise ValueError('Must specify either chunksize or nchunks') if total is None: try: total = len(items) except TypeError: pass # iterators dont know len if bordermode is None: # nocover bordermode = 'none' if nchunks is None: if total is not None: nchunks = int(math.ceil(total / chunksize)) remainder = 0 else: if total is None: raise ValueError( 'Need to specify total to use nchunks on an iterable ' 'without length hints') if legacy: chunksize: int = int(math.ceil(total / nchunks)) remainder = 0 else: if bordermode == 'none': # I feel like this could be simpler chunksize: int = max(int(math.floor(total / nchunks)), 1) nchunks: int = min(int(math.ceil(total / chunksize)), nchunks) chunked_total: int = chunksize * nchunks remainder: int = total - chunked_total else: # not working chunksize: int = max(int(math.ceil(total / nchunks)), 1) # Can artificially extend the size in this case # total = chunksize * nchunks remainder = 0 self.legacy = legacy self.remainder: int = remainder self.items = items self.total = total self.nchunks = nchunks self.chunksize = chunksize self.bordermode = bordermode def __len__(self): if self.nchunks is None: raise TypeError('length is unknown') return self.nchunks def __iter__(self): bordermode = self.bordermode items = self.items chunksize = self.chunksize if not self.legacy and self.nchunks is not None: return self._new_iterator() else: if bordermode is None or bordermode == 'none': return self.noborder(items, chunksize) elif bordermode == 'cycle': return self.cycle(items, chunksize) elif bordermode == 'replicate': return self.replicate(items, chunksize) else: raise ValueError('unknown bordermode=%r' % (bordermode,)) def _new_iterator(self): chunksize = self.chunksize nchunks = self.nchunks chunksize = self.chunksize remainder = self.remainder if self.bordermode == 'cycle': iterator = it.cycle(iter(self.items)) elif self.bordermode == 'replicate': def replicator(items): for item in items: yield item while True: yield item iterator = replicator(iter(self.items)) elif self.bordermode == 'none': iterator = iter(self.items) else: raise KeyError(self.bordermode) # Build an iterator that describes how big each chunk will be if remainder: # TODO: # handle replicate and cycle border modes # TODO: # benchmark different methods chunksize_iter = it.chain( it.repeat(chunksize + 1, remainder), it.repeat(chunksize, nchunks - remainder) ) else: chunksize_iter = it.repeat(chunksize, nchunks) for _chunksize in chunksize_iter: chunk = list(it.islice(iterator, _chunksize)) # if chunk: yield chunk @staticmethod def noborder(items, chunksize): # feed the same iter to zip_longest multiple times, this causes it to # consume successive values of the same sequence sentinel = object() copied_iters = [iter(items)] * chunksize chunks_with_sentinals = zip_longest(*copied_iters, fillvalue=sentinel) # Dont fill empty space in the last chunk, just return it as is for chunk in chunks_with_sentinals: yield [item for item in chunk if item is not sentinel] @staticmethod def cycle(items, chunksize): sentinel = object() copied_iters = [iter(items)] * chunksize chunks_with_sentinals = zip_longest(*copied_iters, fillvalue=sentinel) # Fill empty space in the last chunk with values from the beginning bordervalues = it.cycle(iter(items)) for chunk in chunks_with_sentinals: yield [item if item is not sentinel else next(bordervalues) for item in chunk] @staticmethod def replicate(items, chunksize): sentinel = object() copied_iters = [iter(items)] * chunksize # Fill empty space in the last chunk by replicating the last value chunks_with_sentinals = zip_longest(*copied_iters, fillvalue=sentinel) for chunk in chunks_with_sentinals: filt_chunk = [item for item in chunk if item is not sentinel] if len(filt_chunk) == chunksize: yield filt_chunk else: sizediff = (chunksize - len(filt_chunk)) padded_chunk = filt_chunk + [filt_chunk[-1]] * sizediff yield padded_chunk def iterable(obj, strok=False): """ Checks if the input implements the iterator interface. An exception is made for strings, which return False unless ``strok`` is True Args: obj (object): a scalar or iterable input strok (bool): if True allow strings to be interpreted as iterable. Defaults to False. Returns: bool: True if the input is iterable Example: >>> import ubelt as ub >>> obj_list = [3, [3], '3', (3,), [3, 4, 5], {}] >>> result = [ub.iterable(obj) for obj in obj_list] >>> assert result == [False, True, False, True, True, True] >>> result = [ub.iterable(obj, strok=True) for obj in obj_list] >>> assert result == [False, True, True, True, True, True] """ try: iter(obj) except Exception: return False else: return strok or not isinstance(obj, str) def take(items, indices, default=util_const.NoParam): """ Lookup a subset of an indexable object using a sequence of indices. The ``items`` input is usually a list or dictionary. When ``items`` is a list, this should be a sequence of integers. When ``items`` is a dict, this is a list of keys to lookup in that dictionary. For dictionaries, a default may be specified as a placeholder to use if a key from ``indices`` is not in ``items``. Args: items (Sequence[VT] | Mapping[KT, VT]): An indexable object to select items from. indices (Iterable[int | KT]): A sequence of indexes into ``items``. default (Any | NoParamType): if specified ``items`` must support the ``get`` method and this will be used as the default value. Yields: VT: a selected item within the list SeeAlso: :func:`ubelt.dict_subset` Note: ``ub.take(items, indices)`` is equivalent to ``(items[i] for i in indices)`` when ``default`` is unspecified. Notes: This is based on the :func:`numpy.take` function, but written in pure python. Do not confuse this with :func:`more_itertools.take`, the behavior is very different. Example: >>> import ubelt as ub >>> items = [0, 1, 2, 3] >>> indices = [2, 0] >>> list(ub.take(items, indices)) [2, 0] Example: >>> import ubelt as ub >>> dict_ = {1: 'a', 2: 'b', 3: 'c'} >>> keys = [1, 2, 3, 4, 5] >>> result = list(ub.take(dict_, keys, None)) >>> assert result == ['a', 'b', 'c', None, None] Example: >>> import ubelt as ub >>> dict_ = {1: 'a', 2: 'b', 3: 'c'} >>> keys = [1, 2, 3, 4, 5] >>> try: >>> print(list(ub.take(dict_, keys))) >>> raise AssertionError('did not get key error') >>> except KeyError: >>> print('correctly got key error') """ if default is util_const.NoParam: for index in indices: yield items[index] else: for index in indices: yield items.get(index, default) def compress(items, flags): """ Selects from ``items`` where the corresponding value in ``flags`` is True. Args: items (Iterable[Any]): a sequence to select items from flags (Iterable[bool]): corresponding sequence of bools Returns: Iterable[Any]: a subset of masked items Notes: This function is based on :func:`numpy.compress`, but is pure Python and swaps the condition and array argument to be consistent with :func:`ubelt.take`. This is equivalent to :func:`itertools.compress`. Example: >>> import ubelt as ub >>> items = [1, 2, 3, 4, 5] >>> flags = [False, True, True, False, True] >>> list(ub.compress(items, flags)) [2, 3, 5] """ return it.compress(items, flags) def flatten(nested): """ Transforms a nested iterable into a flat iterable. Args: nested (Iterable[Iterable[Any]]): list of lists Returns: Iterable[Any]: flattened items Notes: Equivalent to :func:`more_itertools.flatten` and :func:`itertools.chain.from_iterable`. Example: >>> import ubelt as ub >>> nested = [['a', 'b'], ['c', 'd']] >>> list(ub.flatten(nested)) ['a', 'b', 'c', 'd'] """ return it.chain.from_iterable(nested) def unique(items, key=None): """ Generates unique items in the order they appear. Args: items (Iterable[T]): list of items key (Callable[[T], Any] | None): Custom normalization function. If specified, this function generates items where ``key(item)`` is unique. Yields: T: a unique item from the input sequence Notes: Functionally equivalent to :func:`more_itertools.unique_everseen`. Example: >>> import ubelt as ub >>> items = [4, 6, 6, 0, 6, 1, 0, 2, 2, 1] >>> unique_items = list(ub.unique(items)) >>> assert unique_items == [4, 6, 0, 1, 2] Example: >>> import ubelt as ub >>> items = ['A', 'a', 'b', 'B', 'C', 'c', 'D', 'e', 'D', 'E'] >>> unique_items = list(ub.unique(items, key=str.lower)) >>> assert unique_items == ['A', 'b', 'C', 'D', 'e'] >>> unique_items = list(ub.unique(items)) >>> assert unique_items == ['A', 'a', 'b', 'B', 'C', 'c', 'D', 'e', 'E'] """ seen = set() if key is None: for item in items: if item not in seen: seen.add(item) yield item else: for item in items: norm = key(item) if norm not in seen: seen.add(norm) yield item def argunique(items, key=None): """ Returns indices corresponding to the first instance of each unique item. Args: items (Sequence[VT]): indexable collection of items key (Callable[[VT], Any] | None): Custom normalization function. If specified, this function generates indexes where ``key(item[index])`` is unique. Returns: Iterator[int] : indices of the unique items Example: >>> import ubelt as ub >>> items = [0, 2, 5, 1, 1, 0, 2, 4] >>> indices = list(ub.argunique(items)) >>> assert indices == [0, 1, 2, 3, 7] >>> indices = list(ub.argunique(items, key=lambda x: x % 2 == 0)) >>> assert indices == [0, 2] """ if key is None: return unique(range(len(items)), key=lambda i: items[i]) else: return unique(range(len(items)), key=lambda i: key(items[i])) def unique_flags(items, key=None): """ Returns a list of booleans corresponding to the first instance of each unique item. Args: items (Sequence[VT]): indexable collection of items key (Callable[[VT], Any] | None): Custom normalization function. If specified generates True if ``key(item)`` is unique and False otherwise. Returns: List[bool] : flags the items that are unique Example: >>> import ubelt as ub >>> items = [0, 2, 1, 1, 0, 9, 2] >>> flags = ub.unique_flags(items) >>> assert flags == [True, True, True, False, False, True, False] >>> flags = ub.unique_flags(items, key=lambda x: x % 2 == 0) >>> assert flags == [True, False, True, False, False, False, False] """ len_ = len(items) if key is None: item_to_index = dict(zip(reversed(items), reversed(range(len_)))) indices = item_to_index.values() else: indices = argunique(items, key=key) flags = boolmask(indices, len_) return flags def boolmask(indices, maxval=None): """ Constructs a list of booleans where an item is True if its position is in ``indices`` otherwise it is False. Args: indices (List[int]): list of integer indices maxval (int | None): length of the returned list. If not specified this is inferred using ``max(indices)`` Returns: List[bool]: mask - a list of booleans. mask[idx] is True if idx in indices Note: In the future the arg ``maxval`` may change its name to ``shape`` Example: >>> import ubelt as ub >>> indices = [0, 1, 4] >>> mask = ub.boolmask(indices, maxval=6) >>> assert mask == [True, True, False, False, True, False] >>> mask = ub.boolmask(indices) >>> assert mask == [True, True, False, False, True] """ if maxval is None: indices = list(indices) maxval = max(indices) + 1 mask = [False] * maxval for index in indices: mask[index] = True return mask def iter_window(iterable, size=2, step=1, wrap=False): """ Iterates through iterable with a window size. This is essentially a 1D sliding window. Args: iterable (Iterable[T]): an iterable sequence size (int): Sliding window size. Defaults to 2. step (int): Sliding step size. Default to 1. wrap (bool): If True, the last window will "wrap-around" to include items from the start of the input sequence in order to always produce consistently sized chunks. Otherwise, the last chunk may be smaller if there are not enough items in the sequence.. Defaults to False. Returns: Iterable[T]: returns a possibly overlapping windows in a sequence Notes: Similar to :func:`more_itertools.windowed`, Similar to :func:`more_itertools.pairwise`, Similar to :func:`more_itertools.triplewise`, Similar to :func:`more_itertools.sliding_window` Example: >>> import ubelt as ub >>> iterable = [1, 2, 3, 4, 5, 6] >>> size, step, wrap = 3, 1, True >>> window_iter = ub.iter_window(iterable, size, step, wrap) >>> window_list = list(window_iter) >>> print('window_list = %r' % (window_list,)) window_list = [(1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6), (5, 6, 1), (6, 1, 2)] Example: >>> import ubelt as ub >>> iterable = [1, 2, 3, 4, 5, 6] >>> size, step, wrap = 3, 2, True >>> window_iter = ub.iter_window(iterable, size, step, wrap) >>> window_list = list(window_iter) >>> print('window_list = {!r}'.format(window_list)) window_list = [(1, 2, 3), (3, 4, 5), (5, 6, 1)] Example: >>> import ubelt as ub >>> iterable = [1, 2, 3, 4, 5, 6] >>> size, step, wrap = 3, 2, False >>> window_iter = ub.iter_window(iterable, size, step, wrap) >>> window_list = list(window_iter) >>> print('window_list = {!r}'.format(window_list)) window_list = [(1, 2, 3), (3, 4, 5)] Example: >>> import ubelt as ub >>> iterable = [] >>> size, step, wrap = 3, 2, False >>> window_iter = ub.iter_window(iterable, size, step, wrap) >>> window_list = list(window_iter) >>> print('window_list = {!r}'.format(window_list)) window_list = [] """ # it.tee may be slow, but works on all iterables iter_list = it.tee(iterable, size) if wrap: # Secondary iterables need to be cycled for wraparound iter_list = [iter_list[0]] + list(map(it.cycle, iter_list[1:])) # Step each iterator the appropriate number of times try: for count, iter_ in enumerate(iter_list[1:], start=1): for _ in range(count): next(iter_) except StopIteration: return iter(()) else: _window_iter = zip(*iter_list) # Account for the step size window_iter = it.islice(_window_iter, 0, None, step) return window_iter def allsame(iterable, eq=operator.eq): """ Determine if all items in a sequence are the same Args: iterable (Iterable[T]): items to determine if they are all the same eq (Callable[[T, T], bool]): function used to test for equality. Defaults to :func:`operator.eq`. Returns: bool: True if all items are equal, otherwise False Notes: Similar to :func:`more_itertools.all_equal` Example: >>> import ubelt as ub >>> ub.allsame([1, 1, 1, 1]) True >>> ub.allsame([]) True >>> ub.allsame([0, 1]) False >>> iterable = iter([0, 1, 1, 1]) >>> next(iterable) >>> ub.allsame(iterable) True >>> ub.allsame(range(10)) False >>> ub.allsame(range(10), lambda a, b: True) True """ iter_ = iter(iterable) try: first = next(iter_) except StopIteration: return True return all(eq(first, item) for item in iter_) def argsort(indexable, key=None, reverse=False): """ Returns the indices that would sort a indexable object. This is similar to :func:`numpy.argsort`, but it is written in pure python and works on both lists and dictionaries. Args: indexable (Iterable[VT] | Mapping[KT, VT]): indexable to sort by key (Callable[[VT], VT] | None): If specified, customizes the ordering of the indexable. reverse (bool): if True returns in descending order. Default to False. Returns: List[int] | List[KT]: indices - list of indices that sorts the indexable Example: >>> import ubelt as ub >>> # argsort works on dicts by returning keys >>> dict_ = {'a': 3, 'b': 2, 'c': 100} >>> indices = ub.argsort(dict_) >>> assert list(ub.take(dict_, indices)) == sorted(dict_.values()) >>> # argsort works on lists by returning indices >>> indexable = [100, 2, 432, 10] >>> indices = ub.argsort(indexable) >>> assert list(ub.take(indexable, indices)) == sorted(indexable) >>> # Can use iterators, but be careful. It exhausts them. >>> indexable = reversed(range(100)) >>> indices = ub.argsort(indexable) >>> assert indices[0] == 99 >>> # Can use key just like sorted >>> indexable = [[0, 1, 2], [3, 4], [5]] >>> indices = ub.argsort(indexable, key=len) >>> assert indices == [2, 1, 0] >>> # Can use reverse just like sorted >>> indexable = [0, 2, 1] >>> indices = ub.argsort(indexable, reverse=True) >>> assert indices == [1, 2, 0] """ # Create an iterator of value/key pairs if isinstance(indexable, collections_abc.Mapping): vk_iter = ((v, k) for k, v in indexable.items()) else: vk_iter = ((v, k) for k, v in enumerate(indexable)) # Sort by values and extract the indices if key is None: indices = [k for v, k in sorted(vk_iter, reverse=reverse)] else: # If key is provided, call it using the value as input indices = [k for v, k in sorted(vk_iter, key=lambda vk: key(vk[0]), reverse=reverse)] return indices def argmax(indexable, key=None): """ Returns index / key of the item with the largest value. This is similar to :func:`numpy.argmax`, but it is written in pure python and works on both lists and dictionaries. Args: indexable (Iterable[VT] | Mapping[KT, VT]): indexable to sort by key (Callable[[VT], Any] | None): If specified, customizes the ordering of the indexable Returns: int | KT: the index of the item with the maximum value. Example: >>> import ubelt as ub >>> assert ub.argmax({'a': 3, 'b': 2, 'c': 100}) == 'c' >>> assert ub.argmax(['a', 'c', 'b', 'z', 'f']) == 3 >>> assert ub.argmax([[0, 1], [2, 3, 4], [5]], key=len) == 1 >>> assert ub.argmax({'a': 3, 'b': 2, 3: 100, 4: 4}) == 3 >>> assert ub.argmax(iter(['a', 'c', 'b', 'z', 'f'])) == 3 """ if key is None and isinstance(indexable, collections_abc.Mapping): return max(indexable.items(), key=operator.itemgetter(1))[0] elif hasattr(indexable, 'index'): if key is None: return indexable.index(max(indexable)) else: return indexable.index(max(indexable, key=key)) else: # less efficient, but catch all solution return argsort(indexable, key=key)[-1] def argmin(indexable, key=None): """ Returns index / key of the item with the smallest value. This is similar to :func:`numpy.argmin`, but it is written in pure python and works on both lists and dictionaries. Args: indexable (Iterable[VT] | Mapping[KT, VT]): indexable to sort by key (Callable[[VT], VT] | None): If specified, customizes the ordering of the indexable. Returns: int | KT: the index of the item with the minimum value. Example: >>> import ubelt as ub >>> assert ub.argmin({'a': 3, 'b': 2, 'c': 100}) == 'b' >>> assert ub.argmin(['a', 'c', 'b', 'z', 'f']) == 0 >>> assert ub.argmin([[0, 1], [2, 3, 4], [5]], key=len) == 2 >>> assert ub.argmin({'a': 3, 'b': 2, 3: 100, 4: 4}) == 'b' >>> assert ub.argmin(iter(['a', 'c', 'A', 'z', 'f'])) == 2 """ if key is None and isinstance(indexable, collections_abc.Mapping): return min(indexable.items(), key=operator.itemgetter(1))[0] elif hasattr(indexable, 'index'): if key is None: return indexable.index(min(indexable)) else: return indexable.index(min(indexable, key=key)) else: # less efficient, but catch all solution return argsort(indexable, key=key)[0] def peek(iterable, default=util_const.NoParam): """ Look at the first item of an iterable. If the input is an iterator, then the next element is exhausted (i.e. a pop operation). Args: iterable (Iterable[T]): an iterable default (T): default item to return if the iterable is empty, otherwise a StopIteration error is raised Returns: T: item - the first item of ordered sequence, a popped item from an iterator, or an arbitrary item from an unordered collection. Notes: Similar to :func:`more_itertools.peekable` Example: >>> import ubelt as ub >>> data = [0, 1, 2] >>> ub.peek(data) 0 >>> iterator = iter(data) >>> print(ub.peek(iterator)) 0 >>> print(ub.peek(iterator)) 1 >>> print(ub.peek(iterator)) 2 >>> ub.peek(range(3)) 0 >>> ub.peek([], 3) 3 """ if default is util_const.NoParam: return next(iter(iterable)) else: return next(iter(iterable), default) # Stubs for potential future object oriented wrappers class IterableMixin: """ """ unique = unique # chunks = chunks histogram = util_dict.dict_hist duplicates = util_dict.find_duplicates group = util_dict.group_items def chunks(self, size=None, num=None, bordermode='none'): return chunks(self, chunksize=size, nchunks=num, total=len(self), bordermode=bordermode) # def histogram(self, weights=None, ordered=False, labels=None): # util_dict.dict_hist.__doc__ # return util_dict.dict_hist(self, weights=weights, ordered=ordered) # def duplicates(self, k=2, key=None): # util_dict.find_duplicates.__doc__ # return util_dict.find_duplicates(self, k=k, key=key) # def group(self, key): # util_dict.group_items.__doc__ # return util_dict.group_items(self, key=key) class OrderedIterableMixin(IterableMixin): compress = compress argunique = argunique window = iter_window class UList(list, OrderedIterableMixin): """ An extended list class that features additional helper methods. Example: >>> from ubelt.util_list import UList >>> self = UList() >>> self.append(1) >>> self += UList([1, 2, 3]) >>> self += UList([5, 7]) >>> # >>> print(f'unique: {list(self.unique())}') >>> print(f'argunique: {list(self.argunique())}') >>> # >>> print(f'chunks: {list(self.chunks(num=2))}') >>> print(f'chunks: {list(self.chunks(size=2))}') >>> # >>> print(f'window: {list(self.window(3))}') >>> # >>> print(f'take: {list(self.take([0, 2, 3]))}') >>> print(f'compress: {list(self.compress([0, 1, 0, 1]))}') >>> # >>> print(f'argsort: {self.argsort()}') >>> print(f'argmax: {self.argmax()}') >>> print(f'argmin: {self.argmin()}') >>> print(f'flatten: {list(UList([self, [2, 3, 3]]).flatten())}') >>> print(f'allsame: {self.allsame()}') >>> print(f'peek: {self.peek()}') >>> print(f'histogram: {self.histogram()}') >>> print(f'group: {self.group(key=lambda x: x % 2)}') >>> print(f'duplicates: {self.duplicates()}') """ peek = peek take = take flatten = flatten allsame = allsame argsort = argsort argmax = argmax argmin = argmin # class USet(set, IterableMixin): # ... # class Set(set, IterableMixin): # ... ubelt-1.3.7/ubelt/util_list.pyi000066400000000000000000000063701472470106000165120ustar00rootroot00000000000000from typing import Iterable from typing import Mapping from typing import Sequence from typing import Any from typing import Callable from typing import Iterator from typing import List from _typeshed import Incomplete from collections.abc import Generator from typing import Any, TypeVar VT = TypeVar("VT") T = TypeVar("T") KT = TypeVar("KT") class chunks: remainder: int legacy: bool items: Iterable total: int | None nchunks: int | None chunksize: int | None bordermode: str def __init__(self, items: Iterable, chunksize: int | None = None, nchunks: int | None = None, total: int | None = None, bordermode: str = 'none', legacy: bool = False) -> None: ... def __len__(self): ... def __iter__(self): ... @staticmethod def noborder(items, chunksize) -> Generator[Any, None, None]: ... @staticmethod def cycle(items, chunksize) -> Generator[Any, None, None]: ... @staticmethod def replicate(items, chunksize) -> Generator[Any, None, None]: ... def iterable(obj: object, strok: bool = False) -> bool: ... def take(items: Sequence[VT] | Mapping[KT, VT], indices: Iterable[int | KT], default: Any = ...) -> Generator[VT, None, None]: ... def compress(items: Iterable[Any], flags: Iterable[bool]) -> Iterable[Any]: ... def flatten(nested: Iterable[Iterable[Any]]) -> Iterable[Any]: ... def unique(items: Iterable[T], key: Callable[[T], Any] | None = None) -> Generator[T, None, None]: ... def argunique(items: Sequence[VT], key: Callable[[VT], Any] | None = None) -> Iterator[int]: ... def unique_flags(items: Sequence[VT], key: Callable[[VT], Any] | None = None) -> List[bool]: ... def boolmask(indices: List[int], maxval: int | None = None) -> List[bool]: ... def iter_window(iterable: Iterable[T], size: int = 2, step: int = 1, wrap: bool = False) -> Iterable[T]: ... def allsame(iterable: Iterable[T], eq: Callable[[T, T], bool] = ...) -> bool: ... def argsort(indexable: Iterable[VT] | Mapping[KT, VT], key: Callable[[VT], VT] | None = None, reverse: bool = False) -> List[int] | List[KT]: ... def argmax(indexable: Iterable[VT] | Mapping[KT, VT], key: Callable[[VT], Any] | None = None) -> int | KT: ... def argmin(indexable: Iterable[VT] | Mapping[KT, VT], key: Callable[[VT], VT] | None = None) -> int | KT: ... def peek(iterable: Iterable[T], default: T = ...) -> T: ... class IterableMixin: unique = unique histogram: Incomplete duplicates: Incomplete group: Incomplete def chunks(self, size: Incomplete | None = ..., num: Incomplete | None = ..., bordermode: str = ...): ... class OrderedIterableMixin(IterableMixin): compress = compress argunique = argunique window = iter_window class UList(list, OrderedIterableMixin): peek = peek take = take flatten = flatten allsame = allsame argsort = argsort argmax = argmax argmin = argmin ubelt-1.3.7/ubelt/util_memoize.py000066400000000000000000000265421472470106000170360ustar00rootroot00000000000000""" This module exposes decorators for in-memory caching of functional results. This is particularly useful when prototyping dynamic programming algorithms. Either :func:`memoize`, :func:`memoize_method`, and :func:`memoize_property` should be used depending on what type of function is being wrapped. The following example demonstrates this. In Python 3.8+ :func:`memoize` works similarly to the standard library :func:`functools.cache`, but the ubelt version makes use of :func:`ubelt.util_hash.hash_data`, which is slower, but handles inputs containing mutable containers. Example: >>> import ubelt as ub >>> # Memoize a function, the args are hashed >>> @ub.memoize >>> def func(a, b): >>> return a + b >>> # >>> class MyClass: >>> # Memoize a class method, the args are hashed >>> @ub.memoize_method >>> def my_method(self, a, b): >>> return a + b >>> # >>> # Memoize a property: there can be no args, >>> @ub.memoize_property >>> @property >>> def my_property1(self): >>> return 4 >>> # >>> # The property decorator is optional >>> def my_property2(self): >>> return 5 >>> # >>> func(1, 2) >>> func(1, 2) >>> self = MyClass() >>> self.my_method(1, 2) >>> self.my_method(1, 2) >>> self.my_property1 >>> self.my_property1 >>> self.my_property2 >>> self.my_property2 """ import functools import sys from ubelt import util_hash __all__ = ['memoize', 'memoize_method', 'memoize_property'] def _hashable(item): """ Returns the item if it is naturally hashable, otherwise it tries to use ubelt.util_hash.hash_data to make it hashable. Errors if it cannot. """ try: hash(item) except TypeError: return util_hash.hash_data(item) else: return item def _make_signature_key(args, kwargs): """ Transforms function args into a key that can be used by the cache Example: >>> from ubelt.util_memoize import _make_signature_key >>> args = (4, [1, 2]) >>> kwargs = {'a': 'b'} >>> key = _make_signature_key(args, kwargs) >>> print('key = {!r}'.format(key)) >>> # Some mutable types cannot be handled by ub.hash_data >>> import pytest >>> from collections import abc >>> # This used to error, in ubelt versions < 0.9.5 >>> _make_signature_key((4, [1, 2], {1: 2, 'a': 'b'}), kwargs={}) >>> class Dummy(abc.MutableSet): >>> def __contains__(self, item): return None >>> def __iter__(self): return iter([]) >>> def __len__(self): return 0 >>> def add(self, item, loc): return None >>> def discard(self, item): return None >>> with pytest.raises(TypeError): >>> _make_signature_key((Dummy(),), kwargs={}) """ kwitems = kwargs.items() # TODO: we should check if Python is at least 3.7 and sort by kwargs # keys otherwise. Should we use hash_data for key generation if (sys.version_info.major, sys.version_info.minor) < (3, 7): # nocover # We can sort because they keys are guaranteed to be strings kwitems = sorted(kwitems) kwitems = tuple(kwitems) try: key = _hashable(args), _hashable(kwitems) except TypeError: msg = ('Signature is not hashable: ' 'args={} kwargs{}'.format(args, kwargs)) raise TypeError(msg) return key def memoize(func): """ memoization decorator that respects args and kwargs In Python 3.9. The :mod:`functools` introduces the `cache` method, which is currently faster than memoize for simple functions [FunctoolsCache]_. However, memoize can handle more general non-natively hashable inputs. Args: func (Callable): live python function Returns: Callable: memoized wrapper References: .. [WikiMemoize] https://wiki.python.org/moin/PythonDecoratorLibrary#Memoize .. [FunctoolsCache] https://docs.python.org/3/library/functools.html Example: >>> import ubelt as ub >>> closure = {'a': 'b', 'c': 'd'} >>> incr = [0] >>> def foo(key): >>> value = closure[key] >>> incr[0] += 1 >>> return value >>> foo_memo = ub.memoize(foo) >>> assert foo('a') == 'b' and foo('c') == 'd' >>> assert incr[0] == 2 >>> print('Call memoized version') >>> assert foo_memo('a') == 'b' and foo_memo('c') == 'd' >>> assert incr[0] == 4 >>> assert foo_memo('a') == 'b' and foo_memo('c') == 'd' >>> print('Counter should no longer increase') >>> assert incr[0] == 4 >>> print('Closure changes result without memoization') >>> closure = {'a': 0, 'c': 1} >>> assert foo('a') == 0 and foo('c') == 1 >>> assert incr[0] == 6 >>> assert foo_memo('a') == 'b' and foo_memo('c') == 'd' """ cache = {} @functools.wraps(func) def memoizer(*args, **kwargs): key = _make_signature_key(args, kwargs) if key not in cache: cache[key] = func(*args, **kwargs) return cache[key] memoizer.cache = cache return memoizer class memoize_method: """ memoization decorator for a method that respects args and kwargs References: .. [ActiveState_Miller_2010] http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods Attributes: __func__ (Callable): the wrapped function Note: This is very thread-unsafe, and has an issue as pointed out in [ActiveState_Miller_2010]_, next version may work on fixing this. Example: >>> import ubelt as ub >>> closure1 = closure = {'a': 'b', 'c': 'd', 'z': 'z1'} >>> incr = [0] >>> class Foo(object): >>> def __init__(self, instance_id): >>> self.instance_id = instance_id >>> @ub.memoize_method >>> def foo_memo(self, key): >>> "Wrapped foo_memo docstr" >>> value = closure[key] >>> incr[0] += 1 >>> return value, self.instance_id >>> def foo(self, key): >>> value = closure[key] >>> incr[0] += 1 >>> return value, self.instance_id >>> self1 = Foo('F1') >>> assert self1.foo('a') == ('b', 'F1') >>> assert self1.foo('c') == ('d', 'F1') >>> assert incr[0] == 2 >>> # >>> print('Call memoized version') >>> assert self1.foo_memo('a') == ('b', 'F1') >>> assert self1.foo_memo('c') == ('d', 'F1') >>> assert incr[0] == 4, 'should have called a function 4 times' >>> # >>> assert self1.foo_memo('a') == ('b', 'F1') >>> assert self1.foo_memo('c') == ('d', 'F1') >>> print('Counter should no longer increase') >>> assert incr[0] == 4 >>> # >>> print('Closure changes result without memoization') >>> closure2 = closure = {'a': 0, 'c': 1, 'z': 'z2'} >>> assert self1.foo('a') == (0, 'F1') >>> assert self1.foo('c') == (1, 'F1') >>> assert incr[0] == 6 >>> assert self1.foo_memo('a') == ('b', 'F1') >>> assert self1.foo_memo('c') == ('d', 'F1') >>> # >>> print('Constructing a new object should get a new cache') >>> self2 = Foo('F2') >>> self2.foo_memo('a') >>> assert incr[0] == 7 >>> self2.foo_memo('a') >>> assert incr[0] == 7 >>> # Check that the decorator preserves the name and docstring >>> assert self1.foo_memo.__doc__ == 'Wrapped foo_memo docstr' >>> assert self1.foo_memo.__name__ == 'foo_memo' >>> print(f'self1.foo_memo = {self1.foo_memo!r}, {hex(id(self1.foo_memo))}') >>> print(f'self2.foo_memo = {self2.foo_memo!r}, {hex(id(self2.foo_memo))}') >>> # >>> # Test for the issue in the active state recipe >>> method1 = self1.foo_memo >>> method2 = self2.foo_memo >>> assert method1('a') == ('b', 'F1') >>> assert method2('a') == (0, 'F2') >>> assert method1('z') == ('z2', 'F1') >>> assert method2('z') == ('z2', 'F2') """ def __init__(self, func): """ Args: func (Callable): method to wrap """ self._func = func self._cache_name = '_cache__' + func.__name__ # Mimic attributes of a bound method self.__func__ = func functools.update_wrapper(self, func) def __get__(self, instance, cls=None): """ Descriptor get method. Called when the decorated method is accessed from an object instance. Args: instance (object): the instance of the class with the memoized method cls (type | None): the type of the instance """ import types unbound = self._func cache = instance.__dict__.setdefault(self._cache_name, {}) # https://stackoverflow.com/questions/71413937/what-does-using-get-on-a-function-do @functools.wraps(unbound) def memoizer(instance, *args, **kwargs): key = _make_signature_key(args, kwargs) if key not in cache: cache[key] = unbound(instance, *args, **kwargs) return cache[key] # Bind the unbound memoizer to the instance bound_memoizer = types.MethodType(memoizer, instance) # Set the attribute to prevent calling __get__ again # Is there a better way to do this? setattr(instance, self._func.__name__, bound_memoizer) return bound_memoizer def memoize_property(fget): """ Return a property attribute for new-style classes that only calls its getter on the first access. The result is stored and on subsequent accesses is returned, preventing the need to call the getter any more. This decorator can either be used by itself or by decorating another property. In either case the method will always become a property. Note: implementation is a modified version of [estebistec_memoize]_. References: .. [estebistec_memoize] https://github.com/estebistec/python-memoized-property Args: fget (property | Callable): A property or a method. Example: >>> import ubelt as ub >>> class C(object): ... load_name_count = 0 ... @ub.memoize_property ... def name(self): ... "name's docstring" ... self.load_name_count += 1 ... return "the name" ... @ub.memoize_property ... @property ... def another_name(self): ... "name's docstring" ... self.load_name_count += 1 ... return "the name" >>> c = C() >>> c.load_name_count 0 >>> c.name 'the name' >>> c.load_name_count 1 >>> c.name 'the name' >>> c.load_name_count 1 >>> c.another_name """ # Unwrap any existing property decorator while hasattr(fget, 'fget'): fget = fget.fget attr_name = '_' + fget.__name__ @functools.wraps(fget) def fget_memoized(self): if not hasattr(self, attr_name): setattr(self, attr_name, fget(self)) return getattr(self, attr_name) return property(fget_memoized) ubelt-1.3.7/ubelt/util_memoize.pyi000066400000000000000000000005731472470106000172030ustar00rootroot00000000000000from typing import Callable def memoize(func: Callable) -> Callable: ... class memoize_method: __func__: Callable def __init__(self, func: Callable) -> None: ... def __get__(self, instance: object, cls: type | None = None): ... def __call__(self, *args, **kwargs): ... def memoize_property(fget: property | Callable): ... ubelt-1.3.7/ubelt/util_mixins.py000066400000000000000000000133461472470106000166760ustar00rootroot00000000000000""" This module defines the :class:`NiceRepr` mixin class, which defines a ``__repr__`` and ``__str__`` method that only depend on a custom ``__nice__`` method, which you must define. This means you only have to overload one function instead of two. Furthermore, if the object defines a ``__len__`` method, then the ``__nice__`` method defaults to something sensible, otherwise it is treated as abstract and raises ``NotImplementedError``. To use, have your object inherit from :class:`NiceRepr`. To customize, define the ``__nice__`` method. Example: >>> # Objects that define __nice__ have a default __str__ and __repr__ >>> import ubelt as ub >>> class Student(ub.NiceRepr): ... def __init__(self, name): ... self.name = name ... def __nice__(self): ... return self.name >>> s1 = Student('Alice') >>> s2 = Student('Bob') >>> # The __str__ representation looks nice >>> print('s1 = {}'.format(s1)) >>> print('s2 = {}'.format(s2)) s1 = s2 = >>> # xdoctest: +IGNORE_WANT >>> # The __repr__ representation also looks nice >>> print('s1 = {!r}'.format(s1)) >>> print('s2 = {!r}'.format(s2)) s1 = s2 = Example: >>> # Objects that define __len__ have a default __nice__ >>> import ubelt as ub >>> class Group(ub.NiceRepr): ... def __init__(self, data): ... self.data = data ... def __len__(self): ... return len(self.data) >>> g = Group([1, 2, 3]) >>> print('g = {}'.format(g)) g = """ import warnings class NiceRepr: """ Inherit from this class and define ``__nice__`` to "nicely" print your objects. Defines ``__str__`` and ``__repr__`` in terms of ``__nice__`` function Classes that inherit from :class:`NiceRepr` should redefine ``__nice__``. If the inheriting class has a ``__len__``, method then the default ``__nice__`` method will return its length. Example: >>> import ubelt as ub >>> class Foo(ub.NiceRepr): ... def __nice__(self): ... return 'info' >>> foo = Foo() >>> assert str(foo) == '' >>> assert repr(foo).startswith('>> import ubelt as ub >>> class Bar(ub.NiceRepr): ... pass >>> bar = Bar() >>> import pytest >>> with pytest.warns(RuntimeWarning) as record: >>> assert 'object at' in str(bar) >>> assert 'object at' in repr(bar) Example: >>> import ubelt as ub >>> class Baz(ub.NiceRepr): ... def __len__(self): ... return 5 >>> baz = Baz() >>> assert str(baz) == '' Example: >>> import ubelt as ub >>> # If your nice message has a bug, it shouldn't bring down the house >>> class Foo(ub.NiceRepr): ... def __nice__(self): ... assert False >>> foo = Foo() >>> import pytest >>> with pytest.warns(RuntimeWarning) as record: >>> print('foo = {!r}'.format(foo)) foo = <...Foo ...> Example: >>> import ubelt as ub >>> class Animal(ub.NiceRepr): ... def __init__(self): ... ... ... def __nice__(self): ... return '' >>> class Cat(Animal): >>> ... >>> class Dog(Animal): >>> ... >>> class Beagle(Dog): >>> ... >>> class Ragdoll(Cat): >>> ... >>> instances = [Animal(), Cat(), Dog(), Beagle(), Ragdoll()] >>> for inst in instances: >>> print(str(inst)) In the case where you cant or dont want to use ubelt.NiceRepr you can get similar behavior by pasting the methods from the following snippet into your class: .. code:: python class MyClass: def __nice__(self): return 'your concise information' def __repr__(self): nice = self.__nice__() classname = self.__class__.__name__ return '<{0}({1}) at {2}>'.format(classname, nice, hex(id(self))) def __str__(self): classname = self.__class__.__name__ nice = self.__nice__() return '<{0}({1})>'.format(classname, nice) """ def __nice__(self): """ Returns: str """ if hasattr(self, '__len__'): # It is a common pattern for objects to use __len__ in __nice__ # As a convenience we define a default __nice__ for these objects return str(len(self)) else: # In all other cases force the subclass to overload __nice__ raise NotImplementedError( 'Define the __nice__ method for {!r}'.format(self.__class__)) def __repr__(self): """ Returns: str """ try: nice = self.__nice__() classname = self.__class__.__name__ return '<{0}({1}) at {2}>'.format(classname, nice, hex(id(self))) except Exception as ex: warnings.warn(str(ex), category=RuntimeWarning) return object.__repr__(self) def __str__(self): """ Returns: str """ try: classname = self.__class__.__name__ nice = self.__nice__() return '<{0}({1})>'.format(classname, nice) except Exception as ex: warnings.warn(str(ex), category=RuntimeWarning) return object.__repr__(self) ubelt-1.3.7/ubelt/util_mixins.pyi000066400000000000000000000000741472470106000170410ustar00rootroot00000000000000class NiceRepr: def __nice__(self) -> str: ... ubelt-1.3.7/ubelt/util_path.py000066400000000000000000002210531472470106000163170ustar00rootroot00000000000000""" Path and filesystem utilities. The :class:`Path` object is an extension of :class:`pathlib.Path` that contains extra convenience methods corresponding to the extra functional methods in this module. (New in 0.11.0). See the class documentation for more details. This module also defines functional path-related utilities, but moving forward users should prefer using :class:`Path` over standalone functional methods. The functions methods will still be available for the forseable future, but their functionality is made redundant by :class:`Path`. For completeness these functions are listed The :func:`expandpath` function expands the tilde to ``$HOME`` and environment variables to their values. The :func:`augpath` function creates variants of an existing path without having to spend multiple lines of code splitting it up and stitching it back together. The :func:`shrinkuser` function replaces your home directory with a tilde. The :func:`userhome` function reports the home directory of the current user of the operating system. The :func:`ensuredir` function operates like ``mkdir -p`` in unix. Note: In the future the part of this module that defines Path may be renamed to util_pathlib. """ from os.path import ( dirname, exists, expanduser, expandvars, join, normpath, split, splitext, ) import os import sys import pathlib import platform import stat import warnings from ubelt import util_io __all__ = [ 'Path', 'TempDir', 'augpath', 'shrinkuser', 'userhome', 'ensuredir', 'expandpath', 'ChDir', ] WIN32 = sys.platform.startswith('win32') def augpath(path, suffix='', prefix='', ext=None, tail='', base=None, dpath=None, relative=None, multidot=False): """ Create a new path with a different extension, basename, directory, prefix, and/or suffix. A prefix is inserted before the basename. A suffix is inserted between the basename and the extension. The basename and extension can be replaced with a new one. Essentially a path is broken down into components (dpath, base, ext), and then recombined as (dpath, prefix, base, suffix, ext) after replacing any specified component. Args: path (str | PathLike): a path to augment suffix (str): placed between the basename and extension Note: this is referred to as stemsuffix in :func:`ub.Path.augment`. prefix (str): placed in front of the basename ext (str | None): if specified, replaces the extension tail (str | None): If specified, appends this text to the extension base (str | None): if specified, replaces the basename without extension. Note: this is referred to as stem in :func:`ub.Path.augment`. dpath (str | PathLike | None): if specified, replaces the specified "relative" directory, which by default is the parent directory. relative (str | PathLike | None): Replaces ``relative`` with ``dpath`` in ``path``. Has no effect if ``dpath`` is not specified. Defaults to the dirname of the input ``path``. *experimental* not currently implemented. multidot (bool): Allows extensions to contain multiple dots. Specifically, if False, everything after the last dot in the basename is the extension. If True, everything after the first dot in the basename is the extension. Returns: str: augmented path SeeAlso: :func:`ubelt.Path.augment` Example: >>> import ubelt as ub >>> path = 'foo.bar' >>> suffix = '_suff' >>> prefix = 'pref_' >>> ext = '.baz' >>> newpath = ub.augpath(path, suffix, prefix, ext=ext, base='bar') >>> print('newpath = %s' % (newpath,)) newpath = pref_bar_suff.baz Example: >>> from ubelt.util_path import * # NOQA >>> augpath('foo.bar') 'foo.bar' >>> augpath('foo.bar', ext='.BAZ') 'foo.BAZ' >>> augpath('foo.bar', suffix='_') 'foo_.bar' >>> augpath('foo.bar', prefix='_') '_foo.bar' >>> augpath('foo.bar', base='baz') 'baz.bar' >>> augpath('foo.tar.gz', ext='.zip', multidot=True) foo.zip >>> augpath('foo.tar.gz', ext='.zip', multidot=False) foo.tar.zip >>> augpath('foo.tar.gz', suffix='_new', multidot=True) foo_new.tar.gz >>> augpath('foo.tar.gz', suffix='_new', tail='.cache', multidot=True) foo_new.tar.gz.cache """ stem = base # new nomenclature # Breakup path if relative is None: orig_dpath, fname = split(path) else: # nocover # if path.startswith(relative): # orig_dpath = relative # fname = relpath(path, relative) # else: # orig_dpath, fname = split(path) raise NotImplementedError('Not implemented yet') if multidot: # The first dot defines the extension parts = fname.split('.', 1) orig_base = parts[0] orig_ext = '' if len(parts) == 1 else '.' + parts[1] else: # The last dot defines the extension orig_base, orig_ext = splitext(fname) # Replace parts with specified augmentations if dpath is None: dpath = orig_dpath if ext is None: ext = orig_ext if stem is None: stem = orig_base # Recombine into new path new_fname = ''.join((prefix, stem, suffix, ext, tail)) newpath = join(dpath, new_fname) return newpath def userhome(username=None): """ Returns the path to some user's home directory. Args: username (str | None): name of a user on the system. If unspecified, the current user is inferred from standard environment variables. Returns: str: path to the specified home directory Raises: KeyError: if the specified user does not exist on the system OSError: if username is unspecified and the current user cannot be inferred Example: >>> import ubelt as ub >>> import os >>> import getpass >>> username = getpass.getuser() >>> userhome_target = os.path.expanduser('~') >>> userhome_got1 = ub.userhome() >>> userhome_got2 = ub.userhome(username) >>> print(f'username={username}') >>> print(f'userhome_got1={userhome_got1}') >>> print(f'userhome_got2={userhome_got2}') >>> print(f'userhome_target={userhome_target}') >>> assert userhome_got1 == userhome_target >>> assert userhome_got2 == userhome_target """ if username is None: # get home directory for the current user if 'HOME' in os.environ: userhome_dpath = os.environ['HOME'] else: # nocover if WIN32: # win32 fallback when HOME is not defined if 'USERPROFILE' in os.environ: userhome_dpath = os.environ['USERPROFILE'] elif 'HOMEPATH' in os.environ: drive = os.environ.get('HOMEDRIVE', '') userhome_dpath = join(drive, os.environ['HOMEPATH']) else: raise OSError("Cannot determine the user's home directory") else: # posix fallback when HOME is not defined import pwd userhome_dpath = pwd.getpwuid(os.getuid()).pw_dir else: # A specific user directory was requested if WIN32: # nocover # get the directory name for the current user c_users = dirname(userhome()) userhome_dpath = join(c_users, username) if not exists(userhome_dpath): raise KeyError('Unknown user: {}'.format(username)) else: import pwd try: pwent = pwd.getpwnam(username) except KeyError: # nocover raise KeyError('Unknown user: {}'.format(username)) userhome_dpath = pwent.pw_dir return userhome_dpath def shrinkuser(path, home='~'): """ Inverse of :func:`os.path.expanduser`. Args: path (str | PathLike): path in system file structure home (str): symbol used to replace the home path. Defaults to ``'~'``, but you might want to use ``'$HOME'`` or ``'%USERPROFILE%'`` instead. Returns: str: shortened path replacing the home directory with a symbol SeeAlso: :func:`ubelt.Path.shrinkuser` Example: >>> from ubelt.util_path import * # NOQA >>> path = expanduser('~') >>> assert path != '~' >>> assert shrinkuser(path) == '~' >>> assert shrinkuser(path + '1') == path + '1' >>> assert shrinkuser(path + '/1') == join('~', '1') >>> assert shrinkuser(path + '/1', '$HOME') == join('$HOME', '1') >>> assert shrinkuser('.') == '.' """ path = normpath(path) userhome_dpath = userhome() if path.startswith(userhome_dpath): if len(path) == len(userhome_dpath): path = home elif path[len(userhome_dpath)] == os.path.sep: path = home + path[len(userhome_dpath):] return path def expandpath(path): """ Shell-like environment variable and tilde path expansion. Args: path (str | PathLike): string representation of a path Returns: str: expanded path SeeAlso: :func:`ubelt.Path.expand` Example: >>> from ubelt.util_path import * # NOQA >>> import ubelt as ub >>> assert normpath(ub.expandpath('~/foo')) == join(ub.userhome(), 'foo') >>> assert ub.expandpath('foo') == 'foo' """ path = expanduser(path) path = expandvars(path) return path def ensuredir(dpath, mode=0o1777, verbose=0, recreate=False): r""" Ensures that directory will exist. Creates new dir with sticky bits by default Args: dpath (str | PathLike | Tuple[str | PathLike]): directory to create if it does not exist. mode (int): octal permissions if a new directory is created. Defaults to 0o1777. verbose (int): verbosity recreate (bool): if True removes the directory and all of its contents and creates a new empty directory. DEPRECATED: Use ``ub.Path(dpath).delete().ensuredir()`` instead. Returns: str: the ensured directory SeeAlso: :func:`ubelt.Path.ensuredir` Example: >>> import ubelt as ub >>> dpath = ub.Path.appdir('ubelt', 'ensuredir') >>> dpath.delete() >>> assert not dpath.exists() >>> ub.ensuredir(dpath) >>> assert dpath.exists() >>> dpath.delete() """ if isinstance(dpath, (list, tuple)): dpath = join(*dpath) if recreate: from ubelt import schedule_deprecation schedule_deprecation( modname='ubelt', migration='Use ``ub.Path(dpath).delete().ensuredir()`` instead', name='recreate', type='argument of ensuredir', deprecate='1.3.0', error='2.0.0', remove='2.1.0', ) util_io.delete(dpath, verbose=verbose) if not exists(dpath): if verbose: print('Ensuring directory (creating {!r})'.format(dpath)) os.makedirs(normpath(dpath), mode=mode, exist_ok=True) else: if verbose: print('Ensuring directory (existing {!r})'.format(dpath)) return dpath class ChDir: """ Context manager that changes the current working directory and then returns you to where you were. This is nearly the same as the stdlib :func:`contextlib.chdir`, with the exception that it will do nothing if the input path is None (i.e. the user did not want to change directories). SeeAlso: :func:`contextlib.chdir` Example: >>> import ubelt as ub >>> dpath = ub.Path.appdir('ubelt/tests/chdir').ensuredir() >>> dir1 = (dpath / 'dir1').ensuredir() >>> dir2 = (dpath / 'dir2').ensuredir() >>> with ChDir(dpath): >>> assert ub.Path.cwd() == dpath >>> # change to the given directory, and then returns back >>> with ChDir(dir1): >>> assert ub.Path.cwd() == dir1 >>> with ChDir(dir2): >>> assert ub.Path.cwd() == dir2 >>> # changes inside the context manager will be reset >>> os.chdir(dpath) >>> assert ub.Path.cwd() == dir1 >>> assert ub.Path.cwd() == dpath >>> with ChDir(dir1): >>> assert ub.Path.cwd() == dir1 >>> with ChDir(None): >>> assert ub.Path.cwd() == dir1 >>> # When disabled, the cwd does *not* reset at context exit >>> os.chdir(dir2) >>> assert ub.Path.cwd() == dir2 >>> os.chdir(dir1) >>> # Dont change dirs, but reset to your cwd at context end >>> with ChDir('.'): >>> os.chdir(dir2) >>> assert ub.Path.cwd() == dir1 >>> assert ub.Path.cwd() == dpath """ def __init__(self, dpath): """ Args: dpath (str | PathLike | None): The new directory to work in. If None, then the context manager is disabled. """ self._context_dpath = dpath self._orig_dpath = None def __enter__(self): """ Returns: ChDir: self """ if self._context_dpath is not None: self._orig_dpath = os.getcwd() os.chdir(self._context_dpath) return self def __exit__(self, ex_type, ex_value, ex_traceback): """ Args: ex_type (Type[BaseException] | None): ex_value (BaseException | None): ex_traceback (TracebackType | None): Returns: bool | None """ if self._context_dpath is not None: os.chdir(self._orig_dpath) class TempDir: """ Context for creating and cleaning up temporary directories. Warning: DEPRECATED. Use :mod:`tempfile` instead. Note: This exists because :class:`tempfile.TemporaryDirectory` was introduced in Python 3.2. Thus once ubelt no longer supports python 2.7, this class will be deprecated. Attributes: dpath (str | None): the temporary path Note: # WE MAY WANT TO KEEP THIS FOR WINDOWS. Example: >>> from ubelt.util_path import * # NOQA >>> with TempDir() as self: >>> dpath = self.dpath >>> assert exists(dpath) >>> assert not exists(dpath) Example: >>> from ubelt.util_path import * # NOQA >>> self = TempDir() >>> dpath = self.ensure() >>> assert exists(dpath) >>> self.cleanup() >>> assert not exists(dpath) """ def __init__(self): from ubelt import schedule_deprecation schedule_deprecation( modname='ubelt', migration='Use tempfile instead', name='TempDir', type='class', deprecate='1.2.0', error='1.4.0', remove='1.5.0', ) self.dpath = None def __del__(self): self.cleanup() def ensure(self): """ Returns: str: the path """ import tempfile if not self.dpath: self.dpath = tempfile.mkdtemp() return self.dpath def cleanup(self): if self.dpath: import shutil shutil.rmtree(self.dpath) self.dpath = None def start(self): """ Returns: TempDir: self """ self.ensure() return self def __enter__(self): """ Returns: TempDir: self """ return self.start() def __exit__(self, ex_type, ex_value, ex_traceback): """ Args: ex_type (Type[BaseException] | None): ex_value (BaseException | None): ex_traceback (TracebackType | None): Returns: bool | None """ self.cleanup() _PathBase = pathlib.WindowsPath if os.name == 'nt' else pathlib.PosixPath class Path(_PathBase): """ This class extends :class:`pathlib.Path` with extra functionality and convenience methods. New methods are designed to support chaining. In addition to new methods this class supports the addition (``+``) operator via which allows for better drop-in compatibility with code using existing string-based paths. Note: On windows this inherits from :class:`pathlib.WindowsPath`. New methods are * :py:meth:`ubelt.Path.ensuredir` - Like mkdir but with easier defaults. * :py:meth:`ubelt.Path.delete` - Previously pathlib could only remove one file at a time. * :py:meth:`ubelt.Path.copy` - Pathlib has no similar functionality. * :py:meth:`ubelt.Path.move` - Pathlib has no similar functionality. * :py:meth:`ubelt.Path.augment` - Unifies and extends disparate functionality across pathlib. * :py:meth:`ubelt.Path.expand` - Unifies existing environ and home expansion. * :py:meth:`ubelt.Path.ls` - Like iterdir, but more interactive. * :py:meth:`ubelt.Path.shrinkuser` - Python has no similar functionality. * :py:meth:`ubelt.Path.walk` - Pathlib had no similar functionality. New classmethods are * :py:meth:`ubelt.Path.appdir` - application directories Modified methods are * :py:meth:`ubelt.Path.touch` - returns self to support chaining * :py:meth:`ubelt.Path.chmod` - returns self to support chaining and now accepts string-based permission codes. Example: >>> # Ubelt extends pathlib functionality >>> import ubelt as ub >>> # Chain expansion and mkdir with cumbersome args. >>> dpath = ub.Path('~/.cache/ubelt/demo_path').expand().ensuredir() >>> fpath = dpath / 'text_file.txt' >>> # Augment is concise and chainable >>> aug_fpath = fpath.augment(stemsuffix='.aux', ext='.jpg').touch() >>> aug_dpath = dpath.augment(stemsuffix='demo_path2') >>> assert aug_fpath.read_text() == '' >>> fpath.write_text('text data') >>> assert aug_fpath.exists() >>> # Delete is akin to "rm -rf" and is also chainable. >>> assert not aug_fpath.delete().exists() >>> assert dpath.exists() >>> assert not dpath.delete().exists() >>> print(f'{str(fpath.shrinkuser()).replace(os.path.sep, "/")}') >>> print(f'{str(dpath.shrinkuser()).replace(os.path.sep, "/")}') >>> print(f'{str(aug_fpath.shrinkuser()).replace(os.path.sep, "/")}') >>> print(f'{str(aug_dpath.shrinkuser()).replace(os.path.sep, "/")}') ~/.cache/ubelt/demo_path/text_file.txt ~/.cache/ubelt/demo_path ~/.cache/ubelt/demo_path/text_file.aux.jpg ~/.cache/ubelt/demo_pathdemo_path2 Inherited unmodified properties from :class:`pathlib.Path` are: * :py:data:`pathlib.PurePath.anchor` * :py:data:`pathlib.PurePath.name` * :py:data:`pathlib.PurePath.parts` * :py:data:`pathlib.PurePath.parent` * :py:data:`pathlib.PurePath.parents` * :py:data:`pathlib.PurePath.suffix` * :py:data:`pathlib.PurePath.suffixes` * :py:data:`pathlib.PurePath.stem` * :py:data:`pathlib.PurePath.drive` * :py:data:`pathlib.PurePath.root` Inherited unmodified classmethods from :class:`pathlib.Path` are: * :py:meth:`pathlib.Path.cwd` * :py:meth:`pathlib.Path.home` Inherited unmodified methods from :class:`pathlib.Path` are: * :py:meth:`pathlib.Path.samefile` * :py:meth:`pathlib.Path.iterdir` * :py:meth:`pathlib.Path.glob` * :py:meth:`pathlib.Path.rglob` * :py:meth:`pathlib.Path.resolve` * :py:meth:`pathlib.Path.lstat` * :py:meth:`pathlib.Path.stat` * :py:meth:`pathlib.Path.owner` * :py:meth:`pathlib.Path.group` * :py:meth:`pathlib.Path.open` * :py:meth:`pathlib.Path.read_bytes` * :py:meth:`pathlib.Path.read_text` * :py:meth:`pathlib.Path.write_bytes` * :py:meth:`pathlib.Path.write_text` * :py:meth:`pathlib.Path.readlink` * :py:meth:`pathlib.Path.mkdir` - we recommend :py:meth:`ubelt.Path.ensuredir` instead. * :py:meth:`pathlib.Path.lchmod` * :py:meth:`pathlib.Path.unlink` * :py:meth:`pathlib.Path.rmdir` * :py:meth:`pathlib.Path.rename` * :py:meth:`pathlib.Path.replace` * :py:meth:`pathlib.Path.symlink_to` * :py:meth:`pathlib.Path.hardlink_to` * :py:meth:`pathlib.Path.link_to` - deprecated * :py:meth:`pathlib.Path.exists` * :py:meth:`pathlib.Path.is_dir` * :py:meth:`pathlib.Path.is_file` * :py:meth:`pathlib.Path.is_mount` * :py:meth:`pathlib.Path.is_symlink` * :py:meth:`pathlib.Path.is_block_device` * :py:meth:`pathlib.Path.is_char_device` * :py:meth:`pathlib.Path.is_fifo` * :py:meth:`pathlib.Path.is_socket` * :py:meth:`pathlib.Path.expanduser` - we recommend :py:meth:`ubelt.Path.expand` instead. * :py:meth:`pathlib.PurePath.as_posix` * :py:meth:`pathlib.PurePath.as_uri` * :py:meth:`pathlib.PurePath.with_name` - we recommend :py:meth:`ubelt.Path.augment` instead. * :py:meth:`pathlib.PurePath.with_stem` - we recommend :py:meth:`ubelt.Path.augment` instead. * :py:meth:`pathlib.PurePath.with_suffix` - we recommend :py:meth:`ubelt.Path.augment` instead. * :py:meth:`pathlib.PurePath.relative_to` * :py:meth:`pathlib.PurePath.joinpath` * :py:meth:`pathlib.PurePath.is_relative_to` * :py:meth:`pathlib.PurePath.is_absolute` * :py:meth:`pathlib.PurePath.is_reserved` * :py:meth:`pathlib.PurePath.match` """ __slots__ = () @classmethod def appdir(cls, appname=None, *args, type='cache'): """ Returns a standard platform specific directory for an application to use as cache, config, or data. The default root location depends on the platform and is specified the the following table: TextArt: | POSIX | Windows | MacOSX data | $XDG_DATA_HOME | %APPDATA% | ~/Library/Application Support config | $XDG_CONFIG_HOME | %APPDATA% | ~/Library/Application Support cache | $XDG_CACHE_HOME | %LOCALAPPDATA% | ~/Library/Caches If an environment variable is not specified the defaults are: APPDATA = ~/AppData/Roaming LOCALAPPDATA = ~/AppData/Local XDG_DATA_HOME = ~/.local/share XDG_CACHE_HOME = ~/.cache XDG_CONFIG_HOME = ~/.config Args: appname (str | None): The name of the application. *args : optional subdirs type (str): the type of data the expected to be stored in this application directory. Valid options are 'cache', 'config', or 'data'. Returns: Path: a new path object for the specified application directory. SeeAlso: This provides functionality similar to the `appdirs `_ - and `platformdirs `_ - packages. Example: >>> # xdoctest: +IGNORE_WANT >>> import ubelt as ub >>> print(ub.Path.appdir('ubelt', type='cache').shrinkuser()) >>> print(ub.Path.appdir('ubelt', type='config').shrinkuser()) >>> print(ub.Path.appdir('ubelt', type='data').shrinkuser()) ~/.cache/ubelt ~/.config/ubelt ~/.local/share/ubelt >>> import pytest >>> with pytest.raises(KeyError): >>> ub.Path.appdir('ubelt', type='other') Example: >>> # xdoctest: +IGNORE_WANT >>> import ubelt as ub >>> # Can now call appdir without any arguments >>> print(ub.Path.appdir().shrinkuser()) ~/.cache """ from ubelt import util_platform if type == 'cache': base = util_platform.platform_cache_dir() elif type == 'config': base = util_platform.platform_config_dir() elif type == 'data': base = util_platform.platform_data_dir() else: raise KeyError(type) if appname is None: return cls(base, *args) else: return cls(base, appname, *args) def augment(self, prefix='', stemsuffix='', ext=None, stem=None, dpath=None, tail='', relative=None, multidot=False, suffix=''): """ Create a new path with a different extension, basename, directory, prefix, and/or suffix. A prefix is inserted before the basename. A stemsuffix is inserted between the basename and the extension. The tail is placed at the very end of the path. The basename and extension can be replaced with a new one. Essentially a path is broken down into components (dpath, stem, ext), and then recombined as (dpath, prefix, stem, stemsuffix, ext, tail) after replacing any specified component. Args: prefix (str): Text placed in front of the stem. Defaults to ''. stemsuffix (str): Text placed between the stem and extension. Defaults to ''. ext (str | None): If specified, replaces the extension stem (str | None): If specified, replaces the stem (i.e. basename without extension). dpath (str | PathLike | None): If specified, replaces the specified "relative" directory, which by default is the parent directory. tail (str | None): If specified, appends this text the very end of the path - after the extension. relative (str | PathLike | None): Replaces ``relative`` with ``dpath`` in ``path``. Has no effect if ``dpath`` is not specified. Defaults to the dirname of the input ``path``. *experimental* not currently implemented. multidot (bool): Allows extensions to contain multiple dots. Specifically, if False, everything after the last dot in the basename is the extension. If True, everything after the first dot in the basename is the extension. SeeAlso: * :func:`ubelt.augpath` * :py:meth:`pathlib.PurePath.with_stem` * :py:meth:`pathlib.PurePath.with_name` * :py:meth:`pathlib.PurePath.with_suffix` Returns: Path: augmented path Warning: NOTICE OF BACKWARDS INCOMPATIBILITY. THE INITIAL RELEASE OF Path.augment suffered from an unfortunate variable naming decision that conflicts with pathlib.Path .. code:: python p = ub.Path('the.entire.fname.or.dname.is.the.name.exe') print(f'p ={p}') print(f'p.name={p.name}') p = ub.Path('the.stem.ends.here.ext') print(f'p ={p}') print(f'p.stem={p.stem}') p = ub.Path('only.the.last.dot.is.the.suffix') print(f'p ={p}') print(f'p.suffix={p.suffix}') p = ub.Path('but.all.suffixes.can.be.recovered') print(f'p ={p}') print(f'p.suffixes={p.suffixes}') Example: >>> import ubelt as ub >>> path = ub.Path('foo.bar') >>> suffix = '_suff' >>> prefix = 'pref_' >>> ext = '.baz' >>> newpath = path.augment(prefix=prefix, stemsuffix=suffix, ext=ext, stem='bar') >>> print('newpath = {!r}'.format(newpath)) newpath = Path('pref_bar_suff.baz') Example: >>> import ubelt as ub >>> path = ub.Path('foo.bar') >>> stemsuffix = '_suff' >>> prefix = 'pref_' >>> ext = '.baz' >>> newpath = path.augment(prefix=prefix, stemsuffix=stemsuffix, ext=ext, stem='bar') >>> print('newpath = {!r}'.format(newpath)) Example: >>> # Compare our augpath(ext=...) versus pathlib with_suffix(...) >>> import ubelt as ub >>> cases = [ >>> ub.Path('no_ext'), >>> ub.Path('one.ext'), >>> ub.Path('double..dot'), >>> ub.Path('two.many.cooks'), >>> ub.Path('path.with.three.dots'), >>> ub.Path('traildot.'), >>> ub.Path('doubletraildot..'), >>> ub.Path('.prefdot'), >>> ub.Path('..doubleprefdot'), >>> ] >>> for path in cases: >>> print('--') >>> print('path = {}'.format(ub.repr2(path, nl=1))) >>> ext = '.EXT' >>> method_pathlib = path.with_suffix(ext) >>> method_augment = path.augment(ext=ext) >>> if method_pathlib == method_augment: >>> print(ub.color_text('sagree', 'green')) >>> else: >>> print(ub.color_text('disagree', 'red')) >>> print('path.with_suffix({}) = {}'.format(ext, ub.repr2(method_pathlib, nl=1))) >>> print('path.augment(ext={}) = {}'.format(ext, ub.repr2(method_augment, nl=1))) >>> print('--') """ if suffix: # nocover from ubelt.util_deprecate import schedule_deprecation schedule_deprecation( 'ubelt', 'suffix', 'arg', deprecate='1.1.3', remove='1.4.0', migration='Use stemsuffix instead', ) if not stemsuffix: stemsuffix = suffix warnings.warn( 'DEVELOPER NOTICE: The ubelt.Path.augment function may ' 'experience a BACKWARDS INCOMPATIBLE update in the future ' 'having to do with the suffix argument to ub.Path.augment ' 'To avoid any issue use the ``stemsuffix` argument or use the ' '``ubelt.augpath`` function instead. ' 'If you see this warning, please make an ' 'issue on https://github.com/Erotemic/ubelt/issues indicating ' 'that there are users of this function in the wild. If there ' 'are none, then this signature will be "fixed", but if anyone ' 'depends on this feature then we will continue to support it as ' 'is.' ) aug = augpath(self, suffix=stemsuffix, prefix=prefix, ext=ext, base=stem, dpath=dpath, relative=relative, multidot=multidot, tail=tail) new = self.__class__(aug) return new def delete(self): """ Removes a file or recursively removes a directory. If a path does not exist, then this is does nothing. SeeAlso: :func:`ubelt.delete` Returns: Path: reference to self Example: >>> import ubelt as ub >>> from os.path import join >>> base = ub.Path.appdir('ubelt', 'delete_test2') >>> dpath1 = (base / 'dir').ensuredir() >>> (base / 'dir' / 'subdir').ensuredir() >>> (base / 'dir' / 'to_remove1.txt').touch() >>> fpath1 = (base / 'dir' / 'subdir' / 'to_remove3.txt').touch() >>> fpath2 = (base / 'dir' / 'subdir' / 'to_remove2.txt').touch() >>> assert all(p.exists() for p in [dpath1, fpath1, fpath2]) >>> fpath1.delete() >>> assert all(p.exists() for p in [dpath1, fpath2]) >>> assert not fpath1.exists() >>> dpath1.delete() >>> assert not any(p.exists() for p in [dpath1, fpath1, fpath2]) """ util_io.delete(self) return self def ensuredir(self, mode=0o777): """ Concise alias of ``self.mkdir(parents=True, exist_ok=True)`` Args: mode (int): octal permissions if a new directory is created. Defaults to 0o777. Returns: Path: returns itself Example: >>> import ubelt as ub >>> cache_dpath = ub.Path.appdir('ubelt').ensuredir() >>> dpath = ub.Path(cache_dpath, 'newdir') >>> dpath.delete() >>> assert not dpath.exists() >>> dpath.ensuredir() >>> assert dpath.exists() >>> dpath.rmdir() """ self.mkdir(mode=mode, parents=True, exist_ok=True) return self def mkdir(self, mode=511, parents=False, exist_ok=False): """ Create a new directory at this given path. Note: The ubelt extension is the same as the original pathlib method, except this returns returns the path instead of None. Args: mode (int) : permission bits parents (bool) : create parents exist_ok (bool) : fail if exists Returns: Path: returns itself """ super().mkdir(mode=mode, parents=parents, exist_ok=exist_ok) return self def expand(self): """ Expands user tilde and environment variables. Concise alias of ``Path(os.path.expandvars(self.expanduser()))`` Returns: Path: path with expanded environment variables and tildes Example: >>> import ubelt as ub >>> home_v1 = ub.Path('~/').expand() >>> home_v2 = ub.Path.home() >>> print('home_v1 = {!r}'.format(home_v1)) >>> print('home_v2 = {!r}'.format(home_v2)) >>> assert home_v1 == home_v2 """ return self.expandvars().expanduser() def expandvars(self): """ As discussed in [CPythonIssue21301]_, CPython won't be adding expandvars to pathlib. I think this is a mistake, so I added it in this extension. Returns: Path: path with expanded environment variables References: .. [CPythonIssue21301] https://bugs.python.org/issue21301 """ return self.__class__(os.path.expandvars(self)) def ls(self, pattern=None): """ A convenience function to list all paths in a directory. This is a wrapper around iterdir that returns the results as a list instead of a generator. This is mainly for faster navigation in IPython. In production code ``iterdir`` or ``glob`` should be used instead. Args: pattern (None | str): if specified, performs a glob instead of an iterdir. Returns: List['Path']: an eagerly evaluated list of paths Note: When pattern is specified only paths matching the pattern are returned, not the paths inside matched directories. This is different than bash semantics where the pattern is first expanded and then ls is performed on all matching paths. Example: >>> import ubelt as ub >>> self = ub.Path.appdir('ubelt/tests/ls') >>> (self / 'dir1').ensuredir() >>> (self / 'dir2').ensuredir() >>> (self / 'file1').touch() >>> (self / 'file2').touch() >>> (self / 'dir1/file3').touch() >>> (self / 'dir2/file4').touch() >>> children = self.ls() >>> assert isinstance(children, list) >>> print(ub.repr2(sorted([p.relative_to(self) for p in children]))) [ Path('dir1'), Path('dir2'), Path('file1'), Path('file2'), ] >>> children = self.ls('dir*/*') >>> assert isinstance(children, list) >>> print(ub.repr2(sorted([p.relative_to(self) for p in children]))) [ Path('dir1/file3'), Path('dir2/file4'), ] """ if pattern is None: return list(self.iterdir()) else: return list(self.glob(pattern)) # TODO: # def _glob(self): # """ # I would like some way of globbing using patterns contained in the path # itself. Perhaps this goes into expand? # """ # import glob # yield from map(self.__class__, glob.glob(self)) def shrinkuser(self, home='~'): """ Shrinks your home directory by replacing it with a tilde. This is the inverse of :func:`os.path.expanduser`. Args: home (str): symbol used to replace the home path. Defaults to '~', but you might want to use '$HOME' or '%USERPROFILE%' instead. Returns: Path: shortened path replacing the home directory with a symbol Example: >>> import ubelt as ub >>> path = ub.Path('~').expand() >>> assert str(path.shrinkuser()) == '~' >>> assert str(ub.Path((str(path) + '1')).shrinkuser()) == str(path) + '1' >>> assert str((path / '1').shrinkuser()) == join('~', '1') >>> assert str((path / '1').shrinkuser('$HOME')) == join('$HOME', '1') >>> assert str(ub.Path('.').shrinkuser()) == '.' """ shrunk = shrinkuser(self, home) new = self.__class__(shrunk) return new def chmod(self, mode, follow_symlinks=True): """ Change the permissions of the path, like os.chmod(). Args: mode (int | str): either a stat code to pass directly to :func:`os.chmod` or a string-based code to construct modified permissions. See note for details on the string-based chmod codes. follow_symlinks (bool): if True, and this path is a symlink, modify permission of the file it points to, otherwise if False, modify the link permission. Note: From the chmod man page: The format of a symbolic mode is [ugoa...][[-+=][perms...]...], where perms is either zero or more letters from the set rwxXst, or a single letter from the set ugo. Multiple symbolic modes can be given, separated by commas. Note: Like :func:`os.chmod`, this may not work on Windows or on certain filesystems. Returns: Path: returns self for chaining Example: >>> # xdoctest: +REQUIRES(POSIX) >>> import ubelt as ub >>> from ubelt.util_path import _encode_chmod_int >>> dpath = ub.Path.appdir('ubelt/tests/chmod').ensuredir() >>> fpath = (dpath / 'file.txt').touch() >>> fpath.chmod('ugo+rw,ugo-x') >>> print(_encode_chmod_int(fpath.stat().st_mode)) u=rw,g=rw,o=rw >>> fpath.chmod('o-rwx') >>> print(_encode_chmod_int(fpath.stat().st_mode)) u=rw,g=rw >>> fpath.chmod(0o646) >>> print(_encode_chmod_int(fpath.stat().st_mode)) u=rw,g=r,o=rw """ if isinstance(mode, str): # Resolve mode # Follow symlinks was added to pathlib.Path.stat in 3.10 # but os.stat has had it since 3.3, so use that instead. old_mode = os.stat(self, follow_symlinks=follow_symlinks).st_mode # old_mode = self.stat(follow_symlinks=follow_symlinks).st_mode mode = _resolve_chmod_code(old_mode, mode) os.chmod(self, mode, follow_symlinks=follow_symlinks) return self # Should not need to modify unless we want chanability here. # def lchmod(self, mode): # """ # Like chmod(), except if the path points to a symlink, the symlink's # permissions are changed, rather than its target's. # # Args: # mode (int | str): either a stat code to pass directly to # :func:`os.chmod` or a string-based code to construct modified # permissions. # # Returns: # Path: returns self for chaining # # Example: # >>> import ubelt as ub # >>> from ubelt.util_path import _encode_chmod_int # >>> dpath = ub.Path.appdir('ubelt/tests/chmod').ensuredir() # >>> fpath = (dpath / 'file1.txt').delete().touch() # >>> lpath = (dpath / 'link1.txt').delete() # >>> lpath.symlink_to(fpath) # >>> print(_encode_chmod_int(fpath.stat().st_mode)) # >>> lpath.lchmod('a+rwx') # >>> print(_encode_chmod_int(fpath.stat().st_mode)) # """ # return self.chmod(mode, follow_symlinks=False) # TODO: # chainable symlink_to that returns the new link # chainable hardlink_to that returns the new link # probably can just uncomment when ready for a new feature # def symlink_to(self, target, target_is_directory=False): # """ # Make this path a symlink pointing to the target path. # """ # super().symlink_to(target, target_is_directory=target_is_directory) # return self # def hardlink_to(self, target): # """ # Make this path a hard link pointing to the same file as *target*. # """ # super().hardlink_to(target) # return self def touch(self, mode=0o0666, exist_ok=True): """ Create this file with the given access mode, if it doesn't exist. Returns: Path: returns itself Note: The :func:`ubelt.util_io.touch` function currently has a slightly different implementation. This uses whatever the pathlib version is. This may change in the future. """ # modify touch to return self # Note: util_io.touch is more expressive than standard python # touch, may want to use that instead. super().touch(mode=mode, exist_ok=exist_ok) return self def walk(self, topdown=True, onerror=None, followlinks=False): """ A variant of :func:`os.walk` for pathlib Args: topdown (bool): if True starts yield nodes closer to the root first otherwise yield nodes closer to the leaves first. onerror (Callable[[OSError], None] | None): A function with one argument of type OSError. If the error is raised the walk is aborted, otherwise it continues. followlinks (bool): if True recurse into symbolic directory links Yields: Tuple['Path', List[str], List[str]]: the root path, directory names, and file names Example: >>> import ubelt as ub >>> self = ub.Path.appdir('ubelt/tests/ls') >>> (self / 'dir1').ensuredir() >>> (self / 'dir2').ensuredir() >>> (self / 'file1').touch() >>> (self / 'file2').touch() >>> (self / 'dir1/file3').touch() >>> (self / 'dir2/file4').touch() >>> subdirs = list(self.walk()) >>> assert len(subdirs) == 3 Example: >>> # Modified from the stdlib >>> import os >>> from os.path import join, getsize >>> import email >>> import ubelt as ub >>> base = ub.Path(email.__file__).parent >>> for root, dirs, files in base.walk(): >>> print(root, " consumes", end="") >>> print(sum(getsize(join(root, name)) for name in files), end="") >>> print("bytes in ", len(files), " non-directory files") >>> if 'CVS' in dirs: >>> dirs.remove('CVS') # don't visit CVS directories """ cls = self.__class__ walker = os.walk(self, topdown=topdown, onerror=onerror, followlinks=followlinks) for root, dnames, fnames in walker: yield (cls(root), dnames, fnames) def __add__(self, other): """ Returns a new string starting with this fspath representation. Returns: str Allows ubelt.Path to be a better drop-in replacement when working with string-based paths. Note: It is not recommended to write new code that uses this behavior. This exists to make it easier to transition existing str-based paths to pathlib. Example: >>> import ubelt as ub >>> base = ub.Path('base') >>> base_ = ub.Path('base/') >>> base2 = ub.Path('base/2') >>> assert base + 'foo' == 'basefoo' >>> assert base_ + 'foo' == 'basefoo' >>> assert base2 + 'foo' == str(base2.augment(tail='foo')) """ return os.fspath(self) + other def __radd__(self, other): """ Returns a new string ending with this fspath representation. Returns: str Allows ubelt.Path to be a better drop-in replacement when working with string-based paths. Note: It is not recommended to write new code that uses this behavior. This exists to make it easier to transition existing str-based paths to pathlib. Example: >>> import ubelt as ub >>> base = ub.Path('base') >>> base_ = ub.Path('base/') >>> base2 = ub.Path('base/2') >>> assert 'foo' + base == 'foobase' >>> assert 'foo' + base_ == 'foobase' >>> assert 'foo' + base2 == str(base2.augment(dpath='foobase')) """ return other + os.fspath(self) def endswith(self, suffix, *args): """ Test if the fspath representation ends with ``suffix``. Allows ubelt.Path to be a better drop-in replacement when working with string-based paths. Args: suffix (str | Tuple[str, ...]): One or more suffixes to test for *args: start (int): if specified begin testing at this position. end (int): if specified stop testing at this position. Returns: bool: True if any of the suffixes match. Example: >>> import ubelt as ub >>> base = ub.Path('base') >>> assert base.endswith('se') >>> assert not base.endswith('be') >>> # test start / stop cases >>> assert ub.Path('aabbccdd').endswith('cdd', 5) >>> assert not ub.Path('aabbccdd').endswith('cdd', 6) >>> assert ub.Path('aabbccdd').endswith('cdd', 5, 10) >>> assert not ub.Path('aabbccdd').endswith('cdd', 5, 7) >>> # test tuple case >>> assert ub.Path('aabbccdd').endswith(('foo', 'cdd')) >>> assert ub.Path('foo').endswith(('foo', 'cdd')) >>> assert not ub.Path('bar').endswith(('foo', 'cdd')) """ return os.fspath(self).endswith(suffix, *args) def startswith(self, prefix, *args): """ Test if the fspath representation starts with ``prefix``. Allows ubelt.Path to be a better drop-in replacement when working with string-based paths. Args: prefix (str | Tuple[str, ...]): One or more prefixes to test for *args: start (int): if specified begin testing at this position. end (int): if specified stop testing at this position. Returns: bool: True if any of the prefixes match. Example: >>> import ubelt as ub >>> base = ub.Path('base') >>> assert base.startswith('base') >>> assert not base.startswith('all your') >>> # test start / stop cases >>> assert ub.Path('aabbccdd').startswith('aab', 0) >>> assert ub.Path('aabbccdd').startswith('aab', 0, 5) >>> assert not ub.Path('aabbccdd').startswith('aab', 1, 5) >>> assert not ub.Path('aabbccdd').startswith('aab', 0, 2) >>> # test tuple case >>> assert ub.Path('aabbccdd').startswith(('foo', 'aab')) >>> assert ub.Path('foo').startswith(('foo', 'aab')) >>> assert not ub.Path('bar').startswith(('foo', 'aab')) """ return os.fspath(self).startswith(prefix, *args) # More shutil functionality # This is discussed in https://peps.python.org/pep-0428/#filesystem-modification def _request_copy_function(self, follow_file_symlinks=True, follow_dir_symlinks=True, meta='stats'): """ Get a copy_function based on specified capabilities """ import shutil # Note: Avoiding the use of the partial enables shutil optimizations from functools import partial if meta is None: if follow_file_symlinks: copy_function = shutil.copyfile else: copy_function = partial(shutil.copyfile, follow_symlinks=follow_file_symlinks) elif meta == 'stats': if follow_file_symlinks: copy_function = shutil.copy2 else: copy_function = partial(shutil.copy2, follow_symlinks=follow_file_symlinks) elif meta == 'mode': if follow_file_symlinks: copy_function = shutil.copy else: copy_function = partial(shutil.copy, follow_symlinks=follow_file_symlinks) else: raise KeyError(meta) return copy_function def copy(self, dst, follow_file_symlinks=False, follow_dir_symlinks=False, meta='stats', overwrite=False): """ Copy this file or directory to dst. By default files are never overwritten and symlinks are copied as-is. At a basic level (i.e. ignoring symlinks) for each path argument (``src`` and ``dst``) these can either be files, directories, or not exist. Given these three states, the following table summarizes how this function copies this path to its destination. TextArt: +----------+------------------------+------------------------+----------+ | dst | dir | file | no-exist | +----------+ | | | | src | | | | +==========+========================+========================+==========+ | dir | error-or-overwrite-dst | error | dst | +----------+------------------------+------------------------+----------+ | file | dst / src.name | error-or-overwrite-dst | dst | +----------+------------------------+------------------------+----------+ | no-exist | error | error | error | +----------+------------------------+------------------------+----------+ In general, the contents of src will be the contents of dst, except for the one case where a file is copied into an existing directory. In this case the name is used to construct a fully qualified destination. Args: dst (str | PathLike): if ``src`` is a file and ``dst`` does not exist, copies this to ``dst`` if ``src`` is a file and ``dst`` is a directory, copies this to ``dst / src.name`` if ``src`` is a directory and ``dst`` does not exist, copies this to ``dst`` if ``src`` is a directory and ``dst`` is a directory, errors unless overwrite is True, in which case, copies this to ``dst`` and overwrites anything conflicting path. follow_file_symlinks (bool): If True and src is a link, the link will be resolved before it is copied (i.e. the data is duplicated), otherwise just the link itself will be copied. follow_dir_symlinks (bool): if True when src is a directory and contains symlinks to other directories, the contents of the linked data are copied, otherwise when False only the link itself is copied. meta (str | None): Indicates what metadata bits to copy. This can be 'stats' which tries to copy all metadata (i.e. like :py:func:`shutil.copy2`), 'mode' which copies just the permission bits (i.e. like :py:func:`shutil.copy`), or None, which ignores all metadata (i.e. like :py:func:`shutil.copyfile`). overwrite (bool): if False, and target file exists, this will raise an error, otherwise the file will be overwritten. Returns: Path: where the path was copied to Note: This is implemented with a combination of :func:`shutil.copy`, :func:`shutil.copy2`, and :func:`shutil.copytree`, but the defaults and behavior here are different (and ideally safer and more intuitive). Note: Unlike cp on Linux, copying a src directory into a dst directory will not implicitly add the src directory name to the dst directory. This means we cannot copy directory ``/`` to ```` and expect the result to be ``/``. Conceptually you can expect ``//`` to exist in ``/``. Example: >>> import ubelt as ub >>> root = ub.Path.appdir('ubelt', 'tests', 'path', 'copy').delete().ensuredir() >>> paths = {} >>> dpath = (root / 'orig').ensuredir() >>> clone0 = (root / 'dst_is_explicit').ensuredir() >>> clone1 = (root / 'dst_is_parent').ensuredir() >>> paths['fpath'] = (dpath / 'file0.txt').touch() >>> paths['empty_dpath'] = (dpath / 'empty_dpath').ensuredir() >>> paths['nested_dpath'] = (dpath / 'nested_dpath').ensuredir() >>> (dpath / 'nested_dpath/d0').ensuredir() >>> (dpath / 'nested_dpath/d0/f1.txt').touch() >>> (dpath / 'nested_dpath/d0/f2.txt').touch() >>> print('paths = {}'.format(ub.repr2(paths, nl=1))) >>> assert all(p.exists() for p in paths.values()) >>> paths['fpath'].copy(clone0 / 'file0.txt') >>> paths['fpath'].copy(clone1) >>> paths['empty_dpath'].copy(clone0 / 'empty_dpath') >>> paths['empty_dpath'].copy((clone1 / 'empty_dpath_alt').ensuredir(), overwrite=True) >>> paths['nested_dpath'].copy(clone0 / 'nested_dpath') >>> paths['nested_dpath'].copy((clone1 / 'nested_dpath_alt').ensuredir(), overwrite=True) Ignore: # Enumerate cases rows = [ {'src': 'no-exist', 'dst': 'no-exist', 'result': 'error'}, {'src': 'no-exist', 'dst': 'file', 'result': 'error'}, {'src': 'no-exist', 'dst': 'dir', 'result': 'error'}, {'src': 'file', 'dst': 'no-exist', 'result': 'dst'}, {'src': 'file', 'dst': 'dir', 'result': 'dst / src.name'}, {'src': 'file', 'dst': 'file', 'result': 'error-or-overwrite-dst'}, {'src': 'dir', 'dst': 'no-exist', 'result': 'dst'}, {'src': 'dir', 'dst': 'dir', 'result': 'error-or-overwrite-dst'}, {'src': 'dir', 'dst': 'file', 'result': 'error'}, ] import pandas as pd df = pd.DataFrame(rows) piv = df.pivot(index=['src'], columns=['dst'], values='result') print(piv.to_markdown(tablefmt="grid", index=True)) See: ~/code/ubelt/tests/test_path.py for test cases """ import shutil copy_function = self._request_copy_function( follow_file_symlinks=follow_file_symlinks, follow_dir_symlinks=follow_dir_symlinks, meta=meta) if WIN32 and platform.python_implementation() == 'PyPy': _patch_win32_stats_on_pypy() if self.is_dir(): if sys.version_info[0:2] < (3, 8): # nocover copytree = _compat_copytree else: copytree = shutil.copytree dst = copytree( self, dst, copy_function=copy_function, symlinks=not follow_dir_symlinks, dirs_exist_ok=overwrite) elif self.is_file(): if not overwrite: dst = Path(dst) if dst.is_dir(): real_dst = dst / self.name else: real_dst = dst if real_dst.exists(): raise FileExistsError('Cannot overwrite existing file unless overwrite=True') dst = copy_function(self, dst) else: raise FileExistsError('The source path does not exist') return Path(dst) def move(self, dst, follow_file_symlinks=False, follow_dir_symlinks=False, meta='stats'): """ Move a file from one location to another, or recursively move a directory from one location to another. This method will refuse to overwrite anything, and there is currently no overwrite option for technical reasons. This may change in the future. Args: dst (str | PathLike): A non-existing path where this file will be moved. follow_file_symlinks (bool): If True and src is a link, the link will be resolved before it is copied (i.e. the data is duplicated), otherwise just the link itself will be copied. follow_dir_symlinks (bool): if True when src is a directory and contains symlinks to other directories, the contents of the linked data are copied, otherwise when False only the link itself is copied. meta (str | None): Indicates what metadata bits to copy. This can be 'stats' which tries to copy all metadata (i.e. like shutil.copy2), 'mode' which copies just the permission bits (i.e. like shutil.copy), or None, which ignores all metadata (i.e. like shutil.copyfile). Note: This method will refuse to overwrite anything. This is implemented via :func:`shutil.move`, which depends heavily on :func:`os.rename` semantics. For this reason, this function will error if it would overwrite any data. If you want an overwriting variant of move we recommend you either either copy the data, and then delete the original (potentially inefficient), or use :func:`shutil.move` directly if you know how :func:`os.rename` works on your system. Returns: Path: where the path was moved to Example: >>> import ubelt as ub >>> dpath = ub.Path.appdir('ubelt', 'tests', 'path', 'move').delete().ensuredir() >>> paths = {} >>> paths['dpath0'] = (dpath / 'dpath0').ensuredir() >>> paths['dpath00'] = (dpath / 'dpath0' / 'sub0').ensuredir() >>> paths['fpath000'] = (dpath / 'dpath0' / 'sub0' / 'f0.txt').touch() >>> paths['fpath001'] = (dpath / 'dpath0' / 'sub0' / 'f1.txt').touch() >>> paths['dpath01'] = (dpath / 'dpath0' / 'sub1').ensuredir() >>> print('paths = {}'.format(ub.repr2(paths, nl=1))) >>> assert all(p.exists() for p in paths.values()) >>> paths['dpath0'].move(dpath / 'dpath1') """ # Behave more like POSIX move to avoid potential confusing behavior if exists(dst): raise FileExistsError( 'Moves are only allowed to locations that dont exist') import shutil if WIN32 and platform.python_implementation() == 'PyPy': _patch_win32_stats_on_pypy() copy_function = self._request_copy_function( follow_file_symlinks=follow_file_symlinks, follow_dir_symlinks=follow_dir_symlinks, meta=meta) real_dst = shutil.move(self, dst, copy_function=copy_function) return Path(real_dst) def _parse_chmod_code(code): """ Expand a chmod code into a list of actions. Args: code (str): of the form: [ugoa…][-+=]perms…[,…] perms is either zero or more letters from the set rwxXst, or a single letter from the set ugo. Yields: Tuple[str, str, str]: target, op, and perms. The target is modified by the operation using the value. target -- specified 'u' for user, 'g' for group, 'o' for other. op -- specified as '+' to add, '-' to remove, or '=' to assign. val -- specified as 'r' for read, 'w' for write, or 'x' for execute. Notes: The perm symbol X shall represent the execute/search portion of the file mode bits if the file is a directory or if the current (unmodified) file mode bits have at least one of the execute bits (S_IXUSR, S_IXGRP, or S_IXOTH) set. It shall be ignored if the file is not a directory and none of the execute bits are set in the current file mode bits. [USE416877]_. References: ..[USE416877] https://unix.stackexchange.com/questions/416877/what-is-a-capital-x-in-posix-chmod Example: >>> from ubelt.util_path import _parse_chmod_code >>> print(list(_parse_chmod_code('ugo+rw,+r,g=rwx'))) >>> print(list(_parse_chmod_code('o+x'))) >>> print(list(_parse_chmod_code('u-x'))) >>> print(list(_parse_chmod_code('x'))) >>> print(list(_parse_chmod_code('ugo+rwx'))) [('ugo', '+', 'rw'), ('ugo', '+', 'r'), ('g', '=', 'rwx')] [('o', '+', 'x')] [('u', '-', 'x')] [('u', '+', 'x')] [('ugo', '+', 'rwx')] >>> import pytest >>> with pytest.raises(ValueError): >>> list(_parse_chmod_code('a+b+c')) """ import re pat = re.compile(r'([\+\-\=])') parts = code.split(',') for part in parts: ab = pat.split(part) len_ab = len(ab) if len_ab == 3: targets, op, perms = ab elif len_ab == 1: perms = ab[0] op = '+' targets = 'u' else: raise ValueError('unknown chmod code pattern: part={part}') if targets == '' or targets == 'a': targets = 'ugo' yield (targets, op, perms) def _resolve_chmod_code(old_mode, code): """ Modifies integer stat permissions based on a string code. Args: old_mode (int): old mode from st_stat code (str): chmod style codeold mode from st_stat Returns: int : new code References: ..[RHEL_SpecialFilePerms] https://www.youtube.com/watch?v=Dn6b-mIKHmM&t=1970s Example: >>> # test normal user / group / other, read / write / execute perms >>> from ubelt.util_path import _resolve_chmod_code >>> print(oct(_resolve_chmod_code(0, '+rwx'))) >>> print(oct(_resolve_chmod_code(0, 'ugo+rwx'))) >>> print(oct(_resolve_chmod_code(0, 'a-rwx'))) >>> print(oct(_resolve_chmod_code(0, 'u+rw,go+r,go-wx'))) >>> print(oct(_resolve_chmod_code(0o0777, 'u+rw,go+r,go-wx'))) 0o777 0o777 0o0 0o644 0o744 >>> import pytest >>> with pytest.raises(NotImplementedError): >>> print(oct(_resolve_chmod_code(0, 'u=rw'))) >>> with pytest.raises(ValueError): >>> _resolve_chmod_code(0, 'u?w') Example: >>> # Test special suid, sgid, and sticky (svtx) codes >>> from ubelt.util_path import _resolve_chmod_code >>> print(oct(_resolve_chmod_code(0, 'u+s'))) >>> print(oct(_resolve_chmod_code(0o7777, 'u-s'))) 0o4000 0o3777 """ import itertools as it action_lut = { 'ur' : stat.S_IRUSR, 'uw' : stat.S_IWUSR, 'ux' : stat.S_IXUSR, 'gr' : stat.S_IRGRP, 'gw' : stat.S_IWGRP, 'gx' : stat.S_IXGRP, 'or' : stat.S_IROTH, 'ow' : stat.S_IWOTH, 'ox' : stat.S_IXOTH, # Special UNIX permissions 'us': stat.S_ISUID, # SUID (executables run as the file's owner) 'gs': stat.S_ISGID, # SGID (executables run as the file's group) and other uses, see: https://docs.python.org/3/library/stat.html#stat.S_ISGID 'ot': stat.S_ISVTX, # sticky (only owner can delete) } actions = _parse_chmod_code(code) new_mode = int(old_mode) # (could optimize to modify inplace if needed) for action in actions: targets, op, perms = action try: action_keys = (target + perm for target, perm in it.product(targets, perms)) action_values = (action_lut[key] for key in action_keys) action_values = list(action_values) if op == '+': for val in action_values: new_mode |= val elif op == '-': for val in action_values: new_mode &= (~val) elif op == '=': raise NotImplementedError(f'new chmod code for op={op}') else: raise AssertionError( f'should not be able to get here. unknown op code: op={op}') except KeyError: # Give a better error message if something goes wrong raise ValueError(f'Unknown action: {action}') return new_mode def _encode_chmod_int(int_code): """ Convert a chmod integer code to a string Currently unused, but may be useful in the future. Args: int_code (int): mode from st_stat concise (bool): if True, uses concise representations of special perms Returns: str: the permissions code Example: >>> from ubelt.util_path import _encode_chmod_int >>> int_code = 0o744 >>> print(_encode_chmod_int(int_code)) u=rwx,g=r,o=r >>> int_code = 0o7777 >>> print(_encode_chmod_int(int_code)) u=rwxs,g=rwxs,o=rwxt """ from collections import defaultdict, OrderedDict action_lut = OrderedDict([ ('ur' , stat.S_IRUSR), ('uw' , stat.S_IWUSR), ('ux' , stat.S_IXUSR), ('gr' , stat.S_IRGRP), ('gw' , stat.S_IWGRP), ('gx' , stat.S_IXGRP), ('or' , stat.S_IROTH), ('ow' , stat.S_IWOTH), ('ox' , stat.S_IXOTH), # Special UNIX permissions ('us', stat.S_ISUID), # SUID (executes run as the file's owner) ('gs', stat.S_ISGID), # SGID (executes run as the file's group) ('ot', stat.S_ISVTX), # sticky (only owner can delete) ]) target_to_perms = defaultdict(list) for key, val in action_lut.items(): target, perm = key if int_code & val: target_to_perms[target].append(perm) # The following commented logic might be useful if we want to created the # "dashed" ls representation of permissions, but that is not needed for # chmod itself, so it is not necessary to implement here. # if concise: # special_chars = {'u': 's', 'g': 's', 'o': 't'} # for k, s in special_chars.items(): # if k in target_to_perms: # vs = target_to_perms[k] # # if the executable bit is not set, replace the lowercase # # with a capital S (or T for sticky) # if 'x' in vs: # if s in vs: # vs.remove('x') # elif s in vs: # vs.remove(s) # vs.append(s.upper()) parts = [k + '=' + ''.join(vs) for k, vs in target_to_perms.items()] code = ','.join(parts) return code def _patch_win32_stats_on_pypy(): """ Handle [PyPyIssue4953]_ [PyPyDiscuss4952]_. References: [PyPyIssue4953] https://github.com/pypy/pypy/issues/4953#event-12838738353 [PyPyDiscuss4952] https://github.com/orgs/pypy/discussions/4952#discussioncomment-9481845 """ if not hasattr(stat, 'IO_REPARSE_TAG_MOUNT_POINT'): os.supports_follow_symlinks.add(os.stat) stat.IO_REPARSE_TAG_APPEXECLINK = 0x8000001b # windows stat.IO_REPARSE_TAG_MOUNT_POINT = 0xa0000003 # windows stat.IO_REPARSE_TAG_SYMLINK = 0xa000000c # windows if sys.version_info[0:2] < (3, 8): # nocover # Vendor in a nearly modern copytree for Python 3.6 and 3.7 def _compat_copytree(src, dst, symlinks=False, ignore=None, copy_function=None, ignore_dangling_symlinks=False, dirs_exist_ok=False): """ A vendored shutil.copytree for older pythons based on the 3.10 implementation """ from shutil import Error, copystat, copy2, copy with os.scandir(src) as itr: entries = list(itr) if ignore is not None: ignored_names = ignore(os.fspath(src), [x.name for x in entries]) else: ignored_names = set() os.makedirs(dst, exist_ok=dirs_exist_ok) errors = [] use_srcentry = copy_function is copy2 or copy_function is copy for srcentry in entries: if srcentry.name in ignored_names: continue srcname = os.path.join(src, srcentry.name) dstname = os.path.join(dst, srcentry.name) srcobj = srcentry if use_srcentry else srcname try: is_symlink = srcentry.is_symlink() if is_symlink and os.name == 'nt': # Special check for directory junctions, which appear as # symlinks but we want to recurse. # Not available on 3.6, use our impl instead # lstat = srcentry.stat(follow_symlinks=False) # if lstat.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT: # is_symlink = False from ubelt._win32_links import _win32_is_junction if _win32_is_junction(srcentry): is_symlink = False if is_symlink: linkto = os.readlink(srcname) if symlinks: # We can't just leave it to `copy_function` because legacy # code with a custom `copy_function` may rely on copytree # doing the right thing. os.symlink(linkto, dstname) copystat(srcobj, dstname, follow_symlinks=not symlinks) else: # ignore dangling symlink if the flag is on if not os.path.exists(linkto) and ignore_dangling_symlinks: continue # otherwise let the copy occur. copy2 will raise an error if srcentry.is_dir(): _compat_copytree(srcobj, dstname, symlinks, ignore, copy_function, dirs_exist_ok=dirs_exist_ok) else: copy_function(srcobj, dstname) elif srcentry.is_dir(): _compat_copytree(srcobj, dstname, symlinks, ignore, copy_function, dirs_exist_ok=dirs_exist_ok) else: # Will raise a SpecialFileError for unsupported file types copy_function(srcobj, dstname) # catch the Error from the recursive copytree so that we can # continue with other files except Error as err: errors.extend(err.args[0]) except OSError as why: errors.append((srcname, dstname, str(why))) try: copystat(src, dst) except OSError as why: # Copying file access times may fail on Windows if getattr(why, 'winerror', None) is None: errors.append((src, dst, str(why))) if errors: raise Error(errors) return dst ubelt-1.3.7/ubelt/util_path.pyi000066400000000000000000000074641472470106000165000ustar00rootroot00000000000000from os import PathLike from typing import Tuple from typing import Type from types import TracebackType from typing import List from typing import Callable from collections.abc import Generator def augpath(path: str | PathLike, suffix: str = '', prefix: str = '', ext: str | None = None, tail: str | None = '', base: str | None = None, dpath: str | PathLike | None = None, relative: str | PathLike | None = None, multidot: bool = False) -> str: ... def userhome(username: str | None = None) -> str: ... def shrinkuser(path: str | PathLike, home: str = '~') -> str: ... def expandpath(path: str | PathLike) -> str: ... def ensuredir(dpath: str | PathLike | Tuple[str | PathLike], mode: int = 1023, verbose: int = 0, recreate: bool = False) -> str: ... class ChDir: def __init__(self, dpath: str | PathLike | None) -> None: ... def __enter__(self) -> ChDir: ... def __exit__(self, ex_type: Type[BaseException] | None, ex_value: BaseException | None, ex_traceback: TracebackType | None) -> bool | None: ... class TempDir: dpath: str | None def __init__(self) -> None: ... def __del__(self) -> None: ... def ensure(self) -> str: ... def cleanup(self) -> None: ... def start(self) -> TempDir: ... def __enter__(self) -> TempDir: ... def __exit__(self, ex_type: Type[BaseException] | None, ex_value: BaseException | None, ex_traceback: TracebackType | None) -> bool | None: ... class Path: @classmethod def appdir(cls, appname: str | None = None, *args, type: str = 'cache') -> 'Path': ... def augment(self, prefix: str = '', stemsuffix: str = '', ext: str | None = None, stem: str | None = None, dpath: str | PathLike | None = None, tail: str | None = '', relative: str | PathLike | None = None, multidot: bool = False, suffix: str = ...) -> 'Path': ... def delete(self) -> 'Path': ... def ensuredir(self, mode: int = 511) -> 'Path': ... def mkdir(self, mode: int = 511, parents: bool = False, exist_ok: bool = False) -> 'Path': ... def expand(self) -> 'Path': ... def expandvars(self) -> 'Path': ... def ls(self, pattern: None | str = None) -> List['Path']: ... def shrinkuser(self, home: str = '~') -> 'Path': ... def touch(self, mode: int = ..., exist_ok: bool = ...) -> 'Path': ... def walk( self, topdown: bool = True, onerror: Callable[[OSError], None] | None = None, followlinks: bool = False ) -> Generator[Tuple['Path', List[str], List[str]], None, None]: ... def __add__(self, other) -> str: ... def __radd__(self, other) -> str: ... def endswith(self, suffix: str | Tuple[str, ...], *args) -> bool: ... def startswith(self, prefix: str | Tuple[str, ...], *args) -> bool: ... def copy(self, dst: str | PathLike, follow_file_symlinks: bool = False, follow_dir_symlinks: bool = False, meta: str | None = 'stats', overwrite: bool = False) -> 'Path': ... def move(self, dst: str | PathLike, follow_file_symlinks: bool = False, follow_dir_symlinks: bool = False, meta: str | None = 'stats') -> 'Path': ... ubelt-1.3.7/ubelt/util_platform.py000066400000000000000000000351131472470106000172070ustar00rootroot00000000000000""" The goal of this module is to provide an idiomatic cross-platform pattern of accessing platform dependent file systems. Standard application directory structure: cache, config, and other XDG standards [XDG_Spec]_. This is similar to the more focused :mod:`appdirs` module [AS_appdirs]_ (deprecated as of 2023-02-10) and its successor :mod:`platformdirs` [PlatDirs]_. Note: Table mapping the type of directory to the system default environment variable. Inspired by [SO_43853548]_, [SO_11113974]_, and [harawata_appdirs]_. .. code-block:: none | Linux | Win32 | Darwin data | $XDG_DATA_HOME | %APPDATA% | ~/Library/Application Support config | $XDG_CONFIG_HOME | %APPDATA% | ~/Library/Application Support cache | $XDG_CACHE_HOME | %LOCALAPPDATA% | ~/Library/Caches If an environment variable is not specified the defaults are: APPDATA = ~/AppData/Roaming LOCALAPPDATA = ~/AppData/Local XDG_DATA_HOME = ~/.local/share XDG_CACHE_HOME = ~/.cache XDG_CONFIG_HOME = ~/.config References: .. [XDG_Spec] https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html .. [SO_43853548] https://stackoverflow.com/questions/43853548/xdg-windows .. [SO_11113974] https://stackoverflow.com/questions/11113974/cross-plat-path .. [harawata_appdirs] https://github.com/harawata/appdirs#supported-directories .. [AS_appdirs] https://github.com/ActiveState/appdirs .. [PlatDirs] https://pypi.org/project/platformdirs/ """ import os import sys import itertools as it from os.path import exists, join, isdir, expanduser, normpath __all__ = [ 'WIN32', 'LINUX', 'DARWIN', 'POSIX', 'find_exe', 'find_path', 'ensure_app_cache_dir', 'ensure_app_config_dir', 'ensure_app_data_dir', 'get_app_cache_dir', 'get_app_config_dir', 'get_app_data_dir', 'platform_cache_dir', 'platform_config_dir', 'platform_data_dir' ] # References: # https://stackoverflow.com/questions/446209/possible-values-from-sys-platform WIN32 = sys.platform == 'win32' # type: bool LINUX = sys.platform.startswith('linux') # type: bool FREEBSD = sys.platform.startswith('freebsd') # type: bool DARWIN = sys.platform == 'darwin' # type: bool POSIX = 'posix' in sys.builtin_module_names # type: bool def platform_data_dir(): """ Returns path for user-specific data files Returns: str : path to the data dir used by the current operating system """ if POSIX: # nocover dpath_ = os.environ.get('XDG_DATA_HOME', '~/.local/share') elif DARWIN: # nocover dpath_ = '~/Library/Application Support' elif WIN32: # nocover dpath_ = os.environ.get('APPDATA', '~/AppData/Roaming') else: # nocover raise NotImplementedError('Unknown Platform %r' % (sys.platform,)) dpath = normpath(expanduser(dpath_)) return dpath def platform_config_dir(): """ Returns a directory which should be writable for any application This should be used for persistent configuration files. Returns: str : path to the cache dir used by the current operating system """ if POSIX: # nocover dpath_ = os.environ.get('XDG_CONFIG_HOME', '~/.config') elif DARWIN: # nocover dpath_ = '~/Library/Application Support' elif WIN32: # nocover dpath_ = os.environ.get('APPDATA', '~/AppData/Roaming') else: # nocover raise NotImplementedError('Unknown Platform %r' % (sys.platform,)) dpath = normpath(expanduser(dpath_)) return dpath def platform_cache_dir(): """ Returns a directory which should be writable for any application This should be used for temporary deletable data. Returns: str : path to the cache dir used by the current operating system """ if POSIX: # nocover dpath_ = os.environ.get('XDG_CACHE_HOME', '~/.cache') elif DARWIN: # nocover dpath_ = '~/Library/Caches' elif WIN32: # nocover dpath_ = os.environ.get('LOCALAPPDATA', '~/AppData/Local') else: # nocover raise NotImplementedError('Unknown Platform %r' % (sys.platform,)) dpath = normpath(expanduser(dpath_)) return dpath # --- def get_app_data_dir(appname, *args): r""" Returns a writable directory for an application. This should be used for temporary deletable data. Note: New applications should prefer :func:`ubelt.util_path.Path.appdir` i.e. ``ubelt.Path.appdir(appname, *args, type='data')``. Args: appname (str): the name of the application *args: any other subdirectories may be specified Returns: str : dpath - writable data directory for this application SeeAlso: :func:`ensure_app_data_dir` """ from ubelt.util_deprecate import schedule_deprecation schedule_deprecation( modname='ubelt', name='get_app_data_dir and ensure_app_data_dir', type='function', migration='use ubelt.Path.appdir(type="data") instead', deprecate='1.2.0', error='2.0.0', remove='2.1.0') dpath = join(platform_data_dir(), appname, *args) return dpath def ensure_app_data_dir(appname, *args): """ Calls :func:`get_app_data_dir` but ensures the directory exists. Note: New applications should prefer :func:`ubelt.util_path.Path.appdir` i.e. ``ubelt.Path.appdir(appname, *args, type='data').ensuredir()``. Args: appname (str): the name of the application *args: any other subdirectories may be specified Returns: str: the path to the ensured directory SeeAlso: :func:`get_app_data_dir` Example: >>> import ubelt as ub >>> dpath = ub.ensure_app_data_dir('ubelt') >>> assert exists(dpath) """ from ubelt import util_path dpath = get_app_data_dir(appname, *args) util_path.ensuredir(dpath) return dpath def get_app_config_dir(appname, *args): r""" Returns a writable directory for an application This should be used for persistent configuration files. Note: New applications should prefer :func:`ubelt.util_path.Path.appdir` i.e. ``ubelt.Path.appdir(appname, *args, type='config')``. Args: appname (str): the name of the application *args: any other subdirectories may be specified Returns: str : dpath - writable config directory for this application SeeAlso: :func:`ensure_app_config_dir` """ from ubelt.util_deprecate import schedule_deprecation schedule_deprecation( modname='ubelt', name='get_app_config_dir and ensure_app_config_dir', type='function', migration='use ubelt.Path.appdir(type="config") instead', deprecate='1.2.0', error='2.0.0', remove='2.1.0') dpath = join(platform_config_dir(), appname, *args) return dpath def ensure_app_config_dir(appname, *args): """ Calls :func:`get_app_config_dir` but ensures the directory exists. Note: New applications should prefer :func:`ubelt.util_path.Path.appdir` i.e. ``ubelt.Path.appdir(appname, *args, type='config').ensuredir()``. Args: appname (str): the name of the application *args: any other subdirectories may be specified Returns: str: the path to the ensured directory SeeAlso: :func:`get_app_config_dir` Example: >>> import ubelt as ub >>> dpath = ub.ensure_app_config_dir('ubelt') >>> assert exists(dpath) """ from ubelt import util_path dpath = get_app_config_dir(appname, *args) util_path.ensuredir(dpath) return dpath def get_app_cache_dir(appname, *args): r""" Returns a writable directory for an application. This should be used for temporary deletable data. Note: New applications should prefer :func:`ubelt.util_path.Path.appdir` i.e. ``ubelt.Path.appdir(appname, *args, type='cache')``. Args: appname (str): the name of the application *args: any other subdirectories may be specified Returns: str: the path to the ensured directory Returns: str : dpath - writable cache directory for this application SeeAlso: :func:`ensure_app_cache_dir` """ from ubelt.util_deprecate import schedule_deprecation schedule_deprecation( modname='ubelt', name='get_app_cache_dir and ensure_app_cache_dir', type='function', migration='use ubelt.Path.appdir(type="cache") instead', deprecate='1.2.0', error='2.0.0', remove='2.1.0') dpath = join(platform_cache_dir(), appname, *args) return dpath def ensure_app_cache_dir(appname, *args): """ Calls :func:`get_app_cache_dir` but ensures the directory exists. Note: New applications should prefer :func:`ubelt.util_path.Path.appdir` i.e. ``ubelt.Path.appdir(appname, *args, type='cache').ensuredir()``. Args: appname (str): the name of the application *args: any other subdirectories may be specified Returns: str: the path to the ensured directory SeeAlso: :func:`get_app_cache_dir` Example: >>> import ubelt as ub >>> dpath = ub.ensure_app_cache_dir('ubelt') >>> assert exists(dpath) """ from ubelt import util_path dpath = get_app_cache_dir(appname, *args) util_path.ensuredir(dpath) return dpath def find_exe(name, multi=False, path=None): """ Locate a command. Search your local filesystem for an executable and return the first matching file with executable permission. Args: name (str | PathLike): globstr of matching filename multi (bool): if True return all matches instead of just the first. Defaults to False. path (str | PathLike | Iterable[str | PathLike] | None): If specified, overrides the system PATH variable. Returns: str | List[str] | None: returns matching executable(s). SeeAlso: :func:`shutil.which` - which is available in Python 3.3+. Note: This is essentially the ``which`` UNIX command References: .. [SO_377017] https://stackoverflow.com/questions/377017/test-if-executable-exists-in-python/377028#377028 .. [shutil_which] https://docs.python.org/dev/library/shutil.html#shutil.which Example: >>> # The following are programs commonly exposed via the PATH variable. >>> # Exact results may differ between machines. >>> # xdoctest: +IGNORE_WANT >>> import ubelt as ub >>> print(ub.find_exe('ls')) >>> print(ub.find_exe('ping')) >>> print(ub.find_exe('which')) >>> print(ub.find_exe('which', multi=True)) >>> print(ub.find_exe('ping', multi=True)) >>> print(ub.find_exe('noexist', multi=True)) /usr/bin/ls /usr/bin/ping /usr/bin/which ['/usr/bin/which', '/bin/which'] ['/usr/bin/ping', '/bin/ping'] [] Example: >>> import ubelt as ub >>> assert not ub.find_exe('!noexist', multi=False) >>> assert ub.find_exe('ping', multi=False) or ub.find_exe('ls', multi=False) >>> assert not ub.find_exe('!noexist', multi=True) >>> assert ub.find_exe('ping', multi=True) or ub.find_exe('ls', multi=True) Benchmark: >>> # xdoctest: +IGNORE_WANT >>> import ubelt as ub >>> import shutil >>> from timerit import Timerit >>> for timer in Timerit(1000, bestof=10, label='ub.find_exe'): >>> ub.find_exe('which') >>> for timer in Timerit(1000, bestof=10, label='shutil.which'): >>> shutil.which('which') Timed best=25.339 µs, mean=25.809 ± 0.3 µs for ub.find_exe Timed best=28.600 µs, mean=28.986 ± 0.3 µs for shutil.which """ candidates = find_path(name, path=path, exact=True) mode = os.X_OK | os.F_OK results = (fpath for fpath in candidates if os.access(fpath, mode) and not isdir(fpath)) if not multi: for fpath in results: return fpath else: return list(results) def find_path(name, path=None, exact=False): """ Search for a file or directory on your local filesystem by name (file must be in a directory specified in a PATH environment variable) Args: name (str | PathLike): file name to match. If exact is False this may be a glob pattern path (str | Iterable[str | PathLike] | None): list of directories to search either specified as an ``os.pathsep`` separated string or a list of directories. Defaults to environment PATH. exact (bool): if True, only returns exact matches. Defaults to False. Yields: str: candidate - a path that matches ``name`` Note: Running with ``name=''`` (i.e. ``ub.find_path('')``) will simply yield all directories in your PATH. Note: For recursive behavior set ``path=(d for d, _, _ in os.walk('.'))``, where ``'.'`` might be replaced by the root directory of interest. Example: >>> # xdoctest: +IGNORE_WANT >>> import ubelt as ub >>> print(list(ub.find_path('ping', exact=True))) >>> print(list(ub.find_path('bin'))) >>> print(list(ub.find_path('gcc*'))) >>> print(list(ub.find_path('cmake*'))) ['/usr/bin/ping', '/bin/ping'] [] [... '/usr/bin/gcc-11', '/usr/bin/gcc-ranlib', ...] [... '/usr/bin/cmake-gui', '/usr/bin/cmake', ...] Example: >>> import ubelt as ub >>> from os.path import dirname >>> path = dirname(dirname(ub.util_platform.__file__)) >>> res = sorted(ub.find_path('ubelt/util_*.py', path=path)) >>> assert len(res) >= 10 >>> res = sorted(ub.find_path('ubelt/util_platform.py', path=path, exact=True)) >>> print(res) >>> assert len(res) == 1 """ if path is None: path = os.environ.get('PATH', os.defpath) if isinstance(path, str): dpaths = path.split(os.pathsep) else: dpaths = path candidates = (join(dpath, name) for dpath in dpaths) if exact: if WIN32: # nocover # on WIN32 allow ``name`` to omit the extension suffix by trying # to match with all possible "valid" suffixes specified by PATHEXT pathext = [''] + os.environ.get('PATHEXT', '').split(os.pathsep) candidates = (p + ext for p in candidates for ext in pathext) candidates = filter(exists, candidates) else: import glob candidates = it.chain.from_iterable( glob.glob(pattern) for pattern in candidates) for candidate in candidates: yield candidate ubelt-1.3.7/ubelt/util_platform.pyi000066400000000000000000000020061472470106000173530ustar00rootroot00000000000000from os import PathLike from typing import Iterable from typing import List from collections.abc import Generator WIN32: bool LINUX: bool FREEBSD: bool DARWIN: bool POSIX: bool def platform_data_dir() -> str: ... def platform_config_dir() -> str: ... def platform_cache_dir() -> str: ... def get_app_data_dir(appname: str, *args) -> str: ... def ensure_app_data_dir(appname: str, *args) -> str: ... def get_app_config_dir(appname: str, *args) -> str: ... def ensure_app_config_dir(appname: str, *args) -> str: ... def get_app_cache_dir(appname: str, *args) -> str: ... def ensure_app_cache_dir(appname: str, *args) -> str: ... def find_exe( name: str | PathLike, multi: bool = False, path: str | PathLike | Iterable[str | PathLike] | None = None ) -> str | List[str] | None: ... def find_path(name: str | PathLike, path: str | Iterable[str | PathLike] | None = None, exact: bool = False) -> Generator[str, None, None]: ... ubelt-1.3.7/ubelt/util_repr.py000066400000000000000000001243001472470106000163300ustar00rootroot00000000000000""" Defines the function :func:`urepr`, which allows for a bit more customization than :func:`repr` or :func:`pprint.pformat`. See the docstring for more details. Two main goals of urepr are to provide nice string representations of nested data structures and make those "eval-able" whenever possible. As an example take the value ``float('inf')``, which normally has a non-evalable repr of ``inf``: >>> import ubelt as ub >>> ub.urepr(float('inf')) "float('inf')" The ``newline`` (or ``nl``) keyword argument can control how deep in the nesting newlines are allowed. >>> print(ub.urepr({1: float('nan'), 2: float('inf'), 3: 3.0})) { 1: float('nan'), 2: float('inf'), 3: 3.0, } >>> print(ub.urepr({1: float('nan'), 2: float('inf'), 3: 3.0}, nl=0)) {1: float('nan'), 2: float('inf'), 3: 3.0} You can also define or overwrite how representations for different types are created. You can either create your own extension object, or you can monkey-patch ``ub.util_repr._REPR_EXTENSIONS`` without specifying the extensions keyword argument (although this will be a global change). >>> import ubelt as ub >>> extensions = ub.util_repr.ReprExtensions() >>> @extensions.register(float) >>> def my_float_formater(data, **kw): >>> return "monkey({})".format(data) >>> print(ub.urepr({1: float('nan'), 2: float('inf'), 3: 3.0}, nl=0, extensions=extensions)) {1: monkey(nan), 2: monkey(inf), 3: monkey(3.0)} As of ubelt 1.1.0 you can now access and update the default extensions via the ``EXTENSIONS`` attribute of the :func:`urepr` function itself. >>> # xdoctest: +SKIP >>> # We skip this at test time to not modify global state >>> import ubelt as ub >>> @ub.urepr.EXTENSIONS.register(float) >>> def my_float_formater(data, **kw): >>> return "monkey2({})".format(data) >>> print(ub.urepr({1: float('nan'), 2: float('inf'), 3: 3.0}, nl=0)) """ import collections from ubelt import util_str from ubelt import util_list __all__ = ['urepr', 'ReprExtensions'] def urepr(data, **kwargs): """ Makes a pretty string representation of ``data``. Makes a pretty and easy-to-doctest string representation. Has nice handling of common nested datatypes. This is an alternative to repr, and :func:`pprint.pformat`. This output of this function are configurable. By default it aims to produce strings that are consistent, compact, and executable. This makes them great for doctests. Note: This function has many keyword arguments that can be used to customize the final representation. For convenience some of the more frequently used kwargs have short aliases. See "Kwargs" for more details. Note: For large data items, this can be noticeably slower than pprint.pformat and much slower than the builtin repr. Benchmarks exist in the repo under dev/bench/bench_urepr_vs_alternatives.py Args: data (object): an arbitrary python object to form the string "representation" of Kwargs: si, stritems, (bool): dict/list items use str instead of repr strkeys, sk (bool): dict keys use str instead of repr strvals, sv (bool): dict values use str instead of repr nl, newlines (int | bool): number of top level nestings to place a newline after. If true all items are followed by newlines regardless of nesting level. Defaults to 1 for lists and True for dicts. nobr, nobraces (bool): if True, text will not contain outer braces for containers. Defaults to False. cbr, compact_brace (bool): if True, braces are compactified (i.e. they will not have newlines placed directly after them, think java / K&R / 1TBS). Defaults to False. trailsep, trailing_sep (bool): if True, a separator is placed after the last item in a sequence. By default this is True if there are any ``nl > 0``. explicit (bool): changes dict representation from ``{k1: v1, ...}`` to ``dict(k1=v1, ...)``. Defaults to False. Modifies: default kvsep is modified to ``'='`` dict braces from `{}` to `dict()`. compact (bool): Produces values more suitable for space constrianed environments Defaults to False. Modifies: default kvsep is modified to ``'='`` default itemsep is modified to ``''`` default nobraces is modified to ``1``. default newlines is modified to ``0``. default strkeys to ``True`` default strvals to ``True`` precision (int | None): if specified floats are formatted with this precision. Defaults to None kvsep (str): separator between keys and values. Defaults to ': ' itemsep (str): separator between items. This separator is placed after commas, which are currently not configurable. This may be modified in the future. Defaults to ' '. sort (bool | callable | None): if 'auto', then sort unordered collections, but keep the ordering of ordered collections. This option attempts to be deterministic in most cases. Defaults to None. if True, then ALL collections will be sorted in the returned text. suppress_small (bool): passed to :func:`numpy.array2string` for ndarrays max_line_width (int): passed to :func:`numpy.array2string` for ndarrays with_dtype (bool): only relevant to numpy.ndarrays. if True includes the dtype. Defaults to `not strvals`. align (bool | str): if True, will align multi-line dictionaries by the kvsep. Defaults to False. extensions (ReprExtensions): a custom :class:`ReprExtensions` instance that can overwrite or define how different types of objects are formatted. Returns: str: outstr - output string Note: There are also internal kwargs, which should not be used: _return_info (bool): return information about child context _root_info (depth): information about parent context RelatedWork: :func:`rich.pretty.pretty_repr` :func:`pprint.pformat` Example: >>> import ubelt as ub >>> dict_ = { ... 'custom_types': [slice(0, 1, None), 1/3], ... 'nest_dict': {'k1': [1, 2, {3: {4, 5}}], ... 'key2': [1, 2, {3: {4, 5}}], ... 'key3': [1, 2, {3: {4, 5}}], ... }, ... 'nest_dict2': {'k': [1, 2, {3: {4, 5}}]}, ... 'nested_tuples': [tuple([1]), tuple([2, 3]), frozenset([4, 5, 6])], ... 'one_tup': tuple([1]), ... 'simple_dict': {'spam': 'eggs', 'ham': 'jam'}, ... 'simple_list': [1, 2, 'red', 'blue'], ... 'odict': ub.odict([(2, '1'), (1, '2')]), ... } >>> # In the interest of saving space we are only going to show the >>> # output for the first example. >>> result = ub.urepr(dict_, nl=1, precision=2) >>> import pytest >>> import sys >>> if sys.version_info[0:2] <= (3, 6): >>> # dictionary order is not guaranteed in 3.6 use repr2 instead >>> pytest.skip() >>> print(result) { 'custom_types': [slice(0, 1, None), 0.33], 'nest_dict': {'k1': [1, 2, {3: {4, 5}}], 'key2': [1, 2, {3: {4, 5}}], 'key3': [1, 2, {3: {4, 5}}]}, 'nest_dict2': {'k': [1, 2, {3: {4, 5}}]}, 'nested_tuples': [(1,), (2, 3), {4, 5, 6}], 'one_tup': (1,), 'simple_dict': {'spam': 'eggs', 'ham': 'jam'}, 'simple_list': [1, 2, 'red', 'blue'], 'odict': {2: '1', 1: '2'}, } >>> # You can try the rest yourself. >>> result = ub.urepr(dict_, nl=3, precision=2); print(result) >>> result = ub.urepr(dict_, nl=2, precision=2); print(result) >>> result = ub.urepr(dict_, nl=1, precision=2, itemsep='', explicit=True); print(result) >>> result = ub.urepr(dict_, nl=1, precision=2, nobr=1, itemsep='', explicit=True); print(result) >>> result = ub.urepr(dict_, nl=3, precision=2, cbr=True); print(result) >>> result = ub.urepr(dict_, nl=3, precision=2, si=True); print(result) >>> result = ub.urepr(dict_, nl=3, sort=True); print(result) >>> result = ub.urepr(dict_, nl=3, sort=False, trailing_sep=False); print(result) >>> result = ub.urepr(dict_, nl=3, sort=False, trailing_sep=False, nobr=True); print(result) Example: >>> import ubelt as ub >>> def _nest(d, w): ... if d == 0: ... return {} ... else: ... return {'n{}'.format(d): _nest(d - 1, w + 1), 'm{}'.format(d): _nest(d - 1, w + 1)} >>> dict_ = _nest(d=4, w=1) >>> result = ub.urepr(dict_, nl=6, precision=2, cbr=1) >>> print('---') >>> print(result) >>> result = ub.urepr(dict_, nl=-1, precision=2) >>> print('---') >>> print(result) Example: >>> import ubelt as ub >>> data = {'a': 100, 'b': [1, '2', 3], 'c': {20:30, 40: 'five'}} >>> print(ub.urepr(data, nl=1)) { 'a': 100, 'b': [1, '2', 3], 'c': {20: 30, 40: 'five'}, } >>> # Compact is useful for things like timerit.Timerit labels >>> print(ub.urepr(data, compact=True)) a=100,b=[1,2,3],c={20=30,40=five} >>> print(ub.urepr(data, compact=True, nobr=False)) {a=100,b=[1,2,3],c={20=30,40=five}} """ custom_extensions = kwargs.get('extensions', None) _return_info = kwargs.get('_return_info', False) kwargs['_root_info'] = _rectify_root_info(kwargs.get('_root_info', None)) if kwargs.get('compact', False): # Compact profile defaults kwargs['newlines'] = kwargs.get('newlines', 0) kwargs['strkeys'] = kwargs.get('strkeys', True) kwargs['strvals'] = kwargs.get('strvals', True) kwargs['nobraces'] = kwargs.get('nobraces', 1) kwargs['itemsep'] = kwargs.get('itemsep', '') kwargs['kvsep'] = kwargs.get('kvsep', '=') outstr = None _leaf_info = None if custom_extensions: func = custom_extensions.lookup(data) if func is not None: outstr = func(data, **kwargs) if outstr is None: if isinstance(data, dict): outstr, _leaf_info = _format_dict(data, **kwargs) elif isinstance(data, (list, tuple, set, frozenset)): outstr, _leaf_info = _format_list(data, **kwargs) if outstr is None: # check any globally registered functions for special formatters func = _REPR_EXTENSIONS.lookup(data) if func is not None: outstr = func(data, **kwargs) else: outstr = _format_object(data, **kwargs) if _return_info: _leaf_info = _rectify_leaf_info(_leaf_info) return outstr, _leaf_info else: return outstr def _rectify_root_info(_root_info): if _root_info is None: _root_info = { 'depth': 0, } return _root_info def _rectify_leaf_info(_leaf_info): if _leaf_info is None: _leaf_info = { 'max_height': 0, 'min_height': 0, } return _leaf_info class ReprExtensions(object): """ Helper class for managing non-builtin (e.g. numpy) format types. This module (:mod:`ubelt.util_repr`) maintains a global set of basic extensions, but it is also possible to create a locally scoped set of extensions and explicitly pass it to urepr. The following example demonstrates this. Example: >>> import ubelt as ub >>> class MyObject(object): >>> pass >>> data = {'a': [1, 2.2222, MyObject()], 'b': MyObject()} >>> # Create a custom set of extensions >>> extensions = ub.ReprExtensions() >>> # Register a function to format your specific type >>> @extensions.register(MyObject) >>> def format_myobject(data, **kwargs): >>> return 'I can do anything here' >>> # Repr2 will now respect the passed custom extensions >>> # Note that the global extensions will still be respected >>> # unless they are overloaded. >>> print(ub.urepr(data, nl=-1, precision=1, extensions=extensions)) { 'a': [1, 2.2, I can do anything here], 'b': I can do anything here } >>> # Overload the formatter for float and int >>> @extensions.register((float, int)) >>> def format_myobject(data, **kwargs): >>> return str((data + 10) // 2) >>> print(ub.urepr(data, nl=-1, precision=1, extensions=extensions)) { 'a': [5, 6.0, I can do anything here], 'b': I can do anything here } """ # set_types = [set, frozenset] # list_types = [list, tuple] # dict_types = [dict] # custom_types = { # 'numpy': [], # 'pandas': [], # } # @classmethod # def sequence_types(cls): # return cls.list_types + cls.set_types def __init__(self): self._type_registry = {} # type: Dict[Type, Callable] # NOQA self._typename_registry = {} # type: Dict[str, Callable] # NOQA self._lazy_queue = [] # type: List[Callable] # NOQA # self._lazy_registrations = [ # self._register_numpy_extensions, # self._register_builtin_extensions, # ] def register(self, key): """ Registers a custom formatting function with ub.urepr Args: key (Type | Tuple[Type] | str): indicator of the type Returns: Callable: decorator function """ def _decorator(func): if isinstance(key, tuple): for t in key: self._type_registry[t] = func if isinstance(key, str): self._typename_registry[key] = func else: self._type_registry[key] = func return func return _decorator def lookup(self, data): """ Returns an appropriate function to format ``data`` if one has been registered. Args: data (Any): an instance that may have a registered formatter Returns: Callable: the formatter for the given type """ # Evaluate the lazy queue if anything is in it if self._lazy_queue: for func in self._lazy_queue: func() self._lazy_queue = [] for type_, func in self._type_registry.items(): if isinstance(data, type_): return func # Fallback to registered typenames. # If we cannot find a formatter for this type, then return None typename = type(data).__name__ func = self._typename_registry.get(typename, None) return func def _register_pandas_extensions(self): """ Example: >>> # xdoctest: +REQUIRES(module:pandas) >>> # xdoctest: +IGNORE_WHITESPACE >>> import pandas as pd >>> import numpy as np >>> import ubelt as ub >>> rng = np.random.RandomState(0) >>> data = pd.DataFrame(rng.rand(3, 3)) >>> print(ub.urepr(data)) >>> print(ub.urepr(data, precision=2)) >>> print(ub.urepr({'akeyfdfj': data}, precision=2)) """ @self.register('DataFrame') def format_pandas(data, **kwargs): # nocover precision = kwargs.get('precision', None) float_format = (None if precision is None else '%.{}f'.format(precision)) formatted = data.to_string(float_format=float_format) return formatted # def _register_torch_extensions(self): # @self.register('Tensor') # def format_tensor(data, **kwargs): # """ # Example: # >>> # xdoctest: +REQUIRES(module:torch) # >>> # xdoctest: +IGNORE_WHITESPACE # >>> import torch # >>> import numpy as np # >>> data = np.array([[.2, 42, 5], [21.2, 3, .4]]) # >>> data = torch.from_numpy(data) # >>> data = torch.rand(100, 100) # >>> print('data = {}'.format(ub.urepr(data, nl=1))) # >>> print(ub.urepr(data)) # """ # import numpy as np # func = self._type_registry[np.ndarray] # npdata = data.data.cpu().numpy() # # kwargs['strvals'] = True # kwargs['with_dtype'] = False # formatted = func(npdata, **kwargs) # # hack for prefix class # formatted = formatted.replace('np.array', '__Tensor') # # import ubelt as ub # # formatted = ub.hzcat('Tensor(' + formatted + ')') # return formatted def _register_numpy_extensions(self): """ Example: >>> # xdoctest: +REQUIRES(module:numpy) >>> import sys >>> import pytest >>> import ubelt as ub >>> if not ub.modname_to_modpath('numpy'): ... raise pytest.skip() >>> # xdoctest: +IGNORE_WHITESPACE >>> import numpy as np >>> data = np.array([[.2, 42, 5], [21.2, 3, .4]]) >>> print(ub.urepr(data)) np.array([[ 0.2, 42. , 5. ], [21.2, 3. , 0.4]], dtype=np.float64) >>> print(ub.urepr(data, with_dtype=False)) np.array([[ 0.2, 42. , 5. ], [21.2, 3. , 0.4]]) >>> print(ub.urepr(data, strvals=True)) [[ 0.2, 42. , 5. ], [21.2, 3. , 0.4]] >>> data = np.empty((0, 10), dtype=np.float64) >>> print(ub.urepr(data, strvals=False)) np.empty((0, 10), dtype=np.float64) >>> print(ub.urepr(data, strvals=True)) [] >>> data = np.ma.empty((0, 10), dtype=np.float64) >>> print(ub.urepr(data, strvals=False)) np.ma.empty((0, 10), dtype=np.float64) """ # TODO: should we register numpy using the new string method? import numpy as np @self.register(np.ndarray) def format_ndarray(data, **kwargs): import re strvals = kwargs.get('sv', kwargs.get('strvals', False)) itemsep = kwargs.get('itemsep', ' ') precision = kwargs.get('precision', None) suppress_small = kwargs.get('supress_small', None) max_line_width = kwargs.get('max_line_width', None) with_dtype = kwargs.get('with_dtype', kwargs.get('dtype', not strvals)) newlines = kwargs.pop('nl', kwargs.pop('newlines', 1)) # if with_dtype and strvals: # raise ValueError('cannot format with strvals and dtype') separator = ',' + itemsep if strvals: prefix = '' suffix = '' else: modname = type(data).__module__ # substitute shorthand for numpy module names np_nice = 'np' modname = re.sub('\\bnumpy\\b', np_nice, modname) modname = re.sub('\\bma.core\\b', 'ma', modname) class_name = type(data).__name__ if class_name == 'ndarray': class_name = 'array' prefix = modname + '.' + class_name + '(' if with_dtype: dtype_repr = data.dtype.name # dtype_repr = np.core.arrayprint.dtype_short_repr(data.dtype) suffix = ',{}dtype={}.{})'.format(itemsep, np_nice, dtype_repr) else: suffix = ')' if not strvals and data.size == 0 and data.shape != (0,): # Special case for displaying empty data prefix = modname + '.empty(' body = repr(tuple(map(int, data.shape))) else: body = np.array2string(data, precision=precision, separator=separator, suppress_small=suppress_small, prefix=prefix, max_line_width=max_line_width) if not strvals: # Handle special float values inf / nan body = re.sub('\\binf\\b', np_nice + '.inf', body) body = re.sub('\\bnan\\b', np_nice + '.nan', body) if not newlines: # remove newlines if we need to body = re.sub('\n *', '', body) formatted = prefix + body + suffix return formatted # Hack, make sure we also register numpy floats self.register(np.float32)(self._type_registry[float]) def _register_builtin_extensions(self): @self.register(float) def format_float(data, **kwargs): precision = kwargs.get('precision', None) strvals = kwargs.get('sv', kwargs.get('strvals', False)) if precision is None: text = str(data) else: text = ('{:.%df}' % precision).format(data) if not strvals: # Ensure the representation of inf and nan is evaluatable # NOTE: sometimes this function is used to make json objects # how can we ensure that this doesn't break things? # Turns out json, never handled these cases. In the future we # may want to add a json flag to urepr to encourage it to # output json-like representations. # json.loads("[0, 1, 2, nan]") # json.loads("[Infinity, NaN]") # json.dumps([float('inf'), float('nan')]) import math if math.isinf(data) or math.isnan(data): text = "float('{}')".format(text) return text @self.register(slice) def format_slice(data, **kwargs): if kwargs.get('itemsep', ' ') == '': return 'slice(%r,%r,%r)' % (data.start, data.stop, data.step) else: return _format_object(data, **kwargs) _REPR_EXTENSIONS = ReprExtensions() _REPR_EXTENSIONS._register_builtin_extensions() def _lazy_init(): """ Only called in the case where we encounter an unknown type that a commonly used external library might have. For now this is just numpy. Numpy is ubiquitous. """ try: # TODO: can we use lazy loading to prevent trying to import numpy until # some attribute of _REPR_EXTENSIONS is used? _REPR_EXTENSIONS._register_numpy_extensions() _REPR_EXTENSIONS._register_pandas_extensions() # _REPR_EXTENSIONS._register_torch_extensions() except ImportError: # nocover pass _REPR_EXTENSIONS._lazy_queue.append(_lazy_init) def _format_object(val, **kwargs): stritems = kwargs.get('si', kwargs.get('stritems', False)) strvals = stritems or kwargs.get('sv', kwargs.get('strvals', False)) base_valfunc = str if strvals else repr itemstr = base_valfunc(val) return itemstr def _format_list(list_, **kwargs): """ Makes a pretty printable / human-readable string representation of a sequence. In most cases this string could be evaled. Args: list_ (list): input list **kwargs: nl, newlines, packed, nobr, nobraces, itemsep, trailing_sep, strvals indent_, precision, use_numpy, with_dtype, force_dtype, stritems, strkeys, explicit, sort, key_order, maxlen Returns: Tuple[str, Dict] : retstr, _leaf_info Example: >>> print(_format_list([])[0]) [] >>> print(_format_list([], nobr=True)[0]) [] >>> print(_format_list([1], nl=0)[0]) [1] >>> print(_format_list([1], nobr=True)[0]) 1, """ kwargs['_root_info'] = _rectify_root_info(kwargs.get('_root_info', None)) kwargs['_root_info']['depth'] += 1 newlines = kwargs.pop('nl', kwargs.pop('newlines', 1)) kwargs['nl'] = _rectify_countdown_or_bool(newlines) nobraces = kwargs.pop('nobr', kwargs.pop('nobraces', False)) kwargs['nobraces'] = _rectify_countdown_or_bool(nobraces) itemsep = kwargs.get('itemsep', ' ') compact_brace = kwargs.get('cbr', kwargs.get('compact_brace', False)) # kwargs['cbr'] = _rectify_countdown_or_bool(compact_brace) itemstrs, _leaf_info = _list_itemstrs(list_, **kwargs) if len(itemstrs) == 0: nobraces = False # force braces to prevent empty output is_tuple = isinstance(list_, tuple) is_set = isinstance(list_, (set, frozenset,)) if nobraces: lbr, rbr = '', '' elif is_tuple: lbr, rbr = '(', ')' elif is_set: lbr, rbr = '{', '}' else: lbr, rbr = '[', ']' # Doesn't actually put in trailing comma if on same line trailing_sep = kwargs.get('trailsep', kwargs.get('trailing_sep', newlines > 0 and len(itemstrs))) # The trailing separator is always needed for single item tuples if is_tuple and len(list_) <= 1: trailing_sep = True if len(itemstrs) == 0: newlines = False retstr = _join_itemstrs(itemstrs, itemsep, newlines, _leaf_info, nobraces, trailing_sep, compact_brace, lbr, rbr) return retstr, _leaf_info def _format_dict(dict_, **kwargs): """ Makes a pretty printable / human-readable string representation of a dictionary. In most cases this string could be evaled. Args: dict_ (dict): a dictionary **kwargs: si, stritems, strkeys, strvals, sk, sv, nl, newlines, nobr, nobraces, cbr, compact_brace, trailing_sep, explicit, itemsep, precision, kvsep, sort Kwargs: sort (None): if True, sorts ALL collections and subcollections, note, collections with undefined orders (e.g. dicts, sets) are sorted by default. Defaults to None. nl (int | None): preferred alias for newline. can be a countdown variable. Defaults to None. explicit (int | bool): can be a countdown variable. if True, uses dict(a=b) syntax instead of {'a': b}. Defaults to False. nobr (bool): removes outer braces. Defaults to False. Returns: Tuple[str, Dict] : retstr, _leaf_info Example: >>> from ubelt.util_repr import * # NOQA >>> dict_ = {'a': 'edf', 'bc': 'ghi'} >>> print(_format_dict(dict_)[0]) { 'a': 'edf', 'bc': 'ghi', } >>> print(_format_dict(dict_, align=True)[0]) >>> print(_format_dict(dict_, align=':')[0]) { 'a' : 'edf', 'bc': 'ghi', } >>> print(_format_dict(dict_, explicit=True, align=True)[0]) dict( a ='edf', bc='ghi', ) """ kwargs['_root_info'] = _rectify_root_info(kwargs.get('_root_info', None)) kwargs['_root_info']['depth'] += 1 stritems = kwargs.pop('si', kwargs.pop('stritems', False)) if stritems: kwargs['strkeys'] = True kwargs['strvals'] = True kwargs['strkeys'] = kwargs.pop('sk', kwargs.pop('strkeys', False)) kwargs['strvals'] = kwargs.pop('sv', kwargs.pop('strvals', False)) newlines = kwargs.pop('nl', kwargs.pop('newlines', True)) kwargs['nl'] = _rectify_countdown_or_bool(newlines) nobraces = kwargs.pop('nobr', kwargs.pop('nobraces', False)) kwargs['nobraces'] = _rectify_countdown_or_bool(nobraces) compact_brace = kwargs.get('cbr', kwargs.get('compact_brace', False)) # kwargs['cbr'] = _rectify_countdown_or_bool(compact_brace) # Doesn't actually put in trailing comma if on same line trailing_sep = kwargs.get('trailsep', kwargs.get('trailing_sep', newlines > 0)) explicit = kwargs.get('explicit', False) itemsep = kwargs.get('itemsep', ' ') align = kwargs.get('align', False) if align and not isinstance(align, str): default_kvsep = ': ' if explicit: default_kvsep = '=' kvsep = kwargs.get('kvsep', default_kvsep) align = kvsep if len(dict_) == 0: retstr = 'dict()' if explicit else '{}' _leaf_info = None else: itemstrs, _leaf_info = _dict_itemstrs(dict_, **kwargs) if nobraces: lbr, rbr = '', '' elif explicit: lbr, rbr = 'dict(', ')' else: lbr, rbr = '{', '}' retstr = _join_itemstrs(itemstrs, itemsep, newlines, _leaf_info, nobraces, trailing_sep, compact_brace, lbr, rbr, align) return retstr, _leaf_info def _join_itemstrs(itemstrs, itemsep, newlines, _leaf_info, nobraces, trailing_sep, compact_brace, lbr, rbr, align=False): """ Joins string-ified items with separators newlines and container-braces. """ # positive newlines means start counting from the root use_newline = newlines > 0 # negative countdown values mean start counting from the leafs # if compact_brace < 0: # compact_brace = (-compact_brace) >= _leaf_info['max_height'] if newlines < 0: use_newline = (-newlines) < _leaf_info['max_height'] if use_newline: sep = ',\n' if nobraces: if align: itemstrs = _align_lines(itemstrs, character=align) body_str = sep.join(itemstrs) if trailing_sep and len(itemstrs) > 0: body_str += ',' retstr = body_str else: if compact_brace: # Why must we modify the indentation below and not here? # prefix = '' # rest = [util_str.indent(s, prefix) for s in itemstrs[1:]] # indented = itemstrs[0:1] + rest indented = itemstrs else: prefix = ' ' * 4 indented = [util_str.indent(s, prefix) for s in itemstrs] if align: indented = _align_lines(indented, character=align) body_str = sep.join(indented) if trailing_sep and len(itemstrs) > 0: body_str += ',' if compact_brace: # Why can we modify the indentation here but not above? braced_body_str = (lbr + body_str.replace('\n', '\n ') + rbr) else: braced_body_str = (lbr + '\n' + body_str + '\n' + rbr) retstr = braced_body_str else: sep = ',' + itemsep body_str = sep.join(itemstrs) if trailing_sep and len(itemstrs) > 0: body_str += ',' retstr = (lbr + body_str + rbr) return retstr def _dict_itemstrs(dict_, **kwargs): """ Create a string representation for each item in a dict. Args: dict_ (dict): the dict **kwargs: explicit, precision, kvsep, strkeys, _return_info, cbr, compact_brace, sort Example: >>> from ubelt.util_repr import * >>> dict_ = {'b': .1, 'l': 'st', 'g': 1.0, 's': 10, 'm': 0.9, 'w': .5} >>> kwargs = {'strkeys': True, 'sort': True} >>> itemstrs, _ = _dict_itemstrs(dict_, **kwargs) >>> char_order = [p[0] for p in itemstrs] >>> assert char_order == ['b', 'g', 'l', 'm', 's', 'w'] """ import ubelt as ub explicit = kwargs.get('explicit', False) kwargs['explicit'] = _rectify_countdown_or_bool(explicit) precision = kwargs.get('precision', None) default_kvsep = ': ' default_strkeys = False if explicit: default_strkeys = True default_kvsep = '=' kvsep = kwargs.get('kvsep', default_kvsep) def make_item_str(key, val): if explicit or kwargs.get('strkeys', default_strkeys): key_str = str(key) else: key_str = urepr(key, precision=precision, newlines=0) prefix = key_str + kvsep kwargs['_return_info'] = True val_str, _leaf_info = urepr(val, **kwargs) # If the first line does not end with an open nest char # (e.g. for ndarrays), otherwise we need to worry about # residual indentation. pos = val_str.find('\n') first_line = val_str if pos == -1 else val_str[:pos] compact_brace = kwargs.get('cbr', kwargs.get('compact_brace', False)) if compact_brace or not first_line.rstrip().endswith(tuple('([{<')): rest = '' if pos == -1 else val_str[pos:] # val_str = first_line.lstrip() + rest val_str = first_line + rest if '\n' in prefix: # Fix issue with keys that span new lines item_str = prefix + val_str else: item_str = ub.hzcat([prefix, val_str]) else: item_str = prefix + val_str return item_str, _leaf_info items = list(dict_.items()) _tups = [make_item_str(key, val) for (key, val) in items] itemstrs = [t[0] for t in _tups] max_height = max([t[1]['max_height'] for t in _tups]) if _tups else 0 _leaf_info = { 'max_height': max_height + 1, } sort = kwargs.get('sort', None) if sort == 'auto': sort = None dict_sort_behavior = kwargs.get('_dict_sort_behavior', 'new') if dict_sort_behavior == 'old': if sort is None: # if sort is None, force orderings on unordered collections like dicts, # but keep ordering of ordered collections like OrderedDicts. # NOTE: WE WANT TO CHANGE THIS TO FALSE BY DEFAULT. # MIGHT REQUIRE DEPRECATING PYTHON 3.6 SUPPORT sort = True # LEGACY UBELT BEHAVIOR # HOW TO WE INTRODUCE A BACKWARDS COMPATIBLE WAY TO MAKE THIS CHANGE? # sort = False # cannot make this change safely if isinstance(dict_, collections.OrderedDict): # never sort ordered dicts; they are perfect just the way they are! sort = False else: if sort is None: # Dictionaries are sorted by default in 3.7+ so never sort dictionaries sort = False if sort: key = sort if callable(sort) else None itemstrs = _sort_itemstrs(items, itemstrs, key) return itemstrs, _leaf_info def _list_itemstrs(list_, **kwargs): """ Create a string representation for each item in a list. Args: list_ (Sequence): **kwargs: _return_info, sort """ items = list(list_) kwargs['_return_info'] = True _tups = [urepr(item, **kwargs) for item in items] itemstrs = [t[0] for t in _tups] max_height = max([t[1]['max_height'] for t in _tups]) if _tups else 0 _leaf_info = { 'max_height': max_height + 1, } sort = kwargs.get('sort', None) if sort == 'auto': sort = None if sort is None: # if sort is None, force orderings on unordered collections like sets, # but keep ordering of ordered collections like lists. sort = isinstance(list_, (set, frozenset)) if sort: key = sort if callable(sort) else None itemstrs = _sort_itemstrs(items, itemstrs, key) return itemstrs, _leaf_info def _sort_itemstrs(items, itemstrs, key=None): """ Equivalent to ``sorted(items)`` except if ``items`` are unorderable, then string values are used to define an ordering. """ # First try to sort items by their normal values # If that does not work, then sort by their string values try: # Set ordering is not unique. Sort by strings values instead. if len(items) > 0 and isinstance(items[0], (set, frozenset)): raise TypeError sortx = util_list.argsort(items, key=key) except TypeError: sortx = util_list.argsort(itemstrs, key=key) itemstrs = [itemstrs[x] for x in sortx] return itemstrs def _rectify_countdown_or_bool(count_or_bool): """ used by recursive functions to specify which level to turn a bool on in counting down yields True, True, ..., False counting up yields False, False, False, ... True Args: count_or_bool (bool | int): if positive and an integer, it will count down, otherwise it will remain the same. Returns: int or bool: count_or_bool_ Example: >>> from ubelt.util_repr import _rectify_countdown_or_bool # NOQA >>> count_or_bool = True >>> a1 = (_rectify_countdown_or_bool(2)) >>> a2 = (_rectify_countdown_or_bool(1)) >>> a3 = (_rectify_countdown_or_bool(0)) >>> a4 = (_rectify_countdown_or_bool(-1)) >>> a5 = (_rectify_countdown_or_bool(-2)) >>> a6 = (_rectify_countdown_or_bool(True)) >>> a7 = (_rectify_countdown_or_bool(False)) >>> a8 = (_rectify_countdown_or_bool(None)) >>> result = [a1, a2, a3, a4, a5, a6, a7, a8] >>> print(result) [1, 0, 0, -1, -2, True, False, False] """ if count_or_bool is True or count_or_bool is False: count_or_bool_ = count_or_bool elif isinstance(count_or_bool, int): if count_or_bool == 0: return 0 elif count_or_bool > 0: count_or_bool_ = count_or_bool - 1 else: # We dont countup negatives anymore count_or_bool_ = count_or_bool else: count_or_bool_ = False return count_or_bool_ def _align_text(text, character='=', replchar=None, pos=0): r""" Left justifies text on the left side of character Args: text (str): text to align character (str): character to align at replchar (str): replacement character (default=None) Returns: str: new_text Example: >>> character = '=' >>> text = 'a = b=\none = two\nthree = fish\n' >>> print(text) >>> result = (_align_text(text, '=')) >>> print(result) a = b= one = two three = fish """ line_list = text.splitlines() new_lines = _align_lines(line_list, character, replchar, pos=pos) new_text = '\n'.join(new_lines) return new_text def _align_lines(line_list, character='=', replchar=None, pos=0): r""" Left justifies text on the left side of character Args: line_list (list of strs): character (str): pos (int or list or None): does one alignment for all chars beyond this column position. If pos is None, then all chars are aligned. Returns: list: new_lines Example: >>> line_list = 'a = b\none = two\nthree = fish'.split('\n') >>> character = '=' >>> new_lines = _align_lines(line_list, character) >>> result = ('\n'.join(new_lines)) >>> print(result) a = b one = two three = fish Example: >>> line_list = 'foofish:\n a = b\n one = two\n three = fish'.split('\n') >>> character = '=' >>> new_lines = _align_lines(line_list, character) >>> result = ('\n'.join(new_lines)) >>> print(result) foofish: a = b one = two three = fish Example: >>> import ubelt as ub >>> character = ':' >>> text = ub.codeblock(''' {'max': '1970/01/01 02:30:13', 'mean': '1970/01/01 01:10:15', 'min': '1970/01/01 00:01:41', 'range': '2:28:32', 'std': '1:13:57',}''').split('\n') >>> new_lines = _align_lines(text, ':', ' :') >>> result = '\n'.join(new_lines) >>> print(result) {'max' : '1970/01/01 02:30:13', 'mean' : '1970/01/01 01:10:15', 'min' : '1970/01/01 00:01:41', 'range' : '2:28:32', 'std' : '1:13:57',} Example: >>> line_list = 'foofish:\n a = b = c\n one = two = three\nthree=4= fish'.split('\n') >>> character = '=' >>> # align the second occurrence of a character >>> new_lines = _align_lines(line_list, character, pos=None) >>> print(('\n'.join(line_list))) >>> result = ('\n'.join(new_lines)) >>> print(result) foofish: a = b = c one = two = three three=4 = fish """ import re # FIXME: continue to fix ansi if pos is None: # Align all occurrences num_pos = max([line.count(character) for line in line_list]) pos = list(range(num_pos)) # Allow multiple alignments if isinstance(pos, list): pos_list = pos # recursive calls new_lines = line_list for pos in pos_list: new_lines = _align_lines(new_lines, character=character, replchar=replchar, pos=pos) return new_lines # base case if replchar is None: replchar = character # the pos-th character to align lpos = pos rpos = lpos + 1 tup_list = [line.split(character) for line in line_list] handle_ansi = True if handle_ansi: # nocover # Remove ansi from length calculation # References: http://stackoverflow.com/questions/14693701remove-ansi ansi_escape = re.compile(r'\x1b[^m]*m') # Find how much padding is needed maxlen = 0 for tup in tup_list: if len(tup) >= rpos + 1: if handle_ansi: # nocover tup = [ansi_escape.sub('', x) for x in tup] left_lenlist = list(map(len, tup[0:rpos])) left_len = sum(left_lenlist) + lpos * len(replchar) maxlen = max(maxlen, left_len) # Pad each line to align the pos-th occurrence of the chosen character new_lines = [] for tup in tup_list: if len(tup) >= rpos + 1: lhs = character.join(tup[0:rpos]) rhs = character.join(tup[rpos:]) # pad the new line with requested justification newline = lhs.ljust(maxlen) + replchar + rhs new_lines.append(newline) else: new_lines.append(replchar.join(tup)) return new_lines # Give the urepr function itself a reference to the default extensions # register method so the user can modify them without accessing this module urepr.extensions = _REPR_EXTENSIONS urepr.register = _REPR_EXTENSIONS.register ubelt-1.3.7/ubelt/util_repr.pyi000066400000000000000000000005471472470106000165070ustar00rootroot00000000000000from typing import Type from typing import Tuple from typing import Callable from typing import Any def urepr(data: object, **kwargs) -> str: ... class ReprExtensions: def __init__(self) -> None: ... def register(self, key: Type | Tuple[Type] | str) -> Callable: ... def lookup(self, data: Any) -> Callable: ... ubelt-1.3.7/ubelt/util_str.py000066400000000000000000000167361472470106000162050ustar00rootroot00000000000000""" Functions for working with text and strings. The :func:`codeblock` and :func:`paragraph` wrap multiline strings to help write text blocks without hindering the surrounding code indentation. The :func:`hzcat` function horizontally concatenates multiline text. The :func:`indent` prefixes all lines in a text block with a given prefix. By default that prefix is 4 spaces. """ __all__ = [ 'indent', 'codeblock', 'paragraph', 'hzcat', 'ensure_unicode', ] def indent(text, prefix=' '): """ Indents a block of text Args: text (str): text to indent prefix (str): prefix to add to each line. Defaults to ``' '`` Returns: str: indented text Example: >>> import ubelt as ub >>> NL = chr(10) # newline character >>> text = 'Lorem ipsum' + NL + 'dolor sit amet' >>> prefix = ' ' >>> result = ub.indent(text, prefix) >>> assert all(t.startswith(prefix) for t in result.split(NL)) """ return prefix + text.replace('\n', '\n' + prefix) def codeblock(text): """ Create a block of text that preserves all newlines and relative indentation Wraps multiline string blocks and returns unindented code. Useful for templated code defined in indented parts of code. Args: text (str): typically a multiline string Returns: str: the unindented string Example: >>> import ubelt as ub >>> # Simulate an indented part of code >>> if True: >>> # notice the indentation on this will be normal >>> codeblock_version = ub.codeblock( ... ''' ... def foo(): ... return 'bar' ... ''' ... ) >>> # notice the indentation and newlines on this will be odd >>> normal_version = (''' ... def foo(): ... return 'bar' ... ''') >>> assert normal_version != codeblock_version >>> print('Without codeblock') >>> print(normal_version) >>> print('With codeblock') >>> print(codeblock_version) """ import textwrap # this is a slow import, do it lazy return textwrap.dedent(text).strip('\n') def paragraph(text): r""" Wraps multi-line strings and restructures the text to remove all newlines, heading, trailing, and double spaces. Useful for writing help strings, log messages, and natural text. Args: text (str): typically a multiline string Returns: str: the reduced text block Example: >>> import ubelt as ub >>> text = ( >>> ''' >>> Lorem ipsum dolor sit amet, consectetur adipiscing >>> elit, sed do eiusmod tempor incididunt ut labore et >>> dolore magna aliqua. >>> ''') >>> out = ub.paragraph(text) >>> assert chr(10) in text >>> assert chr(10) not in out >>> print('text = {!r}'.format(text)) >>> print('out = {!r}'.format(out)) text = '\n Lorem ipsum dolor sit amet, consectetur adipiscing\n elit, sed do eiusmod tempor incididunt ut labore et\n dolore magna aliqua.\n ' out = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.' """ import re out = re.sub(r'\s\s*', ' ', text).strip() return out def hzcat(args, sep=''): """ Horizontally concatenates strings preserving indentation Concatenates a list of objects ensuring that the next item in the list is all the way to the right of any previous items. Args: args (List[str]): strings to concatenate sep (str): separator. Defaults to ``''``. Example1: >>> import ubelt as ub >>> B = ub.repr2([[1, 2], [3, 457]], nl=1, cbr=True, trailsep=False) >>> C = ub.repr2([[5, 6], [7, 8]], nl=1, cbr=True, trailsep=False) >>> args = ['A = ', B, ' * ', C] >>> print(ub.hzcat(args)) A = [[1, 2], * [[5, 6], [3, 457]] [7, 8]] Example2: >>> import ubelt as ub >>> import unicodedata >>> aa = unicodedata.normalize('NFD', 'á') # a unicode char with len2 >>> B = ub.repr2([['θ', aa], [aa, aa, aa]], nl=1, si=True, cbr=True, trailsep=False) >>> C = ub.repr2([[5, 6], [7, 'θ']], nl=1, si=True, cbr=True, trailsep=False) >>> args = ['A', '=', B, '*', C] >>> print(ub.hzcat(args, sep='|')) A|=|[[θ, á], |*|[[5, 6], | | [á, á, á]]| | [7, θ]] """ import unicodedata if '\n' in sep or '\r' in sep: raise ValueError('`sep` cannot contain newline characters') # TODO: ensure unicode data works correctly for python2 # args = [unicodedata.normalize('NFC', ensure_unicode(val)) for val in args] args = [unicodedata.normalize('NFC', val) for val in args] arglines = [a.split('\n') for a in args] height = max(map(len, arglines)) # Do vertical padding arglines = [lines + [''] * (height - len(lines)) for lines in arglines] # Initialize output all_lines = ['' for _ in range(height)] width = 0 n_args = len(args) for sx, lines in enumerate(arglines): # Concatenate the new string for lx, line in enumerate(lines): all_lines[lx] += line # Find the new maximum horizontal width width = max(width, max(map(len, all_lines))) if sx < n_args - 1: # Horizontal padding on all but last iter for lx, line in list(enumerate(all_lines)): residual = width - len(line) all_lines[lx] = line + (' ' * residual) + sep width += len(sep) # Clean up trailing whitespace all_lines = [line.rstrip(' ') for line in all_lines] ret = '\n'.join(all_lines) return ret def ensure_unicode(text): r""" Casts bytes into utf8 (mostly for python2 compatibility). Warning: This function is deprecated and will no longer be available in version 2.0.0. Args: text (str | bytes): text to ensure is decoded as unicode Returns: str References: .. [SO_12561063] http://stackoverflow.com/questions/12561063/extract-data-from-file Example: >>> from ubelt.util_str import * >>> import codecs # NOQA >>> assert ensure_unicode('my ünicôdé strįng') == 'my ünicôdé strįng' >>> assert ensure_unicode('text1') == 'text1' >>> assert ensure_unicode('text1'.encode('utf8')) == 'text1' >>> assert ensure_unicode('text1'.encode('utf8')) == 'text1' >>> assert (codecs.BOM_UTF8 + 'text»¿'.encode('utf8')).decode('utf8') """ from ubelt.util_deprecate import schedule_deprecation schedule_deprecation( modname='ubelt', name='ensure_unicode', type='function', migration='This should not be needed in Python 3', deprecate='1.2.0', error='2.0.0', remove='2.1.0') if isinstance(text, str): return text elif isinstance(text, bytes): return text.decode('utf8') else: # nocover raise ValueError('unknown input type {!r}'.format(text)) # if something with the above code goes wrong, refer to this # except UnicodeDecodeError: # if text.startswith(codecs.BOM_UTF8): # # Can safely remove the utf8 marker # text = text[len(codecs.BOM_UTF8):] # return text.decode('utf-8') ubelt-1.3.7/ubelt/util_str.pyi000066400000000000000000000004311472470106000163370ustar00rootroot00000000000000from typing import List def indent(text: str, prefix: str = ' ') -> str: ... def codeblock(text: str) -> str: ... def paragraph(text: str) -> str: ... def hzcat(args: List[str], sep: str = ''): ... def ensure_unicode(text: str | bytes) -> str: ... ubelt-1.3.7/ubelt/util_stream.py000066400000000000000000000233641472470106000166630ustar00rootroot00000000000000""" Functions for capturing and redirecting IO streams with optional tee-functionality. The :class:`CaptureStdout` captures all text sent to stdout and optionally prevents it from actually reaching stdout. The :class:`TeeStringIO` does the same thing but for arbitrary streams. It is how the former is implemented. """ import sys import io __all__ = [ 'TeeStringIO', 'CaptureStdout', 'CaptureStream', ] class TeeStringIO(io.StringIO): """ An IO object that writes to itself and another IO stream. Attributes: redirect (io.IOBase | None): The other stream to write to. Example: >>> import ubelt as ub >>> import io >>> redirect = io.StringIO() >>> self = ub.TeeStringIO(redirect) >>> self.write('spam') >>> assert self.getvalue() == 'spam' >>> assert redirect.getvalue() == 'spam' """ def __init__(self, redirect=None): """ Args: redirect (io.IOBase): The other stream to write to. """ self.redirect = redirect # type: io.IOBase super().__init__() # Logic taken from prompt_toolkit/output/vt100.py version 3.0.5 in # flush I don't have a full understanding of what the buffer # attribute is supposed to be capturing here, but this seems to # allow us to embed in IPython while still capturing and Teeing # stdout if hasattr(redirect, 'buffer'): self.buffer = redirect.buffer # Py3. else: self.buffer = redirect # Note: mypy doesn't like this type # buffer (io.BufferedIOBase | io.IOBase | None): the redirected buffer attribute def isatty(self): # nocover """ Returns true of the redirect is a terminal. Note: Needed for ``IPython.embed`` to work properly when this class is used to override stdout / stderr. SeeAlso: :meth:`io.IOBase.isatty` Returns: bool """ return (self.redirect is not None and hasattr(self.redirect, 'isatty') and self.redirect.isatty()) def fileno(self): """ Returns underlying file descriptor of the redirected IOBase object if one exists. Returns: int : the integer corresponding to the file descriptor SeeAlso: :meth:`io.IOBase.fileno` Example: >>> import ubelt as ub >>> dpath = ub.Path.appdir('ubelt/tests/util_stream').ensuredir() >>> fpath = dpath / 'fileno-test.txt' >>> with open(fpath, 'w') as file: >>> self = ub.TeeStringIO(file) >>> descriptor = self.fileno() >>> print(f'descriptor={descriptor}') >>> assert isinstance(descriptor, int) Example: >>> # Test errors >>> # Not sure the best way to test, this func is important for >>> # capturing stdout when ipython embedding >>> import io >>> import pytest >>> import ubelt as ub >>> with pytest.raises(io.UnsupportedOperation): >>> ub.TeeStringIO(redirect=io.StringIO()).fileno() >>> with pytest.raises(io.UnsupportedOperation): >>> ub.TeeStringIO(None).fileno() """ if self.redirect is not None: return self.redirect.fileno() else: return super().fileno() @property def encoding(self): """ Gets the encoding of the `redirect` IO object FIXME: My complains that this violates the Liskov substitution principle because the return type can be str or None, whereas the parent class always returns a None. In the future we may raise an exception instead of returning None. SeeAlso: :py:obj:`io.TextIOBase.encoding` Example: >>> import ubelt as ub >>> redirect = io.StringIO() >>> assert ub.TeeStringIO(redirect).encoding is None >>> assert ub.TeeStringIO(None).encoding is None >>> assert ub.TeeStringIO(sys.stdout).encoding is sys.stdout.encoding >>> redirect = io.TextIOWrapper(io.StringIO()) >>> assert ub.TeeStringIO(redirect).encoding is redirect.encoding """ # mypy correctly complains if we include the return type, but we need # to keep this buggy behavior for legacy reasons. # Returns: # None | str if self.redirect is not None: return self.redirect.encoding else: return super().encoding @encoding.setter def encoding(self, value): # Adding a setter to make mypy happy raise Exception('Cannot set encoding attribute') def write(self, msg): """ Write to this and the redirected stream Args: msg (str): the data to write SeeAlso: :meth:`io.TextIOBase.write` Example: >>> import ubelt as ub >>> dpath = ub.Path.appdir('ubelt/tests/util_stream').ensuredir() >>> fpath = dpath / 'write-test.txt' >>> with open(fpath, 'w') as file: >>> self = ub.TeeStringIO(file) >>> n = self.write('hello world') >>> assert n == 11 >>> assert self.getvalue() == 'hello world' >>> assert fpath.read_text() == 'hello world' """ if self.redirect is not None: self.redirect.write(msg) return super().write(msg) def flush(self): # nocover """ Flush to this and the redirected stream SeeAlso: :meth:`io.IOBase.flush` """ if self.redirect is not None: self.redirect.flush() return super().flush() class CaptureStream(object): """ Generic class for capturing streaming output from stdout or stderr """ class CaptureStdout(CaptureStream): r""" Context manager that captures stdout and stores it in an internal stream. Depending on the value of ``suppress``, the user can control if stdout is printed (i.e. if stdout is tee-ed or suppressed) while it is being captured. SeeAlso: :func:`contextlib.redirect_stdout` - similar, but does not have the ability to print stdout while it is being captured. Attributes: text (str | None): internal storage for the most recent part parts (List[str]): internal storage for all parts cap_stdout (None | TeeStringIO): internal stream proxy orig_stdout (io.TextIOBase): internal pointer to the original stdout stream Example: >>> import ubelt as ub >>> self = ub.CaptureStdout(suppress=True) >>> print('dont capture the table flip (╯°□°)╯︵ ┻━┻') >>> with self: ... text = 'capture the heart ♥' ... print(text) >>> print('dont capture look of disapproval ಠ_ಠ') >>> assert isinstance(self.text, str) >>> assert self.text == text + '\n', 'failed capture text' Example: >>> import ubelt as ub >>> self = ub.CaptureStdout(suppress=False) >>> with self: ... print('I am captured and printed in stdout') >>> assert self.text.strip() == 'I am captured and printed in stdout' Example: >>> import ubelt as ub >>> self = ub.CaptureStdout(suppress=True, enabled=False) >>> with self: ... print('dont capture') >>> assert self.text is None """ def __init__(self, suppress=True, enabled=True): """ Args: suppress (bool): if True, stdout is not printed while captured. Defaults to True. enabled (bool): does nothing if this is False. Defaults to True. """ self.text = None self._pos = 0 # keep track of how much has been logged self.parts = [] self.started = False self.cap_stdout = None self.enabled = enabled self.suppress = suppress self.orig_stdout = sys.stdout if suppress: redirect = None else: redirect = self.orig_stdout self.cap_stdout = TeeStringIO(redirect) def log_part(self): """ Log what has been captured so far """ self.cap_stdout.seek(self._pos) text = self.cap_stdout.read() self._pos = self.cap_stdout.tell() self.parts.append(text) self.text = text def start(self): if self.enabled: self.text = '' self.started = True sys.stdout = self.cap_stdout def stop(self): """ Example: >>> import ubelt as ub >>> ub.CaptureStdout(enabled=False).stop() >>> ub.CaptureStdout(enabled=True).stop() """ if self.enabled: self.started = False sys.stdout = self.orig_stdout def __enter__(self): self.start() return self def __del__(self): # nocover if self.started: self.stop() if self.cap_stdout is not None: self.close() def close(self): self.cap_stdout.close() self.cap_stdout = None def __exit__(self, ex_type, ex_value, ex_traceback): """ Args: ex_type (Type[BaseException] | None): ex_value (BaseException | None): ex_traceback (TracebackType | None): Returns: bool | None """ if self.enabled: try: self.log_part() finally: self.stop() if ex_traceback is not None: return False # return a falsey value on error ubelt-1.3.7/ubelt/util_stream.pyi000066400000000000000000000024771472470106000170360ustar00rootroot00000000000000import io from typing import List from typing import Type from types import TracebackType from _typeshed import Incomplete class TeeStringIO(io.StringIO): redirect: io.IOBase | None buffer: Incomplete def __init__(self, redirect: io.IOBase | None = None) -> None: ... def isatty(self) -> bool: ... def fileno(self) -> int: ... @property def encoding(self): ... @encoding.setter def encoding(self, value) -> None: ... def write(self, msg: str): ... def flush(self): ... class CaptureStream: ... class CaptureStdout(CaptureStream): text: str | None parts: List[str] cap_stdout: None | TeeStringIO orig_stdout: io.TextIOBase started: bool enabled: bool suppress: bool def __init__(self, suppress: bool = True, enabled: bool = True) -> None: ... def log_part(self) -> None: ... def start(self) -> None: ... def stop(self) -> None: ... def __enter__(self): ... def __del__(self) -> None: ... def close(self) -> None: ... def __exit__(self, ex_type: Type[BaseException] | None, ex_value: BaseException | None, ex_traceback: TracebackType | None) -> bool | None: ... ubelt-1.3.7/ubelt/util_time.py000066400000000000000000000553161472470106000163300ustar00rootroot00000000000000""" This is util_time, it contains functions for handling time related code. The :func:`timestamp` function returns an iso8601 timestamp without much fuss. The :func:`timeparse` is the inverse of `timestamp`, and makes use of :mod:`dateutil` if it is available. The :class:`Timer` class is a context manager that times a block of indented code. It includes `tic` and `toc` methods a more matlab like feel. Timerit is gone! Use the standalone and separate module :py:mod:`timerit`. See Also: :mod:`tempora` - https://github.com/jaraco/tempora - time related utility functions from Jaraco :mod:`pendulum` - https://github.com/sdispater/pendulum - drop in replacement for datetime :mod:`arrow` - https://github.com/arrow-py/arrow :mod:`kwutil.util_time` - https://kwutil.readthedocs.io/en/latest/auto/kwutil.util_time.html """ import time import sys from functools import lru_cache __all__ = ['timestamp', 'timeparse', 'Timer'] @lru_cache(maxsize=None) def _needs_workaround39103(): """ Depending on the system C library, either %04Y or %Y wont work. This is an actual Python bug: https://bugs.python.org/issue13305 singer-python also had a similar issue: https://github.com/singer-io/singer-python/issues/86 See Also: https://github.com/jaraco/tempora/blob/main/tempora/__init__.py#L59 """ from datetime import datetime as datetime_cls return len(datetime_cls(1, 1, 1).strftime('%Y')) != 4 def timestamp(datetime=None, precision=0, default_timezone='local', allow_dateutil=True): """ Make a concise iso8601 timestamp suitable for use in filenames. Args: datetime (datetime.datetime | datetime.date | None): A datetime to format into a timestamp. If unspecified, the current local time is used. If given as a date, the time 00:00 is used. precision (int): if non-zero, adds up to 6 digits of sub-second precision. default_timezone (str | datetime.timezone): if the input does not specify a timezone, assume this one. Can be "local" or "utc", or a standardized code if dateutil is installed. allow_dateutil (bool): if True, will use dateutil to lookup the default timezone if needed Returns: str: The timestamp, which will always contain a date, time, and timezone. Note: For more info see [WikiISO8601]_, [PyStrptime]_, [PyTime]_. References: .. [WikiISO8601] https://en.wikipedia.org/wiki/ISO_8601 .. [PyStrptime] https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior .. [PyTime] https://docs.python.org/3/library/time.html Example: >>> import ubelt as ub >>> stamp = ub.timestamp() >>> print('stamp = {!r}'.format(stamp)) stamp = ...-...-...T... Example: >>> import ubelt as ub >>> import datetime as datetime_mod >>> from datetime import datetime as datetime_cls >>> # Create a datetime object with timezone information >>> ast_tzinfo = datetime_mod.timezone(datetime_mod.timedelta(hours=-4), 'AST') >>> datetime = datetime_cls.utcfromtimestamp(123456789.123456789).replace(tzinfo=ast_tzinfo) >>> stamp = ub.timestamp(datetime, precision=2) >>> print('stamp = {!r}'.format(stamp)) stamp = '1973-11-29T213309.12-4' >>> # Demo with a fractional hour timezone >>> act_tzinfo = datetime_mod.timezone(datetime_mod.timedelta(hours=+9.5), 'ACT') >>> datetime = datetime_cls.utcfromtimestamp(123456789.123456789).replace(tzinfo=act_tzinfo) >>> stamp = ub.timestamp(datetime, precision=2) >>> print('stamp = {!r}'.format(stamp)) stamp = '1973-11-29T213309.12+0930' >>> # Can accept datetime or date objects with local, utc, or custom default timezones >>> act_tzinfo = datetime_mod.timezone(datetime_mod.timedelta(hours=+9.5), 'ACT') >>> datetime_utc = ub.timeparse('2020-03-05T112233', default_timezone='utc') >>> datetime_act = ub.timeparse('2020-03-05T112233', default_timezone=act_tzinfo) >>> datetime_notz = datetime_utc.replace(tzinfo=None) >>> date = datetime_utc.date() >>> stamp_utc = ub.timestamp(datetime_utc) >>> stamp_act = ub.timestamp(datetime_act) >>> stamp_date_utc = ub.timestamp(date, default_timezone='utc') >>> print(f'stamp_utc = {stamp_utc}') >>> print(f'stamp_act = {stamp_act}') >>> print(f'stamp_date_utc = {stamp_date_utc}') stamp_utc = 2020-03-05T112233+0 stamp_act = 2020-03-05T112233+0930 stamp_date_utc = 2020-03-05T000000+0 Example: >>> # xdoctest: +REQUIRES(module:dateutil) >>> # Make sure we are compatible with dateutil >>> import ubelt as ub >>> from dateutil.tz import tzlocal >>> import datetime as datetime_mod >>> from datetime import datetime as datetime_cls >>> tz_act = datetime_mod.timezone(datetime_mod.timedelta(hours=+9.5), 'ACT') >>> tzinfo_list = [ >>> tz_act, >>> datetime_mod.timezone(datetime_mod.timedelta(hours=-4), 'AST'), >>> datetime_mod.timezone(datetime_mod.timedelta(hours=0), 'UTC'), >>> datetime_mod.timezone.utc, >>> None, >>> tzlocal() >>> ] >>> # Note: there is a win32 bug here >>> # https://bugs.python.org/issue37 that means we cant use >>> # dates close to the epoch >>> datetime_list = [ >>> datetime_cls.utcfromtimestamp(123456789.123456789 + 315360000), >>> datetime_cls.utcfromtimestamp(0 + 315360000), >>> ] >>> basis = { >>> 'precision': [0, 3, 9], >>> 'tzinfo': tzinfo_list, >>> 'datetime': datetime_list, >>> 'default_timezone': ['local', 'utc', tz_act], >>> } >>> for params in ub.named_product(basis): >>> dtime = params['datetime'].replace(tzinfo=params['tzinfo']) >>> precision = params.get('precision', 0) >>> stamp = ub.timestamp(datetime=dtime, precision=precision) >>> recon = ub.timeparse(stamp) >>> alt = recon.strftime('%Y-%m-%dT%H%M%S.%f%z') >>> print('---') >>> print('params = {}'.format(ub.repr2(params, nl=1))) >>> print(f'dtime={dtime}') >>> print(f'stamp={stamp}') >>> print(f'recon={recon}') >>> print(f'alt ={alt}') >>> shift = 10 ** precision >>> a = int(dtime.timestamp() * shift) >>> b = int(recon.timestamp() * shift) >>> assert a == b, f'{a} != {b}' """ import datetime as datetime_mod from datetime import datetime as datetime_cls datetime_obj = datetime offset_seconds = None # datetime inherits from date. Strange, so we cant use this. See: # https://github.com/python/typeshed/issues/4802 if isinstance(datetime_obj, datetime_mod.date) and not isinstance(datetime_obj, datetime_cls): # Coerce a date to datetime.datetime datetime_obj = datetime_cls.combine(datetime_obj, datetime_cls.min.time()) if datetime_obj is None or datetime_obj.tzinfo is None: # In either case, we need to construct a timezone object tzinfo = _timezone_coerce(default_timezone, allow_dateutil=allow_dateutil) # If datetime_obj is unspecified, create a timezone aware now object if datetime_obj is None: datetime_obj = datetime_cls.now(tzinfo) else: tzinfo = datetime_obj.tzinfo # the arg to utcoffset is confusing offset_seconds = tzinfo.utcoffset(datetime_obj).total_seconds() # offset_seconds = tzinfo.utcoffset(None).total_seconds() seconds_per_hour = 3600 tz_hour, tz_remain = divmod(offset_seconds, seconds_per_hour) tz_hour = int(tz_hour) if tz_remain: seconds_per_minute = 60 tz_min = int(tz_remain // seconds_per_minute) utc_offset = '{:+03d}{:02d}'.format(tz_hour, tz_min) else: utc_offset = str(tz_hour) if tz_hour < 0 else '+' + str(tz_hour) if precision > 0: fprecision = 6 # microseconds are padded to 6 decimals # NOTE: The time.strftime and datetime.datetime.strftime methods # seem to work differently. The former does not support %f if _needs_workaround39103(): # nocover local_stamp = datetime_obj.strftime('%04Y-%m-%dT%H%M%S.%f') else: # nocover local_stamp = datetime_obj.strftime('%Y-%m-%dT%H%M%S.%f') ms_offset = len(local_stamp) - max(0, fprecision - precision) local_stamp = local_stamp[:ms_offset] else: if _needs_workaround39103(): # nocover local_stamp = datetime_obj.strftime('%04Y-%m-%dT%H%M%S') else: # nocover local_stamp = datetime_obj.strftime('%Y-%m-%dT%H%M%S') stamp = local_stamp + utc_offset return stamp def timeparse(stamp, default_timezone='local', allow_dateutil=True): """ Create a :class:`datetime.datetime` object from a string timestamp. Without any extra dependencies this will parse the output of :func:`ubelt.util_time.timestamp` into a datetime object. In the case where the format differs, :func:`dateutil.parser.parse` will be used if the :py:mod:`python-dateutil` package is installed. Args: stamp (str): a string encoded timestamp default_timezone (str): if the input does not specify a timezone, assume this one. Can be "local" or "utc". allow_dateutil (bool): if False we only use the minimal parsing and do not allow a fallback to dateutil. Returns: datetime.datetime: the parsed datetime Raises: ValueError: if if parsing fails. TODO: - [ ] Allow defaulting to local or utm timezone (currently default is local) Example: >>> import ubelt as ub >>> # Demonstrate a round trip of timestamp and timeparse >>> stamp = ub.timestamp() >>> datetime = ub.timeparse(stamp) >>> assert ub.timestamp(datetime) == stamp >>> # Round trip with precision >>> stamp = ub.timestamp(precision=4) >>> datetime = ub.timeparse(stamp) >>> assert ub.timestamp(datetime, precision=4) == stamp Example: >>> import ubelt as ub >>> # We should always be able to parse these >>> good_stamps = [ >>> '2000-11-22', >>> '2000-11-22T111111.44444Z', >>> '2000-11-22T111111.44444+5', >>> '2000-11-22T111111.44444-05', >>> '2000-11-22T111111.44444-0500', >>> '2000-11-22T111111.44444+0530', >>> '2000-11-22T111111Z', >>> '2000-11-22T111111+5', >>> '2000-11-22T111111+0530', >>> ] >>> for stamp in good_stamps: >>> print(f'----') >>> print(f'stamp={stamp}') >>> result = ub.timeparse(stamp, allow_dateutil=0) >>> print(f'result={result!r}') >>> recon = ub.timestamp(result) >>> print(f'recon={recon}') Example: >>> import ubelt as ub >>> # We require dateutil to handle these types of stamps >>> import pytest >>> conditional_stamps = [ >>> '2000-01-02T11:23:58.12345+5:30', >>> '09/25/2003', >>> 'Thu Sep 25 10:36:28 2003', >>> ] >>> for stamp in conditional_stamps: >>> with pytest.raises(ValueError): >>> result = ub.timeparse(stamp, allow_dateutil=False) >>> have_dateutil = bool(ub.modname_to_modpath('dateutil')) >>> if have_dateutil: >>> for stamp in conditional_stamps: >>> result = ub.timeparse(stamp) Ignore: import timerit ti = timerit.Timerit(1000, 10) ti.reset('non-standard dateutil.parse').call(lambda: dateutil.parser.parse('2000-01-02T112358.12345+5')) ti.reset('non-standard ubelt.timeparse').call(lambda: ub.timeparse('2000-01-02T112358.12345+5')) ti.reset('standard dateutil.parse').call(lambda: dateutil.parser.parse('2000-01-02T112358.12345+0500')) ti.reset('standard dateutil.isoparse').call(lambda: dateutil.parser.isoparse('2000-01-02T112358.12345+0500')) ti.reset('standard ubelt.timeparse').call(lambda: ub.timeparse('2000-01-02T112358.12345+0500')) ti.reset('standard datetime_cls.strptime').call(lambda: datetime_cls.strptime('2000-01-02T112358.12345+0500', '%Y-%m-%dT%H%M%S.%f%z')) """ from datetime import datetime as datetime_cls datetime_obj = None # Check if we might have a minimal format maybe_minimal = ( len(stamp) >= 17 and 'T' in stamp[10:] ) fixed_stamp = stamp if maybe_minimal: # Note by default %z only handles the format `[+-]HHMM(SS(.ffffff))` # this means we have to handle the case where `[+-]HH` is given. # We do this by checking the offset and padding it to at least the # `[+-]HHMM` format date_part, timetz_part = stamp.split('T', 1) if '-' in timetz_part[6:]: time_part, sign, tz_part = timetz_part.partition('-') elif '+' in timetz_part[6:]: time_part, sign, tz_part = timetz_part.partition('+') else: # In 3.7 a Z suffix is handled correctly # For 3.6 compatibility, replace Z with +0000 if timetz_part.endswith('Z'): time_part = timetz_part[:-1] sign = '+' tz_part = '0000' else: tz_part = None if tz_part is not None: if len(tz_part) == 1: tz_part = '0{}00'.format(tz_part) elif len(tz_part) == 2: tz_part = '{}00'.format(tz_part) fixed_stamp = ''.join([date_part, 'T', time_part, sign, tz_part]) if len(stamp) == 10: try: fmt = '%Y-%m-%d' datetime_obj = datetime_cls.strptime(fixed_stamp, fmt) except ValueError: pass if maybe_minimal and datetime_obj is None: minimal_formats = [ '%Y-%m-%dT%H%M%S%z', '%Y-%m-%dT%H%M%S', '%Y-%m-%dT%H%M%S.%f%z', '%Y-%m-%dT%H%M%S.%f', ] for fmt in minimal_formats: try: datetime_obj = datetime_cls.strptime(fixed_stamp, fmt) except ValueError: pass else: break if datetime_obj is None: # Our minimal logic did not work, can we use dateutil? if not allow_dateutil: raise ValueError(( 'Cannot parse timestamp. ' 'Unknown string format: {!r}, and ' 'dateutil is not allowed').format(stamp)) else: try: from dateutil.parser import parse as du_parse except (ModuleNotFoundError, ImportError): # nocover raise ValueError(( 'Cannot parse timestamp. ' 'Unknown string format: {!r}, and ' 'dateutil is not installed').format(stamp)) from None else: # nocover datetime_obj = du_parse(stamp) if datetime_obj.tzinfo is None: # Timezone is unspecified, need to construct the default one. tzinfo = _timezone_coerce(default_timezone, allow_dateutil=allow_dateutil) datetime_obj = datetime_obj.replace(tzinfo=tzinfo) return datetime_obj def _timezone_coerce(tzinfo, allow_dateutil=True): """ Ensure output it a timezone instance. Example: >>> import pytest >>> from ubelt.util_time import * # NOQA >>> from ubelt.util_time import _timezone_coerce >>> results = [] >>> write = results.append >>> tzinfo = _timezone_coerce('utc', allow_dateutil=0) >>> print(f'tzinfo={tzinfo}') tzinfo=UTC Example: >>> # xdoctest: +REQUIRES(module:dateutil) >>> import pytest >>> results = [] >>> write = results.append >>> write(_timezone_coerce('utc')) >>> write(_timezone_coerce('GMT')) >>> write(_timezone_coerce('EST')) >>> write(_timezone_coerce('HST')) >>> import datetime as datetime_mod >>> dt = datetime_mod.datetime.now() >>> for tzinfo in results: >>> print(f'tzoffset={tzinfo.utcoffset(dt).total_seconds()}') tzoffset=0.0 tzoffset=0.0 tzoffset=-18000.0 tzoffset=-36000.0 Example: >>> import pytest >>> from ubelt.util_time import * # NOQA >>> from ubelt.util_time import _timezone_coerce >>> with pytest.raises(ValueError): ... _timezone_coerce('GMT', allow_dateutil=0) >>> with pytest.raises(TypeError): ... _timezone_coerce(object(), allow_dateutil=0) >>> # xdoctest: +REQUIRES(module:dateutil) >>> with pytest.raises(KeyError): ... _timezone_coerce('NotATimezone', allow_dateutil=1) Example: >>> import pytest >>> from ubelt.util_time import * # NOQA >>> from ubelt.util_time import _timezone_coerce >>> import time >>> tz1 = _timezone_coerce('local', allow_dateutil=0) >>> tz2 = _timezone_coerce('local', allow_dateutil=1) >>> sec1 = tz1.utcoffset(None).total_seconds() >>> sec2 = tz2.utcoffset(None).total_seconds() >>> assert sec1 == sec2 == -time.timezone """ import datetime as datetime_mod if isinstance(tzinfo, str): if tzinfo == 'local': # Note: the local timezone time.timezone is negated _delta = datetime_mod.timedelta(seconds=-time.timezone) out_tzinfo = datetime_mod.timezone(_delta) elif tzinfo == 'utc': out_tzinfo = datetime_mod.timezone.utc else: if allow_dateutil: from dateutil import tz as tz_mod out_tzinfo = tz_mod.gettz(tzinfo) if out_tzinfo is None: raise KeyError(tzinfo) else: raise ValueError(( 'Unrecognized timezone: {!r}, and ' 'dateutil is not allowed').format(tzinfo)) elif isinstance(tzinfo, datetime_mod.timezone): out_tzinfo = tzinfo else: raise TypeError( 'Unknown type: {!r} for tzinfo'.format( type(tzinfo)) ) return out_tzinfo class Timer(object): """ Measures time elapsed between a start and end point. Can be used as a with-statement context manager, or using the tic/toc api. Attributes: elapsed (float): number of seconds measured by the context manager tstart (float): time of last `tic` reported by `self._time()` write (Callable): function used to write flush (Callable): function used to flush Example: >>> # Create and start the timer using the context manager >>> import math >>> import ubelt as ub >>> timer = ub.Timer('Timer test!', verbose=1) >>> with timer: >>> math.factorial(10) >>> assert timer.elapsed > 0 tic('Timer test!') ...toc('Timer test!')=... Example: >>> # Create and start the timer using the tic/toc interface >>> import ubelt as ub >>> timer = ub.Timer().tic() >>> elapsed1 = timer.toc() >>> elapsed2 = timer.toc() >>> elapsed3 = timer.toc() >>> assert elapsed1 <= elapsed2 >>> assert elapsed2 <= elapsed3 Example: >>> # In Python 3.7+ nanosecond resolution can be enabled >>> import ubelt as ub >>> import sys >>> if sys.version_info[0:2] <= (3, 6): >>> import pytest >>> pytest.skip() >>> # xdoctest +REQUIRES(Python>=3.7) # fixme directive doesnt exist yet >>> timer = ub.Timer(label='perf_counter_ns', ns=True).tic() >>> elapsed1 = timer.toc() >>> elapsed2 = timer.toc() >>> assert elapsed1 <= elapsed2 >>> assert isinstance(elapsed1, int) """ _default_time = time.perf_counter def __init__(self, label='', verbose=None, newline=True, ns=False): """ Args: label (str): identifier for printing. Default to ''. verbose (int | None): verbosity flag, defaults to True if label is given, otherwise 0. newline (bool): if False and verbose, print tic and toc on the same line. Defaults to True. ns (bool): if True, a nano-second resolution timer to avoid precision loss caused by the float type. Defaults to False. """ if verbose is None: verbose = bool(label) self.label = label self.verbose = verbose self.newline = newline self.tstart = -1 self.elapsed = -1 self.write = sys.stdout.write self.flush = sys.stdout.flush self.ns = ns if self.ns: self._time = time.perf_counter_ns else: self._time = self._default_time def tic(self): """ starts the timer Returns: Timer: self """ if self.verbose: self.flush() self.write('\ntic(%r)' % self.label) if self.newline: self.write('\n') self.flush() self.tstart = self._time() return self def toc(self): """ stops the timer Returns: float | int: number of second or nanoseconds """ elapsed = self._time() - self.tstart if self.verbose: if self.ns: self.write('...toc(%r)=%.4fs\n' % (self.label, elapsed / 1e9)) else: self.write('...toc(%r)=%.4fs\n' % (self.label, elapsed)) self.flush() return elapsed def __enter__(self): """ Returns: Timer: self """ self.tic() return self def __exit__(self, ex_type, ex_value, ex_traceback): """ Args: ex_type (Type[BaseException] | None): ex_value (BaseException | None): ex_traceback (TracebackType | None): Returns: bool | None """ self.elapsed = self.toc() if ex_traceback is not None: return False # class Time: # """ # Stub for potential future time object # """ # def __init__(cls, datetime): # ... # @classmethod # def coerce(cls, data): # ... # @classmethod # def parse(cls, stamp): # ... # # class TimeDelta: # ... ubelt-1.3.7/ubelt/util_time.pyi000066400000000000000000000022211472470106000164640ustar00rootroot00000000000000import datetime from typing import Callable from typing import Type from types import TracebackType def timestamp(datetime: datetime.datetime | datetime.date | None = None, precision: int = 0, default_timezone: str | datetime.timezone = 'local', allow_dateutil: bool = True) -> str: ... def timeparse(stamp: str, default_timezone: str = 'local', allow_dateutil: bool = True) -> datetime.datetime: ... class Timer: elapsed: float tstart: float write: Callable flush: Callable label: str verbose: int | None newline: bool ns: bool def __init__(self, label: str = '', verbose: int | None = None, newline: bool = True, ns: bool = False) -> None: ... def tic(self) -> Timer: ... def toc(self) -> float | int: ... def __enter__(self) -> Timer: ... def __exit__(self, ex_type: Type[BaseException] | None, ex_value: BaseException | None, ex_traceback: TracebackType | None) -> bool | None: ... ubelt-1.3.7/ubelt/util_zip.py000066400000000000000000000362641472470106000161750ustar00rootroot00000000000000""" Abstractions for working with zipfiles and archives This may be renamed to util_archive in the future. The :func:`ubelt.split_archive` works with paths that reference a file inside of an archive (e.g. a zipfile). It splits it into two parts, the full path to the archive and then the path to the file inside of the archive. By convention these are separated with either a pathsep or a colon. The :func:`ubelt.zopen` works to open a file that lives inside of an archive without the user needing to worry about extracting it first. When possible it will read it directly from the archive, but in some cases it may extract it to a temporary directory first. """ import io import os from os.path import exists, join from ubelt.util_mixins import NiceRepr __all__ = ['zopen', 'split_archive'] def split_archive(fpath, ext='.zip'): """ If fpath specifies a file inside a zipfile, it breaks it into two parts the path to the zipfile and the internal path in the zipfile. Args: fpath (str | PathLike): path that specifies a path inside of an archive ext (str): archive extension Returns: Tuple[str, str | None] Example: >>> split_archive('/a/b/foo.txt') >>> split_archive('/a/b/foo.zip/bar.txt') >>> split_archive('/a/b/foo.zip/baz/biz.zip/bar.py') >>> split_archive('archive.zip') >>> import ubelt as ub >>> split_archive(ub.Path('/a/b/foo.zip/baz/biz.zip/bar.py')) >>> split_archive('/a/b/foo.zip/baz.pt/bar.zip/bar.zip', '.pt') TODO: Fix got/want for win32 (None, None) ('/a/b/foo.zip', 'bar.txt') ('/a/b/foo.zip/baz/biz.zip', 'bar.py') ('archive.zip', None) ('/a/b/foo.zip/baz/biz.zip', 'bar.py') ('/a/b/foo.zip/baz.pt', 'bar.zip/bar.zip') """ import re fpath = os.fspath(fpath) # fpath = os.fspath(fpath) pat = '({}[{}/:])'.format(re.escape(ext), re.escape(os.path.sep)) # pat = r'(\'' + ext + '[' + re.escape(os.path.sep) + '/:])' parts = re.split(pat, fpath, flags=re.IGNORECASE) if len(parts) > 2: archivepath = ''.join(parts[:-1])[:-1] internal = parts[-1] elif len(parts) == 1: archivepath = parts[0] if not archivepath.endswith(ext): archivepath = None internal = None else: # nocover raise AssertionError('impossible state') return archivepath, internal class zopen(NiceRepr): """ An abstraction of the normal :func:`open` function that can also handle reading data directly inside of zipfiles. This is a file-object like interface [FileObj] --- i.e. it supports the read and write methods to an underlying resource. Can open a file normally or open a file within a zip file (readonly). Tries to read from memory only, but will extract to a tempfile if necessary. Just treat the zipfile like a directory, e.g. /path/to/myzip.zip/compressed/path.txt OR? e.g. /path/to/myzip.zip:compressed/path.txt References: .. [FileObj] https://docs.python.org/3/glossary.html#term-file-object TODO: - [ ] Fast way to open a base zipfile, query what is inside, and then choose a file to further zopen (and passing along the same open zipfile reference maybe?). - [ ] Write mode in some restricted setting? Attributes: name (str | PathLike): path to a file or reference to an item in a zipfile. Example: >>> from ubelt.util_zip import * # NOQA >>> import pickle >>> import ubelt as ub >>> dpath = ub.Path.appdir('ubelt/tests/util_zip').ensuredir() >>> dpath = ub.Path(dpath) >>> data_fpath = dpath / 'test.pkl' >>> data = {'demo': 'data'} >>> with open(str(data_fpath), 'wb') as file: >>> pickle.dump(data, file) >>> # Write data >>> import zipfile >>> zip_fpath = dpath / 'test_zip.archive' >>> stl_w_zfile = zipfile.ZipFile(os.fspath(zip_fpath), mode='w') >>> stl_w_zfile.write(os.fspath(data_fpath), os.fspath(data_fpath.relative_to(dpath))) >>> stl_w_zfile.close() >>> stl_r_zfile = zipfile.ZipFile(os.fspath(zip_fpath), mode='r') >>> stl_r_zfile.namelist() >>> stl_r_zfile.close() >>> # Test zopen >>> self = zopen(zip_fpath / 'test.pkl', mode='rb', ext='.archive') >>> print(self._split_archive()) >>> print(self.namelist()) >>> self.close() >>> self = zopen(zip_fpath / 'test.pkl', mode='rb', ext='.archive') >>> recon1 = pickle.loads(self.read()) >>> self.close() >>> self = zopen(zip_fpath / 'test.pkl', mode='rb', ext='.archive') >>> recon2 = pickle.load(self) >>> self.close() >>> assert recon1 == recon2 >>> assert recon1 is not recon2 Example: >>> # Test we can load json data from a zipfile >>> from ubelt.util_zip import * # NOQA >>> import ubelt as ub >>> import json >>> import zipfile >>> dpath = ub.Path.appdir('ubelt/tests/util_zip').ensuredir() >>> infopath = join(dpath, 'info.json') >>> ub.writeto(infopath, '{"x": "1"}') >>> zippath = join(dpath, 'infozip.zip') >>> internal = 'folder/info.json' >>> with zipfile.ZipFile(zippath, 'w') as myzip: >>> myzip.write(infopath, internal) >>> fpath = zippath + '/' + internal >>> # Test context manager >>> with zopen(fpath, 'r') as self: >>> info2 = json.load(self) >>> assert info2['x'] == '1' >>> # Test outside of context manager >>> self = zopen(fpath, 'r') >>> print(self._split_archive()) >>> info2 = json.load(self) >>> assert info2['x'] == '1' >>> # Test nice repr (with zfile) >>> print('self = {!r}'.format(self)) >>> self.close() Example: >>> # Coverage tests --- move to unit-test >>> from ubelt.util_zip import * # NOQA >>> import ubelt as ub >>> import json >>> import zipfile >>> dpath = ub.Path.appdir('ubelt/tests/util_zip').ensuredir() >>> textpath = join(dpath, 'seekable_test.txt') >>> text = chr(10).join(['line{}'.format(i) for i in range(10)]) >>> ub.writeto(textpath, text) >>> zippath = join(dpath, 'seekable_test.zip') >>> internal = 'folder/seekable_test.txt' >>> with zipfile.ZipFile(zippath, 'w') as myzip: >>> myzip.write(textpath, internal) >>> ub.delete(textpath) >>> fpath = zippath + '/' + internal >>> # Test seekable >>> self_seekable = zopen(fpath, 'r', seekable=True) >>> assert self_seekable.seekable() >>> self_seekable.seek(8) >>> assert self_seekable.readline() == 'ne1' + chr(10) >>> assert self_seekable.readline() == 'line2' + chr(10) >>> self_seekable.seek(8) >>> assert self_seekable.readline() == 'ne1' + chr(10) >>> assert self_seekable.readline() == 'line2' + chr(10) >>> # Test non-seekable? >>> # Sometimes non-seekable files are still seekable >>> maybe_seekable = zopen(fpath, 'r', seekable=False) >>> if maybe_seekable.seekable(): >>> maybe_seekable.seek(8) >>> assert maybe_seekable.readline() == 'ne1' + chr(10) >>> assert maybe_seekable.readline() == 'line2' + chr(10) >>> maybe_seekable.seek(8) >>> assert maybe_seekable.readline() == 'ne1' + chr(10) >>> assert maybe_seekable.readline() == 'line2' + chr(10) Example: >>> # More coverage tests --- move to unit-test >>> from ubelt.util_zip import * # NOQA >>> import ubelt as ub >>> import pytest >>> dpath = ub.Path.appdir('ubelt/tests/util_zip').ensuredir() >>> with pytest.raises(OSError): >>> self = zopen('', 'r') >>> # Test open non-zip existing file >>> existing_fpath = join(dpath, 'exists.json') >>> ub.writeto(existing_fpath, '{"x": "1"}') >>> self = zopen(existing_fpath, 'r') >>> assert self.read() == '{"x": "1"}' >>> # Test dir >>> dir(self) >>> # Test nice >>> print(self) >>> print('self = {!r}'.format(self)) >>> self.close() >>> # Test open non-zip non-existing file >>> nonexisting_fpath = join(dpath, 'does-not-exist.txt') >>> ub.delete(nonexisting_fpath) >>> with pytest.raises(OSError): >>> self = zopen(nonexisting_fpath, 'r') >>> with pytest.raises(NotImplementedError): >>> self = zopen(nonexisting_fpath, 'w') >>> # Test nice-repr >>> self = zopen(existing_fpath, 'r') >>> print('self = {!r}'.format(self)) >>> # pathological >>> self = zopen(existing_fpath, 'r') >>> self._handle = None >>> dir(self) """ def __init__(self, fpath, mode='r', seekable=False, ext='.zip'): """ Args: fpath (str | PathLike): path to a file, or a special path that denotes both a path to a zipfile and a path to a archived file inside of the zipfile. mode (str): Currently only "r" - readonly mode is supported seekable (bool): If True, attempts to force "seekability" of the underlying file-object, for compressed files this will first extract the file to a temporary location on disk. If False, any underlying compressed file will be opened directly which may result in the object being non-seekable. ext (str): The extension of the zipfile. Modify this is a non-standard extension is used (e.g. for torch packages). """ self.fpath = fpath self.ext = ext self.name = fpath self.mode = mode self._seekable = seekable self._zfpath = None # points to the base zipfile (if appropriate) self._temp_dpath = None # for temporary extraction self._zfile_read = None # underlying opened zipfile object # The _handle pointer should be a file-like object that this zopen # object impersonate, by forwarding most every getattr call to it. self._handle = None self._open() @property def zfile(self): """ Access the underlying archive file """ if self._zfile_read is None: import zipfile archivefile, internal = self._split_archive() myzip = zipfile.ZipFile(archivefile, 'r') self._zfile_read = myzip return self._zfile_read def namelist(self): """ Lists the contents of this zipfile """ myzip = self.zfile namelist = myzip.namelist() return namelist def __nice__(self): if self._zfpath is None: return 'handle={}, mode={}'.format(str(self._handle), self.mode) else: return 'handle={} in zipfpath={}, mode={}'.format(self._handle, self._zfpath, self.mode) def __getattr__(self, key): # Expose attributes of wrapped handle if hasattr(self._handle, key): assert self._handle is not self return getattr(self._handle, key) raise AttributeError(key) def __dir__(self): # Expose attributes of wrapped handle zopen_attributes = { 'namelist', 'zfile', } keyset = set(dir(super(zopen, self))) keyset.update(set(self.__dict__.keys())) if self._handle is not None: keyset.update(set(dir(self._handle))) return sorted(keyset | zopen_attributes) def _cleanup(self): # print('self._cleanup = {!r}'.format(self._cleanup)) if self._handle is not None: if not getattr(self, 'closed', True): closemethod = getattr(self, 'close', None) if closemethod is not None: # nocover closemethod() closemethod = None self._handle = None if self._temp_dpath and exists(self._temp_dpath): # os.unlink(self._temp_dpath) from ubelt.util_io import delete delete(self._temp_dpath) def __del__(self): self._cleanup() def _split_archive(self): archivefile, internal = split_archive(self.fpath, self.ext) return archivefile, internal def _open(self): """ This logic sets the "_handle" to the appropriate backend object such that zopen can behave like a standard IO object. In read-only mode: * If fpath is a normal file, _handle is the standard `open` object * If fpath is a seekable zipfile, _handle is an IOWrapper pointing to the internal data * If fpath is a non-seekable zipfile, the data is extracted behind the scenes and a standard `open` object to the extracted file is given. In write mode: * NotImpelemented """ if 'r' not in self.mode: raise NotImplementedError('Only read mode is supported for now') _handle = None fpath = os.fspath(self.fpath) if exists(fpath): _handle = open(fpath, self.mode) elif self.ext + '/' in fpath or self.ext + os.path.sep in fpath: archivefile, internal = self._split_archive() myzip = self.zfile if self._seekable: import tempfile # If we need data to be seekable, then we must extract it to a # temporary file first. self._temp_dpath = tempfile.mkdtemp(prefix='zopen_') temp_fpath = join(self._temp_dpath, internal) myzip.extract(internal, self._temp_dpath) _handle = open(temp_fpath, self.mode) else: # Try to load data directly from the zipfile _handle = myzip.open(internal, 'r') if self.mode == 'rb': data = _handle.read() _handle = io.BytesIO(data) elif self.mode == 'r': # FIXME: does not always work. handle seems to be closed # too soon in the case util.zopen(module.__file__).read() _handle = io.TextIOWrapper(_handle) else: raise KeyError(self.mode) self._zfpath = archivefile if _handle is None: raise IOError('file {!r} does not exist'.format(fpath)) self._handle = _handle def __enter__(self): return self def __exit__(self, ex_type, ex_value, ex_traceback): """ Args: ex_type (Type[BaseException] | None): ex_value (BaseException | None): ex_traceback (TracebackType | None): Returns: bool | None """ self.close() # TODO: Allow for navigating inside of the zipfile # TODO: opening a member should not force disk decompression unless we # really need to do real seeks. If we are just streaming the first few # bytes, then a standard handle will work fine. ubelt-1.3.7/ubelt/util_zip.pyi000066400000000000000000000020121472470106000163260ustar00rootroot00000000000000from os import PathLike from typing import Tuple from typing import Type from types import TracebackType from ubelt.util_mixins import NiceRepr def split_archive(fpath: str | PathLike, ext: str = '.zip') -> Tuple[str, str | None]: ... class zopen(NiceRepr): name: str | PathLike fpath: str | PathLike ext: str mode: str def __init__(self, fpath: str | PathLike, mode: str = 'r', seekable: bool = False, ext: str = '.zip') -> None: ... @property def zfile(self): ... def namelist(self): ... def __nice__(self): ... def __getattr__(self, key): ... def __dir__(self): ... def __del__(self) -> None: ... def __enter__(self): ... def __exit__(self, ex_type: Type[BaseException] | None, ex_value: BaseException | None, ex_traceback: TracebackType | None) -> bool | None: ...